diff --git a/.clang-format b/.clang-format
index dcd3860f6268d6f47ebf528641f36a8b31eef848..915697a4a6bf5b2bb3bd74e0593e02915a31d450 100644
--- a/.clang-format
+++ b/.clang-format
@@ -16,7 +16,7 @@ AllowShortBlocksOnASingleLine: Never
 AllowShortCaseLabelsOnASingleLine: false
 AllowShortFunctionsOnASingleLine: None
 AllowShortLambdasOnASingleLine: All
-AllowShortIfStatementsOnASingleLine: Never
+AllowShortIfStatementsOnASingleLine: WithoutElse
 AllowShortLoopsOnASingleLine: false
 AlwaysBreakAfterDefinitionReturnType: None
 AlwaysBreakAfterReturnType: None
@@ -50,7 +50,7 @@ BreakConstructorInitializersBeforeComma: false
 BreakConstructorInitializers: BeforeColon
 BreakAfterJavaFieldAnnotations: false
 BreakStringLiterals: true
-ColumnLimit:     120
+ColumnLimit:     300
 CommentPragmas:  '^ IWYU pragma:'
 CompactNamespaces: false
 ConstructorInitializerAllOnOneLineOrOnePerLine: false
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 33f72ecb941c410f7541f32541a34878ff987b3f..e69b3794f5854cd6c9446b120eb6d64603f8d143 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -10,8 +10,9 @@
         "notskm.clang-tidy",
         "streetsidesoftware.code-spell-checker"
     ],
+    "containerEnv": {"TZ": "${localEnv:TZ:Europe/Berlin}"},
     "runArgs": ["--gpus","all",                     // remove this line in case you have no gpus available
                 "--hostname=${localEnv:HOSTNAME}"], // HOSTNAME needs to be known by the vscode environment. It is probably necessary to add "export HOSTNAME=<hostname>" to the config file of your host machine's bash.
 
-    "image": "git.rz.tu-bs.de:4567/irmb/virtualfluids/ubuntu20_04:1.4"
+    "image": "git.rz.tu-bs.de:4567/irmb/virtualfluids/ubuntu20_04:1.5"
 }
diff --git a/.gitignore b/.gitignore
index 63bd843b606911d303ce1476fc002c009b8ebc9a..e3c73e0775977555049ebcbd27dd12fb783733a9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@ build/
 bin/
 cmake-build-debug/
 run/
+buildGCC
 
 # Python
 _skbuild/
@@ -25,6 +26,8 @@ pythonbindings/pymuparser/bindings*
 # simulation results
 output/
 logs/
+reference_data/
+generated/
 
 # grid
 .grid/
@@ -39,4 +42,7 @@ stl/
 .DS_Store
 
 # Settings
-.gitconfig
\ No newline at end of file
+.gitconfig
+
+# User Settings
+CMakeUserPresets.json
\ No newline at end of file
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
old mode 100644
new mode 100755
index 4294ee79222a9df0785ade41a6e58a8b39d26a62..69c4381d0f9230ccf5a416bbf02c4b452b841a8c
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,7 +1,7 @@
 ###############################################################################
 ##                       VirtualFluids CI Pipeline                           ##
 ###############################################################################
-image: git.rz.tu-bs.de:4567/irmb/virtualfluids/ubuntu20_04:1.4
+image: git.rz.tu-bs.de:4567/irmb/virtualfluids/ubuntu20_04:1.5
 
 stages:
   - build
@@ -49,9 +49,10 @@ stages:
     - cd $CI_PROJECT_DIR/$BUILD_FOLDER
     - rm -r -f ./*
     - cmake .. -LAH
-      --preset=all_make
+      --preset=make_all
       -DBUILD_WARNINGS_AS_ERRORS=ON
       -DCMAKE_CUDA_ARCHITECTURES=60
+      -DBUILD_VF_ALL_SAMPLES=ON
     - make -j4
     - ccache --show-stats
 
@@ -75,7 +76,7 @@ clang_10:
     - export CXX=clang++
 
 ###############################################################################
-msvc_16:
+msvc_17:
   stage: build
 
   tags:
@@ -92,14 +93,14 @@ msvc_16:
     - git --version
     - $env:Path += ";C:\Program Files\CMake\bin\"
     - cmake --version
-    - $env:Path += ";C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Current\Bin"
+    - $env:Path += ";C:\Program Files\Microsoft Visual Studio\2022\Community\MSBuild\Current\Bin"
     - MSBuild.exe -version
 
   script:
     - cd $CI_PROJECT_DIR
     - md -force $env:BUILD_FOLDER
     - cd $env:BUILD_FOLDER
-    - cmake .. --preset=all_msvc -DCMAKE_CUDA_ARCHITECTURES=61 -DBUILD_WARNINGS_AS_ERRORS=ON
+    - cmake .. --preset=msvc_all -DCMAKE_CUDA_ARCHITECTURES=61 -DBUILD_WARNINGS_AS_ERRORS=ON -DBUILD_VF_ALL_SAMPLES=ON
     - MSBuild.exe VirtualFluids.sln /property:Configuration=$env:BUILD_CONFIGURATION /verbosity:minimal /maxcpucount:4
 
   artifacts:
@@ -177,17 +178,17 @@ gcc_9_unit_tests:
     - cd $CI_PROJECT_DIR/build
 
   script:
-    - ctest
+    - ctest --output-on-failure
 
 ###############################################################################
-msvc_16_unit_tests:
+msvc_17_unit_tests:
   stage: test
 
   tags:
     - win
     - gpu
 
-  needs: ["msvc_16"]
+  needs: ["msvc_17"]
 
   before_script:
     - $env:Path += ";C:\Program Files\CMake\bin\"
@@ -212,7 +213,6 @@ gcc_9_python_bindings_test:
   script:
     - python3 -m unittest discover -s Python -v
 
-
 ###############################################################################
 gcc_9_python_hpc_test:
   image: python:latest
@@ -234,6 +234,95 @@ gcc_9_python_hpc_test:
   script:
     - hpc-rocket launch --watch Python/SlurmTests/poiseuille/rocket.yml
 
+###############################################################################
+build-regression-tests-ci:
+  image: python:3.10
+  stage: test
+
+  before_script:
+    - pip install -r utilities/ci-regression-tests/requirements.txt
+
+  script:
+    - python3 utilities/ci-regression-tests/generate-ci.py
+
+  artifacts:
+    expire_in: 1 week
+    paths:
+      - generated/
+
+  rules:
+    - if: $CI_PIPELINE_SOURCE == "schedule"
+      when: always
+    - if: $CI_PIPELINE_SOURCE == "merge_request_event"
+      when: never
+    - when: manual
+      allow_failure: true
+
+  needs: []
+
+trigger-regression-tests:
+  stage: test
+  needs:
+    - build-regression-tests-ci
+  trigger:
+    include:
+      - artifact: generated/regression-tests-ci.yml
+        job: build-regression-tests-ci
+    strategy: depend
+  variables:
+    PARENT_PIPELINE_ID: $CI_PIPELINE_ID
+
+###############################################################################
+regression_test_4gpu:
+  image: python:latest
+  stage: test
+
+  rules:
+    - if: $REMOTE_USER && $REMOTE_HOST && $PRIVATE_KEY && $CI_PIPELINE_SOURCE == "schedule"
+      when: always
+    - if: $CI_PIPELINE_SOURCE == "merge_request_event"
+      when: never
+    - when: manual
+      allow_failure: true
+
+  before_script:
+    - pip install hpc-rocket
+    - pip install "fieldcompare[all]"
+
+  script:
+    - hpc-rocket launch --watch regression-tests/multigpu_test/rocket4GPU.yml
+    - git clone --depth 1 --filter=blob:none --sparse https://github.com/irmb/test_data
+    - cd test_data
+    - git sparse-checkout set regression_tests/gpu/DrivenCavity_4GPU_2Levels regression_tests/gpu/SphereScaling_4GPU_2Levels
+    - cd ..
+    - fieldcompare dir output/4GPU test_data/regression_tests/gpu/DrivenCavity_4GPU_2Levels --include-files "DrivenCavityMultiGPU*.vtu"
+    - fieldcompare dir output/4GPU test_data/regression_tests/gpu/SphereScaling_4GPU_2Levels --include-files "SphereScaling*.vtu"
+
+###############################################################################
+regression_test_8gpu:
+  image: python:latest
+  stage: test
+
+  rules:
+    - if: $REMOTE_USER && $REMOTE_HOST && $PRIVATE_KEY && $CI_PIPELINE_SOURCE == "schedule"
+      when: always
+    - if: $CI_PIPELINE_SOURCE == "merge_request_event"
+      when: never
+    - when: manual
+      allow_failure: true
+
+  before_script:
+    - pip install hpc-rocket
+    - pip install "fieldcompare[all]"
+
+  script:
+    - hpc-rocket launch --watch regression-tests/multigpu_test/rocket8GPU.yml
+    - git clone --depth 1 --filter=blob:none --sparse https://github.com/irmb/test_data
+    - cd test_data
+    - git sparse-checkout set regression_tests/gpu/DrivenCavity_8GPU_2Levels regression_tests/gpu/SphereScaling_8GPU_2Levels
+    - cd ..
+    - fieldcompare dir output/8GPU test_data/regression_tests/gpu/DrivenCavity_8GPU_2Levels --include-files "DrivenCavityMultiGPU*.vtu"
+    - fieldcompare dir output/8GPU test_data/regression_tests/gpu/SphereScaling_8GPU_2Levels --include-files "SphereScaling*.vtu"
 
 ###############################################################################
 ##                            Benchmark                                      ##
@@ -273,24 +362,23 @@ gpu_numerical_tests:
 
   before_script:
     - cd /tmp
-    - git clone https://gitlab-ci-token:${CI_JOB_TOKEN}@git.rz.tu-bs.de/irmb/test_data.git
+    - git clone --depth 1 --filter=blob:none --sparse https://github.com/irmb/test_data
+    - cd test_data
+    - git sparse-checkout set numerical_tests_gpu/grids numerical_tests_gpu/grids
     - export CCACHE_BASEDIR=$CI_PROJECT_DIR
     - export CCACHE_DIR=$CI_PROJECT_DIR/cache
     - ccache -s
     - mkdir -p $CI_PROJECT_DIR/build
     - cd $CI_PROJECT_DIR/build
-    - rm -r -f ./*
+    - rm -rf ./*
     - cmake ..
-      --preset=gpu_numerical_tests_make
+      --preset=make_numerical_tests_gpu
       -DCMAKE_CUDA_ARCHITECTURES=60
-      -DPATH_NUMERICAL_TESTS=/tmp/test_data/numerical_tests_gpu
     - make -j4
     - ccache -s
 
   script:
-    - cd $CI_PROJECT_DIR
-    # - ./build/bin/NumericalTests $CI_PROJECT_DIR/apps/gpu/tests/NumericalTests/configK15_nu10tm2.txt 2>&1 | tee -a numerical_tests_gpu_results.txt
-    - ./build/bin/NumericalTests $CI_PROJECT_DIR/apps/gpu/tests/NumericalTests/configK17chim_nu10tm3.txt 2>&1 | tee -a numerical_tests_gpu_results.txt
+    - $CI_PROJECT_DIR/build/bin/NumericalTests $CI_PROJECT_DIR/apps/gpu/tests/NumericalTests/configK17chim_nu10tm3.txt /tmp/test_data/numerical_tests_gpu/ 2>&1 | tee -a numerical_tests_gpu_results.txt
 
   cache:
     key: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
@@ -302,31 +390,6 @@ gpu_numerical_tests:
     paths:
       - $CI_PROJECT_DIR/numerical_tests_gpu_results.txt
 
-###############################################################################
-run-regression-tests:
-  stage: test
-
-  rules:
-    - if: $CI_PIPELINE_SOURCE == "schedule"
-      when: always
-    - if: $CI_PIPELINE_SOURCE == "merge_request_event"
-      when: never
-    - when: manual
-      allow_failure: true
-
-  needs: []
-
-  tags:
-    - gpu
-    - linux
-
-  before_script:
-    - apt-get update && apt-get -y install python3.8-venv
-    - chmod +x ./regression-tests/*
-
-  script:
-    - ./regression-tests/regression-tests.sh
-
 ###############################################################################
 ##                        Code analysis                                      ##
 ###############################################################################
@@ -352,7 +415,7 @@ clang_build_analyzer_clang_10:
     - mkdir -p $CI_PROJECT_DIR/build
     - cd $CI_PROJECT_DIR/build
     - cmake ..
-      --preset=all_make
+      --preset=make_all
       -DCMAKE_CUDA_ARCHITECTURES=60
       -DCMAKE_CXX_FLAGS=-ftime-trace
     - ClangBuildAnalyzer --start .
@@ -384,7 +447,7 @@ include_what_you_use_clang_10:
     - mkdir -p $CI_PROJECT_DIR/build
     - cd $CI_PROJECT_DIR/build
     - cmake ..
-      --preset=all_make
+      --preset=make_all
       -DCMAKE_CUDA_ARCHITECTURES=60
       -DBUILD_VF_INCLUDE_WHAT_YOU_USE=ON
     - make
@@ -461,7 +524,7 @@ gcov_gcc_9:
     - mkdir -p $CI_PROJECT_DIR/build
     - cd $CI_PROJECT_DIR/build
     - cmake ..
-      --preset=all_make
+      --preset=make_all
       -DCMAKE_CUDA_ARCHITECTURES=60
       -DBUILD_VF_COVERAGE=ON
     - make -j4
@@ -546,8 +609,6 @@ pages:
 .deploy_template:
   stage: deploy
 
-
-
   before_script:
     - 'command -v ssh-agent >/dev/null || ( apt-get update -y && apt-get install openssh-client -y )'
     - apt-get install -y rsync
diff --git a/3rdParty/WebDemo/LBMDemoCopy.htm b/3rdParty/WebDemo/LBMDemoCopy.htm
deleted file mode 100644
index c0a2834eaf2ad463f301bd3593972337219fb813..0000000000000000000000000000000000000000
--- a/3rdParty/WebDemo/LBMDemoCopy.htm
+++ /dev/null
@@ -1,1364 +0,0 @@
-
-<!DOCTYPE HTML>
-<!--
-	A lattice-Boltzmann fluid simulation in JavaScript, using HTML5 canvas for graphics
-	
-	Copyright 2013, Daniel V. Schroeder
-
-	Permission is hereby granted, free of charge, to any person obtaining a copy of 
-	this software and associated data and documentation (the "Software"), to deal in 
-	the Software without restriction, including without limitation the rights to 
-	use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 
-	of the Software, and to permit persons to whom the Software is furnished to do 
-	so, subject to the following conditions:
-
-	The above copyright notice and this permission notice shall be included in all 
-	copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
-	INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 
-	PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 
-	ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
-	OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
-	OTHER DEALINGS IN THE SOFTWARE.
-
-	Except as contained in this notice, the name of the author shall not be used in 
-	advertising or otherwise to promote the sale, use or other dealings in this 
-	Software without prior written authorization.
-
-	Credits:
-	The "wind tunnel" entry/exit conditions are inspired by Graham Pullan's code
-	(http://www.many-core.group.cam.ac.uk/projects/LBdemo.shtml).  Additional inspiration from 
-	Thomas Pohl's applet (http://thomas-pohl.info/work/lba.html).  Other portions of code are based 
-	on Wagner (http://www.ndsu.edu/physics/people/faculty/wagner/lattice_boltzmann_codes/) and
-	Gonsalves (http://www.physics.buffalo.edu/phy411-506-2004/index.html; code adapted from Succi,
-	http://global.oup.com/academic/product/the-lattice-boltzmann-equation-9780199679249).
-
-	Revision history:
-	First version, with only start/stop, speed, and viscosity controls, February 2013
-	Added resolution control, mouse interaction, plot options, etc., March 2013
-	Added sensor, contrast slider, improved tracer placement, Fy period readout, May 2013
-	Added option to animate using setTimeout instead of requestAnimationFrame, July 2013
-	Added "Flowline" plotting (actually just line segments), August 2013
-	
-	Still to do:
-	* Fix the apparent bug in the force calculation that gives inconsistent results depending
-		on initial conditions.  Perhaps bounce-backs between adjacent barrier sites don't cancel?
-	* Grabbing the sensor while "drag fluid" selected causes a momentary drag at previous mouse location.
-	* Try to pass two-fingered touch events on to the browser, so it's still possible to zoom in and out.
-	* Work on GUI control layout, especially for smaller screens.
-	* Treat ends symmetrically when flow speed is zero.
-	* Try some other visualization techniques.
--->
-<html>
-
-<head>
-<title>Fluid Dynamics Simulation</title>
-<meta charset="utf-8">
-<meta name="viewport" content="width=620">
-<style>
-	body {background-color:#ffffff; font-family:Arial, sans-serif; font-size:14px;
-					text-align:center;}						/* gray background, center everything */
-	p {margin-left:auto; margin-right:auto; width:600px;}	/* keep paragraphs narrow and centered */
-	input {font-size:115%;}									/* make buttons bigger */
-	input[type="range"] {width:90px;}						/* make sliders shorter */
-	select {font-size:115%;}								/* make selectors bigger too */
-	li {text-align:left;}
-</style>
-</head>
-
-<body>
-
-<h2>Fluid Dynamics Simulation</h2>
-
-<p>By <a href="http://physics.weber.edu/schroeder/">Dan Schroeder</a>, 
-<a href="http://physics.weber.edu">Physics Department</a>, 
-<a href="http://weber.edu">Weber State University</a></p>
-
-<canvas id="theCanvas" width="600" height="240">This application runs only in modern
-browsers. For best results, use Google Chrome.</canvas>
-
-<div>
-	<select id="sizeSelect" onchange="resize()">
-		<option value="10">60 x 24</option>
-		<option value="8">75 x 30</option>
-		<option value="6">100 x 40</option>
-		<option value="5">120 x 48</option>
-		<option value="4">150 x 60</option>
-		<option value="3">200 x 80</option>
-		<option value="2">300 x 120</option>
-		<option value="1">600 x 240</option>
-	</select>
-	<input id="resetFluidButton" type="button" onclick="initFluid()" value="Reset fluid">
-	<input id="stepButton" type="button" onclick="simulate()" value="Step">
-	<input id="startButton" type="button" onclick="startStop()" value="Start">
-</div>
-<div>
-	Flow speed = <span id="speedValue">0.100</span> 
-	<input id="speedSlider" type="range" min="0" max="0.12" step="0.005" value="0.1" onchange="adjustSpeed()">
-	&nbsp;&nbsp;Viscosity = <span id="viscValue">0.020</span>
-	<input id="viscSlider" type="range" min="0.000000001" max="0.02" step="0.000005" value="0.02" onchange="adjustViscosity()">
-</div>
-<div style="margin-top:3px">
-	<select id="mouseSelect">
-		<option value="draw">Draw barriers</option>
-		<option value="erase">Erase barriers</option>
-		<option value="push">Drag fluid</option>
-	</select>
-	<select id="barrierSelect" onchange="placePresetBarrier()">
-		<option>Barrier shapes</option>
-	</select>
-	<input id="clearButton" type="button" onclick="clearBarriers()" value="Clear barriers">
-</div>
-<div>
-	<select id="plotSelect" onchange="paintCanvas()">
-		<option>Plot density</option>
-		<option>Plot x velocity</option>
-		<option>Plot y velocity</option>
-		<option>Plot speed</option>
-		<option selected>Plot curl</option>
-	</select>
-	&nbsp;&nbsp;Contrast:
-	<input id="contrastSlider" type="range" min="-10" max="10" step="1" value="0" onchange="paintCanvas()">
-</div>
-<div>
-	Animation speed:
-	<input id="stepsSlider" type="range" min="1" max="40" step="1" value="20" onchange="resetTimer()">
-	&nbsp;&nbsp;Steps per second: <span id="speedReadout">0</span>
-	&nbsp;&nbsp;<input id="rafCheck" type="checkbox" checked onchange="resetTimer()">Faster?
-</div>
-<div style="margin-top:4px">
-	<!--<input id="pixelCheck" type="checkbox" checked onchange="resetTimer()">Use pixel graphics-->
-	Show:
-	<input id="tracerCheck" type="checkbox" onchange="initTracers()">Tracers
-	<input id="flowlineCheck" type="checkbox" onchange="paintCanvas()">Flowlines
-	<input id="forceCheck" type="checkbox" onchange="paintCanvas()">Force on barriers
-	<input id="sensorCheck" type="checkbox" onchange="paintCanvas()">Sensor
-	<input id="dataCheck" type="checkbox" onchange="showData()">Data
-</div>
-<div id="dataSection" style="display:none">
-	<textarea id="dataArea" rows="8" cols="50" disabled readonly></textarea>
-	<div>
-		<input id="dataButton" type="button" value="Start data collection" onclick="startOrStopData()">
-		<input id="periodButton" type="button" value="Show F_y period" onclick="showPeriod()">
-		<input id="barrierDataButton" type="button" value="Show barrier locations" onclick="showBarrierLocations()">
-		<input id="debugButton" type="button" value="Debug" onclick="debug()" style="display:none">
-	</div>
-</div>
-<p style="text-align:left">This is a simulation of a two-dimensional fluid. Initially the fluid
-is flowing from left to right, and a linear barrier (shown in black) diverts the fluid and creates 
-vortices. The colors indicate the curl, or local rotational motion, of the fluid.  
-Use the controls to adjust the flow speed and viscosity, draw different barriers, drag the
-fluid around, plot other quantities besides the curl, show the force exerted by the fluid
-on the barriers, and measure the fluid's density and velocity at any point. Enjoy!</p>
-
-<p style="text-align:left">The simulation uses a fairly simple
-<a href="http://en.wikipedia.org/wiki/Lattice_Boltzmann_methods">lattice-Boltzmann algorithm</a>, 
-which you can see by viewing the JavaScript source code. As of mid-2013, the simulation
-runs fastest under Chrome on either MacOS or Windows.  Firefox is somewhat slower and Safari slower still,
-while Opera and Internet Explorer are much slower. You can adjust the resolution to increase or
-decrease the simulation speed.</p>
-
-<p style="text-align:left">If you don't see the slider controls above, try updating your browser.
-As of August 2013, the most recent versions of all major browsers should show the sliders.</p>
-
-<p style="text-align:left">This HTML5-canvas-JavaScript web app is a work in progress. It still
-has a few bugs and awkward features, which I hope to address some day.</p>
-
-<p style="text-align:left">
-	Related materials:
-</p>
-<div style="margin-left:auto; margin-right:auto; width:600px;">
-	<ul>
-	<li><a href="LatticeBoltzmannDemo.java.txt">A similar simulation in Java</a></li>
-	<li><a href="LatticeBoltzmannDemo.py.txt">A similar simulation in Python</a></li>
-	<li><a href="FluidSimulationsForUndergrads.pdf">Poster presentation</a> 
-	given at the AAPT summer meeting, 2013 (pdf, 2.6 MB)</li>
-	<li><a href="http://physics.weber.edu/schroeder/javacourse/LatticeBoltzmann.pdf">Instructions</a> 
-	for a lattice-Boltzmann project in a computational physics course</li>
-	<li>A more detailed explanation of the lattice-Boltzmann algorithm (coming soon)</li>
-	</ul>
-</div>
-
-<script src="barrierdata.js"></script>
-<script>
-	// Global variables:	
-	var mobile = navigator.userAgent.match(/iPhone|iPad|iPod|Android|BlackBerry|Opera Mini|IEMobile/i)
-	var canvas = document.getElementById('theCanvas');
-	var context = canvas.getContext('2d');
-	var image = context.createImageData(canvas.width, canvas.height);		// for direct pixel manipulation (faster than fillRect)
-	for (var i=3; i<image.data.length; i+=4) image.data[i] = 255;			// set all alpha values to opaque
-	var sizeSelect = document.getElementById('sizeSelect');
-	sizeSelect.selectedIndex = 5;
-	if (mobile) sizeSelect.selectedIndex = 1;		// smaller works better on mobile platforms
-	var pxPerSquare = Number(sizeSelect.options[sizeSelect.selectedIndex].value);
-													// width of plotted grid site in pixels
-	var xdim = canvas.width / pxPerSquare;			// grid dimensions for simulation
-	var ydim = canvas.height / pxPerSquare;
-	var stepsSlider = document.getElementById('stepsSlider');
-	var startButton = document.getElementById('startButton');
-	var speedSlider = document.getElementById('speedSlider');
-	var speedValue = document.getElementById('speedValue');
-	var viscSlider = document.getElementById('viscSlider');
-	var viscValue = document.getElementById('viscValue');
-	var mouseSelect = document.getElementById('mouseSelect');
-	var barrierSelect = document.getElementById('barrierSelect');
-	for (var barrierIndex=0; barrierIndex<barrierList.length; barrierIndex++) {
-		var shape = document.createElement("option");
-		shape.text = barrierList[barrierIndex].name;
-		barrierSelect.add(shape, null);
-	}
-	var plotSelect = document.getElementById('plotSelect');
-	var contrastSlider = document.getElementById('contrastSlider');
-	//var pixelCheck = document.getElementById('pixelCheck');
-	var tracerCheck = document.getElementById('tracerCheck');
-	var flowlineCheck = document.getElementById('flowlineCheck');
-	var forceCheck = document.getElementById('forceCheck');
-	var sensorCheck = document.getElementById('sensorCheck');
-	var dataCheck = document.getElementById('dataCheck');
-	var rafCheck = document.getElementById('rafCheck');
-	var speedReadout = document.getElementById('speedReadout');
-	var dataSection = document.getElementById('dataSection');
-	var dataArea = document.getElementById('dataArea');
-	var dataButton = document.getElementById('dataButton');
-	var running = false;						// will be true when running
-	var stepCount = 0;
-	var startTime = 0;
-	var four9ths = 4.0 / 9.0;					// abbreviations
-	var one9th = 1.0 / 9.0;
-	var one36th = 1.0 / 36.0;
-	var barrierCount = 0;
-	var barrierxSum = 0;
-	var barrierySum = 0;
-	var barrierFx = 0.0;						// total force on all barrier sites
-	var barrierFy = 0.0;
-	var sensorX = xdim / 2;						// coordinates of "sensor" to measure local fluid properties	
-	var sensorY = ydim / 2;
-	var draggingSensor = false;
-	var mouseIsDown = false;
-	var mouseX, mouseY;							// mouse location in canvas coordinates
-	var oldMouseX = -1, oldMouseY = -1;			// mouse coordinates from previous simulation frame
-	var collectingData = false;
-	var time = 0;								// time (in simulation step units) since data collection started
-	var showingPeriod = false;
-	var lastBarrierFy = 1;						// for determining when F_y oscillation begins
-	var lastFyOscTime = 0;						// for calculating F_y oscillation period
-
-	canvas.addEventListener('mousedown', mouseDown, false);
-	canvas.addEventListener('mousemove', mouseMove, false);
-	document.body.addEventListener('mouseup', mouseUp, false);	// button release could occur outside canvas
-	canvas.addEventListener('touchstart', mouseDown, false);
-	canvas.addEventListener('touchmove', mouseMove, false);
-	document.body.addEventListener('touchend', mouseUp, false);
-
-	// Create the arrays of fluid particle densities, etc. (using 1D arrays for speed):
-	// To index into these arrays, use x + y*xdim, traversing rows first and then columns.
-	var n0 = new Array(xdim*ydim);			// microscopic densities along each lattice direction
-	var nN = new Array(xdim*ydim);
-	var nS = new Array(xdim*ydim);
-	var nE = new Array(xdim*ydim);
-	var nW = new Array(xdim*ydim);
-	var nNE = new Array(xdim*ydim);
-	var nSE = new Array(xdim*ydim);
-	var nNW = new Array(xdim*ydim);
-	var nSW = new Array(xdim*ydim);
-	var rho = new Array(xdim*ydim);			// macroscopic density
-	var ux = new Array(xdim*ydim);			// macroscopic velocity
-	var uy = new Array(xdim*ydim);
-	var curl = new Array(xdim*ydim);
-	var barrier = new Array(xdim * ydim); 	// boolean array of barrier locations
-	var odd = 1;
-
-	// Initialize to a steady rightward flow with no barriers:
-	for (var y=0; y<ydim; y++) {
-		for (var x=0; x<xdim; x++) {
-			barrier[x+y*xdim] = false;
-		}
-	}
-
-	// Create a simple linear "wall" barrier (intentionally a little offset from center):
-	var barrierSize = 8;
-	if (mobile) barrierSize = 4;
-	for (var y=(ydim/2)-barrierSize; y<=(ydim/2)+barrierSize; y++) {
-		var x = Math.round(ydim/3);
-		barrier[x+y*xdim] = true;
-	}
-
-	// Set up the array of colors for plotting (mimicks matplotlib "jet" colormap):
-	// (Kludge: Index nColors+1 labels the color used for drawing barriers.)
-	var nColors = 400;							// there are actually nColors+2 colors
-	var hexColorList = new Array(nColors+2);
-	var redList = new Array(nColors+2);
-	var greenList = new Array(nColors+2);
-	var blueList = new Array(nColors+2);
-	for (var c=0; c<=nColors; c++) {
-		var r, g, b;
-		if (c < nColors/8) {
-			r = 0; g = 0; b = Math.round(255 * (c + nColors/8) / (nColors/4));
-		} else if (c < 3*nColors/8) {
-			r = 0; g = Math.round(255 * (c - nColors/8) / (nColors/4)); b = 255;
-		} else if (c < 5*nColors/8) {
-			r = Math.round(255 * (c - 3*nColors/8) / (nColors/4)); g = 255; b = 255 - r;
-		} else if (c < 7*nColors/8) {
-			r = 255; g = Math.round(255 * (7*nColors/8 - c) / (nColors/4)); b = 0;
-		} else {
-			r = Math.round(255 * (9*nColors/8 - c) / (nColors/4)); g = 0; b = 0;
-		}
-		redList[c] = r; greenList[c] = g; blueList[c] = b;
-		hexColorList[c] = rgbToHex(r, g, b);
-	}
-	redList[nColors+1] = 0; greenList[nColors+1] = 0; blueList[nColors+1] = 0;	// barriers are black
-	hexColorList[nColors+1] = rgbToHex(0, 0, 0);
-
-	// Functions to convert rgb to hex color string (from stackoverflow):
-	function componentToHex(c) {
-		var hex = c.toString(16);
-		return hex.length == 1 ? "0" + hex : hex;
-	}
-	function rgbToHex(r, g, b) {
-		return "#" + componentToHex(r) + componentToHex(g) + componentToHex(b);
-	}
-
-	// Initialize array of partially transparant blacks, for drawing flow lines:
-	var transBlackArraySize = 50;
-	var transBlackArray = new Array(transBlackArraySize);
-	for (var i=0; i<transBlackArraySize; i++) {
-		transBlackArray[i] = "rgba(0,0,0," + Number(i/transBlackArraySize).toFixed(2) + ")";
-	}
-
-	// Initialize tracers (but don't place them yet):
-	var nTracers = 144;
-	var tracerX = new Array(nTracers);
-	var tracerY = new Array(nTracers);
-	for (var t=0; t<nTracers; t++) {
-		tracerX[t] = 0.0; tracerY[t] = 0.0;
-	}
-
-	initFluid();		// initialize to steady rightward flow
-
-	// Mysterious gymnastics that are apparently useful for better cross-browser animation timing:
-	window.requestAnimFrame = (function(callback) {
-		return 	window.requestAnimationFrame || 
-			window.webkitRequestAnimationFrame || 
-			window.mozRequestAnimationFrame || 
-			window.oRequestAnimationFrame || 
-			window.msRequestAnimationFrame ||
-			function(callback) {
-				window.setTimeout(callback, 1);		// second parameter is time in ms
-			};
-	})();
-
-	// Simulate function executes a bunch of steps and then schedules another call to itself:
-	function simulate() {
-		var stepsPerFrame = Number(stepsSlider.value);			// number of simulation steps per animation frame
-		setBoundaries();
-		// Test to see if we're dragging the fluid:
-		var pushing = false;
-		var pushX, pushY, pushUX, pushUY;
-		if (mouseIsDown && mouseSelect.selectedIndex==2) {
-			if (oldMouseX >= 0) {
-				var gridLoc = canvasToGrid(mouseX, mouseY);
-				pushX = gridLoc.x;
-				pushY = gridLoc.y;
-				pushUX = (mouseX - oldMouseX) / pxPerSquare / stepsPerFrame;
-				pushUY = -(mouseY - oldMouseY) / pxPerSquare / stepsPerFrame;	// y axis is flipped
-				if (Math.abs(pushUX) > 0.1) pushUX = 0.1 * Math.abs(pushUX) / pushUX;
-				if (Math.abs(pushUY) > 0.1) pushUY = 0.1 * Math.abs(pushUY) / pushUY;
-				pushing = true;
-			}
-			oldMouseX = mouseX; oldMouseY = mouseY;
-		} else {
-			oldMouseX = -1; oldMouseY = -1;
-		}
-		// Execute a bunch of time steps:
-		for (var step = 0; step < stepsPerFrame; step++) {
-		    setBoundaries();
-			collide();
-			//stream();
-			if (odd == 1) { odd = 0; }
-			else { odd = 1; }
-			if (tracerCheck.checked) moveTracers();
-			if (pushing) push(pushX, pushY, pushUX, pushUY);
-			time++;
-			if (showingPeriod && (barrierFy > 0) && (lastBarrierFy <=0)) {
-				var thisFyOscTime = time - barrierFy/(barrierFy-lastBarrierFy);	// interpolate when Fy changed sign
-				if (lastFyOscTime > 0) {
-					var period = thisFyOscTime - lastFyOscTime;
-					dataArea.innerHTML += Number(period).toFixed(2) + "\n";
-					dataArea.scrollTop = dataArea.scrollHeight;
-				}
-				lastFyOscTime = thisFyOscTime;
-			}
-			lastBarrierFy = barrierFy;
-		}
-		paintCanvas();
-		if (collectingData) {
-			writeData();
-			if (time >= 10000) startOrStopData();
-		}
-		if (running) {
-			stepCount += stepsPerFrame;
-			var elapsedTime = ((new Date()).getTime() - startTime) / 1000;	// time in seconds
-			speedReadout.innerHTML = Number(stepCount/elapsedTime).toFixed(0);
-		}
-		var stable = true;
-		for (var x=0; x<xdim; x++) {
-			var index = x + (ydim/2)*xdim;	// look at middle row only
-			if (rho[index] <= 0) stable = false;
-		}
-		if (!stable) {
-			window.alert("The simulation has become unstable due to excessive fluid speeds.");
-			startStop();
-			initFluid();
-		}
-		if (running) {
-			if (rafCheck.checked) {
-				requestAnimFrame(function() { simulate(); });	// let browser schedule next frame
-			} else {
-				window.setTimeout(simulate, 1);	// schedule next frame asap (nominally 1 ms but always more)
-			}
-		}
-	}
-
-	// Set the fluid variables at the boundaries, according to the current slider value:
-	function setBoundaries() {
-		var u0 = Number(speedSlider.value);
-		for (var x=0; x<xdim; x++) {
-			setEquil(x, 0, u0, 0, 1);
-			setEquil(x, ydim-1, u0, 0, 1);
-		}
-		for (var y=1; y<ydim-1; y++) {
-			setEquil(0, y, u0, 0, 1);
-			setEquil(xdim-1, y, u0, 0, 1);
-		}
-	}
-
-	// Collide particles within each cell (here's the physics!):
-	function collideOLD() {
-		var viscosity = Number(viscSlider.value);	// kinematic viscosity coefficient in natural units
-		var omega = 1 / (3*viscosity + 0.5);		// reciprocal of relaxation time
-		for (var y=1; y<ydim-1; y++) {
-			for (var x=1; x<xdim-1; x++) {
-				var i = x + y*xdim;		// array index for this lattice site
-				var thisrho = n0[i] + nN[i] + nS[i] + nE[i] + nW[i] + nNW[i] + nNE[i] + nSW[i] + nSE[i];
-				rho[i] = thisrho;
-				var thisux = (nE[i] + nNE[i] + nSE[i] - nW[i] - nNW[i] - nSW[i]) / thisrho;
-				ux[i] = thisux;
-				var thisuy = (nN[i] + nNE[i] + nNW[i] - nS[i] - nSE[i] - nSW[i]) / thisrho;
-				uy[i] = thisuy
-				var one9thrho = one9th * thisrho;		// pre-compute a bunch of stuff for optimization
-				var one36thrho = one36th * thisrho;
-				var ux3 = 3 * thisux;
-				var uy3 = 3 * thisuy;
-				var ux2 = thisux * thisux;
-				var uy2 = thisuy * thisuy;
-				var uxuy2 = 2 * thisux * thisuy;
-				var u2 = ux2 + uy2;
-				var u215 = 1.5 * u2;
-				n0[i]  += omega * (four9ths*thisrho * (1                        - u215) - n0[i]);
-				nE[i]  += omega * (   one9thrho * (1 + ux3       + 4.5*ux2        - u215) - nE[i]);
-				nW[i]  += omega * (   one9thrho * (1 - ux3       + 4.5*ux2        - u215) - nW[i]);
-				nN[i]  += omega * (   one9thrho * (1 + uy3       + 4.5*uy2        - u215) - nN[i]);
-				nS[i]  += omega * (   one9thrho * (1 - uy3       + 4.5*uy2        - u215) - nS[i]);
-				nNE[i] += omega * (  one36thrho * (1 + ux3 + uy3 + 4.5*(u2+uxuy2) - u215) - nNE[i]);
-				nSE[i] += omega * (  one36thrho * (1 + ux3 - uy3 + 4.5*(u2-uxuy2) - u215) - nSE[i]);
-				nNW[i] += omega * (  one36thrho * (1 - ux3 + uy3 + 4.5*(u2-uxuy2) - u215) - nNW[i]);
-				nSW[i] += omega * (  one36thrho * (1 - ux3 - uy3 + 4.5*(u2+uxuy2) - u215) - nSW[i]);
-			}
-		}
-		for (var y=1; y<ydim-2; y++) {
-			nW[xdim-1+y*xdim] = nW[xdim-2+y*xdim];		// at right end, copy left-flowing densities from next row to the left
-			nNW[xdim-1+y*xdim] = nNW[xdim-2+y*xdim];
-			nSW[xdim-1+y*xdim] = nSW[xdim-2+y*xdim];
-		}
-	}
-
-	///----------------------------Cumulants
-	function collide() {
-	    var viscosity = Number(viscSlider.value); // kinematic viscosity coefficient in natural units
-	    var omega = 1 / (3 * viscosity + 0.5); 	// reciprocal of relaxation time
-	    //var om3 = 9.0 * (8.0 - 6.0 * omega + omega * omega) / (36.0 - 18 * omega + 2 * omega * omega); //
-	    var om3 = 3.0 * (omega - 2.0) / (omega - 3.0);
-	    for (var y = 1; y < ydim - 1; y++) {
-	        for (var x = 1; x < xdim - 1; x++) {
-	            if (x > xdim - 5) { omega = 1; }
-	            //if (true)
-	          //  {//(!barrier[x+y*xdim]){
-	            else { omega = 1 / (3 * viscosity + 0.5); }
-	            if (!barrier[x+y*xdim]){
-	            var i = x + y * xdim; 	// array index for this lattice site
-	            var ix=(x+1)+ y*xdim;
-	            var iy=x+(y+1)*xdim;
-	            var ixy=(x+1)+(y+1)*xdim;
-var maa; 
-var mab; 
-var mac; 
-var mba; 
-var mbb; 
-var mbc; 
-var mca; 
-var mcb; 
-var mcc; 
-	            
-	            if(odd==1){
-	             maa = nSW[ixy];
-	             mab = nW[ix];
-	             mac = nNW[ix];
-	             mba = nS[iy];
-	             mbb = n0[i];
-	             mbc = nN[i];
-	             mca = nSE[iy];
-	             mcb = nE[i];
-	             mcc = nNE[i];
-	            }
-	            else
-	            {
-	             maa = nNE[ixy];
-	             mab = nE[ix];
-	             mac = nSE[ix];
-	             mba = nN[iy];
-	             mbb = n0[i];
-	             mbc = nS[i];
-	             mca = nNW[iy];
-	             mcb = nW[i];
-	             mcc = nSW[i];
-                }
-	            
-	            var thisrho = maa + mab + mac + mba + mbb + mbc + mca + mcb + mcc;
-	            rho[i] = thisrho;
-	            var thisux = (mca+mcb+mcc-maa-mab-mac) / thisrho;
-	            ux[i] = thisux;
-	            var thisuy = (mac+mbc+mcc-maa-mba-mca) / thisrho;
-	            uy[i] = thisuy;
-
-
-	            
-	            var n1=maa+mab+mac;
-                var n2=(-1-thisuy)*maa-thisuy*mab+(1-thisuy)*mac;
-                     mac=(-1-thisuy)*(-1-thisuy)*maa+thisuy*thisuy*mab+(1-thisuy)*(1-thisuy)*mac;
-                     maa=n1;
-                     mab=n2;
-
-
-                     n1=mba+mbb+mbc;
-                     n2=(-1-thisuy)*mba-thisuy*mbb+(1-thisuy)*mbc;
-                     mbc=(-1-thisuy)*(-1-thisuy)*mba+thisuy*thisuy*mbb+(1-thisuy)*(1-thisuy)*mbc;
-                     mba=n1;
-                     mbb=n2;
-
-
-                     n1=mca+mcb+mcc;
-                     n2=(-1-thisuy)*mca-thisuy*mcb+(1-thisuy)*mcc;
-                     mcc=(-1-thisuy)*(-1-thisuy)*mca+thisuy*thisuy*mcb+(1-thisuy)*(1-thisuy)*mcc;
-                     mca=n1;
-                     mcb=n2;
-///// y
-                     n1=maa+mba+mca;
-                     n2=(-1-thisux)*maa-thisux*mba+(1-thisux)*mca;
-                     mca=(-1-thisux)*(-1-thisux)*maa+thisux*thisux*mba+(1-thisux)*(1-thisux)*mca;
-                     maa=n1;
-                     mba=n2;
-
-                     n1=mab+mbb+mcb;
-                     n2=(-1-thisux)*mab-thisux*mbb+(1-thisux)*mcb;
-                     mcb=(-1-thisux)*(-1-thisux)*mab+thisux*thisux*mbb+(1-thisux)*(1-thisux)*mcb;
-                     mab=n1;
-                     mbb=n2;
-
-                     n1=mac+mbc+mcc;
-                     n2=(-1-thisux)*mac-thisux*mbc+(1-thisux)*mcc;
-                     mcc=(-1-thisux)*(-1-thisux)*mac+thisux*thisux*mbc+(1-thisux)*(1-thisux)*mcc;
-                     mac=n1;
-                     mbc=n2;
-	            //
-
-	            //----fast transform
-	            
-//	            var n1 = mcc + maa - mac - mca - thisux * thisuy / thisrho;
-//	            var n2;
-//	            //var pp = 2 * (mca + mac + maa + mcc) + mba + mbc + mab + mcb - (thisux * thisux + thisuy * thisuy) / thisrho;
-//	            var pxx = -mba - mbc + mab + mcb - (thisux * thisux - thisuy * thisuy) / thisrho;
-//	            maa = thisrho;
-//	            mba = 0;
-//	            mab = 0;
-//	            mbb = n1;
-
-	            //-----!FastTransform
-
-                     //now the collision:
-                     
-                     var pp=mac+mca;
-                     var pxx = mca - mac;
-                     var CUMcc = mcc - (mca * mac + 2 * mbb * mbb) / thisrho;
-
-                     //----
-                     var dxUx = (-0.5 * omega * (2 * mca - mac) - 0.5 * (mca + mac - maa)) / thisrho;
-                     var dyUy = (-0.5 * omega * (2 * mac - mca) - 0.5 * (mca + mac - maa)) / thisrho;
-                     //----
-
-                    // pp = thisrho/3.0*(dxUx*dxUx+dyUy*dyUy)+ 2.0 / 3.0 * thisrho; 
-                    // CUMcc = 0.0;
-                    // pxx =thisrho/3.0*(dxUx*dxUx-dyUy*dyUy)*omega-3*thisrho*(1.0-omega*0.5)*(thisux*thisux*dxUx-thisuy*thisuy*dyUy)+ pxx * (1.0 - omega);
-                    var om2=omega;//1.99;
-var quadLim = 0.01; //0.001/(0.01+uu+1.0e-9);			
-var limit1 = om2 + (1.0 - om2) * Math.abs(pp) / (Math.abs(pp) + 0.5); //4 Konstantin
-			pp=2.0/3.0*thisrho*limit1+(1.0-limit1)*pp;
-		     //pp =  2.0 / 3.0 * thisrho;
-                     pxx = -3*thisrho*(1.0-omega*0.5)*(thisux*thisux*dxUx-thisuy*thisuy*dyUy)+ pxx * (1.0 - omega);
-                     mbb = mbb * (1.0 - omega);
-                     mca = 0.5 * (pp + pxx);
-                     mac = 0.5 * (pp - pxx);
-                    //-----without limiter
-                    // mcb = 0.0; //mcb * (1.0 - om3);
-
-                    // mbc = 0.0; //mbc * (1.0 - om3);
-                     //---with limiter
-                     var uu = Math.sqrt(thisux * thisux + thisuy * thisuy);
-                     
-                     var limIT = om3 + (1.0 - om3) * Math.abs(mcb) / (Math.abs(mcb) + quadLim);
-                     mcb = mcb * (1.0 - limIT);
-                     limIT = om3 + (1.0 - om3) * Math.abs(mbc) / (Math.abs(mbc) + quadLim);
-                     mbc = mbc * (1.0 - limIT);
-                     //---!limiter
-                     
-                     mcc = /*CUMcc+ */  (mca * mac + 2 * mbb * mbb) / thisrho;
-
-
-                     ////x-Richtung
-
-                     n1 = (mcc + mac * (-1 + thisux) * thisux + mbc * (-1 + 2 * thisux)) * 0.5;
-                     n2 = mac - mcc - 2 * mbc * thisux - mac * thisux * thisux;
-                     mcc = (mbc + mcc + 2 * mbc * thisux + mac * thisux * (1 + thisux)) * 0.5;
-                     mac = n1;
-                     mbc = n2;
-
-                     n1 = (mcb + mab * (-1 + thisux) * thisux + mbb * (-1 + 2 * thisux)) * 0.5;
-                     n2 = mab - mcb - 2 * mbb * thisux - mab * thisux * thisux;
-                     mcb = (mbb + mcb + 2 * mbb * thisux + mab * thisux * (1 + thisux)) * 0.5;
-                     mab = n1;
-                     mbb = n2;
-
-                     n1 = (mca + maa * (-1 + thisux) * thisux + mba * (-1 + 2 * thisux)) * 0.5;
-                     n2 = maa - mca - 2 * mba * thisux - maa * thisux * thisux;
-                     mca = (mba + mca + 2 * mba * thisux + maa * thisux * (1 + thisux)) * 0.5;
-                     maa = n1;
-                     mba = n2;
-
-                     ////y-Richtung
-                     n1 = (mcc + mca * (-1 + thisuy) * thisuy + mcb * (-1 + 2 * thisuy)) * 0.5;
-                     n2 = mca - mcc - 2 * mcb * thisuy - mca * thisuy * thisuy;
-                     mcc = (mcb + mcc + 2 * mcb * thisuy + mca * thisuy * (1 + thisuy)) * 0.5;
-                     mca = n1;
-                     mcb = n2;
-
-                     n1 = (mbc + mba * (-1 + thisuy) * thisuy + mbb * (-1 + 2 * thisuy)) * 0.5;
-                     n2 = mba - mbc - 2 * mbb * thisuy - mba * thisuy * thisuy;
-                     mbc = (mbb + mbc + 2 * mbb * thisuy + mba * thisuy * (1 + thisuy)) * 0.5;
-                     mba = n1;
-                     mbb = n2;
-
-                     n1 = (mac + maa * (-1 + thisuy) * thisuy + mab * (-1 + 2 * thisuy)) * 0.5;
-                     n2 = maa - mac - 2 * mab * thisuy - maa * thisuy * thisuy;
-                     mac = (mab + mac + 2 * mab * thisuy + maa * thisuy * (1 + thisuy)) * 0.5;
-                     maa = n1;
-                     mab = n2;
-     
-	            
-	            if (odd==1){
-	            n0[i] =mbb;
-	            nW[ix] =mcb;
-	            nE[i] =mab;
-	            nS[iy] =mbc;
-	            nN[i] =mba;
-	            nSW[ixy]=mcc;
-	            nNW[ix]=mca;
-	            nSE[iy]=mac;
-	            nNE[i] = maa;
-	            
-	            }
-	            else{
-	            n0[i] =mbb;
-	            nE[ix] =mcb;
-	            nW[i] =mab;
-	            nN[iy] =mbc;
-	            nS[i] =mba;
-	            nNE[ixy]=mcc;
-	            nSE[ix]=mca;
-	            nNW[iy]=mac;
-	            nSW[i] = maa;
-	            
-	            }	            
-	        }
-	    }
-	    }
-	   // if (odd == 0) {
-	   //     for (var y = 1; y < ydim - 2; y++) {
-	          //     nE[xdim - 1 + y * xdim] = nE[xdim - 2 + y * xdim]; 	// at right end, copy left-flowing densities from next row to the left
-	          //     nNE[xdim - 1 + y * xdim] = nNE[xdim - 2 + y * xdim];
-	          //     nSE[xdim - 1 + y * xdim] = nSE[xdim - 2 + y * xdim];
-
-	             //  nW[1+y * xdim] += 0.0001 * 3;
-	             //  nWE[1+y * xdim] += 0.0001;
-	             //  nWS[1+y * xdim] += 0.0001; 
-	     //   }
-	    //}
-	    //else {
-	      //  for (var y = 1; y < ydim - 2; y++) {
-	            //   nW[xdim - 1 + y * xdim] = nW[xdim - 2 + y * xdim]; 	// at right end, copy left-flowing densities from next row to the left
-	            //   nNW[xdim - 1 + y * xdim] = nNW[xdim - 2 + y * xdim];
-	            //   nSW[xdim - 1 + y * xdim] = nSW[xdim - 2 + y * xdim];
-	        //}
-	    //}
-	    
-	}
-
-////------------------------------!Cumulants
-
-
-
-	// Move particles along their directions of motion:
-	function stream() {
-		barrierCount = 0; barrierxSum = 0; barrierySum = 0;
-		barrierFx = 0.0; barrierFy = 0.0;
-		for (var y=ydim-2; y>0; y--) {			// first start in NW corner...
-			for (var x=1; x<xdim-1; x++) {
-				nN[x+y*xdim] = nN[x+(y-1)*xdim];			// move the north-moving particles
-				nNW[x+y*xdim] = nNW[x+1+(y-1)*xdim];		// and the northwest-moving particles
-			}
-		}
-		for (var y=ydim-2; y>0; y--) {			// now start in NE corner...
-			for (var x=xdim-2; x>0; x--) {
-				nE[x+y*xdim] = nE[x-1+y*xdim];			// move the east-moving particles
-				nNE[x+y*xdim] = nNE[x-1+(y-1)*xdim];		// and the northeast-moving particles
-			}
-		}
-		for (var y=1; y<ydim-1; y++) {			// now start in SE corner...
-			for (var x=xdim-2; x>0; x--) {
-				nS[x+y*xdim] = nS[x+(y+1)*xdim];			// move the south-moving particles
-				nSE[x+y*xdim] = nSE[x-1+(y+1)*xdim];		// and the southeast-moving particles
-			}
-		}
-		for (var y=1; y<ydim-1; y++) {				// now start in the SW corner...
-			for (var x=1; x<xdim-1; x++) {
-				nW[x+y*xdim] = nW[x+1+y*xdim];			// move the west-moving particles
-				nSW[x+y*xdim] = nSW[x+1+(y+1)*xdim];		// and the southwest-moving particles
-			}
-		}
-		for (var y=1; y<ydim-1; y++) {				// Now handle bounce-back from barriers
-			for (var x=1; x<xdim-1; x++) {
-				if (barrier[x+y*xdim]) {
-					var index = x + y*xdim;
-					nE[x+1+y*xdim] = nW[index];
-					nW[x-1+y*xdim] = nE[index];
-					nN[x+(y+1)*xdim] = nS[index];
-					nS[x+(y-1)*xdim] = nN[index];
-					nNE[x+1+(y+1)*xdim] = nSW[index];
-					nNW[x-1+(y+1)*xdim] = nSE[index];
-					nSE[x+1+(y-1)*xdim] = nNW[index];
-					nSW[x-1+(y-1)*xdim] = nNE[index];
-					// Keep track of stuff needed to plot force vector:
-					barrierCount++;
-					barrierxSum += x;
-					barrierySum += y;
-					barrierFx += nE[index] + nNE[index] + nSE[index] - nW[index] - nNW[index] - nSW[index];
-					barrierFy += nN[index] + nNE[index] + nNW[index] - nS[index] - nSE[index] - nSW[index];
-				}
-			}
-		}
-	}
-
-	// Move the tracer particles:
-	function moveTracers() {
-		for (var t=0; t<nTracers; t++) {
-			var roundedX = Math.round(tracerX[t]);
-			var roundedY = Math.round(tracerY[t]);
-			var index = roundedX + roundedY*xdim;
-			tracerX[t] += ux[index];
-			tracerY[t] += uy[index];
-			if (tracerX[t] > xdim-1) {
-				tracerX[t] = 0;
-				tracerY[t] = Math.random() * ydim;
-			}
-		}
-	}
-
-	// "Drag" the fluid in a direction determined by the mouse (or touch) motion:
-	// (The drag affects a "circle", 5 px in diameter, centered on the given coordinates.)
-	function push(pushX, pushY, pushUX, pushUY) {
-		// First make sure we're not too close to edge:
-		var margin = 3;
-		if ((pushX > margin) && (pushX < xdim-1-margin) && (pushY > margin) && (pushY < ydim-1-margin)) {
-			for (var dx=-1; dx<=1; dx++) {
-				setEquil(pushX+dx, pushY+2, pushUX, pushUY);
-				setEquil(pushX+dx, pushY-2, pushUX, pushUY);
-			}
-			for (var dx=-2; dx<=2; dx++) {
-				for (var dy=-1; dy<=1; dy++) {
-					setEquil(pushX+dx, pushY+dy, pushUX, pushUY);
-				}
-			}
-		}
-	}
-
-	// Set all densities in a cell to their equilibrium values for a given velocity and density:
-	// (If density is omitted, it's left unchanged.)
-	function setEquil(x, y, newux, newuy, newrho) {
-	    var i = x + y * xdim;
-	    var ix = (x + 1) + y * xdim;
-	    var iy = x + (y + 1) * xdim;
-	    var ixy = (x + 1) + (y + 1) * xdim;
-
-		if (typeof newrho == 'undefined') {
-			newrho = rho[i];
-		}
-		var ux3 = 3 * newux;
-		var uy3 = 3 * newuy;
-		var ux2 = newux * newux;
-		var uy2 = newuy * newuy;
-		var uxuy2 = 2 * newux * newuy;
-		var u2 = ux2 + uy2;
-		var u215 = 1.5 * u2;
-		if (odd == 0) {
-		    n0[i] = four9ths * newrho * (1 - u215);
-		    nW[ix] = one9th * newrho * (1 + ux3 + 4.5 * ux2 - u215);
-		    nE[i] = one9th * newrho * (1 - ux3 + 4.5 * ux2 - u215);
-		    nS[iy] = one9th * newrho * (1 + uy3 + 4.5 * uy2 - u215);
-		    nN[i] = one9th * newrho * (1 - uy3 + 4.5 * uy2 - u215);
-		    nSW[ixy] = one36th * newrho * (1 + ux3 + uy3 + 4.5 * (u2 + uxuy2) - u215);
-		    nNW[ix] = one36th * newrho * (1 + ux3 - uy3 + 4.5 * (u2 - uxuy2) - u215);
-		    nSE[iy] = one36th * newrho * (1 - ux3 + uy3 + 4.5 * (u2 - uxuy2) - u215);
-		    nNE[i] = one36th * newrho * (1 - ux3 - uy3 + 4.5 * (u2 + uxuy2) - u215);
-		    rho[i] = newrho;
-		    ux[i] = newux;
-		    uy[i] = newuy;
-		}
-		else {
-		    n0[i] = four9ths * newrho * (1 - u215);
-		    nE[ix] = one9th * newrho * (1 + ux3 + 4.5 * ux2 - u215);
-		    nW[i] = one9th * newrho * (1 - ux3 + 4.5 * ux2 - u215);
-		    nN[iy] = one9th * newrho * (1 + uy3 + 4.5 * uy2 - u215);
-		    nS[i] = one9th * newrho * (1 - uy3 + 4.5 * uy2 - u215);
-		    nNE[ixy] = one36th * newrho * (1 + ux3 + uy3 + 4.5 * (u2 + uxuy2) - u215);
-		    nSE[ix] = one36th * newrho * (1 + ux3 - uy3 + 4.5 * (u2 - uxuy2) - u215);
-		    nNW[iy] = one36th * newrho * (1 - ux3 + uy3 + 4.5 * (u2 - uxuy2) - u215);
-		    nSW[i] = one36th * newrho * (1 - ux3 - uy3 + 4.5 * (u2 + uxuy2) - u215);
-		    rho[i] = newrho;
-		    ux[i] = newux;
-		    uy[i] = newuy;
-		
-		}
-	}
-
-	// Initialize the tracer particles:
-	function initTracers() {
-		if (tracerCheck.checked) {
-			var nRows = Math.ceil(Math.sqrt(nTracers));
-			var dx = xdim / nRows;
-			var dy = ydim / nRows;
-			var nextX = dx / 2;
-			var nextY = dy / 2;
-			for (var t=0; t<nTracers; t++) {
-				tracerX[t] = nextX;
-				tracerY[t] = nextY;
-				nextX += dx;
-				if (nextX > xdim) {
-					nextX = dx / 2;
-					nextY += dy;
-				}
-			}
-		}
-		paintCanvas();
-	}
-
-	// Paint the canvas:
-	function paintCanvas() {
-		var cIndex=0;
-		var contrast = Math.pow(1.2,Number(contrastSlider.value));
-		var plotType = plotSelect.selectedIndex;
-		//var pixelGraphics = pixelCheck.checked;
-		if (plotType == 4) computeCurl();
-		for (var y=0; y<ydim; y++) {
-			for (var x=0; x<xdim; x++) {
-				if (barrier[x+y*xdim]) {
-					cIndex = nColors + 1;	// kludge for barrier color which isn't really part of color map
-				} else {
-					if (plotType == 0) {
-						cIndex = Math.round(nColors * ((rho[x+y*xdim]-1)*6*contrast + 0.5));
-					} else if (plotType == 1) {
-						cIndex = Math.round(nColors * (ux[x+y*xdim]*2*contrast + 0.5));
-					} else if (plotType == 2) {
-						cIndex = Math.round(nColors * (uy[x+y*xdim]*2*contrast + 0.5));
-					} else if (plotType == 3) {
-						var speed = Math.sqrt(ux[x+y*xdim]*ux[x+y*xdim] + uy[x+y*xdim]*uy[x+y*xdim]);
-						cIndex = Math.round(nColors * (speed*4*contrast));
-					} else {
-						cIndex = Math.round(nColors * (curl[x+y*xdim]*5*contrast + 0.5));
-					}
-					if (cIndex < 0) cIndex = 0;
-					if (cIndex > nColors) cIndex = nColors;
-				}
-				//if (pixelGraphics) {
-					//colorSquare(x, y, cIndex);
-				colorSquare(x, y, redList[cIndex], greenList[cIndex], blueList[cIndex]);
-				//} else {
-				//	context.fillStyle = hexColorList[cIndex];
-				//	context.fillRect(x*pxPerSquare, (ydim-y-1)*pxPerSquare, pxPerSquare, pxPerSquare);
-				//}
-			}
-		}
-		//if (pixelGraphics) 
-		context.putImageData(image, 0, 0);		// blast image to the screen
-		// Draw tracers, force vector, and/or sensor if appropriate:
-		if (tracerCheck.checked) drawTracers();
-		if (flowlineCheck.checked) drawFlowlines();
-		if (forceCheck.checked) drawForceArrow(barrierxSum/barrierCount, barrierySum/barrierCount, barrierFx, barrierFy);
-		if (sensorCheck.checked) drawSensor();
-	}
-
-	// Color a grid square in the image data array, one pixel at a time (rgb each in range 0 to 255):
-	function colorSquare(x, y, r, g, b) {
-	//function colorSquare(x, y, cIndex) {		// for some strange reason, this version is quite a bit slower on Chrome
-		//var r = redList[cIndex];
-		//var g = greenList[cIndex];
-		//var b = blueList[cIndex];
-		var flippedy = ydim - y - 1;			// put y=0 at the bottom
-		for (var py=flippedy*pxPerSquare; py<(flippedy+1)*pxPerSquare; py++) {
-			for (var px=x*pxPerSquare; px<(x+1)*pxPerSquare; px++) {
-				var index = (px + py*image.width) * 4;
-				image.data[index+0] = r;
-				image.data[index+1] = g;
-				image.data[index+2] = b;
-			}
-		}
-	}
-
-	// Compute the curl (actually times 2) of the macroscopic velocity field, for plotting:
-	function computeCurl() {
-		for (var y=1; y<ydim-1; y++) {			// interior sites only; leave edges set to zero
-			for (var x=1; x<xdim-1; x++) {
-				curl[x+y*xdim] = uy[x+1+y*xdim] - uy[x-1+y*xdim] - ux[x+(y+1)*xdim] + ux[x+(y-1)*xdim];
-			}
-		}
-	}
-
-	// Draw the tracer particles:
-	function drawTracers() {
-		context.fillStyle = "rgb(150,150,150)";
-		for (var t=0; t<nTracers; t++) {
-			var canvasX = (tracerX[t]+0.5) * pxPerSquare;
-			var canvasY = canvas.height - (tracerY[t]+0.5) * pxPerSquare;
-			context.fillRect(canvasX-1, canvasY-1, 2, 2);
-		}
-	}
-
-	// Draw a grid of short line segments along flow directions:
-	function drawFlowlines() {
-		var pxPerFlowline = 10;
-		if (pxPerSquare == 1) pxPerFlowline = 6;
-		if (pxPerSquare == 2) pxPerFlowline = 8;
-		if (pxPerSquare == 5) pxPerFlowline = 12;
-		if ((pxPerSquare == 6) || (pxPerSquare == 8)) pxPerFlowline = 15;
-		if (pxPerSquare == 10) pxPerFlowline = 20;
-		var sitesPerFlowline = pxPerFlowline / pxPerSquare;
-		var xLines = canvas.width / pxPerFlowline;
-		var yLines = canvas.height / pxPerFlowline;
-		for (var yCount=0; yCount<yLines; yCount++) {
-			for (var xCount=0; xCount<xLines; xCount++) {
-				var x = Math.round((xCount+0.5) * sitesPerFlowline);
-				var y = Math.round((yCount+0.5) * sitesPerFlowline);
-				var thisUx = ux[x+y*xdim];
-				var thisUy = uy[x+y*xdim];
-				var speed = Math.sqrt(thisUx*thisUx + thisUy*thisUy);
-				if (speed > 0.0001) {
-					var px = (xCount+0.5) * pxPerFlowline;
-					var py = canvas.height - ((yCount+0.5) * pxPerFlowline);
-					var scale = 0.5 * pxPerFlowline / speed;
-					context.beginPath();
-					context.moveTo(px-thisUx*scale, py+thisUy*scale);
-					context.lineTo(px+thisUx*scale, py-thisUy*scale);
-					//context.lineWidth = speed * 5;
-					var cIndex = Math.round(speed * transBlackArraySize / 0.3);
-					if (cIndex >= transBlackArraySize) cIndex = transBlackArraySize - 1;
-					context.strokeStyle = transBlackArray[cIndex];
-					//context.strokeStyle = "rgba(0,0,0,0.1)";
-					context.stroke();
-				}
-			}
-		}
-	}
-
-	// Draw an arrow to represent the total force on the barrier(s):
-	function drawForceArrow(x, y, Fx, Fy) {
-		context.fillStyle = "rgba(100,100,100,0.7)";
-		context.translate((x + 0.5) * pxPerSquare, canvas.height - (y + 0.5) * pxPerSquare);
-		var magF = Math.sqrt(Fx*Fx + Fy*Fy);
-		context.scale(4*magF, 4*magF);
-		context.rotate(Math.atan2(-Fy, Fx));
-		context.beginPath();
-		context.moveTo(0, 3);
-		context.lineTo(100, 3);
-		context.lineTo(100, 12);
-		context.lineTo(130, 0);
-		context.lineTo(100, -12);
-		context.lineTo(100, -3);
-		context.lineTo(0, -3);
-		context.lineTo(0, 3);
-		context.fill();
-		context.setTransform(1, 0, 0, 1, 0, 0);
-	}
-
-	// Draw the sensor and its associated data display:
-	function drawSensor() {
-		var canvasX = (sensorX+0.5) * pxPerSquare;
-		var canvasY = canvas.height - (sensorY+0.5) * pxPerSquare;
-		context.fillStyle = "rgba(180,180,180,0.7)";	// first draw gray filled circle
-		context.beginPath();
-		context.arc(canvasX, canvasY, 7, 0, 2*Math.PI);
-		context.fill();
-		context.strokeStyle = "#404040";				// next draw cross-hairs
-		context.linewidth = 1;
-		context.beginPath();
-		context.moveTo(canvasX, canvasY-10);
-		context.lineTo(canvasX, canvasY+10);
-		context.moveTo(canvasX-10, canvasY);
-		context.lineTo(canvasX+10, canvasY);
-		context.stroke();
-		context.fillStyle = "rgba(255,255,255,0.5)";	// draw rectangle behind text
-		canvasX += 10;
-		context.font = "12px Monospace";
-		var rectWidth = context.measureText("00000000000").width+6;
-		var rectHeight = 58;
-		if (canvasX+rectWidth > canvas.width) canvasX -= (rectWidth+20);
-		if (canvasY+rectHeight > canvas.height) canvasY = canvas.height - rectHeight;
-		context.fillRect(canvasX, canvasY, rectWidth, rectHeight);
-		context.fillStyle = "#000000";					// finally draw the text
-		canvasX += 3;
-		canvasY += 12;
-		var coordinates = "  (" + sensorX + "," + sensorY + ")";
-		context.fillText(coordinates, canvasX, canvasY);
-		canvasY += 14;
-		var rhoSymbol = String.fromCharCode(parseInt('03C1',16));
-		var index = sensorX + sensorY * xdim;
-		context.fillText(" " + rhoSymbol + " =  " + Number(rho[index]).toFixed(3), canvasX, canvasY);
-		canvasY += 14;
-		var digitString = Number(ux[index]).toFixed(3);
-		if (ux[index] >= 0) digitString = " " + digitString;
-		context.fillText("ux = " + digitString, canvasX, canvasY);
-		canvasY += 14;
-		digitString = Number(uy[index]).toFixed(3);
-		if (uy[index] >= 0) digitString = " " + digitString;
-		context.fillText("uy = " + digitString, canvasX, canvasY);
-	}
-
-	// Functions to handle mouse/touch interaction:
-	function mouseDown(e) {
-		if (sensorCheck.checked) {
-			var canvasLoc = pageToCanvas(e.pageX, e.pageY);
-			var gridLoc = canvasToGrid(canvasLoc.x, canvasLoc.y);
-			var dx = (gridLoc.x - sensorX) * pxPerSquare;
-			var dy = (gridLoc.y - sensorY) * pxPerSquare;
-			if (Math.sqrt(dx*dx + dy*dy) <= 8) {
-				draggingSensor = true;
-			}
-		}
-		mousePressDrag(e);
-	};
-	function mouseMove(e) {
-		if (mouseIsDown) {
-			mousePressDrag(e);
-		}
-	};
-	function mouseUp(e) {
-		mouseIsDown = false;
-		draggingSensor = false;
-	};
-
-	// Handle mouse press or drag:
-	function mousePressDrag(e) {
-		e.preventDefault();
-		mouseIsDown = true;
-		var canvasLoc = pageToCanvas(e.pageX, e.pageY);
-		if (draggingSensor) {
-			var gridLoc = canvasToGrid(canvasLoc.x, canvasLoc.y);
-			sensorX = gridLoc.x;
-			sensorY = gridLoc.y;
-			paintCanvas();
-			return;
-		}
-		if (mouseSelect.selectedIndex == 2) {
-			mouseX = canvasLoc.x;
-			mouseY = canvasLoc.y;
-			return;
-		}
-		var gridLoc = canvasToGrid(canvasLoc.x, canvasLoc.y);
-		if (mouseSelect.selectedIndex == 0) {
-			addBarrier(gridLoc.x, gridLoc.y);
-			paintCanvas();
-		} else {
-			removeBarrier(gridLoc.x, gridLoc.y);
-		}
-	}
-
-	// Convert page coordinates to canvas coordinates:
-	function pageToCanvas(pageX, pageY) {
-		var canvasX = pageX - canvas.offsetLeft;
-		var canvasY = pageY - canvas.offsetTop;
-		// this simple subtraction may not work when the canvas is nested in other elements
-		return { x:canvasX, y:canvasY };
-	}
-
-	// Convert canvas coordinates to grid coordinates:
-	function canvasToGrid(canvasX, canvasY) {
-		var gridX = Math.floor(canvasX / pxPerSquare);
-		var gridY = Math.floor((canvas.height - 1 - canvasY) / pxPerSquare); 	// off by 1?
-		return { x:gridX, y:gridY };
-	}
-
-	// Add a barrier at a given grid coordinate location:
-	function addBarrier(x, y) {
-		if ((x > 1) && (x < xdim-2) && (y > 1) && (y < ydim-2)) {
-			barrier[x+y*xdim] = true;
-		}
-	}
-
-	// Remove a barrier at a given grid coordinate location:
-	function removeBarrier(x, y) {
-		if (barrier[x+y*xdim]) {
-			barrier[x+y*xdim] = false;
-			paintCanvas();
-		}
-	}
-
-	// Clear all barriers:
-	function clearBarriers() {
-		for (var x=0; x<xdim; x++) {
-			for (var y=0; y<ydim; y++) {
-				barrier[x+y*xdim] = false;
-			}
-		}
-		paintCanvas();
-	}
-
-	// Resize the grid:
-	function resize() {
-		// First up-sample the macroscopic variables into temporary arrays at max resolution:
-		var tempRho = new Array(canvas.width*canvas.height);
-		var tempUx = new Array(canvas.width*canvas.height);
-		var tempUy = new Array(canvas.width*canvas.height);
-		var tempBarrier = new Array(canvas.width*canvas.height);
-		for (var y=0; y<canvas.height; y++) {
-			for (var x=0; x<canvas.width; x++) {
-				var tempIndex = x + y*canvas.width;
-				var xOld = Math.floor(x / pxPerSquare);
-				var yOld = Math.floor(y / pxPerSquare);
-				var oldIndex = xOld + yOld*xdim;
-				tempRho[tempIndex] = rho[oldIndex];
-				tempUx[tempIndex] = ux[oldIndex];
-				tempUy[tempIndex] = uy[oldIndex];
-				tempBarrier[tempIndex] = barrier[oldIndex];
-			}
-		}
-		// Get new size from GUI selector:
-		var oldPxPerSquare = pxPerSquare;
-		pxPerSquare = Number(sizeSelect.options[sizeSelect.selectedIndex].value);
-		var growRatio = oldPxPerSquare / pxPerSquare;
-		xdim = canvas.width / pxPerSquare;
-		ydim = canvas.height / pxPerSquare;
-		// Create new arrays at the desired resolution:
-		n0 = new Array(xdim*ydim);
-		nN = new Array(xdim*ydim);
-		nS = new Array(xdim*ydim);
-		nE = new Array(xdim*ydim);
-		nW = new Array(xdim*ydim);
-		nNE = new Array(xdim*ydim);
-		nSE = new Array(xdim*ydim);
-		nNW = new Array(xdim*ydim);
-		nSW = new Array(xdim*ydim);
-		rho = new Array(xdim*ydim);
-		ux = new Array(xdim*ydim);
-		uy = new Array(xdim*ydim);
-		curl = new Array(xdim*ydim);
-		barrier = new Array(xdim*ydim);
-		// Down-sample the temporary arrays into the new arrays:
-		for (var yNew=0; yNew<ydim; yNew++) {
-			for (var xNew=0; xNew<xdim; xNew++) {
-				var rhoTotal = 0;
-				var uxTotal = 0;
-				var uyTotal = 0;
-				var barrierTotal = 0;
-				for (var y=yNew*pxPerSquare; y<(yNew+1)*pxPerSquare; y++) {
-					for (var x=xNew*pxPerSquare; x<(xNew+1)*pxPerSquare; x++) {
-						var index = x + y*canvas.width;
-						rhoTotal += tempRho[index];
-						uxTotal += tempUx[index];
-						uyTotal += tempUy[index];
-						if (tempBarrier[index]) barrierTotal++;
-					}
-				}
-				setEquil(xNew, yNew, uxTotal/(pxPerSquare*pxPerSquare), uyTotal/(pxPerSquare*pxPerSquare), rhoTotal/(pxPerSquare*pxPerSquare))
-				curl[xNew+yNew*xdim] = 0.0;
-				barrier[xNew+yNew*xdim] = (barrierTotal >= pxPerSquare*pxPerSquare/2);
-			}
-		}
-		setBoundaries();
-		if (tracerCheck.checked) {
-			for (var t=0; t<nTracers; t++) {
-				tracerX[t] *= growRatio;
-				tracerY[t] *= growRatio;
-			}
-		}
-		sensorX = Math.round(sensorX * growRatio);
-		sensorY = Math.round(sensorY * growRatio);
-		//computeCurl();
-		paintCanvas();
-		resetTimer();
-	}
-
-	// Function to initialize or re-initialize the fluid, based on speed slider setting:
-	function initFluid() {
-		// Amazingly, if I nest the y loop inside the x loop, Firefox slows down by a factor of 20
-		var u0 = Number(speedSlider.value);
-		for (var y=0; y<ydim; y++) {
-			for (var x=0; x<xdim; x++) {
-				setEquil(x, y, u0, 0, 1);
-				curl[x+y*xdim] = 0.0;
-			}
-		}
-	paintCanvas();
-	}
-
-	// Function to start or pause the simulation:
-	function startStop() {
-		running = !running;
-		if (running) {
-			startButton.value = "Pause";
-			resetTimer();
-			simulate();
-		} else {
-			startButton.value = " Run ";
-		}
-	}
-
-	// Reset the timer that handles performance evaluation:
-	function resetTimer() {
-		stepCount = 0;
-		startTime = (new Date()).getTime();
-	}
-
-	// Show value of flow speed setting:
-	function adjustSpeed() {
-		speedValue.innerHTML = Number(speedSlider.value).toFixed(3);
-	}
-
-	// Show value of viscosity:
-	function adjustViscosity() {
-		viscValue.innerHTML = Number(viscSlider.value);//.toFixed(6);
-	}
-
-	// Show or hide the data area:
-	function showData() {
-		if (dataCheck.checked) {
-			dataSection.style.display="block";
-		} else {
-			dataSection.style.display="none";
-		}
-	}
-
-	// Start or stop collecting data:
-	function startOrStopData() {
-		collectingData = !collectingData;
-		if (collectingData) {
-			time = 0;
-			dataArea.innerHTML = "Time \tDensity\tVel_x \tVel_y \tForce_x\tForce_y\n";
-			writeData();
-			dataButton.value = "Stop data collection";
-			showingPeriod = false;
-			periodButton.value = "Show F_y period";
-		} else {
-			dataButton.value = "Start data collection";
-		}
-	}
-
-	// Write one line of data to the data area:
-	function writeData() {
-		var timeString = String(time);
-		while (timeString.length < 5) timeString = "0" + timeString;
-		sIndex = sensorX + sensorY*xdim;
-		dataArea.innerHTML += timeString + "\t" + Number(rho[sIndex]).toFixed(4) + "\t"
-			+ Number(ux[sIndex]).toFixed(4) + "\t" + Number(uy[sIndex]).toFixed(4) + "\t"
-			+ Number(barrierFx).toFixed(4) + "\t" + Number(barrierFy).toFixed(4) + "\n";
-		dataArea.scrollTop = dataArea.scrollHeight;
-	}
-
-	// Handle click to "show period" button
-	function showPeriod() {
-		showingPeriod = !showingPeriod;
-		if (showingPeriod) {
-			time = 0;
-			lastBarrierFy = 1.0;	// arbitrary positive value
-			lastFyOscTime = -1.0;	// arbitrary negative value
-			dataArea.innerHTML = "Period of F_y oscillation\n";
-			periodButton.value = "Stop data";
-			collectingData = false;
-			dataButton.value = "Start data collection";
-		} else {
-			periodButton.value = "Show F_y period";
-		}
-	}
-
-	// Write all the barrier locations to the data area:
-	function showBarrierLocations() {
-		dataArea.innerHTML = '{name:"Barrier locations",\n';
-		dataArea.innerHTML += 'locations:[\n';
-		for (var y=1; y<ydim-1; y++) {
-			for (var x=1; x<xdim-1; x++) {
-				if (barrier[x+y*xdim]) dataArea.innerHTML += x + ',' + y + ',\n';
-			}
-		}
-		dataArea.innerHTML = dataArea.innerHTML.substr(0, dataArea.innerHTML.length-2); // remove final comma
-		dataArea.innerHTML += '\n]},\n';
-	}
-
-	// Place a preset barrier:
-	function placePresetBarrier() {
-		var index = barrierSelect.selectedIndex;
-		if (index == 0) return;
-		clearBarriers();
-		var bCount = barrierList[index-1].locations.length/2;	// number of barrier sites
-		// To decide where to place it, find minimum x and min/max y:
-		var xMin = barrierList[index-1].locations[0];
-		var yMin = barrierList[index-1].locations[1];
-		var yMax = yMin;
-		for (var siteIndex=2; siteIndex<2*bCount; siteIndex+=2) {
-			if (barrierList[index-1].locations[siteIndex] < xMin) {
-				xMin = barrierList[index-1].locations[siteIndex];
-			}
-			if (barrierList[index-1].locations[siteIndex+1] < yMin) {
-				yMin = barrierList[index-1].locations[siteIndex+1];
-			}
-			if (barrierList[index-1].locations[siteIndex+1] > yMax) {
-				yMax = barrierList[index-1].locations[siteIndex+1];
-			}
-		}
-		var yAverage = Math.round((yMin+yMax)/2);
-		// Now place the barriers:
-		for (var siteIndex=0; siteIndex<2*bCount; siteIndex+=2) {
-			var x = barrierList[index-1].locations[siteIndex] - xMin + Math.round(ydim/3);
-			var y = barrierList[index-1].locations[siteIndex+1] - yAverage + Math.round(ydim/2);
-			addBarrier(x, y);
-		}
-		paintCanvas();
-		barrierSelect.selectedIndex = 0;	// A choice on this menu is a one-time action, not an ongoing setting
-	}
-
-	// Print debugging data:
-	function debug() {
-		dataArea.innerHTML = "Tracer locations:\n";
-		for (var t=0; t<nTracers; t++) {
-			dataArea.innerHTML += tracerX[t] + ", " + tracerY[t] + "\n";
-		}
-	}
-</script>
-    
-</body>
-</html>
\ No newline at end of file
diff --git a/3rdParty/WebDemo/barrierdata.js b/3rdParty/WebDemo/barrierdata.js
deleted file mode 100644
index c165530d7dbc4b7b07d3e7a3f6703b180c38cbce..0000000000000000000000000000000000000000
--- a/3rdParty/WebDemo/barrierdata.js
+++ /dev/null
@@ -1,655 +0,0 @@
-var barrierList = [
-{ name: "Short line",
-    locations: [
-12, 15,
-12, 16,
-12, 17,
-12, 18,
-12, 19,
-12, 20,
-12, 21,
-12, 22,
-12, 23]
-},
-{ name: "Long line",
-    locations: [
-13, 11,
-13, 12,
-13, 13,
-13, 14,
-13, 15,
-13, 16,
-13, 17,
-13, 18,
-13, 19,
-13, 20,
-13, 21,
-13, 22,
-13, 23,
-13, 24,
-13, 25,
-13, 26,
-13, 27,
-13, 28
-]
-},
-{ name: "Diagonal",
-    locations: [
-30, 14,
-29, 15,
-30, 15,
-28, 16,
-29, 16,
-27, 17,
-28, 17,
-26, 18,
-27, 18,
-25, 19,
-26, 19,
-24, 20,
-25, 20,
-23, 21,
-24, 21,
-22, 22,
-23, 22,
-21, 23,
-22, 23,
-20, 24,
-21, 24,
-19, 25,
-20, 25,
-18, 26,
-19, 26,
-17, 27,
-18, 27,
-16, 28,
-17, 28,
-15, 29,
-16, 29,
-14, 30,
-15, 30,
-13, 31,
-14, 31
-]
-},
-{ name: "Shallow diagonal",
-    locations: [
-47, 18,
-48, 18,
-49, 18,
-50, 18,
-44, 19,
-45, 19,
-46, 19,
-47, 19,
-41, 20,
-42, 20,
-43, 20,
-44, 20,
-38, 21,
-39, 21,
-40, 21,
-41, 21,
-35, 22,
-36, 22,
-37, 22,
-38, 22,
-32, 23,
-33, 23,
-34, 23,
-35, 23,
-29, 24,
-30, 24,
-31, 24,
-32, 24,
-26, 25,
-27, 25,
-28, 25,
-29, 25,
-23, 26,
-24, 26,
-25, 26,
-26, 26,
-20, 27,
-21, 27,
-22, 27,
-23, 27,
-17, 28,
-18, 28,
-19, 28,
-20, 28
-]
-},
-{ name: "Small circle",
-    locations: [
-14, 11,
-15, 11,
-16, 11,
-17, 11,
-18, 11,
-13, 12,
-14, 12,
-18, 12,
-19, 12,
-12, 13,
-13, 13,
-19, 13,
-20, 13,
-12, 14,
-20, 14,
-12, 15,
-20, 15,
-12, 16,
-20, 16,
-12, 17,
-13, 17,
-19, 17,
-20, 17,
-13, 18,
-14, 18,
-18, 18,
-19, 18,
-14, 19,
-15, 19,
-16, 19,
-17, 19,
-18, 19
-]
-},
-{ name: "Large circle",
-    locations: [
-19, 11,
-20, 11,
-21, 11,
-22, 11,
-23, 11,
-24, 11,
-17, 12,
-18, 12,
-19, 12,
-24, 12,
-25, 12,
-26, 12,
-16, 13,
-17, 13,
-26, 13,
-27, 13,
-15, 14,
-16, 14,
-27, 14,
-28, 14,
-14, 15,
-15, 15,
-28, 15,
-29, 15,
-14, 16,
-29, 16,
-13, 17,
-14, 17,
-29, 17,
-30, 17,
-13, 18,
-30, 18,
-13, 19,
-30, 19,
-13, 20,
-30, 20,
-13, 21,
-30, 21,
-13, 22,
-14, 22,
-29, 22,
-30, 22,
-14, 23,
-29, 23,
-14, 24,
-15, 24,
-28, 24,
-29, 24,
-15, 25,
-16, 25,
-27, 25,
-28, 25,
-16, 26,
-17, 26,
-26, 26,
-27, 26,
-17, 27,
-18, 27,
-19, 27,
-24, 27,
-25, 27,
-26, 27,
-19, 28,
-20, 28,
-21, 28,
-22, 28,
-23, 28,
-24, 28
-]
-},
-{ name: "Line with spoiler",
-    locations: [
-16, 20,
-16, 21,
-16, 22,
-16, 23,
-16, 24,
-17, 24,
-18, 24,
-19, 24,
-20, 24,
-21, 24,
-22, 24,
-23, 24,
-24, 24,
-25, 24,
-26, 24,
-27, 24,
-28, 24,
-29, 24,
-30, 24,
-31, 24,
-32, 24,
-33, 24,
-34, 24,
-35, 24,
-36, 24,
-37, 24,
-38, 24,
-39, 24,
-40, 24,
-41, 24,
-42, 24,
-43, 24,
-44, 24,
-45, 24,
-46, 24,
-47, 24,
-48, 24,
-49, 24,
-50, 24,
-16, 25,
-16, 26,
-16, 27,
-16, 28
-]
-},
-{ name: "Circle with spoiler",
-    locations: [
-29, 36,
-30, 36,
-31, 36,
-32, 36,
-33, 36,
-28, 37,
-29, 37,
-33, 37,
-34, 37,
-27, 38,
-28, 38,
-34, 38,
-35, 38,
-27, 39,
-35, 39,
-27, 40,
-35, 40,
-36, 40,
-37, 40,
-38, 40,
-39, 40,
-40, 40,
-41, 40,
-42, 40,
-43, 40,
-44, 40,
-45, 40,
-46, 40,
-47, 40,
-48, 40,
-49, 40,
-50, 40,
-51, 40,
-52, 40,
-53, 40,
-54, 40,
-55, 40,
-56, 40,
-57, 40,
-58, 40,
-59, 40,
-60, 40,
-61, 40,
-62, 40,
-63, 40,
-64, 40,
-65, 40,
-66, 40,
-67, 40,
-68, 40,
-69, 40,
-27, 41,
-35, 41,
-27, 42,
-28, 42,
-34, 42,
-35, 42,
-28, 43,
-29, 43,
-33, 43,
-34, 43,
-29, 44,
-30, 44,
-31, 44,
-32, 44,
-33, 44
-]
-},
-{ name: "Right angle",
-    locations: [
-27, 36,
-28, 36,
-29, 36,
-30, 36,
-31, 36,
-32, 36,
-33, 36,
-34, 36,
-35, 36,
-36, 36,
-37, 36,
-38, 36,
-39, 36,
-40, 36,
-41, 36,
-42, 36,
-43, 36,
-44, 36,
-45, 36,
-46, 36,
-47, 36,
-48, 36,
-49, 36,
-50, 36,
-51, 36,
-52, 36,
-53, 36,
-54, 36,
-55, 36,
-56, 36,
-57, 36,
-58, 36,
-59, 36,
-60, 36,
-61, 36,
-62, 36,
-63, 36,
-64, 36,
-65, 36,
-66, 36,
-67, 36,
-68, 36,
-69, 36,
-70, 36,
-71, 36,
-72, 36,
-73, 36,
-74, 36,
-75, 36,
-76, 36,
-77, 36,
-78, 36,
-79, 36,
-27, 37,
-27, 38,
-27, 39,
-27, 40,
-27, 41,
-27, 42,
-27, 43,
-27, 44
-]
-},
-{ name: "Wedge",
-    locations: [
-27, 36,
-28, 36,
-29, 36,
-30, 36,
-31, 36,
-32, 36,
-33, 36,
-34, 36,
-35, 36,
-36, 36,
-37, 36,
-38, 36,
-39, 36,
-40, 36,
-41, 36,
-42, 36,
-43, 36,
-44, 36,
-45, 36,
-46, 36,
-47, 36,
-48, 36,
-49, 36,
-50, 36,
-51, 36,
-52, 36,
-53, 36,
-54, 36,
-55, 36,
-56, 36,
-57, 36,
-58, 36,
-59, 36,
-60, 36,
-61, 36,
-62, 36,
-63, 36,
-64, 36,
-65, 36,
-66, 36,
-67, 36,
-68, 36,
-69, 36,
-70, 36,
-71, 36,
-72, 36,
-73, 36,
-74, 36,
-75, 36,
-76, 36,
-77, 36,
-78, 36,
-79, 36,
-27, 37,
-67, 37,
-68, 37,
-69, 37,
-70, 37,
-71, 37,
-72, 37,
-73, 37,
-27, 38,
-61, 38,
-62, 38,
-63, 38,
-64, 38,
-65, 38,
-66, 38,
-67, 38,
-27, 39,
-55, 39,
-56, 39,
-57, 39,
-58, 39,
-59, 39,
-60, 39,
-61, 39,
-27, 40,
-49, 40,
-50, 40,
-51, 40,
-52, 40,
-53, 40,
-54, 40,
-55, 40,
-27, 41,
-43, 41,
-44, 41,
-45, 41,
-46, 41,
-47, 41,
-48, 41,
-49, 41,
-27, 42,
-37, 42,
-38, 42,
-39, 42,
-40, 42,
-41, 42,
-42, 42,
-43, 42,
-27, 43,
-31, 43,
-32, 43,
-33, 43,
-34, 43,
-35, 43,
-36, 43,
-37, 43,
-27, 44,
-28, 44,
-29, 44,
-30, 44,
-31, 44
-]
-},
-{ name: "Airfoil",
-    locations: [
-17, 16,
-18, 16,
-19, 16,
-20, 16,
-21, 16,
-22, 16,
-23, 16,
-24, 16,
-25, 16,
-26, 16,
-27, 16,
-28, 16,
-29, 16,
-30, 16,
-31, 16,
-32, 16,
-33, 16,
-34, 16,
-35, 16,
-36, 16,
-37, 16,
-38, 16,
-39, 16,
-40, 16,
-41, 16,
-42, 16,
-43, 16,
-44, 16,
-45, 16,
-46, 16,
-47, 16,
-48, 16,
-49, 16,
-50, 16,
-51, 16,
-52, 16,
-53, 16,
-54, 16,
-55, 16,
-56, 16,
-57, 16,
-58, 16,
-59, 16,
-60, 16,
-61, 16,
-62, 16,
-63, 16,
-64, 16,
-65, 16,
-66, 16,
-67, 16,
-68, 16,
-14, 17,
-15, 17,
-16, 17,
-17, 17,
-56, 17,
-57, 17,
-58, 17,
-59, 17,
-60, 17,
-61, 17,
-62, 17,
-13, 18,
-14, 18,
-50, 18,
-51, 18,
-52, 18,
-53, 18,
-54, 18,
-55, 18,
-56, 18,
-13, 19,
-44, 19,
-45, 19,
-46, 19,
-47, 19,
-48, 19,
-49, 19,
-50, 19,
-13, 20,
-38, 20,
-39, 20,
-40, 20,
-41, 20,
-42, 20,
-43, 20,
-44, 20,
-13, 21,
-14, 21,
-32, 21,
-33, 21,
-34, 21,
-35, 21,
-36, 21,
-37, 21,
-38, 21,
-14, 22,
-15, 22,
-26, 22,
-27, 22,
-28, 22,
-29, 22,
-30, 22,
-31, 22,
-32, 22,
-15, 23,
-16, 23,
-17, 23,
-18, 23,
-21, 23,
-22, 23,
-23, 23,
-24, 23,
-25, 23,
-26, 23,
-18, 24,
-19, 24,
-20, 24,
-21, 24
-]
-}
-];
\ No newline at end of file
diff --git a/CITATION.cff b/CITATION.cff
new file mode 100644
index 0000000000000000000000000000000000000000..50d4989d5c269521392644515d716fa93b3cf6e3
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1,40 @@
+cff-version: 1.2.0
+message: "If you use this software, please cite it as below."
+type: software
+authors:
+  - family-names: Kutscher
+    given-names: Konstantin
+    orcid: https://orcid.org/0000-0002-1099-1608
+  - family-names: Schönherr
+    given-names: Martin
+    orcid: https://orcid.org/0000-0002-4774-1776
+  - family-names: Geier
+    given-names: Martin
+    orcid: https://orcid.org/0000-0002-8367-9412
+  - family-names: Krafczyk
+    given-names: Manfred
+    orcid: https://orcid.org/0000-0002-8509-0871
+  - family-names: Alihussein
+    given-names: Hussein
+    orcid: https://orcid.org/0000-0003-3656-7028
+  - family-names: Linxweiler
+    given-names: Jan
+    orcid: https://orcid.org/0000-0002-2755-5087
+  - family-names: Peters
+    given-names: Sören
+    orcid: https://orcid.org/0000-0001-5236-3776
+  - family-names: Wellmann
+    given-names: Anna
+    orcid: https://orcid.org/0000-0002-8825-2995
+  - family-names: Safari
+    given-names: Hesameddin
+    orcid: https://orcid.org/0000-0002-2755-5087
+  - family-names: Marcus
+    given-names: Sven
+    orcid: https://orcid.org/0000-0003-3689-2162
+title: "VirtualFluids"
+version: 0.1.0
+license: GPL-3.0-or-later
+repository-code: "https://git.rz.tu-bs.de/irmb/VirtualFluids"
+date-released: "XXXXXXX"
+
diff --git a/CMake/FileUtilities.cmake b/CMake/FileUtilities.cmake
index 151000a681795923d4e31ed8c5f06dfd1e7af7fd..13057ef832b5aa2d7ce303fe55e95a91284f5f56 100644
--- a/CMake/FileUtilities.cmake
+++ b/CMake/FileUtilities.cmake
@@ -5,7 +5,7 @@
 ## After function call the files are stored in: MY_SRCS
 #################################################################################
 
-macro(includeAllFiles targetName file_path)
+macro(includeAllFiles folderName targetName file_path)
 	if(NOT DEFINED collectTestFiles)
 	    set(collectTestFiles ON)
 	endif()
@@ -14,11 +14,11 @@ macro(includeAllFiles targetName file_path)
         set(collectProductionFiles ON)
     endif()
 
-	includeFiles(${targetName} "${file_path}")
+	includeFiles(${folderName} ${targetName} "${file_path}")
 endmacro(includeAllFiles)
 
 
-macro(includeProductionFiles targetName file_path)
+macro(includeProductionFiles folderName targetName file_path)
 	if(NOT DEFINED collectTestFiles)
 	    set(collectTestFiles OFF)
 	endif()
@@ -27,12 +27,12 @@ macro(includeProductionFiles targetName file_path)
         set(collectProductionFiles ON)
     endif()
 
-	includeFiles(${targetName} "${file_path}")
+	includeFiles(${folderName}  ${targetName} "${file_path}")
 endmacro(includeProductionFiles)
 
 
 
-macro(includeTestFiles targetName file_paths)
+macro(includeTestFiles folderName file_paths)
 	if(NOT DEFINED collectTestFiles)
 		set(collectTestFiles ON)
 	endif()
@@ -41,13 +41,13 @@ macro(includeTestFiles targetName file_paths)
 		set(collectProductionFiles OFF)
 	endif()
 
-	includeFiles(${targetName} "${file_paths}")
+	includeFiles(${folderName} ${folderName} "${file_paths}")
 endmacro(includeTestFiles)
 
 
 
 
-macro(includeFiles targetName file_paths)
+macro(includeFiles folderName targetName file_paths)
 
 	foreach(file ${file_paths})
 
@@ -57,7 +57,7 @@ macro(includeFiles targetName file_paths)
 
 		collectFilesFrom(${file})
 		if (package_dir)
-		   setSourceGroupForFilesIn(${file} ${package_dir} ${targetName})
+		   setSourceGroupForFilesIn(${file} ${package_dir} ${targetName} ${folderName})
 		endif()
 
 	endforeach()
@@ -90,9 +90,9 @@ endmacro()
 
 
 
-macro(setSourceGroupForFilesIn file package_dir targetName)
+macro(setSourceGroupForFilesIn file package_dir targetName folderName)
 #input: target_name PACKAGE_SRCS
-	buildSourceGroup(${targetName} ${package_dir})
+	buildSourceGroup(${folderName} ${package_dir})
 
 	if(isAllTestSuite)
 		source_group(${targetName}\\${SOURCE_GROUP} FILES ${file})
@@ -105,20 +105,20 @@ endmacro(setSourceGroupForFilesIn)
 
 
 
-macro(buildSourceGroup targetName path)
-#input: targetName (e.g. lib name, exe name)
+macro(buildSourceGroup folderName path)
+#input: folderName (e.g. name of folder after src/)
 
 	unset(SOURCE_GROUP)
 	string(REPLACE "/" ";" folderListFromPath ${path})
-	set(findTargetName 0)
+	set(findFolderName 0)
 
 	foreach(folder ${folderListFromPath})
-		if(findTargetName)
+		if(findFolderName)
 			set(SOURCE_GROUP ${SOURCE_GROUP}\\${folder})
 		endif()
 
-		if(${folder} STREQUAL ${targetName})
-			SET(findTargetName 1)
+		if(${folder} STREQUAL ${folderName})
+			SET(findFolderName 1)
 		endif()
 	endforeach()
 
diff --git a/CMake/VirtualFluidsMacros.cmake b/CMake/VirtualFluidsMacros.cmake
index 63503f5f14221bb8cec7670dbdda6aa92497d327..4fd163b2cc1b53fe461ef482d906f4cb1255a76c 100644
--- a/CMake/VirtualFluidsMacros.cmake
+++ b/CMake/VirtualFluidsMacros.cmake
@@ -105,14 +105,15 @@ function(vf_add_library)
 
     set( options )
     set( oneValueArgs NAME BUILDTYPE)
-    set( multiValueArgs PUBLIC_LINK PRIVATE_LINK FILES FOLDER EXCLUDE)
+    set( multiValueArgs PUBLIC_LINK PRIVATE_LINK FILES FOLDER EXCLUDE MODULEFOLDER)
     cmake_parse_arguments( ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} )
 
-    if(DEFINED ARG_NAME)
+    if(DEFINED ARG_NAME) 
         set(library_name ${ARG_NAME})
     else()
         vf_get_library_name (library_name)
     endif()
+    vf_get_library_name (folder_name) # folder_name is not equal to library_name when ARG_NAME was set
 
     if(NOT DEFINED ARG_BUILDTYPE)
         if(BUILD_SHARED_LIBS)
@@ -122,12 +123,16 @@ function(vf_add_library)
         endif()
     endif()
 
+    if(DEFINED ARG_MODULEFOLDER)
+        set(folder_name ${ARG_MODULEFOLDER})
+    endif()
+
     status("Configuring the target: ${library_name} (type=${ARG_BUILDTYPE})...")
 
 
     collectFiles(sourceFiles "${ARG_FILES}" "${ARG_FOLDER}" "${ARG_EXCLUDE}")
 
-    includeProductionFiles (${library_name} "${sourceFiles}")
+    includeProductionFiles (${folder_name} ${library_name} "${sourceFiles}")
 
     #################################################################
     ###   ADD TARGET                                              ###
@@ -325,4 +330,4 @@ function(vf_load_user_apps)
     foreach(app IN LISTS USER_APPS)
       add_subdirectory(${app})
     endforeach()
-endfunction()
\ No newline at end of file
+endfunction()
diff --git a/CMake/cmake_config_files/ARAGORN.config.cmake b/CMake/cmake_config_files/ARAGORN.config.cmake
index d713f02d971024f29d3fb0fd30cfce7585d9dc55..9f33c9977924ab60a1300763f53d04bb842b2f00 100644
--- a/CMake/cmake_config_files/ARAGORN.config.cmake
+++ b/CMake/cmake_config_files/ARAGORN.config.cmake
@@ -6,10 +6,6 @@
 
 set(CMAKE_CUDA_ARCHITECTURES 86)     # Nvidia GeForce RTX 3060
 
-# numerical tests location of the grids
-# SET(PATH_NUMERICAL_TESTS "E:/temp/numericalTests/")
-# list(APPEND VF_COMPILER_DEFINITION "PATH_NUMERICAL_TESTS=${PATH_NUMERICAL_TESTS}")
-
 # add invidual apps here
 set(GPU_APP "apps/gpu/LBM/")
 list(APPEND USER_APPS 
diff --git a/CMake/cmake_config_files/ARAGORNUBUNTU.config.cmake b/CMake/cmake_config_files/ARAGORNUBUNTU.config.cmake
index 90debb1a4c270109d4dfbb455f21253b3a6754b8..3259e13acaade9b896e5e4a82dec90d3f4eb5e89 100644
--- a/CMake/cmake_config_files/ARAGORNUBUNTU.config.cmake
+++ b/CMake/cmake_config_files/ARAGORNUBUNTU.config.cmake
@@ -14,4 +14,9 @@ list(APPEND USER_APPS
     "${GPU_APP}DrivenCavityMultiGPU"
     "${GPU_APP}SphereScaling"
     # "${GPU_APP}MusselOyster"
-    )
\ No newline at end of file
+    )
+
+# add_compile_options(-fsanitize=address)
+# add_link_options(-fsanitize=address)
+# add_compile_options(-fsanitize=undefined)
+# add_link_options(-fsanitize=undefined)
diff --git a/CMake/cmake_config_files/BOMBADIL.config.cmake b/CMake/cmake_config_files/BOMBADIL.config.cmake
index 9c4bd4ecffab1e63161343ecc493eb9d9bc951a4..0534bf1c87cb54bf6d8839b881fe6b772e2b7365 100644
--- a/CMake/cmake_config_files/BOMBADIL.config.cmake
+++ b/CMake/cmake_config_files/BOMBADIL.config.cmake
@@ -48,25 +48,6 @@ set(LIGGGHTS_RELEASE_LIBRARY "d:/Tools/LIGGGHTS/build/Release/liggghts.lib")
   # SET(METIS_RELEASE_LIBRARY "/mnt/d/Tools/metis-5.1.0/build/Linux-x86_64/libmetis/libmetis.a") 
 #ENDIF()
 
-#################################################################################
-#  PE  
-#################################################################################
-IF(${USE_DEM_COUPLING})
-  SET(PE_BINARY_DIR "d:/Tools/waLBerla/walberlaGit/build" CACHE PATH "pe binary dir")
-  SET(PE_ROOT "d:/Tools/waLBerla/walberlaGit" CACHE PATH "pe root")
- 
-  SET(PE_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/pe/Debug/pe.lib) 
-  SET(PE_RELEASE_LIBRARY ${PE_BINARY_DIR}/src/pe/Release/pe.lib)
-  SET(BLOCKFOREST_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/blockforest/Debug/blockforest.lib) 
-  SET(BLOCKFOREST_RELEASE_LIBRARY ${PE_BINARY_DIR}/src/blockforest/Release/blockforest.lib)
-  SET(DOMAIN_DECOMPOSITION_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/domain_decomposition/Debug/domain_decomposition.lib) 
-  SET(DOMAIN_DECOMPOSITION_RELEASE_LIBRARY ${PE_BINARY_DIR}/src/domain_decomposition/Release/domain_decomposition.lib)
-  SET(GEOMETRY_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/geometry/Debug/geometry.lib) 
-  SET(GEOMETRY_RELEASE_LIBRARY ${PE_BINARY_DIR}/src/geometry/Release/geometry.lib)
-  SET(CORE_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/core/Debug/core.lib) 
-  SET(CORE_RELEASE_LIBRARY ${PE_BINARY_DIR}/src/core/Release/core.lib)
-
- ENDIF()
 
 ##################################################################################
 #  FETOL
diff --git a/CMake/cmake_config_files/GITLAB-RUNNER03.config.cmake b/CMake/cmake_config_files/GITLAB-RUNNER03.config.cmake
index edaf7669b171518c8aa6b2ec9786147a6ffc48f5..86780712df9b315883e3a49c7ea20443e66ddfd5 100644
--- a/CMake/cmake_config_files/GITLAB-RUNNER03.config.cmake
+++ b/CMake/cmake_config_files/GITLAB-RUNNER03.config.cmake
@@ -4,7 +4,4 @@
 # OS:          Windows 10
 #################################################################################
 
-
-#SET(PATH_NUMERICAL_TESTS "E:/temp/numericalTests/")
-#LIST(APPEND VF_COMPILER_DEFINITION "PATH_NUMERICAL_TESTS=${PATH_NUMERICAL_TESTS}")
 SET(CMAKE_CUDA_ARCHITECTURES 61)
\ No newline at end of file
diff --git a/CMake/cmake_config_files/HUSSEIN.config.cmake b/CMake/cmake_config_files/HUSSEIN.config.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..9878edaaffb28ab04646493faa58eb18c05f7df9
--- /dev/null
+++ b/CMake/cmake_config_files/HUSSEIN.config.cmake
@@ -0,0 +1,14 @@
+#################################################################################
+# VirtualFluids MACHINE FILE
+# Responsible: Hussein Alihussein
+# OS:          Windows 10
+#################################################################################
+
+#################################################################################
+#  BOOST  
+#################################################################################
+SET(BOOST_VERSION "1.76.0")
+SET(BOOST_ROOT "C:/Users/hussein/Documents/VirtualFluids/boost_1_76_0")
+SET(BOOST_DIR ${BOOST_ROOT})
+SET(BOOST_LIBRARYDIR ${BOOST_ROOT}"/stageMSVC64/lib")  
+#################################################################################
diff --git a/CMake/cmake_config_files/MOLLOK.config.cmake b/CMake/cmake_config_files/MOLLOK.config.cmake
index f700f3cd7a4b5669ef6ffee9436a1528e50e9dc9..72470da1bc52a242cb8e3c341e0e7f87bb06ab26 100644
--- a/CMake/cmake_config_files/MOLLOK.config.cmake
+++ b/CMake/cmake_config_files/MOLLOK.config.cmake
@@ -12,4 +12,5 @@ set(PATH_NUMERICAL_TESTS "D:/out/numericalTests/")
 list(APPEND VF_COMPILER_DEFINITION "PATH_NUMERICAL_TESTS=${PATH_NUMERICAL_TESTS}")
 
 # add invidual apps here
-list(APPEND USER_APPS "apps/gpu/LBM/WTG_RUB")
\ No newline at end of file
+list(APPEND USER_APPS "apps/gpu/LBM/WTG_RUB")
+list(APPEND USER_APPS "apps/gpu/LBM/TGV_3D_GridRef")
diff --git a/CMake/cmake_config_files/PHOENIX.config.cmake b/CMake/cmake_config_files/PHOENIX.config.cmake
index d31d8684a53a769e48408ad5febe7d2c6b22c623..2112bd6aa50e9335bc6b23bda0f0e9fda3ef7533 100644
--- a/CMake/cmake_config_files/PHOENIX.config.cmake
+++ b/CMake/cmake_config_files/PHOENIX.config.cmake
@@ -4,24 +4,6 @@
 # OS:          CentOS 7.3
 #################################################################################
 
-#################################################################################
-#  PE (legacy)
-#################################################################################
-IF(${USE_DEM_COUPLING})
-  SET(PE_BINARY_DIR "/home/irmb/walberla-git/build" CACHE PATH "pe binary dir")
-  SET(PE_ROOT "/home/irmb/walberla-git" CACHE PATH "pe root")
-
-  SET(PE_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/pe/libpe.a)
-  SET(PE_RELEASE_LIBRARY ${PE_BINARY_DIR}/src/pe/libpe.a)
-  SET(BLOCKFOREST_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/blockforest/libblockforest.a)
-  SET(BLOCKFOREST_RELEASE_LIBRARY ${PE_BINARY_DIR}/src/blockforest/libblockforest.a)
-  SET(DOMAIN_DECOMPOSITION_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/domain_decomposition/libdomain_decomposition.a)
-  SET(DOMAIN_DECOMPOSITION_RELEASE_LIBRARY ${PE_BINARY_DIR}/src/domain_decomposition/libdomain_decomposition.a)
-  SET(GEOMETRY_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/geometry/libgeometry.a)
-  SET(GEOMETRY_RELEASE_LIBRARY ${PE_BINARY_DIR}/src/geometry/libgeometry.a)
-  SET(CORE_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/core/libcore.a)
-  SET(CORE_RELEASE_LIBRARY ${PE_BINARY_DIR}/src/core/libcore.a)
-ENDIF()
 
 ## nvidia
 set(CMAKE_CUDA_ARCHITECTURES 60) # NVIDIA Tesla P100
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b3af407acd66ec3223f55de7753df879786ce561..7d0dbf22fd98c2f6ac0cb417b2fe900f775bfa55 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -9,7 +9,7 @@
 cmake_minimum_required(VERSION 3.15..3.20 FATAL_ERROR)
 
 project(VirtualFluids
-        VERSION 1.0.0
+        VERSION 0.1.0
         DESCRIPTION "CFD code based on the Lattice Boltzmann Method"
         HOMEPAGE_URL "https://www.tu-braunschweig.de/irmb/forschung/virtualfluids"
         LANGUAGES CXX)
@@ -52,9 +52,6 @@ option(BUILD_USE_BOOST "Build VirtualFluids with boost" OFF)
 option(BUILD_USE_MPI "include MPI library support" ON)
 
 # vf gpu
-option(BUILD_VF_GKS          "Build VirtualFluids GKS"     OFF )
-option(BUILD_VF_TRAFFIC      "Build VirtualFluids Traffic" OFF)
-option(BUILD_JSONCPP         "Builds json cpp "            OFF)
 option(BUILD_NUMERIC_TESTS   "Build numeric tests"         OFF)
 
 option(BUILD_VF_UNIT_TESTS "Build VirtualFluids unit tests" OFF)
@@ -78,7 +75,7 @@ endif()
 option(BUILD_VF_PYTHON_BINDINGS "" OFF)
 
 option(BUILD_VF_DOUBLE_ACCURACY "Use double accuracy" OFF)
-
+option(BUILD_VF_ALL_SAMPLES "Build All Samples" OFF)
 
 #################################################################################
 #  MACROS
@@ -100,7 +97,7 @@ IF( BUILD_VF_DOUBLE_ACCURACY )
 ENDIF()
 
 # set gpu features
-if(BUILD_VF_GPU OR BUILD_VF_GKS)
+if(BUILD_VF_GPU)
     include(CheckLanguage)
     check_language(CUDA)
 
@@ -137,6 +134,8 @@ if(BUILD_VF_GPU OR BUILD_VF_GKS)
     message(STATUS "CMAKE_CUDA_FLAGS: ${CMAKE_CUDA_FLAGS}")
     message(STATUS "CUDA Architecture: ${CMAKE_CUDA_ARCHITECTURES}")
     set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" CACHE STRING "Cuda Architecture (compute capabilitiy)")
+
+    set(CMAKE_CUDA_FLAGS_DEBUG " -G" CACHE STRING "" FORCE)
 endif()
 
 
@@ -212,11 +211,11 @@ add_subdirectory(src/lbm)
 #  VIRTUAL FLUIDS CPU / GPU
 #################################################################################
 if (BUILD_VF_CPU)
-    include (cpu.cmake)
+    include(cpu.cmake)
 endif()
-if(BUILD_VF_GPU OR BUILD_VF_GKS)
+if(BUILD_VF_GPU)
     add_subdirectory(src/cuda)
-    include (gpu.cmake)
+    include(gpu.cmake)
 endif()
 
 if (BUILD_VF_PYTHON_BINDINGS)
diff --git a/CMakePresets.json b/CMakePresets.json
index c53482ec72109f1a672b97797763d027a6ec80bf..6e2658d148bddf55950e5849adcf10709a8b8caf 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -1,26 +1,36 @@
 {
-    "version": 2,
+    "version": 3,
     "cmakeMinimumRequired": {
         "major": 3,
-        "minor": 20,
+        "minor": 21,
         "patch": 0
     },
     "configurePresets": [
         {
             "name": "default",
-            "hidden": true,
-            "binaryDir": "${sourceDir}/build/"
+            "binaryDir": "build",
+            "hidden": true
         },
         {
             "name": "msvc",
             "hidden": true,
-            "generator": "Visual Studio 16 2019",
-            "architecture": "x64"
+            "generator": "Visual Studio 17 2022",
+            "architecture": "x64",
+            "condition": {
+                "type": "equals",
+                "lhs": "${hostSystemName}",
+                "rhs": "Windows"
+              }
         },
         {
             "name": "make",
             "hidden": true,
-            "generator": "Unix Makefiles"
+            "generator": "Unix Makefiles",
+            "condition": {
+                "type": "notEquals",
+                "lhs": "${hostSystemName}",
+                "rhs": "Windows"
+              }
         },
         {
             "name": "unit_tests",
@@ -49,34 +59,6 @@
                 "BUILD_VF_DOUBLE_ACCURACY": "OFF"
             }
         },
-        {
-            "name": "debug",
-            "hidden": true,
-            "cacheVariables": {
-                "CMAKE_BUILD_TYPE": "Debug"
-            }
-        },
-        {
-            "name": "release",
-            "hidden": true,
-            "cacheVariables": {
-                "CMAKE_BUILD_TYPE": "Release"
-            }
-        },
-        {
-            "name": "min_size_rel",
-            "hidden": true,
-            "cacheVariables": {
-                "CMAKE_BUILD_TYPE": "MinSizeRel"
-            }
-        },
-        {
-            "name": "rel_with_deb_info",
-            "hidden": true,
-            "cacheVariables": {
-                "CMAKE_BUILD_TYPE": "RelWithDebInfo"
-            }
-        },
         {
             "name": "gpu_numerical_tests",
             "inherits": [
@@ -91,7 +73,7 @@
             }
         },
         {
-            "name": "all_make",
+            "name": "make_all",
             "inherits": [
                 "cpu",
                 "gpu",
@@ -101,162 +83,66 @@
             "displayName": "all make configuration"
         },
         {
-            "name": "all_msvc",
+            "name": "make_cpu",
             "inherits": [
                 "cpu",
-                "gpu",
                 "unit_tests",
-                "msvc"
-            ],
-            "displayName": "all msvc configuration"
-        },
-        {
-            "name": "gpu_numerical_tests_make",
-            "inherits": [
-                "gpu_numerical_tests",
                 "make"
             ],
-            "displayName": "gpu numerical tests make configuration"
+            "displayName": "cpu make configuration"
         },
         {
-            "name": "gpu_numerical_tests_msvc",
-            "inherits": [
-                "msvc",
-                "gpu_numerical_tests"
-            ],
-            "displayName": "gpu numerical tests msvc configuration"
-        },
-        {
-            "name": "debug_make_gpu",
-            "displayName": "Debug GPU Make",
-            "inherits": [
-                "gpu",
-                "make",
-                "debug"
-            ]
-        },
-        {
-            "name": "release_make_gpu",
-            "displayName": "Release GPU Make",
+            "name": "make_gpu",
             "inherits": [
                 "gpu",
-                "make",
-                "release"
-            ]
-        },
-        {
-            "name": "min_size_rel_make_gpu",
-            "displayName": "MinSizeRel GPU Make",
-            "inherits": [
-                "gpu",
-                "make",
-                "min_size_rel"
-            ]
+                "unit_tests",
+                "make"
+            ],
+            "displayName": "gpu make configuration"
         },
         {
-            "name": "rel_with_deb_info_make_gpu",
-            "displayName": "RelWithDebInfo GPU Make",
+            "name": "msvc_all",
             "inherits": [
+                "cpu",
                 "gpu",
-                "make",
-                "rel_with_deb_info"
-            ]
+                "unit_tests",
+                "msvc"
+            ],
+            "displayName": "all msvc configuration"
         },
         {
-            "name": "debug_msvc_gpu",
-            "displayName": "Debug GPU MSVC",
+            "name": "msvc_cpu",
             "inherits": [
-                "gpu",
-                "msvc",
-                "debug"
-            ]
+                "cpu",
+                "unit_tests",
+                "msvc"
+            ],
+            "displayName": "cpu msvc configuration"
         },
         {
-            "name": "release_msvc_gpu",
-            "displayName": "Release GPU MSVC",
+            "name": "msvc_gpu",
             "inherits": [
                 "gpu",
-                "msvc",
-                "release"
-            ]
+                "unit_tests",
+                "msvc"
+            ],
+            "displayName": "gpu msvc configuration"
         },
         {
-            "name": "min_size_rel_msvc_gpu",
-            "displayName": "MinSizeRel GPU MSVC",
+            "name": "make_numerical_tests_gpu",
             "inherits": [
-                "gpu",
-                "msvc",
-                "min_size_rel"
-            ]
+                "gpu_numerical_tests",
+                "make"
+            ],
+            "displayName": "gpu numerical tests make configuration"
         },
         {
-            "name": "rel_with_deb_info_msvc_gpu",
-            "displayName": "RelWithDebInfo GPU MSVC",
+            "name": "msvc_numerical_tests_gpu",
             "inherits": [
-                "gpu",
                 "msvc",
-                "rel_with_deb_info"
-            ]
-        }
-    ],
-    "buildPresets": [
-        {
-            "name": "Default",
-            "hidden": true,
-            "configurePreset": "default",
-            "jobs": 4
-        },
-        {
-            "name": "GPU",
-            "hidden": true,
-            "configurePreset": "gpu",
-            "targets": [
-                "ActuatorLine",
-                "DrivenCavity",
-                "BoundaryLayer"
+                "gpu_numerical_tests"
             ],
-            "inherits": [
-                "Default"
-            ]
-        },
-        {
-            "name": "Release",
-            "hidden": true,
-            "configurePreset": "release"
-        },
-        {
-            "name": "Debug_Make_GPU",
-            "displayName": "Debug",
-            "description": "Compile GPU version with debug information",
-            "configurePreset": "debug_make_gpu",
-            "inherits": [
-                "GPU"
-            ]
-        },
-        {
-            "name": "MinSizeRel_Make_GPU",
-            "displayName": "MinSizeRel",
-            "configurePreset": "min_size_rel_make_gpu",
-            "inherits": [
-                "GPU"
-            ]
-        },
-        {
-            "name": "RelWithDebInfo_GPU",
-            "displayName": "RelWithDebInfo",
-            "configurePreset": "rel_with_deb_info_make_gpu",
-            "inherits": [
-                "GPU"
-            ]
-        },
-        {
-            "name": "Release_GPU",
-            "description": "Build release version of GPU",
-            "displayName": "Release GPU",
-            "configurePreset": "release_make_gpu",
-            "inherits": [
-                "GPU"
-            ]
+            "displayName": "gpu numerical tests msvc configuration"
         }
     ]
-}
\ No newline at end of file
+}
diff --git a/Containers/dockerfiles/Ubuntu20_04.Dockerfile b/Containers/Ubuntu20_04.Dockerfile
similarity index 73%
rename from Containers/dockerfiles/Ubuntu20_04.Dockerfile
rename to Containers/Ubuntu20_04.Dockerfile
index 21511a97adab3694540c1d13a699f4d10b3d0356..c2eea15613787d8f60b835cbb4beec9b0d3893a4 100644
--- a/Containers/dockerfiles/Ubuntu20_04.Dockerfile
+++ b/Containers/Ubuntu20_04.Dockerfile
@@ -14,12 +14,18 @@
 # clangd language server https://clangd.llvm.org/
 # python pip3 with modules: setuptools, wheels, scikit-build, pyvista, numpy, ansible, gcovr
 
+# software-properties-common for add-apt-repository
+
 FROM nvidia/cuda:11.3.1-devel-ubuntu20.04
 
+# timezone
+ARG TZ
+ENV TZ="$TZ"
+
 ARG DEBIAN_FRONTEND=noninteractive
 RUN apt-get update &&   \
     apt-get install -y  \
-    wget unzip git      \
+    wget unzip software-properties-common \
     build-essential gdb \
     ccache              \
     ninja-build         \
@@ -36,16 +42,13 @@ RUN apt-get update &&   \
     cppcheck            \
     clangd-12           \
     && update-alternatives --install /usr/bin/clangd clangd /usr/bin/clangd-12 100 \
-    && mkdir -p /usr/local/cmake/ && cd /usr/local/cmake/ \
-    && version=3.24 && build=0 \
-    && wget https://cmake.org/files/v$version/cmake-$version.$build-linux-x86_64.tar.gz \
-    && tar -xzvf cmake-$version.$build-linux-x86_64.tar.gz                              \
-    && ln -s /usr/local/cmake/cmake-$version.$build-linux-x86_64/bin/* /usr/local/bin/  \
     && pip3 install      \
+        cmake==3.26.3    \
         setuptools       \
         wheel            \
         scikit-build     \
         pyvista          \
         numpy            \
         ansible          \
-        'jinja2<3.1' gcovr==5.0
+        'jinja2<3.1' gcovr==5.0 \
+    && apt update && add-apt-repository -y ppa:git-core/ppa && apt update && apt install git -y
diff --git a/Containers/VirtualFluidsBasicsTest.def b/Containers/VirtualFluidsBasicsTest.def
deleted file mode 100644
index 930b93e5e9f71ff9b4b8afcae4c8ea47aeb82522..0000000000000000000000000000000000000000
--- a/Containers/VirtualFluidsBasicsTest.def
+++ /dev/null
@@ -1,29 +0,0 @@
-BootStrap: docker
-From: ubuntu:20.04
-
-%files
-    3rdParty 3rdParty
-    apps apps
-    CMake CMake
-    Python Python
-    src src
-    CMakeLists.txt CMakeLists.txt
-    cpu.cmake cpu.cmake
-    gpu.cmake gpu.cmake
-    setup.py setup.py
-    pyproject.toml pyproject.toml
-
-%post
-    apt-get update &&          \
-    apt-get install -y         \
-    build-essential            \
-    cmake=3.16.3-1ubuntu1      \
-    openmpi-bin=4.0.3-0ubuntu1 \
-    libomp-dev
-
-    mkdir -p build && \
-    cmake -DBUILD_VF_CPU=ON -DBUILD_VF_UNIT_TESTS=ON &&\
-    make -j4 2>&1
-
-%runscript
-    bin/basicsTest
\ No newline at end of file
diff --git a/Containers/VirtualFluidsMPICH.def b/Containers/VirtualFluidsMPICH.def
deleted file mode 100644
index 72f9ac549bd9d2bd006dafededf7b7b2f74f2600..0000000000000000000000000000000000000000
--- a/Containers/VirtualFluidsMPICH.def
+++ /dev/null
@@ -1,44 +0,0 @@
-BootStrap: docker
-From: ubuntu:20.04
-
-%files
-    3rdParty 3rdParty
-    apps apps
-    CMake CMake
-    Python Python
-    src src
-    CMakeLists.txt CMakeLists.txt
-    cpu.cmake cpu.cmake
-    gpu.cmake gpu.cmake
-    setup.py setup.py
-    pyproject.toml pyproject.toml
-
-
-%post
-    export DEBIAN_FRONTEND=noninteractive
-    apt-get update &&          \
-    apt-get install -y         \
-    build-essential            \
-    cmake=3.16.3-1ubuntu1      \
-    python3                    \
-    python3-dev                \
-    python3-pip                \
-    mpich                      \
-    libomp-dev
-    
-    pip3 install setuptools wheel
-
-    export PYTHONPATH=Python
-    python3 /setup.py install
-
-%environment
-    export PYTHONPATH=/Python
-
-%runscript
-    python3 /Python/liddrivencavity/simulation.py
-
-%appenv poiseuille
-    export PYTHONPATH=Python
-
-%apprun poisueille
-    python3 /Python/poiseuille/poiseuille_hpc.py
diff --git a/Containers/VirtualFluidsOpenMPI.def b/Containers/VirtualFluidsOpenMPI.def
deleted file mode 100644
index 3903b8769d7e652bbb12add7b815eb35de279d94..0000000000000000000000000000000000000000
--- a/Containers/VirtualFluidsOpenMPI.def
+++ /dev/null
@@ -1,25 +0,0 @@
-BootStrap: docker
-From: irmb/virtualfluids-python-deps
-
-%files
-    3rdParty 3rdParty
-    apps apps
-    CMake CMake
-    Python Python
-    src src
-    CMakeLists.txt CMakeLists.txt
-    cpu.cmake cpu.cmake
-    gpu.cmake gpu.cmake
-    setup.py setup.py
-    pyproject.toml pyproject.toml
-
-
-%post
-    export PYTHONPATH=Python
-    python3 /setup.py install
-
-%environment
-    export PYTHONPATH=/Python
-
-%runscript
-    python3 /Python/liddrivencavity/simulation.py
diff --git a/Containers/VirtualFluidsPython.def b/Containers/VirtualFluidsPython.def
deleted file mode 100644
index d54066bc634cf25f4340b1e659eae72515467fa8..0000000000000000000000000000000000000000
--- a/Containers/VirtualFluidsPython.def
+++ /dev/null
@@ -1,33 +0,0 @@
-BootStrap: docker
-From: ubuntu:20.04
-
-%files
-    Python Python
-    dist dist
-
-
-%post
-    export DEBIAN_FRONTEND=noninteractive
-    apt-get update &&          \
-    apt-get install -y         \
-    build-essential            \
-    cmake=3.16.3-1ubuntu1      \
-    python3                    \
-    python3-dev                \
-    python3-pip                \
-    mpich                      \
-    libomp-dev
-
-    pip3 install setuptools wheel $(find dist/*.whl)
-
-%environment
-    export PYTHONPATH=/Python
-
-%runscript
-    python3 /Python/liddrivencavity/simulation.py
-
-%appenv poiseuille
-    export PYTHONPATH=Python
-
-%apprun poisueille
-    python3 /Python/poiseuille/poiseuille_hpc.py
diff --git a/Python/SlurmTests/poiseuille/PoiseuilleTestContainer.def b/Python/SlurmTests/poiseuille/PoiseuilleTestContainer.def
index a275c88a6a46bfe806fce68e87aab571b66cc077..d31a7b82a4e9e988f815139fb46318d231d450f8 100644
--- a/Python/SlurmTests/poiseuille/PoiseuilleTestContainer.def
+++ b/Python/SlurmTests/poiseuille/PoiseuilleTestContainer.def
@@ -30,10 +30,11 @@ Stage: build
     libomp-dev                 \
     libgl1
 
-    pip3 install setuptools wheel cmake numpy scipy pyvista
+    pip3 install setuptools wheel cmake numpy scipy pyvista scikit-build
 
     export PYTHONPATH=Python
-    python3 /setup.py bdist_wheel build_ext --build-temp=build
+    python3 /setup.py bdist_wheel build_ext --build-temp=_skbuild -- -DBUILD_VF_CPU=ON -DBUILD_VF_DOUBLE_ACCURACY=ON
+
     pip3 install $(find dist/*.whl)
 
 
diff --git a/Python/SlurmTests/poiseuille/rocket.yml b/Python/SlurmTests/poiseuille/rocket.yml
index da64a48cd3f6fae69ae9f06648c7c156950a71ec..b186469a4d3fd4b8edfafa4fc3f6dcd64e311d70 100644
--- a/Python/SlurmTests/poiseuille/rocket.yml
+++ b/Python/SlurmTests/poiseuille/rocket.yml
@@ -16,8 +16,8 @@ collect:
     to: POISEUILLE_TEST.out
     overwrite: true
 
-clean:
-  - poiseuille_test/*
+#clean:
+#  - poiseuille_test/PoiseuilleTestContainer.sif
 
 sbatch: poiseuille_test/slurm.job
 continue_if_job_fails: true
diff --git a/Python/actuator_line/actuator_line.py b/Python/actuator_line/actuator_line.py
index 721af737ff6ef3340c3c2f6204aa6a7824cd1d2f..d0589f402456e8ffe8320ce7f780738aef22fbe4 100644
--- a/Python/actuator_line/actuator_line.py
+++ b/Python/actuator_line/actuator_line.py
@@ -46,10 +46,7 @@ output_path.mkdir(exist_ok=True)
 
 #%%
 logger.Logger.initialize_logger()
-basics.logger.Logger.add_stdout()
-basics.logger.Logger.set_debug_level(basics.logger.Level.INFO_LOW)
-basics.logger.Logger.time_stamp(basics.logger.TimeStamp.ENABLE)
-basics.logger.Logger.enable_printed_rank_numbers(True)
+
 #%%
 grid_factory = gpu.grid_generator.GridFactory.make()
 grid_builder = gpu.grid_generator.MultipleGridBuilder.make_shared(grid_factory)
@@ -147,7 +144,7 @@ grid_scaling_factory.set_scaling_factory(gpu.GridScaling.ScaleCompressible)
 
 grid_builder.add_coarse_grid(0.0, 0.0, 0.0, *length, dx)
 grid_builder.set_periodic_boundary_condition(not read_precursor, True, False)
-grid_builder.build_grids(basics.LbmOrGks.LBM, False)
+grid_builder.build_grids(False)
 
 sampling_offset = 2
 if read_precursor:
diff --git a/Python/boundary_layer/boundary_layer.py b/Python/boundary_layer/boundary_layer.py
index 6f6c64bc072d3afbb8aa5febbec209c26af2deee..25b3cd895f8a3a80f9fd6438e00d3e924fc13779 100644
--- a/Python/boundary_layer/boundary_layer.py
+++ b/Python/boundary_layer/boundary_layer.py
@@ -46,10 +46,7 @@ output_path.mkdir(exist_ok=True)
 
 #%%
 logger.Logger.initialize_logger()
-basics.logger.Logger.add_stdout()
-basics.logger.Logger.set_debug_level(basics.logger.Level.INFO_LOW)
-basics.logger.Logger.time_stamp(basics.logger.TimeStamp.ENABLE)
-basics.logger.Logger.enable_printed_rank_numbers(True)
+
 #%%
 grid_factory = gpu.grid_generator.GridFactory.make()
 grid_builder = gpu.grid_generator.MultipleGridBuilder.make_shared(grid_factory)
@@ -145,7 +142,7 @@ tm_factory.read_config_file(config)
 #%%
 grid_builder.add_coarse_grid(0.0, 0.0, 0.0, *length, dx)
 grid_builder.set_periodic_boundary_condition(not read_precursor, True, False)
-grid_builder.build_grids(basics.LbmOrGks.LBM, False)
+grid_builder.build_grids(False)
 
 sampling_offset = 2
 if read_precursor:
diff --git a/README.md b/README.md
index 9e02f019a98078c19f7c6a61a029e9f4d8f97434..fe1bb5dd262d193ba3ac201fe8a7875c1bf812f7 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,6 @@ with usage of the GPU:
  - CUDA [developer.nvidia.com/cuda-zone](https://developer.nvidia.com/cuda-zone):
     * Minimum CUDA Version 9.0
     * Minimum Compute Capability 3.0, because of maximal number of Blocks in x direction
-    * Recommended Compute Capability 6.0, because of atomics for double precision floating point data (GKS only)
 
 
 ### Build VirtualFluids
diff --git a/apps/cpu/Applications.cmake b/apps/cpu/Applications.cmake
index 68dfeb3ed7687da74d49a35337b0bae92798e80d..3c71d51344030980071addc6f9831a74d0daa53c 100644
--- a/apps/cpu/Applications.cmake
+++ b/apps/cpu/Applications.cmake
@@ -83,4 +83,6 @@ ENDIF()
 #add_subdirectory(Applications/OrganPipe)
 #add_subdirectory(Applications/LidDrivenCavity)
 
-
+if(BUILD_USE_BOOST)
+    add_subdirectory(${APPS_ROOT_CPU}/TPMSRow)
+endif()
diff --git a/apps/cpu/ConvectionOfVortex/cov.cpp b/apps/cpu/ConvectionOfVortex/cov.cpp
index 627f5d03abe32f43cf3eb33649e0f209595b8b6a..45b9489397df760be5d1247f1f2961393b2c22fe 100644
--- a/apps/cpu/ConvectionOfVortex/cov.cpp
+++ b/apps/cpu/ConvectionOfVortex/cov.cpp
@@ -8,13 +8,15 @@ using namespace std;
 
 void run()
 {
+    using namespace vf::lbm::dir;
+
    try
    {
       SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       int    numOfThreads = 4;
-      double availMem = 5e9;
+      real availMem = 5e9;
 
       
 
@@ -24,11 +26,11 @@ void run()
 
       string  pathname = "d:/temp/ConvectionOfVortex_0.003_4th";
       int     endTime = 10000;
-      double  outTime = 10;
-      LBMReal dx =  0.003;
-      LBMReal rhoLB = 0.0;
-      LBMReal nuLB = 8.66025e-6;
-      double yFactor = 1.0;
+      real  outTime = 10;
+      real dx =  0.003;
+      real rhoLB = 0.0;
+      real nuLB = 8.66025e-6;
+      real yFactor = 1.0;
 
       //string  pathname = "d:/temp/ConvectionOfVortex_0.003_square";
       //int     endTime = 20;
@@ -79,13 +81,13 @@ void run()
       int refineLevel = 1;
 
       //bounding box
-      double g_minX1 = -0.045;
-      double g_minX2 = -0.015/yFactor;
-      double g_minX3 = -0.06;
+      real g_minX1 = -0.045;
+      real g_minX2 = -0.015/yFactor;
+      real g_minX3 = -0.06;
 
-      double g_maxX1 = 0.045;
-      double g_maxX2 = 0.015/yFactor;
-      double g_maxX3 = 0.06;
+      real g_maxX1 = 0.045;
+      real g_maxX2 = 0.015/yFactor;
+      real g_maxX3 = 0.06;
 
       vector<int>  blocknx(3);
       blocknx[0] = 10;
@@ -97,7 +99,7 @@ void run()
       if (myid == 0) GbSystem3D::writeGeoObject(gridCube.get(), pathname + "/geo/gridCube", WbWriterVtkXmlBinary::getInstance());
 
 
-      double blockLength = blocknx[0] * dx;
+      real blockLength = blocknx[0] * dx;
 
       SPtr<Grid3D> grid(new Grid3D(comm));
       grid->setDeltaX(dx);
@@ -150,7 +152,7 @@ void run()
       if (myid==0) GbSystem3D::writeGeoObject(geoOutflow4.get(), pathname+"/geo/geoOutflow4", WbWriterVtkXmlASCII::getInstance());
       SPtr<D3Q27Interactor> outflowIntr4 = SPtr<D3Q27Interactor>(new D3Q27Interactor(geoOutflow4, grid, outflowBCAdapter, Interactor3D::SOLID));
 
-      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_00M));
+      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_00M));
       InteractorsHelper intHelper(grid, metisVisitor);
       //intHelper.addInteractor(outflowIntr1);
       //intHelper.addInteractor(outflowIntr2);
@@ -183,8 +185,8 @@ void run()
       unsigned long long numberOfNodesPerBlock = (unsigned long long)(blocknx[0])* (unsigned long long)(blocknx[1])* (unsigned long long)(blocknx[2]);
       unsigned long long numberOfNodes = numberOfBlocks * numberOfNodesPerBlock;
       unsigned long long numberOfNodesPerBlockWithGhostLayer = numberOfBlocks * (blocknx[0] + ghostLayer) * (blocknx[1] + ghostLayer) * (blocknx[2] + ghostLayer);
-      double needMemAll = double(numberOfNodesPerBlockWithGhostLayer*(27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-      double needMem = needMemAll / double(comm->getNumberOfProcesses());
+      real needMemAll = real(numberOfNodesPerBlockWithGhostLayer*(27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+      real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
       if (myid == 0)
       {
@@ -224,7 +226,7 @@ void run()
 
       intHelper.setBC();
 
-      double Ma = 0.005;
+      real Ma = 0.005;
 
       mu::Parser initRho, initVx1, initVx2; 
       initRho.SetExpr("rhoLB + (-(rho0*epsilon^2)/2) * exp(1-(scaleFactor*(x1^2+x3^2))/R^2) + (1/(2*gamma*rho0)) * ((-(rho0*epsilon^2)/2) * exp(1-(scaleFactor*(x1^2+x3^2))/R^2))^2");
diff --git a/apps/cpu/CouetteFlow/cflow.cpp b/apps/cpu/CouetteFlow/cflow.cpp
index 3de4a3b36f7453eaafca24648d0aa770fb954d63..a60031096a0197e129a6c01c1dd9d5881dc2699f 100644
--- a/apps/cpu/CouetteFlow/cflow.cpp
+++ b/apps/cpu/CouetteFlow/cflow.cpp
@@ -8,6 +8,8 @@ using namespace std;
 
 void bflow(string configname)
 {
+    using namespace vf::lbm::dir;
+
    try
    {
       vf::basics::ConfigurationFile   config;
@@ -16,24 +18,24 @@ void bflow(string configname)
       string          pathname = config.getValue<string>("pathname");
       int             numOfThreads = config.getValue<int>("numOfThreads");
       vector<int>     blocknx = config.getVector<int>("blocknx");
-      vector<double>  boundingBox = config.getVector<double>("boundingBox");
+      vector<real>  boundingBox = config.getVector<real>("boundingBox");
       //double          nuLB = config.getValue<double>("nuLB");
-      double          endTime = config.getValue<double>("endTime");
-      double          outTime = config.getValue<double>("outTime");
-      double          availMem = config.getValue<double>("availMem");
+      real          endTime = config.getValue<real>("endTime");
+      real          outTime = config.getValue<real>("outTime");
+      real          availMem = config.getValue<real>("availMem");
       //int             refineLevel = config.getValue<int>("refineLevel");
       bool            logToFile = config.getValue<bool>("logToFile");
       //double          restartStep = config.getValue<double>("restartStep");
-      double          deltax = config.getValue<double>("deltax");
+      real          deltax = config.getValue<real>("deltax");
       //double          cpStep = config.getValue<double>("cpStep");
       //double          cpStepStart = config.getValue<double>("cpStepStart");
       //bool            newStart = config.getValue<bool>("newStart");
-      double          forcing = config.getValue<double>("forcing");
+      real          forcing = config.getValue<real>("forcing");
       //double          n = config.getValue<double>("n");
       //double          k = config.getValue<double>("k");
       //double          tau0 = config.getValue<double>("tau0");
-      double          velocity = config.getValue<double>("velocity");
-      double          n = config.getValue<double>("n");
+      real          velocity = config.getValue<real>("velocity");
+      real          n = config.getValue<real>("n");
 //      double          Re = config.getValue<double>("Re");
 //      double          Bn = config.getValue<double>("Bn");
 
@@ -58,7 +60,7 @@ void bflow(string configname)
          }
       }
 
-      LBMReal rhoLB = 0.0;
+      real rhoLB = 0.0;
 
       SPtr<LBMUnitConverter> conv = SPtr<LBMUnitConverter>(new LBMUnitConverter());
 
@@ -71,15 +73,15 @@ void bflow(string configname)
       //double g_maxX2 = boundingBox[1];
       //double g_maxX3 = boundingBox[2];
 
-      double g_minX1 = 0.0;
-      double g_minX2 = -boundingBox[1]/2.0;
-      double g_minX3 = -boundingBox[2]/2.0;
+      real g_minX1 = 0.0;
+      real g_minX2 = -boundingBox[1]/2.0;
+      real g_minX3 = -boundingBox[2]/2.0;
 
-      double g_maxX1 = boundingBox[0];
-      double g_maxX2 = boundingBox[1]/2.0;
-      double g_maxX3 = boundingBox[2]/2.0;
+      real g_maxX1 = boundingBox[0];
+      real g_maxX2 = boundingBox[1]/2.0;
+      real g_maxX3 = boundingBox[2]/2.0;
 
-      double blockLength = 3.0 * deltax;
+      real blockLength = 3.0 * deltax;
 
 //      double h = (g_maxX2) / 2.0;
 //      double dex = g_maxX1;
@@ -89,16 +91,16 @@ void bflow(string configname)
       //LBMReal n = 0.4;
 
 
-      double d = boundingBox[1];
-      double U = velocity;
-      double Gamma = U / d;
+      real d = boundingBox[1];
+      real U = velocity;
+      real Gamma = U / d;
 
-      double k = 0.05; // (U * d) / (Re * std::pow(Gamma, n - 1));
-      double tau0 = 1e-6;// Bn* k* std::pow(Gamma, n);
+      real k = 0.05; // (U * d) / (Re * std::pow(Gamma, n - 1));
+      real tau0 = 1e-6;// Bn* k* std::pow(Gamma, n);
 
-      double beta = 14;
-      double c = 10; // 1.0 / 6.0;
-      double mu0 = 1e-4;
+      real beta = 14;
+      real c = 10; // 1.0 / 6.0;
+      real mu0 = 1e-4;
 
       SPtr<Rheology> thix = Rheology::getInstance();
       //Herschel-Bulkley
@@ -184,7 +186,7 @@ void bflow(string configname)
 
       ////////////////////////////////////////////
       //METIS
-      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_MMM, MetisPartitioner::KWAY));
+      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_MMM, MetisPartitioner::KWAY));
       ////////////////////////////////////////////
       /////delete solid blocks
       if (myid == 0) UBLOG(logINFO, "deleteSolidBlocks - start");
@@ -205,8 +207,8 @@ void bflow(string configname)
       unsigned long nodb = (blocknx[0]) * (blocknx[1]) * (blocknx[2]);
       unsigned long nod = nob * (blocknx[0]) * (blocknx[1]) * (blocknx[2]);
       unsigned long nodg = nob * (blocknx[0] + gl) * (blocknx[1] + gl) * (blocknx[1] + gl);
-      double needMemAll = double(nodg * (27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-      double needMem = needMemAll / double(comm->getNumberOfProcesses());
+      real needMemAll = real(nodg * (27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+      real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
       if (myid == 0)
       {
diff --git a/apps/cpu/FlowAroundCylinder/cylinder.cpp b/apps/cpu/FlowAroundCylinder/cylinder.cpp
index 5578ecb56b37e3b489e4c60d9a26adfa05e9b3d3..d66222495986cc6eaa26c078d7eaf225834ffaab 100644
--- a/apps/cpu/FlowAroundCylinder/cylinder.cpp
+++ b/apps/cpu/FlowAroundCylinder/cylinder.cpp
@@ -9,6 +9,8 @@ using namespace std;
 //////////////////////////////////////////////////////////////////////////
 void run(string configname)
 {
+    using namespace vf::lbm::dir;
+
    try
    {
       //DEBUG///////////////////////////////////////
@@ -18,20 +20,20 @@ void run(string configname)
       config.load(configname);
 
       string          pathOut = config.getValue<string>("pathOut");
-      double          uLB = config.getValue<double>("uLB");
-      double          restartStep = config.getValue<double>("restartStep");
-      double          cpStart = config.getValue<double>("cpStart");
-      double          cpStep = config.getValue<double>("cpStep");
-      double          endTime = config.getValue<double>("endTime");
-      double          outTime = config.getValue<double>("outTime");
-      double          availMem = config.getValue<double>("availMem");
+      real          uLB = config.getValue<real>("uLB");
+      real          restartStep = config.getValue<real>("restartStep");
+      real          cpStart = config.getValue<real>("cpStart");
+      real          cpStep = config.getValue<real>("cpStep");
+      real          endTime = config.getValue<real>("endTime");
+      real          outTime = config.getValue<real>("outTime");
+      real          availMem = config.getValue<real>("availMem");
       int             refineLevel = config.getValue<int>("refineLevel");
       bool            logToFile = config.getValue<bool>("logToFile");
-      vector<double>  nupsStep = config.getVector<double>("nupsStep");
+      vector<real>  nupsStep = config.getVector<real>("nupsStep");
       bool            newStart = config.getValue<bool>("newStart");
       int             numOfThreads = config.getValue<int>("numOfThreads");
       vector<int>     blockNx = config.getVector<int>("blockNx");
-      double          dx = config.getValue<double>("dx");
+      real          dx = config.getValue<real>("dx");
 
       SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
@@ -58,18 +60,18 @@ void run(string configname)
 
       
 
-      double L1 = 2.5;
-      double L2, L3, H;
+      real L1 = 2.5;
+      real L2, L3, H;
       L2 = L3 = H = 0.41;
 
-      LBMReal Re = 20.0;
-      LBMReal radius = 0.05;
-      LBMReal rhoReal = 1.0; //kg/m^3
-      LBMReal uReal = 0.45;//m/s
-      LBMReal nueReal = (uReal*radius*2.0)/Re;
+      real Re = 20.0;
+      real radius = 0.05;
+      real rhoReal = 1.0; //kg/m^3
+      real uReal = 0.45;//m/s
+      real nueReal = (uReal*radius*2.0)/Re;
       
-      LBMReal rhoLB = 0.0;
-      LBMReal nueLB = (((4.0/9.0)*uLB)*2.0*(radius/dx))/Re;
+      real rhoLB = 0.0;
+      real nueLB = (((4.0/9.0)*uLB)*2.0*(radius/dx))/Re;
 
       SPtr<LBMUnitConverter> conv = SPtr<LBMUnitConverter>(new LBMUnitConverter());
 
@@ -135,13 +137,13 @@ void run(string configname)
          GbSystem3D::writeGeoObject(refCylinder.get(), pathOut+"/geo/refCylinder", WbWriterVtkXmlBinary::getInstance());
 
          //bounding box
-         double g_minX1 = 0.0;
-         double g_minX2 = 0.0;
-         double g_minX3 = 0.0;
+         real g_minX1 = 0.0;
+         real g_minX2 = 0.0;
+         real g_minX3 = 0.0;
 
-         double g_maxX1 = L1;
-         double g_maxX2 = L2;
-         double g_maxX3 = L3;
+         real g_maxX1 = L1;
+         real g_maxX2 = L2;
+         real g_maxX3 = L3;
 
          SPtr<GbObject3D> gridCube(new GbCuboid3D(g_minX1, g_minX2, g_minX3, g_maxX1, g_maxX2, g_maxX3));
          if (myid==0) GbSystem3D::writeGeoObject(gridCube.get(), pathOut+"/geo/gridCube", WbWriterVtkXmlBinary::getInstance());
@@ -150,7 +152,7 @@ void run(string configname)
          const int blocknx2 = blockNx[1];
          const int blocknx3 = blockNx[2];
 
-         double blockLength = blocknx1*dx;
+         real blockLength = blocknx1*dx;
 
          grid->setDeltaX(dx);
          grid->setBlockNX(blocknx1, blocknx2, blocknx3);
@@ -203,7 +205,7 @@ void run(string configname)
          SPtr<D3Q27Interactor> outflowInt = SPtr<D3Q27Interactor>(new D3Q27Interactor(geoOutflow, grid, denBCAdapter, Interactor3D::SOLID));
 
          
-         SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_00M));
+         SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_00M));
          InteractorsHelper intHelper(grid, metisVisitor);
          intHelper.addInteractor(cylinderInt);
          intHelper.addInteractor(addWallYminInt);
@@ -223,8 +225,8 @@ void run(string configname)
          unsigned long long numberOfNodesPerBlock = (unsigned long long)(blockNx[0])* (unsigned long long)(blockNx[1])* (unsigned long long)(blockNx[2]);
          unsigned long long numberOfNodes = numberOfBlocks * numberOfNodesPerBlock;
          unsigned long long numberOfNodesPerBlockWithGhostLayer = numberOfBlocks * (blockNx[0]+ghostLayer) * (blockNx[1]+ghostLayer) * (blockNx[2]+ghostLayer);
-         double needMemAll = double(numberOfNodesPerBlockWithGhostLayer*(27*sizeof(double)+sizeof(int)+sizeof(float)*4));
-         double needMem = needMemAll/double(comm->getNumberOfProcesses());
+         real needMemAll = real(numberOfNodesPerBlockWithGhostLayer*(27*sizeof(real)+sizeof(int)+sizeof(float)*4));
+         real needMem = needMemAll/real(comm->getNumberOfProcesses());
 
          if (myid==0)
          {
@@ -302,8 +304,8 @@ void run(string configname)
 
 	  SPtr<CoProcessor> writeMQCoProcessor(new WriteMacroscopicQuantitiesCoProcessor(grid, stepSch, pathOut, WbWriterVtkXmlBinary::getInstance(), conv, comm));
 
-      double area = (2.0*radius*H)/(dx*dx);
-      double v    = 4.0*uLB/9.0;
+      real area = (2.0*radius*H)/(dx*dx);
+      real v    = 4.0*uLB/9.0;
       SPtr<UbScheduler> forceSch(new UbScheduler(100));
       SPtr<CalculateForcesCoProcessor> fp = make_shared<CalculateForcesCoProcessor>(grid, forceSch, pathOut + "/results/forces.txt", comm, v, area);
       fp->addInteractor(cylinderInt);
diff --git a/apps/cpu/FlowAroundCylinder/cylinder.cpp1 b/apps/cpu/FlowAroundCylinder/cylinder.cpp1
index 5321a23d1c03fe85270cf8e382b8d50dc5df4351..f4001248da3e7fec9921da00d6932c376ea6dc66 100644
--- a/apps/cpu/FlowAroundCylinder/cylinder.cpp1
+++ b/apps/cpu/FlowAroundCylinder/cylinder.cpp1
@@ -248,7 +248,7 @@ void run(const char *cstr)
          unsigned long nod = nob * (blocknx1+gl) * (blocknx2+gl) * (blocknx3+gl);
 
          double needMemAll  = double(nod*(27*sizeof(double) + sizeof(int) + sizeof(float)*4));
-         double needMem  = needMemAll / double(comm->getNummberOfProcesses());
+         double needMem  = needMemAll / double(comm->getNumberOfProcesses());
 
          if(myid == 0)
          {
diff --git a/apps/cpu/FlowAroundCylinder/cylinder.cpp2 b/apps/cpu/FlowAroundCylinder/cylinder.cpp2
index 4dc7285b37131250607166cca3de70db53935156..107f4882f38dbada406a106ce6bfa2a8122f7379 100644
--- a/apps/cpu/FlowAroundCylinder/cylinder.cpp2
+++ b/apps/cpu/FlowAroundCylinder/cylinder.cpp2
@@ -262,7 +262,7 @@ void run(const char *cstr)
          unsigned long nod = nob * (blocknx1+gl) * (blocknx2+gl) * (blocknx3+gl);
 
          double needMemAll  = double(nod*(27*sizeof(double) + sizeof(int) + sizeof(float)*4));
-         double needMem  = needMemAll / double(comm->getNummberOfProcesses());
+         double needMem  = needMemAll / double(comm->getNumberOfProcesses());
 
          if(myid == 0)
          {
diff --git a/apps/cpu/HerschelBulkleyModel/hbflow.cpp b/apps/cpu/HerschelBulkleyModel/hbflow.cpp
index b97942a1cd78c4ea9a5c73b4f24ddf4f6ae2edf6..567fd661cd2e131e3f4f311285bd636f471dccb6 100644
--- a/apps/cpu/HerschelBulkleyModel/hbflow.cpp
+++ b/apps/cpu/HerschelBulkleyModel/hbflow.cpp
@@ -8,6 +8,8 @@ using namespace std;
 
 void bflow(string configname)
 {
+    using namespace vf::lbm::dir;
+
    try
    {
       vf::basics::ConfigurationFile   config;
@@ -16,27 +18,27 @@ void bflow(string configname)
       string          pathname = config.getValue<string>("pathname");
       int             numOfThreads = config.getValue<int>("numOfThreads");
       vector<int>     blocknx = config.getVector<int>("blocknx");
-      vector<double>  boundingBox = config.getVector<double>("boundingBox");
-      double          nuLB = config.getValue<double>("nuLB");
-      double          endTime = config.getValue<double>("endTime");
-      double          outTime = config.getValue<double>("outTime");
-      double          availMem = config.getValue<double>("availMem");
+      vector<real>  boundingBox = config.getVector<real>("boundingBox");
+      real          nuLB = config.getValue<real>("nuLB");
+      real          endTime = config.getValue<real>("endTime");
+      real          outTime = config.getValue<real>("outTime");
+      real          availMem = config.getValue<real>("availMem");
       //int             refineLevel = config.getValue<int>("refineLevel");
       bool            logToFile = config.getValue<bool>("logToFile");
       //double          restartStep = config.getValue<double>("restartStep");
-      double          deltax = config.getValue<double>("deltax");
+      real          deltax = config.getValue<real>("deltax");
       //double          cpStep = config.getValue<double>("cpStep");
       //double          cpStepStart = config.getValue<double>("cpStepStart");
       //bool            newStart = config.getValue<bool>("newStart");
-      double          forcing = config.getValue<double>("forcing");
+      real          forcing = config.getValue<real>("forcing");
       //double          n = config.getValue<double>("n");
       //double          k = config.getValue<double>("k");
-      double          tau0 = config.getValue<double>("tau0");
-      double          velocity = config.getValue<double>("velocity");
-      double          n = config.getValue<double>("n");
+      real          tau0 = config.getValue<real>("tau0");
+      real          velocity = config.getValue<real>("velocity");
+      real          n = config.getValue<real>("n");
 //      double          Re = config.getValue<double>("Re");
 //      double          Bn = config.getValue<double>("Bn");
-      double          scaleFactor = config.getValue<double>("scaleFactor");
+      real          scaleFactor = config.getValue<real>("scaleFactor");
 
       SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
@@ -59,7 +61,7 @@ void bflow(string configname)
          }
       }
 
-      LBMReal rhoLB = 0.0;
+      real rhoLB = 0.0;
 
       SPtr<LBMUnitConverter> conv = SPtr<LBMUnitConverter>(new LBMUnitConverter());
 
@@ -72,17 +74,17 @@ void bflow(string configname)
       //double g_maxX2 = boundingBox[1];
       //double g_maxX3 = boundingBox[2]+1.0;
 
-      double g_minX1 = 0.0;
-      double g_minX2 = -boundingBox[1]/2.0;
-      double g_minX3 = -boundingBox[2]/2.0;
+      real g_minX1 = 0.0;
+      real g_minX2 = -boundingBox[1]/2.0;
+      real g_minX3 = -boundingBox[2]/2.0;
 
-      double g_maxX1 = boundingBox[0];
-      double g_maxX2 = boundingBox[1]/2.0;
-      double g_maxX3 = boundingBox[2]/2.0;
+      real g_maxX1 = boundingBox[0];
+      real g_maxX2 = boundingBox[1]/2.0;
+      real g_maxX3 = boundingBox[2]/2.0;
 
       
 
-      double blockLength = 3.0 * deltax;
+      real blockLength = 3.0 * deltax;
 
 //      double h = (g_maxX2) / 2.0;
 //      double dex = g_maxX1;
@@ -92,9 +94,9 @@ void bflow(string configname)
       //LBMReal n = 0.4;
 
 
-      double d = boundingBox[1];
-      double U = velocity;
-      double Gamma = U / d;
+      real d = boundingBox[1];
+      real U = velocity;
+      real Gamma = U / d;
 
       //double scaleFactor = 2.0;
 
@@ -108,7 +110,7 @@ void bflow(string configname)
 
       // Acoustic Scaling
 
-      double k = nuLB * scaleFactor;
+      real k = nuLB * scaleFactor;
       //double tau0 = 3e-5; 
       forcing /= scaleFactor;
       endTime *= scaleFactor;
@@ -116,9 +118,9 @@ void bflow(string configname)
 
       //outTime = endTime;
 
-      double beta = 14;
-      double c = 10; // 1.0 / 6.0;
-      double mu0 = 1e-4;
+      real beta = 14;
+      real c = 10; // 1.0 / 6.0;
+      real mu0 = 1e-4;
 
       SPtr<Rheology> thix = Rheology::getInstance();
       //Herschel-Bulkley
@@ -218,7 +220,7 @@ void bflow(string configname)
 
       ////////////////////////////////////////////
       //METIS
-      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_MMM, MetisPartitioner::RECURSIVE));
+      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_MMM, MetisPartitioner::RECURSIVE));
       ////////////////////////////////////////////
       /////delete solid blocks
       if (myid == 0) UBLOG(logINFO, "deleteSolidBlocks - start");
@@ -240,8 +242,8 @@ void bflow(string configname)
       unsigned long nodb = (blocknx[0]) * (blocknx[1]) * (blocknx[2]);
       unsigned long nod = nob * (blocknx[0]) * (blocknx[1]) * (blocknx[2]);
       unsigned long nodg = nob * (blocknx[0] + gl) * (blocknx[1] + gl) * (blocknx[1] + gl);
-      double needMemAll = double(nodg * (27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-      double needMem = needMemAll / double(comm->getNumberOfProcesses());
+      real needMemAll = real(nodg * (27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+      real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
       if (myid == 0)
       {
diff --git a/apps/cpu/HerschelBulkleySphere/hbsphere.cpp b/apps/cpu/HerschelBulkleySphere/hbsphere.cpp
index 67f5a00ad49dcbe16a018e402f85ed02b3848650..ae71a3a44926c52e04eb0df682b0495ce37c173d 100644
--- a/apps/cpu/HerschelBulkleySphere/hbsphere.cpp
+++ b/apps/cpu/HerschelBulkleySphere/hbsphere.cpp
@@ -8,6 +8,8 @@ using namespace std;
 
 void bflow(string configname)
 {
+    using namespace vf::lbm::dir;
+
    try
    {
       vf::basics::ConfigurationFile config;
@@ -16,24 +18,24 @@ void bflow(string configname)
       string          outputPath = config.getValue<string>("outputPath");
       int             numOfThreads = config.getValue<int>("numOfThreads");
       vector<int>     blocknx = config.getVector<int>("blocknx");
-      vector<double>  boundingBox = config.getVector<double>("boundingBox");
+      vector<real>  boundingBox = config.getVector<real>("boundingBox");
       //double          nuLB = config.getValue<double>("nuLB");
-      double          endTime = config.getValue<double>("endTime");
-      double          outTime = config.getValue<double>("outTime");
-      double          availMem = config.getValue<double>("availMem");
+      real          endTime = config.getValue<real>("endTime");
+      real          outTime = config.getValue<real>("outTime");
+      real          availMem = config.getValue<real>("availMem");
       int             refineLevel = config.getValue<int>("refineLevel");
       bool            logToFile = config.getValue<bool>("logToFile");
-      double          restartStep = config.getValue<double>("restartStep");
-      double          deltax = config.getValue<double>("deltax");
-      double          radius = config.getValue<double>("radius");
-      double          cpStep = config.getValue<double>("cpStep");
-      double          cpStart = config.getValue<double>("cpStart");
+      real          restartStep = config.getValue<real>("restartStep");
+      real          deltax = config.getValue<real>("deltax");
+      real          radius = config.getValue<real>("radius");
+      real          cpStep = config.getValue<real>("cpStep");
+      real          cpStart = config.getValue<real>("cpStart");
       bool            newStart = config.getValue<bool>("newStart");
-      double          velocity = config.getValue<double>("velocity");
-      double          n = config.getValue<double>("n");
-      double          Re = config.getValue<double>("Re");
-      double          Bn = config.getValue<double>("Bn");
-      vector<double>  sphereCenter = config.getVector<double>("sphereCenter");
+      real          velocity = config.getValue<real>("velocity");
+      real          n = config.getValue<real>("n");
+      real          Re = config.getValue<real>("Re");
+      real          Bn = config.getValue<real>("Bn");
+      vector<real>  sphereCenter = config.getVector<real>("sphereCenter");
 
       SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
@@ -56,19 +58,19 @@ void bflow(string configname)
          }
       }
 
-      LBMReal rhoLB = 0.0;
+      real rhoLB = 0.0;
 
       SPtr<LBMUnitConverter> conv = SPtr<LBMUnitConverter>(new LBMUnitConverter());
 
       //bounding box
 
-      double g_minX1 = 0;
-      double g_minX2 = 0;
-      double g_minX3 = 0;
+      real g_minX1 = 0;
+      real g_minX2 = 0;
+      real g_minX3 = 0;
 
-      double g_maxX1 = boundingBox[0];
-      double g_maxX2 = boundingBox[1];
-      double g_maxX3 = boundingBox[2];
+      real g_maxX1 = boundingBox[0];
+      real g_maxX2 = boundingBox[1];
+      real g_maxX3 = boundingBox[2];
 
       //double g_minX1 = -boundingBox[0]/2.0;
       //double g_minX2 = -boundingBox[1] / 2.0;
@@ -78,21 +80,21 @@ void bflow(string configname)
       //double g_maxX2 = boundingBox[1]/2.0;
       //double g_maxX3 = boundingBox[2]/2.0;
 
-      double blockLength = 3.0 * deltax;
+      real blockLength = 3.0 * deltax;
 
-      double d = 2.0 * radius;
-      double U = velocity;
-      double Gamma = U / d;
+      real d = 2.0 * radius;
+      real U = velocity;
+      real Gamma = U / d;
 
-      double k = (U * d) / (Re * std::pow(Gamma, n - 1));
-      double tau0 = Bn * k * std::pow(Gamma, n);
+      real k = (U * d) / (Re * std::pow(Gamma, n - 1));
+      real tau0 = Bn * k * std::pow(Gamma, n);
 
       //double k = 0.05; // (U * d) / (Re * std::pow(Gamma, n - 1));
       //double tau0 = 3e-6; //Bn * k * std::pow(Gamma, n);
 
       //double forcing = 8e-7;
 
-      double omegaMin = 1.0e-8;
+      real omegaMin = 1.0e-8;
 
       SPtr<Rheology> thix = Rheology::getInstance();
       thix->setPowerIndex(n);
@@ -161,7 +163,7 @@ void bflow(string configname)
 
       ////////////////////////////////////////////
       //METIS
-      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_MMM, MetisPartitioner::KWAY));
+      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_MMM, MetisPartitioner::KWAY));
       ////////////////////////////////////////////
       //////////////////////////////////////////////////////////////////////////
       //restart
@@ -243,7 +245,7 @@ void bflow(string configname)
 
          ////////////////////////////////////////////
          //METIS
-         SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_MMM, MetisPartitioner::KWAY));
+         SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_MMM, MetisPartitioner::KWAY));
          ////////////////////////////////////////////
          /////delete solid blocks
          if (myid == 0) UBLOG(logINFO, "deleteSolidBlocks - start");
@@ -267,8 +269,8 @@ void bflow(string configname)
          unsigned long nodb = (blocknx[0]) * (blocknx[1]) * (blocknx[2]);
          unsigned long nod = nob * (blocknx[0]) * (blocknx[1]) * (blocknx[2]);
          unsigned long nodg = nob * (blocknx[0] + gl) * (blocknx[1] + gl) * (blocknx[1] + gl);
-         double needMemAll = double(nodg * (27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-         double needMem = needMemAll / double(comm->getNumberOfProcesses());
+         real needMemAll = real(nodg * (27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+         real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
          if (myid == 0)
          {
@@ -341,7 +343,7 @@ void bflow(string configname)
       SPtr<WriteMacroscopicQuantitiesCoProcessor> writeMQCoProcessor(new WriteMacroscopicQuantitiesCoProcessor(grid, visSch, outputPath, WbWriterVtkXmlBinary::getInstance(), SPtr<LBMUnitConverter>(new LBMUnitConverter()), comm));
       //writeMQCoProcessor->process(0);
 
-      double area = UbMath::PI*radius*radius;
+      real area = UbMath::PI*radius*radius;
       SPtr<UbScheduler> forceSch(new UbScheduler(100));
       SPtr<CalculateForcesCoProcessor> fp = make_shared<CalculateForcesCoProcessor>(grid, forceSch, outputPath + "/forces/forces.txt", comm, velocity, area);
       fp->addInteractor(sphereInt);
diff --git a/apps/cpu/JetBreakup/JetBreakup.cpp b/apps/cpu/JetBreakup/JetBreakup.cpp
index 01d4cc3eb5b7d46118d40bc5fbb98b16e57d82eb..2115b515f1c77cd97b587449ab9881642aa6e1be 100644
--- a/apps/cpu/JetBreakup/JetBreakup.cpp
+++ b/apps/cpu/JetBreakup/JetBreakup.cpp
@@ -6,13 +6,15 @@
 
 using namespace std;
 
-void setInflowBC(double x1, double x2, double x3, double radius, int dir)
+void setInflowBC(real x1, real x2, real x3, real radius, int dir)
 {
 
 }
 
 void run(string configname)
 {
+    using namespace vf::lbm::dir;
+
     try {
 
         // Sleep(30000);
@@ -27,7 +29,7 @@ void run(string configname)
         vector<int> blocknx = config.getVector<int>("blocknx");
         //vector<double> boundingBox = config.getVector<double>("boundingBox");
         // vector<double>  length = config.getVector<double>("length");
-        double U_LB = config.getValue<double>("U_LB");
+        real U_LB = config.getValue<real>("U_LB");
         // double uF2                         = config.getValue<double>("uF2");
         //double nuL = config.getValue<double>("nuL");
         //double nuG = config.getValue<double>("nuG");
@@ -35,23 +37,23 @@ void run(string configname)
         //double sigma = config.getValue<double>("sigma");
         int interfaceWidth = config.getValue<int>("interfaceWidth");
         //double D          = config.getValue<double>("D");
-        double theta = config.getValue<double>("contactAngle");
-        double D_LB = config.getValue<double>("D_LB");
-        double phiL = config.getValue<double>("phi_L");
-        double phiH = config.getValue<double>("phi_H");
-        double tauH = config.getValue<double>("Phase-field Relaxation");
-        double mob = config.getValue<double>("Mobility");
-
-        double endTime = config.getValue<double>("endTime");
-        double outTime = config.getValue<double>("outTime");
-        double availMem = config.getValue<double>("availMem");
+        real theta = config.getValue<real>("contactAngle");
+        real D_LB = config.getValue<real>("D_LB");
+        real phiL = config.getValue<real>("phi_L");
+        real phiH = config.getValue<real>("phi_H");
+        real tauH = config.getValue<real>("Phase-field Relaxation");
+        real mob = config.getValue<real>("Mobility");
+
+        real endTime = config.getValue<real>("endTime");
+        real outTime = config.getValue<real>("outTime");
+        real availMem = config.getValue<real>("availMem");
         //int refineLevel = config.getValue<int>("refineLevel");
         //double Re = config.getValue<double>("Re");
         
         bool logToFile = config.getValue<bool>("logToFile");
-        double restartStep = config.getValue<double>("restartStep");
-        double cpStart = config.getValue<double>("cpStart");
-        double cpStep = config.getValue<double>("cpStep");
+        real restartStep = config.getValue<real>("restartStep");
+        real cpStart = config.getValue<real>("cpStart");
+        real cpStep = config.getValue<real>("cpStep");
         bool newStart = config.getValue<bool>("newStart");
 
 
@@ -81,7 +83,7 @@ void run(string configname)
 
         // Sleep(30000);
 
-        double rho_h=0, rho_l=0, r_rho=0, mu_h=0, /*mu_l,*/ Uo=0, D=0, sigma=0;
+        real rho_h=0, rho_l=0, r_rho=0, mu_h=0, /*mu_l,*/ Uo=0, D=0, sigma=0;
 
         switch (caseN) {
             case 1: 
@@ -140,23 +142,23 @@ void run(string configname)
                 break;                
         }
 
-        double Re = rho_h * Uo * D / mu_h;
-        double We = rho_h * Uo * Uo * D / sigma;
+        real Re = rho_h * Uo * D / mu_h;
+        real We = rho_h * Uo * Uo * D / sigma;
 
-        double dx = D / D_LB;
-        double nu_h = U_LB * D_LB / Re;
-        double nu_l = nu_h;
+        real dx = D / D_LB;
+        real nu_h = U_LB * D_LB / Re;
+        real nu_l = nu_h;
 
-        double rho_h_LB = 1;
+        real rho_h_LB = 1;
         //surface tension
-        double sigma_LB = rho_h_LB * U_LB * U_LB * D_LB / We;
+        real sigma_LB = rho_h_LB * U_LB * U_LB * D_LB / We;
 
         // LBMReal dLB = 0; // = length[1] / dx;
-        LBMReal rhoLB = 0.0;
+        real rhoLB = 0.0;
         //LBMReal nuLB = nu_l; //(uLB*dLB) / Re;
 
-        double beta = 12.0 * sigma_LB / interfaceWidth;
-        double kappa = 1.5 * interfaceWidth * sigma_LB;
+        real beta = 12.0 * sigma_LB / interfaceWidth;
+        real kappa = 1.5 * interfaceWidth * sigma_LB;
 
         if (myid == 0) {
             UBLOG(logINFO, "Parameters:");
@@ -219,7 +221,7 @@ void run(string configname)
         grid->setGhostLayerWidth(2);
 
         SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(
-            comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_MMM, MetisPartitioner::RECURSIVE));
+            comm, MetisPartitioningGridVisitor::LevelBased, DIR_MMM, MetisPartitioner::RECURSIVE));
 
         //////////////////////////////////////////////////////////////////////////
         // restart
@@ -251,7 +253,7 @@ void run(string configname)
         fctF2.SetExpr("vy1");
         fctF2.DefineConst("vy1", U_LB);
 
-        double startTime = 1;
+        real startTime = 1;
         SPtr<BCAdapter> velBCAdapterF1(
             new MultiphaseVelocityBCAdapter(true, false, false, fctF1, phiH, 0.0, startTime));
         SPtr<BCAdapter> velBCAdapterF2(
@@ -293,17 +295,17 @@ void run(string configname)
             //  if (newStart) {
 
             // bounding box
-            double g_minX1 = 0;
-            double g_minX2 = 0;
-            double g_minX3 = 0;
+            real g_minX1 = 0;
+            real g_minX2 = 0;
+            real g_minX3 = 0;
 
             //double g_maxX1 = 8.0*D;
             //double g_maxX2 = 2.5*D;
             //double g_maxX3 = 2.5*D;
 
-             double g_maxX1 = 1.0 * D; // 8.0 * D;
-             double g_maxX2 = 2.0 * D;
-             double g_maxX3 = 2.0 * D;
+             real g_maxX1 = 1.0 * D; // 8.0 * D;
+             real g_maxX2 = 2.0 * D;
+             real g_maxX3 = 2.0 * D;
 
             // geometry
             SPtr<GbObject3D> gridCube(new GbCuboid3D(g_minX1, g_minX2, g_minX3, g_maxX1, g_maxX2, g_maxX3));
@@ -452,9 +454,9 @@ void run(string configname)
             unsigned long long numberOfNodes = numberOfBlocks * numberOfNodesPerBlock;
             unsigned long long numberOfNodesPerBlockWithGhostLayer =
                 numberOfBlocks * (blocknx[0] + ghostLayer) * (blocknx[1] + ghostLayer) * (blocknx[2] + ghostLayer);
-            double needMemAll =
-                double(numberOfNodesPerBlockWithGhostLayer * (27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-            double needMem = needMemAll / double(comm->getNumberOfProcesses());
+            real needMemAll =
+                real(numberOfNodesPerBlockWithGhostLayer * (27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+            real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
             if (myid == 0) {
                 UBLOG(logINFO, "Number of blocks = " << numberOfBlocks);
@@ -486,9 +488,9 @@ void run(string configname)
             //mu::Parser fct1;
             //fct1.SetExpr("phiL");
             //fct1.DefineConst("phiL", phiL);
-            LBMReal x1c = 0;  // (g_maxX1 - g_minX1-1)/2; //
-            LBMReal x2c = (g_maxX2 - g_minX2)/2;
-            LBMReal x3c = (g_maxX3 - g_minX3)/2;
+            real x1c = 0;  // (g_maxX1 - g_minX1-1)/2; //
+            real x2c = (g_maxX2 - g_minX2)/2;
+            real x3c = (g_maxX3 - g_minX3)/2;
             
             mu::Parser fct1;
             fct1.SetExpr("0.5-0.5*tanh(2*(sqrt((x1-x1c)^2+(x2-x2c)^2+(x3-x3c)^2)-radius)/interfaceThickness)");
@@ -574,7 +576,7 @@ void run(string configname)
         grid->accept(setConnsVisitor);
 
         SPtr<UbScheduler> visSch(new UbScheduler(outTime));
-        double t_ast, t;
+        real t_ast, t;
         t_ast = 7.19;
         t = (int)(t_ast/(U_LB/(D_LB)));
         visSch->addSchedule(t,t,t); //t=7.19
diff --git a/apps/cpu/LaminarTubeFlow/ltf.cpp b/apps/cpu/LaminarTubeFlow/ltf.cpp
index 93fd31083a1da92bc5fb73bb0606c7a8121bb5b8..cbafef30c489a26b5f8df9610ec3e6ad7aa1da79 100644
--- a/apps/cpu/LaminarTubeFlow/ltf.cpp
+++ b/apps/cpu/LaminarTubeFlow/ltf.cpp
@@ -9,6 +9,8 @@ using namespace std;
 
 void run(string configname)
 {
+    using namespace vf::lbm::dir;
+
    try
    {
       vf::basics::ConfigurationFile   config;
@@ -17,18 +19,18 @@ void run(string configname)
       string          pathname = config.getValue<string>("pathname");
       int             numOfThreads = config.getValue<int>("numOfThreads");
       vector<int>     blocknx = config.getVector<int>("blocknx");
-      double          uLB = config.getValue<double>("uLB");
-      double          endTime = config.getValue<double>("endTime");
-      double          outTime = config.getValue<double>("outTime");
-      double          availMem = config.getValue<double>("availMem");
+      real          uLB = config.getValue<real>("uLB");
+      real          endTime = config.getValue<real>("endTime");
+      real          outTime = config.getValue<real>("outTime");
+      real          availMem = config.getValue<real>("availMem");
       int             refineLevel = config.getValue<int>("refineLevel");
-      double          Re = config.getValue<double>("Re");
-      double          dx = config.getValue<double>("dx");
-      vector<double>  length = config.getVector<double>("length");
+      real          Re = config.getValue<real>("Re");
+      real          dx = config.getValue<real>("dx");
+      vector<real>  length = config.getVector<real>("length");
       bool            logToFile = config.getValue<bool>("logToFile");
-      double          restartStep = config.getValue<double>("restartStep");
-      double          cpStart = config.getValue<double>("cpStart");
-      double          cpStep = config.getValue<double>("cpStep");
+      real          restartStep = config.getValue<real>("restartStep");
+      real          cpStart = config.getValue<real>("cpStart");
+      real          cpStep = config.getValue<real>("cpStep");
       bool            newStart = config.getValue<bool>("newStart");
 
       SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
@@ -56,9 +58,9 @@ void run(string configname)
 
       //Sleep(30000);
 
-      LBMReal dLB = length[1] / dx;
-      LBMReal rhoLB = 0.0;
-      LBMReal nuLB = (uLB*dLB) / Re;
+      real dLB = length[1] / dx;
+      real rhoLB = 0.0;
+      real nuLB = (uLB*dLB) / Re;
 
       SPtr<LBMUnitConverter> conv = SPtr<LBMUnitConverter>(new LBMUnitConverter());
 
@@ -108,7 +110,7 @@ void run(string configname)
       kernel->setBCProcessor(bcProc);
 
       //////////////////////////////////////////////////////////////////////////
-      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_00M));
+      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_00M));
       //restart
       SPtr<UbScheduler> mSch(new UbScheduler(cpStep, cpStart));
       //SPtr<MPIIOMigrationCoProcessor> migCoProcessor(new MPIIOMigrationCoProcessor(grid, mSch, metisVisitor, pathname + "/mig", comm));
@@ -126,13 +128,13 @@ void run(string configname)
       {
 
          //bounding box
-         double g_minX1 = 0.0;
-         double g_minX2 = -length[1] / 2.0;
-         double g_minX3 = -length[2] / 2.0;
+         real g_minX1 = 0.0;
+         real g_minX2 = -length[1] / 2.0;
+         real g_minX3 = -length[2] / 2.0;
 
-         double g_maxX1 = length[0];
-         double g_maxX2 = length[1] / 2.0;
-         double g_maxX3 = length[2] / 2.0;
+         real g_maxX1 = length[0];
+         real g_maxX2 = length[1] / 2.0;
+         real g_maxX3 = length[2] / 2.0;
 
          //geometry
          //x
@@ -145,7 +147,7 @@ void run(string configname)
          if (myid == 0) GbSystem3D::writeGeoObject(gridCube.get(), pathname + "/geo/gridCube", WbWriterVtkXmlBinary::getInstance());
 
 
-         double blockLength = blocknx[0] * dx;
+         real blockLength = blocknx[0] * dx;
 
 
 
@@ -235,8 +237,8 @@ void run(string configname)
          unsigned long long numberOfNodesPerBlock = (unsigned long long)(blocknx[0])* (unsigned long long)(blocknx[1])* (unsigned long long)(blocknx[2]);
          unsigned long long numberOfNodes = numberOfBlocks * numberOfNodesPerBlock;
          unsigned long long numberOfNodesPerBlockWithGhostLayer = numberOfBlocks * (blocknx[0] + ghostLayer) * (blocknx[1] + ghostLayer) * (blocknx[2] + ghostLayer);
-         double needMemAll = double(numberOfNodesPerBlockWithGhostLayer*(27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-         double needMem = needMemAll / double(comm->getNumberOfProcesses());
+         real needMemAll = real(numberOfNodesPerBlockWithGhostLayer*(27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+         real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
          if (myid == 0)
          {
@@ -355,7 +357,7 @@ void run(string configname)
 int main(int argc, char *argv[])
 {
     try {
-        vf::logging::Logger::initalizeLogger();
+        vf::logging::Logger::initializeLogger();
 
         VF_LOG_INFO("Starting VirtualFluids...");
 
diff --git a/apps/cpu/Multiphase/Multiphase.cpp b/apps/cpu/Multiphase/Multiphase.cpp
index 09d74e1473e9fef8e7f29343d758359eaf0752a2..4d4bc9cf82afe16309f22b69c0973acb3f96324c 100644
--- a/apps/cpu/Multiphase/Multiphase.cpp
+++ b/apps/cpu/Multiphase/Multiphase.cpp
@@ -8,6 +8,8 @@ using namespace std;
 
 void run(string configname)
 {
+    using namespace vf::lbm::dir;
+
     try {
 
         //Sleep(30000);
@@ -20,37 +22,37 @@ void run(string configname)
         string geoFile             = config.getValue<string>("geoFile");
         int numOfThreads           = config.getValue<int>("numOfThreads");
         vector<int> blocknx        = config.getVector<int>("blocknx");
-        vector<double> boundingBox = config.getVector<double>("boundingBox");
+        vector<real> boundingBox = config.getVector<real>("boundingBox");
         // vector<double>  length = config.getVector<double>("length");
-        double uLB = config.getValue<double>("uLB");
+        real uLB = config.getValue<real>("uLB");
         // double uF2                         = config.getValue<double>("uF2");
-        double nuL             = config.getValue<double>("nuL");
-        double nuG             = config.getValue<double>("nuG");
-        double densityRatio    = config.getValue<double>("densityRatio");
-        double sigma           = config.getValue<double>("sigma");
+        real nuL             = config.getValue<real>("nuL");
+        real nuG             = config.getValue<real>("nuG");
+        real densityRatio    = config.getValue<real>("densityRatio");
+        real sigma           = config.getValue<real>("sigma");
         int interfaceWidth = config.getValue<int>("interfaceWidth");
         //double radius          = config.getValue<double>("radius");
-        double theta           = config.getValue<double>("contactAngle");
-        double gr              = config.getValue<double>("gravity");
-        double phiL            = config.getValue<double>("phi_L");
-        double phiH            = config.getValue<double>("phi_H");
-        double tauH            = config.getValue<double>("Phase-field Relaxation");
-        double mob             = config.getValue<double>("Mobility");
-
-        double endTime     = config.getValue<double>("endTime");
-        double outTime     = config.getValue<double>("outTime");
-        double availMem    = config.getValue<double>("availMem");
+        real theta           = config.getValue<real>("contactAngle");
+        real gr              = config.getValue<real>("gravity");
+        real phiL            = config.getValue<real>("phi_L");
+        real phiH            = config.getValue<real>("phi_H");
+        real tauH            = config.getValue<real>("Phase-field Relaxation");
+        real mob             = config.getValue<real>("Mobility");
+
+        real endTime     = config.getValue<real>("endTime");
+        real outTime     = config.getValue<real>("outTime");
+        real availMem    = config.getValue<real>("availMem");
         int refineLevel    = config.getValue<int>("refineLevel");
-        double Re          = config.getValue<double>("Re");
-        double dx          = config.getValue<double>("dx");
+        real Re          = config.getValue<real>("Re");
+        real dx          = config.getValue<real>("dx");
         bool logToFile     = config.getValue<bool>("logToFile");
-        double restartStep = config.getValue<double>("restartStep");
-        double cpStart     = config.getValue<double>("cpStart");
-        double cpStep      = config.getValue<double>("cpStep");
+        real restartStep = config.getValue<real>("restartStep");
+        real cpStart     = config.getValue<real>("cpStart");
+        real cpStep      = config.getValue<real>("cpStep");
         bool newStart      = config.getValue<bool>("newStart");
 
-        double beta = 12 * sigma / interfaceWidth;
-        double kappa = 1.5 * interfaceWidth * sigma;
+        real beta = 12 * sigma / interfaceWidth;
+        real kappa = 1.5 * interfaceWidth * sigma;
 
         SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
         int myid                = comm->getProcessID();
@@ -76,8 +78,8 @@ void run(string configname)
         // Sleep(30000);
 
         // LBMReal dLB = 0; // = length[1] / dx;
-        LBMReal rhoLB = 0.0;
-        LBMReal nuLB  = nuL; //(uLB*dLB) / Re;
+        real rhoLB = 0.0;
+        real nuLB  = nuL; //(uLB*dLB) / Re;
 
         SPtr<LBMUnitConverter> conv(new LBMUnitConverter());
 
@@ -122,7 +124,7 @@ void run(string configname)
         grid->setGhostLayerWidth(2);
 
        
-        SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_MMM, MetisPartitioner::RECURSIVE));
+        SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_MMM, MetisPartitioner::RECURSIVE));
 
         //////////////////////////////////////////////////////////////////////////
         // restart
@@ -154,7 +156,7 @@ void run(string configname)
         fctF2.SetExpr("vy1");
         fctF2.DefineConst("vy1", uLB);
 
-        double startTime = 30;
+        real startTime = 30;
         SPtr<BCAdapter> velBCAdapterF1(new MultiphaseVelocityBCAdapter(true, false, false, fctF1, phiH, 0.0, startTime));
         SPtr<BCAdapter> velBCAdapterF2(new MultiphaseVelocityBCAdapter(true, false, false, fctF2, phiH, startTime, endTime));
 
@@ -199,13 +201,13 @@ void run(string configname)
             double g_maxX2 = length[1] / 2.0;
             double g_maxX3 = length[2] / 2.0;*/
 
-            double g_minX1 = boundingBox[0];
-            double g_minX2 = boundingBox[2];
-            double g_minX3 = boundingBox[4];
+            real g_minX1 = boundingBox[0];
+            real g_minX2 = boundingBox[2];
+            real g_minX3 = boundingBox[4];
 
-            double g_maxX1 = boundingBox[1];
-            double g_maxX2 = boundingBox[3];
-            double g_maxX3 = boundingBox[5];
+            real g_maxX1 = boundingBox[1];
+            real g_maxX2 = boundingBox[3];
+            real g_maxX3 = boundingBox[5];
 
             // geometry
             SPtr<GbObject3D> gridCube(new GbCuboid3D(g_minX1, g_minX2, g_minX3, g_maxX1, g_maxX2, g_maxX3));
@@ -330,9 +332,9 @@ void run(string configname)
             unsigned long long numberOfNodes = numberOfBlocks * numberOfNodesPerBlock;
             unsigned long long numberOfNodesPerBlockWithGhostLayer =
                 numberOfBlocks * (blocknx[0] + ghostLayer) * (blocknx[1] + ghostLayer) * (blocknx[2] + ghostLayer);
-            double needMemAll =
-                double(numberOfNodesPerBlockWithGhostLayer * (27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-            double needMem = needMemAll / double(comm->getNumberOfProcesses());
+            real needMemAll =
+                real(numberOfNodesPerBlockWithGhostLayer * (27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+            real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
             if (myid == 0) {
                 UBLOG(logINFO, "Number of blocks = " << numberOfBlocks);
diff --git a/apps/cpu/MultiphaseDropletTest.zip b/apps/cpu/MultiphaseDropletTest.zip
deleted file mode 100644
index 5eb13a6c0bacfbf392deb00c6b388ba282c038e0..0000000000000000000000000000000000000000
Binary files a/apps/cpu/MultiphaseDropletTest.zip and /dev/null differ
diff --git a/apps/cpu/MultiphaseDropletTest/droplet.cpp b/apps/cpu/MultiphaseDropletTest/droplet.cpp
index 54b59fcfd8bd93f220b3d3d4ebb5bb29881079e5..a9d561930a77cc447bbe6c959bd7e8464f314d69 100644
--- a/apps/cpu/MultiphaseDropletTest/droplet.cpp
+++ b/apps/cpu/MultiphaseDropletTest/droplet.cpp
@@ -13,6 +13,8 @@ using namespace std;
 
 void run(string configname)
 {
+    using namespace vf::lbm::dir;
+
     try {
         vf::basics::ConfigurationFile config;
         config.load(configname);
@@ -20,31 +22,31 @@ void run(string configname)
         string pathname            = config.getValue<string>("pathname");
         int numOfThreads           = config.getValue<int>("numOfThreads");
         vector<int> blocknx        = config.getVector<int>("blocknx");
-        vector<double> boundingBox = config.getVector<double>("boundingBox");
-        double uLB             = config.getValue<double>("uLB");
-        double nuL             = config.getValue<double>("nuL");
-        double nuG             = config.getValue<double>("nuG");
-        double densityRatio    = config.getValue<double>("densityRatio");
-        double sigma           = config.getValue<double>("sigma");
+        vector<real> boundingBox = config.getVector<real>("boundingBox");
+        real uLB             = config.getValue<real>("uLB");
+        real nuL             = config.getValue<real>("nuL");
+        real nuG             = config.getValue<real>("nuG");
+        real densityRatio    = config.getValue<real>("densityRatio");
+        real sigma           = config.getValue<real>("sigma");
         int interfaceThickness = config.getValue<int>("interfaceThickness");
-        double radius          = config.getValue<double>("radius");
-        double theta           = config.getValue<double>("contactAngle");
+        real radius          = config.getValue<real>("radius");
+        real theta           = config.getValue<real>("contactAngle");
         //double gr              = config.getValue<double>("gravity");
-        double phiL            = config.getValue<double>("phi_L");
-        double phiH            = config.getValue<double>("phi_H");
-        double tauH            = config.getValue<double>("Phase-field Relaxation");
-        double mob             = config.getValue<double>("Mobility");
-
-        double endTime     = config.getValue<double>("endTime");
-        double outTime     = config.getValue<double>("outTime");
-        double availMem    = config.getValue<double>("availMem");
+        real phiL            = config.getValue<real>("phi_L");
+        real phiH            = config.getValue<real>("phi_H");
+        real tauH            = config.getValue<real>("Phase-field Relaxation");
+        real mob             = config.getValue<real>("Mobility");
+
+        real endTime     = config.getValue<real>("endTime");
+        real outTime     = config.getValue<real>("outTime");
+        real availMem    = config.getValue<real>("availMem");
         int refineLevel    = config.getValue<int>("refineLevel");
-        double Re          = config.getValue<double>("Re");
-        double dx          = config.getValue<double>("dx");
+        real Re          = config.getValue<real>("Re");
+        real dx          = config.getValue<real>("dx");
         bool logToFile     = config.getValue<bool>("logToFile");
-        double restartStep = config.getValue<double>("restartStep");
-        double cpStart     = config.getValue<double>("cpStart");
-        double cpStep      = config.getValue<double>("cpStep");
+        real restartStep = config.getValue<real>("restartStep");
+        real cpStart     = config.getValue<real>("cpStart");
+        real cpStep      = config.getValue<real>("cpStep");
         bool newStart      = config.getValue<bool>("newStart");
         //double rStep = config.getValue<double>("rStep");
 
@@ -88,37 +90,37 @@ void run(string configname)
         //Sleep(30000);
 
         // LBMReal dLB = 0; // = length[1] / dx;
-        LBMReal rhoLB = 0.0;
-        LBMReal nuLB  = nuL; //(uLB*dLB) / Re;
+        real rhoLB = 0.0;
+        real nuLB  = nuL; //(uLB*dLB) / Re;
 
         //diameter of circular droplet
-        LBMReal D  = 2.0*radius;
+        real D  = 2.0*radius;
 
         //density retio
-        LBMReal r_rho = densityRatio;
+        real r_rho = densityRatio;
 
         //density of heavy fluid
-        LBMReal rho_h = 1.0;
+        real rho_h = 1.0;
         //density of light fluid
-        LBMReal rho_l = rho_h / r_rho;
+        real rho_l = rho_h / r_rho;
 
         //kinimatic viscosity
-        LBMReal nu_h = nuL;
+        real nu_h = nuL;
         //LBMReal nu_l = nuG;
         //#dynamic viscosity
-        LBMReal mu_h = rho_h * nu_h;
+        real mu_h = rho_h * nu_h;
         
         //gravity
-        LBMReal g_y = Re* Re* mu_h* mu_h / (rho_h * (rho_h - rho_l) * D * D * D);
+        real g_y = Re* Re* mu_h* mu_h / (rho_h * (rho_h - rho_l) * D * D * D);
         //Eotvos number
-        LBMReal Eo = 100;
+        real Eo = 100;
         //surface tension
         sigma = rho_h* g_y* D* D / Eo;
 
         //g_y = 0;
 
-        double beta  = 12.0 * sigma / interfaceThickness;
-        double kappa = 1.5 * interfaceThickness * sigma;
+        real beta  = 12.0 * sigma / interfaceThickness;
+        real kappa = 1.5 * interfaceThickness * sigma;
 
         if (myid == 0) {
                 //UBLOG(logINFO, "uLb = " << uLB);
@@ -187,7 +189,7 @@ void run(string configname)
         grid->setPeriodicX3(true);
         grid->setGhostLayerWidth(2);
 
-        SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_MMM, MetisPartitioner::RECURSIVE));
+        SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_MMM, MetisPartitioner::RECURSIVE));
 
         //////////////////////////////////////////////////////////////////////////
         // restart
@@ -206,13 +208,13 @@ void run(string configname)
         if (newStart) {
 
             // bounding box
-            double g_minX1 = boundingBox[0];
-            double g_minX2 = boundingBox[2];
-            double g_minX3 = boundingBox[4];
+            real g_minX1 = boundingBox[0];
+            real g_minX2 = boundingBox[2];
+            real g_minX3 = boundingBox[4];
 
-            double g_maxX1 = boundingBox[1];
-            double g_maxX2 = boundingBox[3];
-            double g_maxX3 = boundingBox[5];
+            real g_maxX1 = boundingBox[1];
+            real g_maxX2 = boundingBox[3];
+            real g_maxX3 = boundingBox[5];
 
             // geometry
             SPtr<GbObject3D> gridCube(new GbCuboid3D(g_minX1, g_minX2, g_minX3, g_maxX1, g_maxX2, g_maxX3));
@@ -225,7 +227,7 @@ void run(string configname)
             GenBlocksGridVisitor genBlocks(gridCube);
             grid->accept(genBlocks);
 
-            double dx2 = 2.0 * dx;
+            real dx2 = 2.0 * dx;
             GbCuboid3DPtr wallYmin(new GbCuboid3D(g_minX1 - dx2, g_minX2 - dx2, g_minX3 - dx2, g_maxX1 + dx2, g_minX2, g_maxX3 + dx2));
             GbSystem3D::writeGeoObject(wallYmin.get(), pathname + "/geo/wallYmin", WbWriterVtkXmlASCII::getInstance());
             GbCuboid3DPtr wallYmax(new GbCuboid3D(g_minX1 - dx2, g_maxX2, g_minX3 - dx2, g_maxX1 + dx2, g_maxX2 + dx2, g_maxX3 + dx2));
@@ -252,9 +254,9 @@ void run(string configname)
             unsigned long long numberOfNodes = numberOfBlocks * numberOfNodesPerBlock;
             unsigned long long numberOfNodesPerBlockWithGhostLayer =
                 numberOfBlocks * (blocknx[0] + ghostLayer) * (blocknx[1] + ghostLayer) * (blocknx[2] + ghostLayer);
-            double needMemAll =
-                double(numberOfNodesPerBlockWithGhostLayer * (27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-            double needMem = needMemAll / double(comm->getNumberOfProcesses());
+            real needMemAll =
+                real(numberOfNodesPerBlockWithGhostLayer * (27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+            real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
             if (myid == 0) {
                 UBLOG(logINFO, "Number of blocks = " << numberOfBlocks);
@@ -284,9 +286,9 @@ void run(string configname)
             intHelper.setBC();
 
             // initialization of distributions
-            LBMReal x1c = 2.5 * D; // (g_maxX1 - g_minX1-1)/2; //
-            LBMReal x2c = 12.5 * D; //(g_maxX2 - g_minX2-1)/2;
-            LBMReal x3c = 1.5; //2.5 * D; //(g_maxX3 - g_minX3-1)/2;
+            real x1c = 2.5 * D; // (g_maxX1 - g_minX1-1)/2; //
+            real x2c = 12.5 * D; //(g_maxX2 - g_minX2-1)/2;
+            real x3c = 1.5; //2.5 * D; //(g_maxX3 - g_minX3-1)/2;
             //LBMReal x3c = 2.5 * D;
             mu::Parser fct1;
             fct1.SetExpr("0.5-0.5*tanh(2*(sqrt((x1-x1c)^2+(x2-x2c)^2+(x3-x3c)^2)-radius)/interfaceThickness)");
@@ -355,7 +357,7 @@ void run(string configname)
         grid->accept(setConnsVisitor);
 
         SPtr<UbScheduler> visSch(new UbScheduler(outTime));
-        double t_ast, t;
+        real t_ast, t;
         t_ast = 2;
         t = (int)(t_ast/std::sqrt(g_y/D));
         visSch->addSchedule(t,t,t); //t=2
diff --git a/apps/cpu/Nozzle/nozzle.cpp b/apps/cpu/Nozzle/nozzle.cpp
index ab07f1f91ae20990d970e5850bec79607cf2b741..54a306729006a60ec02c04cf029f529163acbe0f 100644
--- a/apps/cpu/Nozzle/nozzle.cpp
+++ b/apps/cpu/Nozzle/nozzle.cpp
@@ -28,19 +28,29 @@ int main(int argc, char *argv[])
     //double g_maxX2 =  429.087e-3;
     //double g_maxX3 =  214.5e-3;
 
-    double g_minX1 = -1341.81e-3 + 10e-3;
-    double g_minX2 =  0.360872;
-    double g_minX3 = 0;//-210e-3;
+    //double g_minX1 = -1341.81e-3 + 10e-3;
+    //double g_minX2 =  0.360872;
+    //double g_minX3 = -210e-3;
+
+    //double g_maxX1 = -1260.81e-3 - 10e-3;
+    //double g_maxX2 =  0.416302;
+    //double g_maxX3 = 210e-3;
+
+    //int blockNX[3] = { 10, 10, 10 };
 
-    double g_maxX1 = -1260.81e-3 - 10e-3;
-    double g_maxX2 =  0.416302;
-    double g_maxX3 = 0.20105; //210e-3;
+    double g_minX1 = -1.31431;
+    double g_minX2 = 0.375582;
+    double g_minX3 = -210e-3 - 1e-3;
 
-    int blockNX[3] = { 10, 10, 10 };
+    double g_maxX1 = -1.28831;
+    double g_maxX2 = 0.401582;
+    double g_maxX3 = 0.206;
+
+    int blockNX[3] = { 26, 26, 26 };
 
     double dx = 1e-3;
 
-    double uLB  = 0.0001;
+    double uLB  = 0.00001;
     //double rhoLB = 0.0;
 
     // concrete 
@@ -51,31 +61,33 @@ int main(int argc, char *argv[])
     double A = UbMath::PI * R * R;
     double u = V / 3600 / A;
     double muConcrete = 2.1133054011798826; // [Pa s]
+    double rhoAir = 1.2041;                // [kg/m^3]
     double tau0 = 715.218181094648; //
     double rhoConcrete = 2400; // [kg/m^3]
     double nu = muConcrete / rhoConcrete;
-    double rhoAir = 1.2041; // [kg/m^3]
+
     //double Re_D = d_part * u / nu;
     //if (myid == 0) UBLOG(logINFO, "Re_D = " << Re_D);
     //
     SPtr<LBMUnitConverter> units = std::make_shared<LBMUnitConverter>(d_part, 1., 2400, d_part / dx, uLB);
-    //double nuLB = D*units->getFactorLentghWToLb() * u*units->getFactorVelocityWToLb() / Re_D;
-    //if (myid == 0) UBLOG(logINFO, "nuLB = " << nuLB);
+    if (myid == 0) std::cout << units->toString() << std::endl;
 
     double interfaceThickness = 4.096;
     double sigma = 0.03;
     double Re = rhoConcrete * u * d_part / muConcrete;
     double We = rhoConcrete * u * u * d_part / sigma;
 
-    double nu_h_LB = uLB * d_part / Re;
-    double nu_l_LB = nu_h_LB;
-    if (myid == 0) UBLOG(logINFO, "nu_h = " << nu_h_LB << " nu_l = " << nu_l_LB);
+    
+
+    double nu_h_LB = uLB * d_part * units->getFactorLentghWToLb() / Re;
+    double nu_l_LB = 0;// = nu_h_LB;
+    
 
     double rho_h_LB = 1;
 
     // surface tension
-    double sigma_LB = rho_h_LB * uLB * uLB * d_part / We;
-    if (myid == 0) UBLOG(logINFO, "sigma_LB = " << sigma_LB);
+    double sigma_LB = rho_h_LB * uLB * uLB * d_part * units->getFactorLentghWToLb() / We;
+    
 
     // LBMReal dLB = 0; // = length[1] / dx;
     LBMReal rhoLB = 0.0;
@@ -105,31 +117,7 @@ int main(int argc, char *argv[])
     //SPtr<LBMUnitConverter> units = std::make_shared<LBMUnitConverter>(d_part, 1., 1000, d_part / dx, std::abs(uLB));
     //SPtr<LBMUnitConverter> units = std::make_shared<LBMUnitConverter>(d_part, 1., 1000, d_part / dx, std::abs(uLB));
     //SPtr<LBMUnitConverter> units = std::make_shared<LBMUnitConverter>(d_part, 1., 2400, d_part / dx, uRef);
-    if (myid == 0) std::cout << units->toString() << std::endl;
-
-    //SPtr<LBMKernel> kernel   = make_shared<IBcumulantK17LBMKernel>();
-    //SPtr<LBMKernel> kernel   = make_shared<CumulantK17LBMKernel>();
-    //SPtr<LBMKernel> kernel = make_shared<MultiphaseTwoPhaseFieldsPressureFilterLBMKernel>();
-    SPtr<LBMKernel> kernel = make_shared<MultiphaseSimpleVelocityBaseExternalPressureLBMKernel>();
-
-    kernel->setWithForcing(true);
-    kernel->setForcingX1(0.0);
-    kernel->setForcingX2(0.0);
-    kernel->setForcingX3(0.0);
-
-    kernel->setPhiL(phiL);
-    kernel->setPhiH(phiH);
-    kernel->setPhaseFieldRelaxation(tauH);
-    kernel->setMobility(mob);
-    kernel->setInterfaceWidth(interfaceThickness);
-
-    kernel->setCollisionFactorMultiphase(nu_h_LB, nu_l_LB);
-    kernel->setDensityRatio(densityRatio);
-    kernel->setMultiphaseModelParameters(beta, kappa);
-    kernel->setContactAngle(theta);
- 
-    SPtr<BCProcessor> bcProc = make_shared<BCProcessor>();
-    kernel->setBCProcessor(bcProc);
+    
 
     //SPtr<BCAdapter> noSlipBCAdapter(new NoSlipBCAdapter());
     //noSlipBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new NoSlipBCAlgorithm()));
@@ -137,21 +125,23 @@ int main(int argc, char *argv[])
     noSlipBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new MultiphaseNoSlipBCAlgorithm()));
 
 
-    mu::Parser fct;
 
-    {
-        // concrete inflow boundary condition
-        fct.SetExpr("U");
-        fct.DefineConst("U", -u*units->getFactorVelocityWToLb());
-        if (myid == 0) UBLOG(logINFO, "Concrete inflow velocity = " << u << " m/s");
-        if (myid == 0) UBLOG(logINFO, "Concrete inflow velocity = " << u*units->getFactorVelocityWToLb() << " dx/dt");
+  
+    // concrete inflow boundary condition
+    mu::Parser fct;
+    fct.SetExpr("U");
+    fct.DefineConst("U", -u*units->getFactorVelocityWToLb());
+    if (myid == 0) VF_LOG_INFO("Concrete inflow velocity = {} m/s", u);
+    if (myid == 0) VF_LOG_INFO("Concrete inflow velocity = {} dx/dt", u * units->getFactorVelocityWToLb());
+    if (myid == 0) VF_LOG_INFO("Concrete Re = {}", Re);
+        
     //    // Å tigler, J. (2014). Analytical velocity profile in tube for laminar and turbulent flow. Engineering
     //    // Mechanics, 21(6), 371-379.
     //    double cx1 = -1.31431 + R;
     //    double cx2 = 0.375582 + R;
     //    //double cx3 = 0.20105 + R;
     //    double L = g_maxX1 - g_minX1;
-    //    double p_concrete = 7e5; // Pa = 7 Bar
+    //    double p_concrete = 1e5; // Pa = 1 Bar
     //    double p1 = p_concrete * units->getFactorPressureWToLb();
     //    double p2 = 0.0;
     //    double drhoLB = 1.0 + rhoLB;
@@ -166,59 +156,111 @@ int main(int argc, char *argv[])
     //    fct.DefineConst("R", R);
     //    fct.DefineConst("U", uLB * ((N + 3) / (N + 1)));
     //    fct.DefineConst("NplusOne", N + 1.0);
-    }
+    
 
     //SPtr<BCAdapter> inflowConcreteBCAdapter(new VelocityBCAdapter(false, false, true, fct, 0, BCFunction::INFCONST));
     //inflowConcreteBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new VelocityBCAlgorithm()));
     SPtr<BCAdapter> inflowConcreteBCAdapter(new MultiphaseVelocityBCAdapter(false, false, true, fct, phiH, 0, BCFunction::INFCONST));
     inflowConcreteBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new MultiphaseVelocityBCAlgorithm()));
 
-    {
-        //air inflow boundary condition
-        // Å tigler, J. (2014). Analytical velocity profile in tube for laminar and turbulent flow. Engineering
-        // Mechanics, 21(6), 371-379.
-        //SPtr<LBMUnitConverter> unitsAir = std::make_shared<LBMUnitConverter>(d_part, LBMUnitConverter::AIR_20C, d_part / dx);
-        SPtr<LBMUnitConverter> unitsAir = std::make_shared<LBMUnitConverter>(d_part, 1., 1.2041, d_part / dx, uLB);
-        double V = 40;     // flow rate [m^3/h]
-        double D = 0.0166;  // air inlet diameter [m]
-        double R = D / 2.0; // radius [m]
-        double A = UbMath::PI * R * R;
-        double u = V / 3600 / A;
-        double uLB = u * unitsAir->getFactorVelocityWToLb();
-        //double cx1 = -1.2788 + R;
-        double cx2 = 0.3803 + R;
-        double cx3 = 0.1517 + R;
-        double L = g_maxX1 - g_minX1;
-        double p_air = 7e5; // Pa = 7 Bar
-        double p1 = p_air;
-        double p2 = 0.0;
-        double mu = 17.2e-6; //Pa s, air 20° C
-        double N = R * R / 2 * mu * u * (p1 - p2) / L - 3;
-        if (myid == 0) UBLOG(logINFO, "Air inflow velocity = " << u << " m/s");
-        if (myid == 0) UBLOG(logINFO, "Air inflow velocity = " << uLB << " dx/dt");
-
-        double nu = mu / rhoConcrete;
-        double Re = D * u / nu;
-        if (myid == 0) UBLOG(logINFO, "Re_air = " << Re);
-
-        double nuLB = D * unitsAir->getFactorLentghWToLb() * uLB * unitsAir->getFactorVelocityWToLb() / Re;
-        if (myid == 0) UBLOG(logINFO, "nuLB_air = " << nuLB);
-
-        // mu::Parser fct;
-        fct.SetExpr("U");
-        fct.DefineConst("U", -uLB);
-        //fct.SetExpr("U*(1-(((((x2-y0)^2+(x3-z0)^2)^0.5)/R)^NplusOne))");
-        ////fct.DefineConst("x0", cx1);
-        //fct.DefineConst("y0", cx2);
-        //fct.DefineConst("z0", cx3);
-        //fct.DefineConst("R", R);
-        //fct.DefineConst("U", -uLB * ((N + 3) / (N + 1)));
-        //fct.DefineConst("NplusOne", N + 1.0);
-    }
+    
+        // air inflow boundary condition
+        //  Å tigler, J. (2014). Analytical velocity profile in tube for laminar and turbulent flow. Engineering
+        //  Mechanics, 21(6), 371-379.
+        // SPtr<LBMUnitConverter> unitsAir = std::make_shared<LBMUnitConverter>(d_part, LBMUnitConverter::AIR_20C, d_part / dx);
+        //SPtr<LBMUnitConverter> unitsAir = std::make_shared<LBMUnitConverter>(d_part, 1., 1.2041, d_part / dx, uLB);
+        //double V = 40;      // flow rate [m^3/h]
+        //double D = 0.0166;  // air inlet diameter [m]
+        //double R = D / 2.0; // radius [m]
+        //double A = UbMath::PI * R * R;
+        //double u = V / 3600 / A;
+        //double uLB = u * unitsAir->getFactorVelocityWToLb();
+        //// double cx1 = -1.2788 + R;
+        //double cx2 = 0.3803 + R;
+        //double cx3 = 0.1517 + R;
+        //double L = g_maxX1 - g_minX1;
+        //double p_air = 7e5; // Pa = 7 Bar
+        //double p1 = p_air;
+        //double p2 = 0.0;
+        //double mu = 17.2e-6; // Pa s, air 20° C
+        //double N = R * R / 2 * mu * u * (p1 - p2) / L - 3;
+        //if (myid == 0) VF_LOG_INFO("Air inflow velocity = {} m/s", u);
+        //if (myid == 0) VF_LOG_INFO("Air inflow velocity = {} dx/dt", uLB);
+        //
+
+        //double nu = mu / rhoConcrete;
+        //double Re = d_part * u / nu;
+        //if (myid == 0) VF_LOG_INFO("Re_air = {}", Re);
+
+        //double nuLB = d_part * unitsAir->getFactorLentghWToLb() * uLB / Re;
+        //if (myid == 0) VF_LOG_INFO("nuLB_air = {}", nuLB);
+        //nu_l_LB = nuLB;
+    
+
+    SPtr<LBMUnitConverter> unitsAir = std::make_shared<LBMUnitConverter>(d_part, 1., 1.2041, d_part / dx, uLB);
+    double V_air = 40;      // flow rate [m^3/h]
+    double D_air = 0.00553; // air inlet diameter [m]
+    double R_air = D_air / 2.0; // radius [m]
+    double A_air = UbMath::PI * R_air * R_air;
+    double u_air = V_air / 3600 / A_air;
+    double uLB_air = u_air * unitsAir->getFactorVelocityWToLb();
+    // double cx1 = -1.2788 + R;
+    double cx2 = 0.385822 + R_air;
+    double cx3 = 0.135562 + R_air;
+    double L_air = 0.00747;
+    double p_air = 7e5; // Pa = 7 Bar
+    double p1 = p_air;
+    double p2 = 1e5;
+    double mu_air = 17.2e-6; // Pa s, air 20° C
+    double rho_air = 1.2041;  // [kg/m^3]
+    double N = R_air * R_air / 2 * mu_air * u_air * (p1 - p2) / L_air - 3;
+    if (myid == 0) VF_LOG_INFO("Air inflow velocity = {} m/s", u_air);
+    if (myid == 0) VF_LOG_INFO("Air inflow velocity = {} dx/dt", uLB_air);
+
+    double nu_air = mu_air / rho_air;
+    double Re_air = d_part * u_air / nu_air;
+    if (myid == 0) VF_LOG_INFO("Air Re = {}", Re_air);
+
+    double nuLB_air = d_part * unitsAir->getFactorLentghWToLb() * uLB_air / Re_air;
+    if (myid == 0) VF_LOG_INFO("nuLB_air = {}", nuLB_air);
+    nu_l_LB = nuLB_air;
+
+    if (myid == 0) VF_LOG_INFO("nu_h = {}", nu_h_LB);
+    if (myid == 0) VF_LOG_INFO("nu_l = {}", nu_l_LB);
+    if (myid == 0) VF_LOG_INFO("sigma_LB = {}", sigma_LB);
+
+    
+
+    mu::Parser fctVx1;
+    //fctVx1.SetExpr("U");
+    //fctVx1.DefineConst("U", uLB_air);
+    mu::Parser fctVx2;
+    fctVx2.SetExpr("U");
+    fctVx2.DefineConst("U", 0);
+    mu::Parser fctVx3;
+    //fctVx3.SetExpr("U");
+    //fctVx3.DefineConst("U", -uLB_air);
+    
+    fctVx1.SetExpr("U*(1-(((((x2-y0)^2+(x3-z0)^2)^0.5)/R)^NplusOne))");
+    //fct.DefineConst("x0", cx1);
+    fctVx1.DefineConst("y0", cx2);
+    fctVx1.DefineConst("z0", cx3);
+    fctVx1.DefineConst("R", R);
+    fctVx1.DefineConst("U", uLB_air * ((N + 3) / (N + 1)));
+    fctVx1.DefineConst("NplusOne", N + 1.0);
+
+    fctVx3.SetExpr("U*(1-(((((x2-y0)^2+(x3-z0)^2)^0.5)/R)^NplusOne))");
+    // fc3.DefineConst("x0", cx1);
+    fctVx3.DefineConst("y0", cx2);
+    fctVx3.DefineConst("z0", cx3);
+    fctVx3.DefineConst("R", R);
+    fctVx3.DefineConst("U", -uLB_air * ((N + 3) / (N + 1)));
+    fctVx3.DefineConst("NplusOne", N + 1.0);
+    
 
     //SPtr<BCAdapter> inflowAirBCAdapter(new VelocityBCAdapter(true, false, false, fct, 0, BCFunction::INFCONST));
     //inflowAirBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new VelocityBCAlgorithm()));
-    SPtr<BCAdapter> inflowAirBCAdapter(new MultiphaseVelocityBCAdapter(true, false, false, fct, phiL, 0, BCFunction::INFCONST));
+    SPtr<BCAdapter> inflowAirBCAdapter(new MultiphaseVelocityBCAdapter(true, false, true, fctVx1, fctVx3, fctVx3, phiL, 0, BCFunction::INFCONST));
     inflowAirBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new MultiphaseVelocityBCAlgorithm()));
 
     SPtr<BCAdapter> outflowBCAdapter(new DensityBCAdapter(rhoLB));
@@ -234,6 +276,30 @@ int main(int argc, char *argv[])
     bcVisitor.addBC(inflowAirBCAdapter);
     bcVisitor.addBC(outflowBCAdapter);
 
+    // SPtr<LBMKernel> kernel   = make_shared<IBcumulantK17LBMKernel>();
+    // SPtr<LBMKernel> kernel   = make_shared<CumulantK17LBMKernel>();
+    // SPtr<LBMKernel> kernel = make_shared<MultiphaseTwoPhaseFieldsPressureFilterLBMKernel>();
+    SPtr<LBMKernel> kernel = make_shared<MultiphaseSimpleVelocityBaseExternalPressureLBMKernel>();
+
+    kernel->setWithForcing(true);
+    kernel->setForcingX1(0.0);
+    kernel->setForcingX2(0.0);
+    kernel->setForcingX3(0.0);
+
+    kernel->setPhiL(phiL);
+    kernel->setPhiH(phiH);
+    kernel->setPhaseFieldRelaxation(tauH);
+    kernel->setMobility(mob);
+    kernel->setInterfaceWidth(interfaceThickness);
+
+    kernel->setCollisionFactorMultiphase(nu_h_LB, nu_l_LB);
+    kernel->setDensityRatio(densityRatio);
+    kernel->setMultiphaseModelParameters(beta, kappa);
+    kernel->setContactAngle(theta);
+
+    SPtr<BCProcessor> bcProc = make_shared<BCProcessor>();
+    kernel->setBCProcessor(bcProc);
+
     SPtr<Grid3D> grid = make_shared<Grid3D>(comm);
     grid->setPeriodicX1(false);
     grid->setPeriodicX2(false);
@@ -244,7 +310,7 @@ int main(int argc, char *argv[])
 
     string geoPath = "d:/Projects/TRR277/Project/WP4/NozzleGeo";
 
-    string outputPath = "d:/temp/NozzleFlowTest_Multiphase";
+    string outputPath = "d:/temp/NozzleFlowTest_Multiphase2";
     UbSystem::makeDirectory(outputPath);
     UbSystem::makeDirectory(outputPath + "/liggghts");
 
@@ -254,7 +320,7 @@ int main(int argc, char *argv[])
     //    UbLog::output_policy::setStream(logFilename.str());
     //}
 
-    SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_MMM, MetisPartitioner::RECURSIVE));
+    SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, vf::lbm::dir::DIR_MMM, MetisPartitioner::RECURSIVE));
     
     SPtr<GbObject3D> gridCube = make_shared <GbCuboid3D>(g_minX1, g_minX2, g_minX3, g_maxX1, g_maxX2, g_maxX3);
     if (myid == 0)
@@ -314,42 +380,57 @@ int main(int argc, char *argv[])
     meshNozzleVolcanNozzle2->readMeshFromSTLFileBinary(geoPath + "/06_2_Nozzle_Volcan_Nozzle.stl", true);
     if (myid == 0) UBLOG(logINFO, "Read meshNozzleVolcanNozzle2:end");
     if (myid == 0) GbSystem3D::writeGeoObject(meshNozzleVolcanNozzle2.get(), outputPath + "/geo/meshNozzleVolcanNozzle2", WbWriterVtkXmlBinary::getInstance());
-    SPtr<Interactor3D> intrNozzleVolcanNozzle2 = std::make_shared<D3Q27TriFaceMeshInteractor>(meshNozzleVolcanNozzle2, grid, noSlipBCAdapter, Interactor3D::SOLID, Interactor3D::EDGES);
+    SPtr<Interactor3D> intrNozzleVolcanNozzle2 = std::make_shared<D3Q27TriFaceMeshInteractor>(meshNozzleVolcanNozzle2, grid, noSlipBCAdapter, Interactor3D::SOLID, Interactor3D::POINTS);
     ///////////////////////////////////////////////////////////
     //box
     SPtr<D3Q27Interactor> intrBox = SPtr<D3Q27Interactor>(new D3Q27Interactor(gridCube, grid, noSlipBCAdapter, Interactor3D::INVERSESOLID));
     ///////////////////////////////////////////////////////////
     //inflow
     GbCylinder3DPtr geoInflow(new GbCylinder3D(-1.30181+0.0005, 0.390872-0.00229, 0.20105, -1.30181+0.0005, 0.390872-0.00229, 0.23, 0.013));
-    if (myid == 0) GbSystem3D::writeGeoObject(geoInflow.get(), outputPath + "/geo/geoInflow", WbWriterVtkXmlASCII::getInstance());
+    if (myid == 0) GbSystem3D::writeGeoObject(geoInflow.get(), outputPath + "/geo/geoInflow", WbWriterVtkXmlBinary::getInstance());
     SPtr<D3Q27Interactor> intrInflow = SPtr<D3Q27Interactor>(new D3Q27Interactor(geoInflow, grid, inflowConcreteBCAdapter, Interactor3D::SOLID));
     ///////////////////////////////////////////////////////////
     //outflow
     GbCylinder3DPtr geoOutflow(new GbCylinder3D(-1.30181+0.0005, 0.390872-0.00229, -0.22, -1.30181+0.0005, 0.390872-0.00229, -0.21, 0.013));
-    if (myid == 0) GbSystem3D::writeGeoObject(geoOutflow.get(), outputPath + "/geo/geoOutflow", WbWriterVtkXmlASCII::getInstance());
+    //GbCylinder3DPtr geoOutflow(new GbCylinder3D(-1.30181+0.0005, 0.390872-0.00229, g_minX3, -1.30181+0.0005, 0.390872-0.00229, -0.21, 0.013));
+    if (myid == 0) GbSystem3D::writeGeoObject(geoOutflow.get(), outputPath + "/geo/geoOutflow", WbWriterVtkXmlBinary::getInstance());
     SPtr<D3Q27Interactor> intrOutflow = SPtr<D3Q27Interactor>(new D3Q27Interactor(geoOutflow, grid, outflowBCAdapter, Interactor3D::SOLID));
     ///////////////////////////////////////////////////////////
-    SPtr<GbTriFaceMesh3D> geoAirInlet = std::make_shared<GbTriFaceMesh3D>();
-    if (myid == 0) UBLOG(logINFO, "Read Air_Inlet:start");
-    geoAirInlet->readMeshFromSTLFileASCII(geoPath + "/Air_Inlet.stl", true);
-    if (myid == 0) UBLOG(logINFO, "Read Air_Inlet:end");
-    if (myid == 0) GbSystem3D::writeGeoObject(geoAirInlet.get(), outputPath + "/geo/geoAirInlet", WbWriterVtkXmlBinary::getInstance());
-    SPtr<Interactor3D> intrAirInlet = std::make_shared<D3Q27TriFaceMeshInteractor>(
-        geoAirInlet, grid, inflowAirBCAdapter, Interactor3D::SOLID, Interactor3D::EDGES);
+    //SPtr<GbTriFaceMesh3D> geoAirInlet = std::make_shared<GbTriFaceMesh3D>();
+    //if (myid == 0) UBLOG(logINFO, "Read Air_Inlet:start");
+    //geoAirInlet->readMeshFromSTLFileASCII(geoPath + "/Air_Inlet.stl", true);
+    //if (myid == 0) UBLOG(logINFO, "Read Air_Inlet:end");
+    //if (myid == 0) GbSystem3D::writeGeoObject(geoAirInlet.get(), outputPath + "/geo/geoAirInlet", WbWriterVtkXmlBinary::getInstance());
+    //SPtr<Interactor3D> intrAirInlet = std::make_shared<D3Q27TriFaceMeshInteractor>(geoAirInlet, grid, inflowAirBCAdapter, Interactor3D::SOLID, Interactor3D::EDGES);
+    /////////////////////////////////////////////////////////////
+    //Fluid area
+    GbCylinder3DPtr geoFluidArea(new GbCylinder3D(-1.30181+0.0005, 0.390872-0.00229, g_minX3, -1.30181+0.0005, 0.390872-0.00229, g_maxX3, 0.013));
+    if (myid == 0) GbSystem3D::writeGeoObject(geoFluidArea.get(), outputPath + "/geo/geoFluidArea", WbWriterVtkXmlBinary::getInstance());
+    SPtr<D3Q27Interactor> intrFluidArea = SPtr<D3Q27Interactor>(new D3Q27Interactor(geoFluidArea, grid, noSlipBCAdapter, Interactor3D::INVERSESOLID));
+    ///////////////////////////////////////////////////////////
+    ///////////////////////////////////////////////////////////
+    GbCylinder3DPtr geoAirInflow(new GbCylinder3D(-1.31431 - 0.0005, 0.388587, 0.1383275, -1.31431, 0.388587, 0.1383275, 0.002765));
+    if (myid == 0) GbSystem3D::writeGeoObject(geoAirInflow.get(), outputPath + "/geo/geoAirInlet", WbWriterVtkXmlBinary::getInstance());
+    SPtr<Interactor3D> intrAirInflow = std::make_shared<D3Q27Interactor>(geoAirInflow, grid, inflowAirBCAdapter, Interactor3D::SOLID, Interactor3D::EDGES);
     ///////////////////////////////////////////////////////////
 
     InteractorsHelper intHelper(grid, metisVisitor, true);
-    intHelper.addInteractor(intrBox);
+    
+    intHelper.addInteractor(intrFluidArea);
+    intHelper.addInteractor(intrNozzleVolcanNozzle2);
+    //intHelper.addInteractor(intrBox);
     intHelper.addInteractor(intrInflow);
-    intHelper.addInteractor(intrAirInlet);
+    intHelper.addInteractor(intrAirInflow);
     intHelper.addInteractor(intrOutflow);
-    intHelper.addInteractor(intrNozzleAirDistributor);
-    intHelper.addInteractor(intrNozzleAirInlet);
-    intHelper.addInteractor(intrNozzleSpacer);
-    intHelper.addInteractor(intrNozzleAccDistributor);
-    intHelper.addInteractor(intrNozzleAccInlet);
-    intHelper.addInteractor(intrNozzleVolcanNozzle1);
-    intHelper.addInteractor(intrNozzleVolcanNozzle2);
+    
+
+    //intHelper.addInteractor(intrNozzleAirDistributor);
+    //intHelper.addInteractor(intrNozzleAirInlet);
+    //intHelper.addInteractor(intrNozzleSpacer);
+    //intHelper.addInteractor(intrNozzleAccDistributor);
+    //intHelper.addInteractor(intrNozzleAccInlet);
+    //intHelper.addInteractor(intrNozzleVolcanNozzle1);
+    
 
 
     intHelper.selectBlocks();
diff --git a/apps/cpu/PoiseuilleFlow/pf1.cpp b/apps/cpu/PoiseuilleFlow/pf1.cpp
index d4d856d51f66a1ac6800e1f2f78da5b219b54488..93680117551c13a23ecc08c5dc4731d92ec78b77 100644
--- a/apps/cpu/PoiseuilleFlow/pf1.cpp
+++ b/apps/cpu/PoiseuilleFlow/pf1.cpp
@@ -7,6 +7,8 @@ using namespace std;
 //pipe flow with forcing
 void pf1()
 {
+    using namespace vf::lbm::dir;
+
    SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
    int myid = comm->getProcessID();
 
@@ -14,25 +16,25 @@ void pf1()
    string          pathOut = "d:/temp/test_dir_naming";  //"/gfs1/work/niikonst/pflow_pipe_forcing";
    int             numOfThreads = 1;
    int             blocknx[3] ={ 10,10,10 };
-   double          endTime = 10;
-   double          cpStart = 10;
-   double          cpStep = 10;
-   double          outTime = 10;
-   double          availMem = 8e9;
-   double          deltax = 1;
-   double          rhoLB = 0.0;
-   double          nuLB = 0.005;
+   real          endTime = 10;
+   real          cpStart = 10;
+   real          cpStep = 10;
+   real          outTime = 10;
+   real          availMem = 8e9;
+   real          deltax = 1;
+   real          rhoLB = 0.0;
+   real          nuLB = 0.005;
 
    //geometry definition
 
    //simulation bounding box
-   double g_minX1 = 0.0;
-   double g_minX2 = -10.0;
-   double g_minX3 = -10.0;
+   real g_minX1 = 0.0;
+   real g_minX2 = -10.0;
+   real g_minX3 = -10.0;
 
-   double g_maxX1 = 50;
-   double g_maxX2 = 10;
-   double g_maxX3 = 10;
+   real g_maxX1 = 50;
+   real g_maxX2 = 10;
+   real g_maxX3 = 10;
 
    //Sleep(15000);
 
@@ -76,7 +78,7 @@ void pf1()
 
    //set boundary conditions for blocks and create process decomposition for MPI
    SPtr<D3Q27Interactor> cylinderInt(new D3Q27Interactor(cylinder, grid, noSlipBCAdapter, Interactor3D::INVERSESOLID));
-   SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_00M));
+   SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_00M));
    InteractorsHelper intHelper(grid, metisVisitor);
    intHelper.addInteractor(cylinderInt);
    intHelper.selectBlocks();
@@ -91,8 +93,8 @@ void pf1()
    unsigned long long numberOfNodesPerBlock = (unsigned long long)(blocknx[0])* (unsigned long long)(blocknx[1])* (unsigned long long)(blocknx[2]);
    unsigned long long numberOfNodes = numberOfBlocks * numberOfNodesPerBlock;
    unsigned long long numberOfNodesPerBlockWithGhostLayer = numberOfBlocks * (blocknx[0] + ghostLayer) * (blocknx[1] + ghostLayer) * (blocknx[2] + ghostLayer);
-   double needMemAll = double(numberOfNodesPerBlockWithGhostLayer*(27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-   double needMem = needMemAll / double(comm->getNumberOfProcesses());
+   real needMemAll = real(numberOfNodesPerBlockWithGhostLayer*(27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+   real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
    if (myid == 0)
    {
diff --git a/apps/cpu/RisingBubble2D/RisingBubble2D.cpp b/apps/cpu/RisingBubble2D/RisingBubble2D.cpp
index 19677aac71768b7cc0a5acba13e06c22dd6f4658..92495242f11659b3a90b30f4df836642dee5aea8 100644
--- a/apps/cpu/RisingBubble2D/RisingBubble2D.cpp
+++ b/apps/cpu/RisingBubble2D/RisingBubble2D.cpp
@@ -13,6 +13,8 @@ using namespace std;
 
 void run(string configname)
 {
+    using namespace vf::lbm::dir;
+
     try {
         vf::basics::ConfigurationFile  config;
         config.load(configname);
@@ -20,31 +22,31 @@ void run(string configname)
         string pathname            = config.getValue<string>("pathname");
         int numOfThreads           = config.getValue<int>("numOfThreads");
         vector<int> blocknx        = config.getVector<int>("blocknx");
-        vector<double> boundingBox = config.getVector<double>("boundingBox");
-        double uLB             = config.getValue<double>("uLB");
-        double nuL             = config.getValue<double>("nuL");
-        double nuG             = config.getValue<double>("nuG");
-        double densityRatio    = config.getValue<double>("densityRatio");
+        vector<real> boundingBox = config.getVector<real>("boundingBox");
+        real uLB             = config.getValue<real>("uLB");
+        real nuL             = config.getValue<real>("nuL");
+        real nuG             = config.getValue<real>("nuG");
+        real densityRatio    = config.getValue<real>("densityRatio");
         //double sigma           = config.getValue<double>("sigma");
         int interfaceThickness = config.getValue<int>("interfaceThickness");
-        double radius          = config.getValue<double>("radius");
-        double theta           = config.getValue<double>("contactAngle");
-        double phiL            = config.getValue<double>("phi_L");
-        double phiH            = config.getValue<double>("phi_H");
-        double tauH            = config.getValue<double>("Phase-field Relaxation");
-        double mob             = config.getValue<double>("Mobility");
-
-        double endTime     = config.getValue<double>("endTime");
-        double outTime     = config.getValue<double>("outTime");
-        double availMem    = config.getValue<double>("availMem");
+        real radius          = config.getValue<real>("radius");
+        real theta           = config.getValue<real>("contactAngle");
+        real phiL            = config.getValue<real>("phi_L");
+        real phiH            = config.getValue<real>("phi_H");
+        real tauH            = config.getValue<real>("Phase-field Relaxation");
+        real mob             = config.getValue<real>("Mobility");
+
+        real endTime     = config.getValue<real>("endTime");
+        real outTime     = config.getValue<real>("outTime");
+        real availMem    = config.getValue<real>("availMem");
         int refineLevel    = config.getValue<int>("refineLevel");
-        double Re          = config.getValue<double>("Re");
-        double Eo          = config.getValue<double>("Eo");
-        double dx          = config.getValue<double>("dx");
+        real Re          = config.getValue<real>("Re");
+        real Eo          = config.getValue<real>("Eo");
+        real dx          = config.getValue<real>("dx");
         bool logToFile     = config.getValue<bool>("logToFile");
-        double restartStep = config.getValue<double>("restartStep");
-        double cpStart     = config.getValue<double>("cpStart");
-        double cpStep      = config.getValue<double>("cpStep");
+        real restartStep = config.getValue<real>("restartStep");
+        real cpStart     = config.getValue<real>("cpStart");
+        real cpStep      = config.getValue<real>("cpStep");
         bool newStart      = config.getValue<bool>("newStart");
         //double rStep = config.getValue<double>("rStep");
 
@@ -88,37 +90,37 @@ void run(string configname)
         //Sleep(20000);
 
         // LBMReal dLB = 0; // = length[1] / dx;
-        LBMReal rhoLB = 0.0;
-        LBMReal nuLB  = nuL; //(uLB*dLB) / Re;
+        real rhoLB = 0.0;
+        real nuLB  = nuL; //(uLB*dLB) / Re;
 
         //diameter of circular droplet
-        LBMReal D  = 2.0*radius;
+        real D  = 2.0*radius;
 
         //density retio
         //LBMReal r_rho = densityRatio;
 
         //density of heavy fluid
-        LBMReal rho_h = 1.0;
+        real rho_h = 1.0;
         //density of light fluid
         //LBMReal rho_l = rho_h / r_rho;
 
         //kinimatic viscosity
-        LBMReal nu_h = nuL;
+        real nu_h = nuL;
         //LBMReal nu_l = nuG;
         //#dynamic viscosity
         //LBMReal mu_h = rho_h * nu_h;
         
         //gravity
-        LBMReal g_y = Re * Re * nu_h * nu_h / (D*D*D);
+        real g_y = Re * Re * nu_h * nu_h / (D*D*D);
         //Eotvos number
         //LBMReal Eo = 100;
         //surface tension
-        LBMReal sigma = rho_h * g_y * D * D / Eo;
+        real sigma = rho_h * g_y * D * D / Eo;
 
         //g_y = 0;
 
-        double beta  = 12.0 * sigma / interfaceThickness;
-        double kappa = 1.5 * interfaceThickness * sigma;
+        real beta  = 12.0 * sigma / interfaceThickness;
+        real kappa = 1.5 * interfaceThickness * sigma;
 
         if (myid == 0) {
                 //UBLOG(logINFO, "uLb = " << uLB);
@@ -189,7 +191,7 @@ void run(string configname)
         grid->setPeriodicX3(true);
         grid->setGhostLayerWidth(2);
 
-        SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_MMM, MetisPartitioner::RECURSIVE));
+        SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_MMM, MetisPartitioner::RECURSIVE));
 
         //////////////////////////////////////////////////////////////////////////
         // restart
@@ -208,13 +210,13 @@ void run(string configname)
         if (newStart) {
 
             // bounding box
-            double g_minX1 = boundingBox[0];
-            double g_minX2 = boundingBox[2];
-            double g_minX3 = boundingBox[4];
+            real g_minX1 = boundingBox[0];
+            real g_minX2 = boundingBox[2];
+            real g_minX3 = boundingBox[4];
 
-            double g_maxX1 = boundingBox[1];
-            double g_maxX2 = boundingBox[3];
-            double g_maxX3 = boundingBox[5];
+            real g_maxX1 = boundingBox[1];
+            real g_maxX2 = boundingBox[3];
+            real g_maxX3 = boundingBox[5];
 
             // geometry
             SPtr<GbObject3D> gridCube(new GbCuboid3D(g_minX1, g_minX2, g_minX3, g_maxX1, g_maxX2, g_maxX3));
@@ -227,7 +229,7 @@ void run(string configname)
             GenBlocksGridVisitor genBlocks(gridCube);
             grid->accept(genBlocks);
 
-            double dx2 = 2.0 * dx;
+            real dx2 = 2.0 * dx;
             GbCuboid3DPtr wallXmin(new GbCuboid3D(g_minX1 - dx2, g_minX2 - dx2, g_minX3 - dx2, g_minX1, g_maxX2 + dx2, g_maxX3 + dx2));
             GbSystem3D::writeGeoObject(wallXmin.get(), pathname + "/geo/wallXmin", WbWriterVtkXmlASCII::getInstance());
             GbCuboid3DPtr wallXmax(new GbCuboid3D(g_maxX1, g_minX2 - dx2, g_minX3 - dx2, g_maxX1 + dx2, g_maxX2 + dx2, g_maxX3 + dx2));
@@ -264,9 +266,9 @@ void run(string configname)
             unsigned long long numberOfNodes = numberOfBlocks * numberOfNodesPerBlock;
             unsigned long long numberOfNodesPerBlockWithGhostLayer =
                 numberOfBlocks * (blocknx[0] + ghostLayer) * (blocknx[1] + ghostLayer) * (blocknx[2] + ghostLayer);
-            double needMemAll =
-                double(numberOfNodesPerBlockWithGhostLayer * (27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-            double needMem = needMemAll / double(comm->getNumberOfProcesses());
+            real needMemAll =
+                real(numberOfNodesPerBlockWithGhostLayer * (27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+            real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
             if (myid == 0) {
                 UBLOG(logINFO, "Number of blocks = " << numberOfBlocks);
@@ -296,9 +298,9 @@ void run(string configname)
             intHelper.setBC();
 
             // initialization of distributions
-            LBMReal x1c = D; 
-            LBMReal x2c = D; 
-            LBMReal x3c = 1.5; 
+            real x1c = D; 
+            real x2c = D; 
+            real x3c = 1.5; 
             //LBMReal x3c = 2.5 * D;
             mu::Parser fct1;
             fct1.SetExpr("0.5+0.5*tanh(2*(sqrt((x1-x1c)^2+(x2-x2c)^2+(x3-x3c)^2)-radius)/interfaceThickness)");
diff --git a/apps/cpu/TPMSRow/CMakeLists.txt b/apps/cpu/TPMSRow/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ea54915ed2eb339e0e6e71c9d40c638120e58fa
--- /dev/null
+++ b/apps/cpu/TPMSRow/CMakeLists.txt
@@ -0,0 +1,10 @@
+PROJECT(TPMSRow)
+
+vf_add_library(BUILDTYPE binary PRIVATE_LINK VirtualFluidsCore muparser basics ${MPI_CXX_LIBRARIES} FILES TPMSRow.cpp )
+
+vf_get_library_name (library_name)
+#target_include_dires(${library_name} PRIVATE ${APPS_ROOT_CPU})
+#target_include_dires(${library_name} PRIVATE "/cluster/lib/boost/1.63.0/gcc")
+target_include_directories(${library_name} PRIVATE ${APPS_ROOT_CPU})
+#target_include_directories(${library_name} PRIVATE "/cluster/lib/boost/1.63.0/gcc/include/")
+
diff --git a/apps/cpu/TPMSRow/TPMSRow.cfg b/apps/cpu/TPMSRow/TPMSRow.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..095b0bb32836969bca0ff86b4a31b20e70c72f2a
--- /dev/null
+++ b/apps/cpu/TPMSRow/TPMSRow.cfg
@@ -0,0 +1,34 @@
+pathname = E:\SimulationsResults\TPMSRow
+#pathname = C:\temp\TPMSRow
+#pathname = /mnt/c/temp/TPMSRow
+numOfThreads = 1
+availMem = 15e10
+refineLevel = 0
+
+#Grid
+length =0.025 0.01 0.01
+blocknx = 25 50 50
+#dx =0.000050000000000000000000000000000000
+dx=0.0001
+#Geometry
+UnitEdgeLength=0.01
+TPMSL = 0.01 0.01 0.01
+TPMSOrigin = 0 0 0
+gridCubeOrigin = -0.005 0 0
+
+#Simulation
+nu = 0.0001523579766536965
+Re = 6563.489631218715
+Re0 = 6563.489631218715
+
+timeAvStart = 300
+timeAvStop = 1500
+
+beginTime = 0
+outTime = 100
+endTime = 200
+logToFile = false
+newStart = true
+restartStep =  1200000
+cpStart =  1200000
+cpStep =  1200000
diff --git a/apps/cpu/TPMSRow/TPMSRow.cpp b/apps/cpu/TPMSRow/TPMSRow.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..15df6e24316a0f00407218a04df4a7b48293d637
--- /dev/null
+++ b/apps/cpu/TPMSRow/TPMSRow.cpp
@@ -0,0 +1,513 @@
+#include <iostream>
+#include <string>
+
+//#include <boost/pointer_cast.hpp>
+
+#include "VirtualFluids.h"
+
+using namespace std;
+using namespace vf::lbm::dir;
+using namespace vf::basics::constant;
+
+void run(string configname)
+{
+    try {
+        vf::basics::ConfigurationFile config;
+        config.load(configname);
+
+        string pathname             = config.getValue<string>("pathname");
+        int numOfThreads            = config.getValue<int>("numOfThreads");
+        vector<int> blocknx         = config.getVector<int>("blocknx");
+        double beginTime            = config.getValue<double>("beginTime");
+        double endTime              = config.getValue<double>("endTime");
+        double outTime              = config.getValue<double>("outTime");
+        double availMem             = config.getValue<double>("availMem");
+        double nu                   = config.getValue<double>("nu");
+        double dx                   = config.getValue<double>("dx");
+        double UnitEdgeLength       = config.getValue<double>("UnitEdgeLength");
+        double Re                   = config.getValue<double>("Re");
+        double Re0                  = config.getValue<double>("Re0");
+        //double rhoIn                = config.getValue<double>("rhoIn");
+        //string geometry             = config.getValue<string>("geometry");
+        vector<double> length       = config.getVector<double>("length");
+        //vector<double> FunnelL      = config.getVector<double>("FunnelL");
+        //vector<double> FunnelOrigin = config.getVector<double>("FunnelOrigin");
+        
+        double          timeAvStart       = config.getValue<double>("timeAvStart");
+        double          timeAvStop        = config.getValue<double>("timeAvStop");
+
+        vector<double> TPMSL        = config.getVector<double>("TPMSL");
+        vector<double> TPMSOrigin   = config.getVector<double>("TPMSOrigin");
+        vector<double> gridCubeOrigin = config.getVector<double>("gridCubeOrigin");
+        int refineLevel             = config.getValue<int>("refineLevel");
+        bool logToFile              = config.getValue<bool>("logToFile");
+        double restartStep          = config.getValue<double>("restartStep");
+        double cpStart              = config.getValue<double>("cpStart");
+        double cpStep               = config.getValue<double>("cpStep");
+        bool newStart               = config.getValue<bool>("newStart");
+
+        //SPtr<Communicator> comm = MPICommunicator::getInstance();
+        SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+        int myid                = comm->getProcessID();
+        //int numOfProcesses      = comm->getNumberOfProcesses();
+
+        if (logToFile) {
+#if defined(__unix__)
+            if (myid == 0) {
+                const char *str = pathname.c_str();
+                mkdir(str, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
+            }
+#endif
+
+            if (myid == 0) {
+                stringstream logFilename;
+                logFilename << pathname + "/logfile" + UbSystem::toString(UbSystem::getTimeStamp()) + ".txt";
+                UbLog::output_policy::setStream(logFilename.str());
+            }
+        }
+        //dx = 1. / 100. / 112.;
+        double vx = Re * nu / (UnitEdgeLength / dx);
+
+        SPtr<LBMUnitConverter> conv = SPtr<LBMUnitConverter>(new LBMUnitConverter());
+
+        //UbSystem::makeDirectory(pathname);
+         //UbSystem::makeDirectory(pathname+ "/mig");
+         //UbSystem::makeDirectory(pathname+ "/geo");
+         //UbSystem::makeDirectory(pathname+ "/blocks/blocks_");
+      
+
+        ////////////////////////////////////////////////////////////////////////
+        // BC Adapter
+        // BCAdapterPtr gradientAdapter(new VelocityBCAdapter(true, true, true, pdxC, pdyC, pdzC, 0.0,
+        // BCFunction::INFCONST));
+        // gradientAdapter->setBcAlgorithm(BCAlgorithmPtr(new FluxBCAlgorithm()));
+        // BCAdapterPtr cubeNoslipAdapter(new NoSlipBCAdapter(1));
+        SPtr<BCAdapter> tpmsNoslipAdapter(new NoSlipBCAdapter());
+        //SPtr<BCAdapter> funnelNoslipAdapter(new NoSlipBCAdapter(1));
+
+           // SPtr<BCAdapter> xMinApr(new DensityBCAdapter(0.0000001));
+         SPtr<BCAdapter> xMinApr(new DensityBCAdapter());
+        //  SPtr<BCAdapter> xMinApr(new VelocityBCAdapter(vx, 0., BCFunction::INFCONST, 0., 0., BCFunction::INFCONST,
+         //  0.,0., BCFunction::INFCONST));
+
+        SPtr<BCAdapter> xMaxApr(new DensityBCAdapter(0.));
+        //SPtr<BCAdapter> yMinApr(new NoSlipBCAdapter(1));
+        //SPtr<BCAdapter> yMaxApr(new NoSlipBCAdapter(1));
+        SPtr<BCAdapter> zMinApr(new NoSlipBCAdapter());
+        SPtr<BCAdapter> zMaxApr(new NoSlipBCAdapter());
+
+        //SPtr<BCAdapter> zMinFunnelApr(new NoSlipBCAdapter(1));
+        //SPtr<BCAdapter> zMaxFunnelApr(new NoSlipBCAdapter(1));
+
+         //tpmsNoslipAdapter->setBcAlgorithm(BCAlgorithmPtr(new NoSlipBCAlgorithm()));
+         //tpmsNoslipAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new ThinWallNoSlipBCAlgorithm()));
+
+        tpmsNoslipAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new NoSlipBCAlgorithm()));
+        //funnelNoslipAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new NoSlipBCAlgorithm()));
+
+         //xMinApr->setBcAlgorithm(SPtr<BCAlgorithm>(new NonEqDensityBCAlgorithm()));
+        // xMinApr->setBcAlgorithm(SPtr<BCAlgorithm>(new VelocityBCAlgorithm()));
+         xMinApr->setBcAlgorithm(SPtr<BCAlgorithm>(new NonReflectingInflowBCAlgorithm())); 
+        // xMinApr->setBcAlgorithm(SPtr<BCAlgorithm>(new VelocityWithDensityBCAlgorithm()));
+         //xMaxApr->setBcAlgorithm(SPtr<BCAlgorithm>(new NonEqDensityBCAlgorithm()));
+         xMaxApr->setBcAlgorithm(SPtr<BCAlgorithm>(new NonReflectingOutflowBCAlgorithmWithRelaxation()));
+        //yMinApr->setBcAlgorithm(SPtr<BCAlgorithm>(new NoSlipBCAlgorithm()));
+        //yMaxApr->setBcAlgorithm(SPtr<BCAlgorithm>(new NoSlipBCAlgorithm()));
+        zMinApr->setBcAlgorithm(SPtr<BCAlgorithm>(new NoSlipBCAlgorithm()));
+        zMaxApr->setBcAlgorithm(SPtr<BCAlgorithm>(new NoSlipBCAlgorithm()));
+
+        //zMinFunnelApr->setBcAlgorithm(SPtr<BCAlgorithm>(new NoSlipBCAlgorithm()));
+        //zMaxFunnelApr->setBcAlgorithm(SPtr<BCAlgorithm>(new NoSlipBCAlgorithm()));
+
+        ////////////////////////////////////////////////////////////////////////
+        // BC visitor
+        BoundaryConditionsBlockVisitor bcVisitor;
+        // bcVisitor.addBC(cubeNoslipAdapter);
+        bcVisitor.addBC(tpmsNoslipAdapter);
+        //bcVisitor.addBC(funnelNoslipAdapter);
+        bcVisitor.addBC(xMinApr);
+        bcVisitor.addBC(xMaxApr);
+        //bcVisitor.addBC(yMinApr);
+        //bcVisitor.addBC(yMaxApr);
+        bcVisitor.addBC(zMinApr);
+        bcVisitor.addBC(zMaxApr);
+        //bcVisitor.addBC(zMinFunnelApr);
+        //bcVisitor.addBC(zMaxFunnelApr);
+
+        ////////////////////////////////////////////////////////////////////////    
+        //spnonge layer
+        //mu::Parser spongeLayer;
+        //spongeLayer.SetExpr("x1>=(sizeX-sizeSP)/dx ? (sizeX/dx-(x1+1))/sizeSP/dx/2.0 + 0.5 : 1.0");
+        //spongeLayer.DefineConst("sizeX", length[0]);
+        //spongeLayer.DefineConst("sizeSP", 0.005);
+        //spongeLayer.DefineConst("dx", dx);
+
+        ////////////////////////////////////////////////////////////////////////
+        // grid, kernel and BCProcessor
+        SPtr<Grid3D> grid(new Grid3D(comm));
+        SPtr<LBMKernel> kernel;
+        //kernel = SPtr<LBMKernel>(new IncompressibleCumulantLBMKernel());
+         kernel = SPtr<LBMKernel>(new CompressibleCumulantLBMKernel());
+        //kernel = SPtr<LBMKernel>(new IncompressibleCumulantWithSpongeLayerLBMKernel());       
+        //kernel->setWithSpongeLayer(true);
+        //kernel->setSpongeLayer(spongeLayer);
+        // kernel = ;
+         // kernel = SPtr<LBMKernel>(new CumulantK17LBMKernel());
+        // 		 mu::Parser fctForcingX1;
+        // 		 fctForcingX1.SetExpr("Fx2");
+        // 		 fctForcingX1.DefineConst("Fx2", 5e-4);
+        // 		 kernel->setForcingX1(fctForcingX1);
+        // 		 kernel->setWithForcing(true);
+        //
+        // SPtr<ThinWallBCProcessor> bcProc(new ThinWallBCProcessor());
+        SPtr<BCProcessor> bcProc(new BCProcessor());
+        kernel->setBCProcessor(bcProc);
+
+
+            SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(
+                comm, MetisPartitioningGridVisitor::LevelIntersected, DIR_00M, MetisPartitioner::RECURSIVE));
+
+        //////////////////////////////////////////////////////////////////////////
+        // restart
+        SPtr<UbScheduler> mSch(new UbScheduler(cpStep, cpStart));
+        SPtr<MPIIOMigrationCoProcessor> migCoProcessor(
+            new MPIIOMigrationCoProcessor(grid, mSch,metisVisitor, pathname + "/mig", comm));
+        migCoProcessor->setLBMKernel(kernel);
+        migCoProcessor->setBCProcessor(bcProc);
+        //////////////////////////////////////////////////////////////////////////
+
+        if (newStart) {
+            GbImplicitSurfacePtr tpms;
+            // tpms = GbImplicitSurfacePtr(new GbImplicitSurface(0, 0, 0, TPMSL[0], TPMSL[1], TPMSL[2], UnitEdgeLength,
+            // dx));
+            tpms = GbImplicitSurfacePtr(new GbImplicitSurface(TPMSOrigin[0], TPMSOrigin[1], TPMSOrigin[2],
+                                                              TPMSOrigin[0] + TPMSL[0],
+                                                              TPMSOrigin[1] + TPMSL[1],
+                                                              TPMSOrigin[2] + TPMSL[2],
+                                                              UnitEdgeLength, dx, 2.5e-4));
+
+            // 	for (int i = 0; i < 12; i++)
+            // 	{
+            // 	  cout << tpms->evaluateImplicitFunction(0.002, 0.002, i/1000., 1.)<<endl;
+            // 	}
+
+            if (myid == 0)
+                GbSystem3D::writeGeoObject(tpms.get(), pathname + "/geo/tpms", WbWriterVtkXmlBinary::getInstance());
+
+
+            //SPtr<GbTriFaceMesh3D> funnel;
+            //SPtr<GbTriFaceMesh3D> funnel(new GbTriFaceMesh3D());
+            //funnel->readMeshFromSTLFileBinary(geometry, true);
+
+          
+
+            //funnel = SPtr<GbTriFaceMesh3D>(GbTriFaceMesh3DCreator::getInstance()->readMeshFromSTLFile2(geometry, "tpmsMeshBody", GbTriFaceMesh3D::KDTREE_SAHPLIT, false));
+            // funnel->rotate(0.,180,0.);
+
+            //funnel->translate(-funnel->getX1Minimum() - funnel->getLengthX1(),
+                              //tpms->getX2Centroid() - funnel->getX2Centroid(),
+                              //tpms->getX3Centroid() - funnel->getX3Centroid());
+            //if (myid == 0)
+                //GbSystem3D::writeGeoObject(funnel.get(), pathname + "/geo/funnel", WbWriterVtkXmlBinary::getInstance());
+
+            double g_minX1 = gridCubeOrigin[0];
+            double g_minX2 = gridCubeOrigin[1];
+            double g_minX3 = gridCubeOrigin[2];
+
+            double g_maxX1 = gridCubeOrigin[0] + length[0];
+            double g_maxX2 = gridCubeOrigin[1] + length[1];
+            double g_maxX3 = gridCubeOrigin[2] + length[2];
+
+            SPtr<GbObject3D> gridCube(new GbCuboid3D(g_minX1, g_minX2, g_minX3, g_maxX1, g_maxX2, g_maxX3));
+            if (myid == 0)
+                GbSystem3D::writeGeoObject(gridCube.get(), pathname + "/geo/gridCube",
+                                           WbWriterVtkXmlBinary::getInstance());
+
+            
+            SPtr<GbCuboid3D> spongecube(new GbCuboid3D(TPMSOrigin[0] + TPMSL[0], g_minX2 - dx, g_minX3 - dx,
+                                                       g_maxX1 + dx, g_maxX2 + dx, g_maxX3 + dx));
+            if (myid == 0)
+                GbSystem3D::writeGeoObject(spongecube.get(), pathname + "/geo/spongecube",
+                                           WbWriterVtkXmlBinary::getInstance());
+            if (myid == 0) {
+                // UBLOG(logINFO,"rho = " << rhoLB );
+                UBLOG(logINFO, "nu = " << nu);
+                UBLOG(logINFO, "Re = " << Re);
+                UBLOG(logINFO, "vx = " << vx);
+                UBLOG(logINFO, "dx = " << dx);
+                UBLOG(logINFO, "Preprocess - start");
+            }
+
+            grid->setDeltaX(dx);
+            grid->setBlockNX(blocknx[0], blocknx[1], blocknx[2]);
+            grid->setPeriodicX1(false);
+            grid->setPeriodicX2(true);
+            grid->setPeriodicX3(false);
+
+            GenBlocksGridVisitor genBlocks(gridCube);
+            grid->accept(genBlocks);
+
+            SPtr<CoProcessor> ppblocks(new WriteBlocksCoProcessor(grid, SPtr<UbScheduler>(new UbScheduler(1)), pathname,
+                                                                  WbWriterVtkXmlBinary::getInstance(), comm));
+
+            ppblocks->process(0);
+
+            // GbObject3DPtr solidcube(new GbCuboid3D(0, g_minX2, g_minX3, TPMSL[0], g_maxX2, g_maxX3));
+            // if (myid == 0) GbSystem3D::writeGeoObject(solidcube.get(), pathname + "/geo/solidcube",
+            // WbWriterVtkXmlBinary::getInstance());
+
+            GbCuboid3DPtr xMin(
+                new GbCuboid3D(g_minX1 - dx, g_minX2 - dx, g_minX3 - dx, g_minX1, g_maxX2 + dx, g_maxX3 + dx));
+
+            /*GbCuboid3DPtr yMin(
+                new GbCuboid3D(g_minX1 - dx, g_minX2 - dx, g_minX3 - dx, g_maxX1, g_minX2, g_maxX3 + dx));
+            GbCuboid3DPtr yMax(
+                new GbCuboid3D(g_minX1 - dx, g_maxX2, g_minX3 - dx, g_maxX1 + dx, g_maxX2 + dx, g_maxX3 + dx));*/
+
+           /* GbCuboid3DPtr zMinFunnel(
+                new GbCuboid3D(g_minX1 - dx, g_minX2 - dx, g_minX3 - dx, g_maxX1, g_maxX2 + dx, g_minX3));
+            GbCuboid3DPtr zMaxFunnel(
+                new GbCuboid3D(g_minX1 - dx, g_minX2 - dx, g_maxX3, g_maxX1 + dx, g_maxX2 + dx, g_maxX3 + dx));*/
+
+            //g_minX1 = 0.;
+            // g_minX2 = -length[1] / 2.0;
+            // g_minX3 = -length[2] / 2.0;
+
+            //g_maxX1 = TPMSL[0];
+            // g_maxX2 = length[1] / 2.0;
+            // g_maxX3 -= TPMSL[2] / 2.0;
+
+            GbCuboid3DPtr xMax(new GbCuboid3D(g_maxX1 , g_minX2 - dx, g_minX3 - dx, g_maxX1 + dx, g_maxX2 + dx,
+                                              g_maxX3 + dx));
+
+            //GbCuboid3DPtr zMin(new GbCuboid3D(g_minX1 - dx, g_minX2 - dx, g_minX3 - dx, 1.1 * g_maxX1, g_maxX2 + dx,
+            //                                  g_minX3 + 0.5 * (length[2] - TPMSL[2])));
+            //GbCuboid3DPtr zMax(new GbCuboid3D(g_minX1 - dx, g_minX2 - dx, g_maxX3 - 0.5 * (length[2] - TPMSL[2]),
+            //                                  1.1 * g_maxX1, g_maxX2 + dx, g_maxX3));
+
+            GbCuboid3DPtr zMin(new GbCuboid3D(g_minX1 - dx, g_minX2 - dx, g_minX3 - dx, g_maxX1 + dx, g_maxX2 + dx, g_minX3));
+            GbCuboid3DPtr zMax(new GbCuboid3D(g_minX1 - dx, g_minX2 - dx, g_maxX3, g_maxX1 + dx, g_maxX2 + dx, g_maxX3 + dx));
+
+            if (myid == 0)
+                GbSystem3D::writeGeoObject(xMin.get(), pathname + "/geo/xMin", WbWriterVtkXmlBinary::getInstance());
+            if (myid == 0)
+                GbSystem3D::writeGeoObject(xMax.get(), pathname + "/geo/xMax", WbWriterVtkXmlBinary::getInstance());
+           /* if (myid == 0)
+                GbSystem3D::writeGeoObject(yMin.get(), pathname + "/geo/yMin", WbWriterVtkXmlBinary::getInstance());
+            if (myid == 0)
+                GbSystem3D::writeGeoObject(yMax.get(), pathname + "/geo/yMax", WbWriterVtkXmlBinary::getInstance());*/
+            if (myid == 0)
+                GbSystem3D::writeGeoObject(zMin.get(), pathname + "/geo/zMin", WbWriterVtkXmlBinary::getInstance());
+            if (myid == 0)
+                GbSystem3D::writeGeoObject(zMax.get(), pathname + "/geo/zMax", WbWriterVtkXmlBinary::getInstance());
+
+ /*           if (myid == 0)
+                GbSystem3D::writeGeoObject(zMinFunnel.get(), pathname + "/geo/zMinFunnel",
+                                           WbWriterVtkXmlBinary::getInstance());
+            if (myid == 0)
+                GbSystem3D::writeGeoObject(zMaxFunnel.get(), pathname + "/geo/zMaxFunnel",
+                                           WbWriterVtkXmlBinary::getInstance());*/
+
+            // D3Q27InteractorPtr cubeInt = D3Q27InteractorPtr(new D3Q27Interactor(solidcube, grid, cubeNoslipAdapter,
+            // Interactor3D::SOLID));
+            SPtr<D3Q27Interactor> tpmsInt = SPtr<D3Q27Interactor>(
+                new D3Q27Interactor(tpms, grid, tpmsNoslipAdapter, Interactor3D::SOLID, Interactor3D::POINTS));
+            //SPtr<Interactor3D> funnelInt = SPtr<D3Q27TriFaceMeshInteractor>(
+                //new D3Q27TriFaceMeshInteractor(funnel, grid, funnelNoslipAdapter, Interactor3D::SOLID));
+            // D3Q27TriFaceMeshInteractorPtr tpmsInt = D3Q27TriFaceMeshInteractorPtr(new
+            // D3Q27TriFaceMeshInteractor(tpms, grid, tpmsNoslipAdapter, Interactor3D::SOLID));
+            //  tpmsInt->setQs2(0);
+
+            SPtr<D3Q27Interactor> xMinInt = SPtr<D3Q27Interactor>(
+                new D3Q27Interactor(xMin, grid, xMinApr, Interactor3D::SOLID, Interactor3D::POINTS));
+            SPtr<D3Q27Interactor> xMaxInt = SPtr<D3Q27Interactor>(
+                new D3Q27Interactor(xMax, grid, xMaxApr, Interactor3D::SOLID, Interactor3D::POINTS));
+          /*  SPtr<D3Q27Interactor> yMinInt =
+                SPtr<D3Q27Interactor>(new D3Q27Interactor(yMin, grid, yMinApr, Interactor3D::SOLID));
+            SPtr<D3Q27Interactor> yMaxInt =
+                SPtr<D3Q27Interactor>(new D3Q27Interactor(yMax, grid, yMaxApr, Interactor3D::SOLID));*/
+            SPtr<D3Q27Interactor> zMinInt = SPtr<D3Q27Interactor>(
+                new D3Q27Interactor(zMin, grid, zMinApr, Interactor3D::SOLID, Interactor3D::POINTS));
+            SPtr<D3Q27Interactor> zMaxInt = SPtr<D3Q27Interactor>(
+                new D3Q27Interactor(zMax, grid, zMaxApr, Interactor3D::SOLID, Interactor3D::POINTS));
+
+            /*SPtr<D3Q27Interactor> zMinFunnelInt =
+                SPtr<D3Q27Interactor>(new D3Q27Interactor(zMinFunnel, grid, zMinFunnelApr, Interactor3D::SOLID));
+            SPtr<D3Q27Interactor> zMaxFunnelInt =
+                SPtr<D3Q27Interactor>(new D3Q27Interactor(zMaxFunnel, grid, zMaxFunnelApr, Interactor3D::SOLID));*/
+
+            // return;
+
+            InteractorsHelper intHelper(grid, metisVisitor,false);
+
+            //intHelper.addInteractor(cubeInt);
+            //intHelper.addInteractor(zMinFunnelInt);
+            //intHelper.addInteractor(zMaxFunnelInt);
+            //intHelper.addInteractor(funnelInt);
+
+            intHelper.addInteractor(tpmsInt);
+            intHelper.addInteractor(zMinInt);
+            intHelper.addInteractor(zMaxInt);
+
+            intHelper.addInteractor(xMinInt);
+            intHelper.addInteractor(xMaxInt);
+            //intHelper.addInteractor(yMinInt);
+            //intHelper.addInteractor(yMaxInt);
+
+
+            intHelper.selectBlocks();
+            // intHelper.selectBlocks2();
+
+            
+            // domain decomposition for threads
+            PQueuePartitioningGridVisitor pqPartVisitor(numOfThreads);
+            grid->accept(pqPartVisitor);
+
+            ppblocks->process(0);
+            ppblocks.reset();
+
+            //////////////////////////////////////////////////////////////////////////
+            unsigned long long numberOfBlocks = (unsigned long long)grid->getNumberOfBlocks();
+            int ghostLayer                    = 3;
+            unsigned long long numberOfNodesPerBlock =
+                (unsigned long long)(blocknx[0]) * (unsigned long long)(blocknx[1]) * (unsigned long long)(blocknx[2]);
+            unsigned long long numberOfNodes = numberOfBlocks * numberOfNodesPerBlock;
+            unsigned long long numberOfNodesPerBlockWithGhostLayer =
+                numberOfBlocks * (blocknx[0] + ghostLayer) * (blocknx[1] + ghostLayer) * (blocknx[2] + ghostLayer);
+            double needMemAll =
+                double(numberOfNodesPerBlockWithGhostLayer * (27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
+            double needMem = needMemAll / double(comm->getNumberOfProcesses());
+
+            if (myid == 0) {
+                UBLOG(logINFO, "Number of blocks = " << numberOfBlocks);
+                UBLOG(logINFO, "Number of nodes  = " << numberOfNodes);
+                int minInitLevel = grid->getCoarsestInitializedLevel();
+                int maxInitLevel = grid->getFinestInitializedLevel();
+                for (int level = minInitLevel; level <= maxInitLevel; level++) {
+                    int nobl = grid->getNumberOfBlocks(level);
+                    UBLOG(logINFO, "Number of blocks for level " << level << " = " << nobl);
+                    UBLOG(logINFO, "Number of nodes for level " << level << " = " << nobl * numberOfNodesPerBlock);
+                }
+                UBLOG(logINFO, "Necessary memory  = " << needMemAll << " bytes");
+                UBLOG(logINFO, "Necessary memory per process = " << needMem << " bytes");
+                UBLOG(logINFO, "Available memory per process = " << availMem << " bytes");
+            }
+            //////////////////////////////////////////////////////////////////////////
+
+            SetKernelBlockVisitor kernelVisitor(kernel, nu, availMem, needMem);
+            grid->accept(kernelVisitor);
+
+            //          if (refineLevel > 0)
+            //          {
+            // 			 SetUndefinedNodesBlockVisitor undefNodesVisitor;
+            //             grid->accept(undefNodesVisitor);
+            //          }
+
+            intHelper.setBC();
+
+            SpongeLayerBlockVisitor spongeLayerVisitor(spongecube, kernel, nu, DIR_P00);
+            grid->accept(spongeLayerVisitor);
+
+            grid->accept(bcVisitor);
+
+            // initialization of distributions
+            InitDistributionsBlockVisitor initVisitor;
+             //initVisitor.setVx1(0.001);
+            // initVisitor.setVx1(uLB);
+            grid->accept(initVisitor);
+
+            // boundary conditions grid
+            {
+                SPtr<UbScheduler> geoSch(new UbScheduler(1));
+                SPtr<CoProcessor> ppgeo(new WriteBoundaryConditionsCoProcessor(grid, geoSch, pathname, WbWriterVtkXmlBinary::getInstance(), comm));
+                ppgeo->process(0);
+                ppgeo.reset();
+            }
+            if (myid == 0)
+                UBLOG(logINFO, "Preprocess - end");
+        } 
+        else 
+        {
+            if (myid == 0) {
+                UBLOG(logINFO, "Parameters:");
+                //UBLOG(logINFO, "uLb = " << uLB);
+                //UBLOG(logINFO, "rho = " << rhoLB);
+                //UBLOG(logINFO, "nuLb = " << nuLB);
+                UBLOG(logINFO, "Re = " << Re);
+                UBLOG(logINFO, "dx = " << dx);
+                UBLOG(logINFO, "number of levels = " << refineLevel + 1);
+                UBLOG(logINFO, "numOfThreads = " << numOfThreads);
+                UBLOG(logINFO, "path = " << pathname);
+            }
+
+            migCoProcessor->restart((int)restartStep);
+            grid->setTimeStep(restartStep);
+
+            if (myid == 0)
+                UBLOG(logINFO, "Restart - end");
+        }
+        // set connectors
+        SPtr<InterpolationProcessor> iProcessor(new CompressibleOffsetInterpolationProcessor());
+        //SetConnectorsBlockVisitor setConnsVisitor(comm, true, D3Q27System::ENDDIR, nu, iProcessor);
+        OneDistributionSetConnectorsBlockVisitor setConnsVisitor(comm);
+        grid->accept(setConnsVisitor);
+
+
+        
+
+        SPtr<UbScheduler> visSch(new UbScheduler(outTime/*,beginTime,endTime*/));
+        SPtr<CoProcessor> pp(new WriteMacroscopicQuantitiesCoProcessor(grid, visSch, pathname, WbWriterVtkXmlBinary::getInstance(), conv, comm));
+        
+        SPtr<UbScheduler> tavSch(new UbScheduler(100, timeAvStart, timeAvStop));
+        SPtr<TimeAveragedValuesCoProcessor> tav(new TimeAveragedValuesCoProcessor(grid, pathname, WbWriterVtkXmlBinary::getInstance(), tavSch, comm,
+        TimeAveragedValuesCoProcessor::Density | TimeAveragedValuesCoProcessor::Velocity | TimeAveragedValuesCoProcessor::Fluctuations));
+        tav->setWithGhostLayer(true);        
+        
+        SPtr<UbScheduler> nuSch(new UbScheduler(100, 0, endTime / 2));
+        mu::Parser fnu;
+        fnu.SetExpr("(L*u/T)*(((T-2*t)/Re0)+(2*t/Re))");
+        fnu.DefineConst("Re0", Re0);
+        fnu.DefineConst("Re", Re);
+        fnu.DefineConst("T", endTime);
+        fnu.DefineConst("L", (UnitEdgeLength / dx));
+        fnu.DefineConst("u", vx);
+        SPtr<CoProcessor> nupr(new DecreaseViscosityCoProcessor(grid, nuSch, &fnu, comm));
+
+        SPtr<UbScheduler> nupsSch(new UbScheduler(100, 100, 100000000));
+        SPtr<CoProcessor> npr(new NUPSCounterCoProcessor(grid, nupsSch, numOfThreads, comm));
+
+        //omp_set_num_threads(numOfThreads);
+        numOfThreads = 1;
+        SPtr<UbScheduler> stepGhostLayer(visSch);
+        SPtr<Calculator> calculator(new BasicCalculator(grid, stepGhostLayer, int(endTime)));
+
+        //calculator->addCoProcessor(nupr);
+        calculator->addCoProcessor(npr);
+        calculator->addCoProcessor(pp);
+        calculator->addCoProcessor(migCoProcessor);
+        calculator->addCoProcessor(tav);
+
+        if (myid == 0)
+            UBLOG(logINFO, "Simulation-start");
+        calculator->calculate();
+        if (myid == 0)
+            UBLOG(logINFO, "Simulation-end");
+    } catch (std::exception &e) {
+        cerr << e.what() << endl << flush;
+    } catch (std::string &s) {
+        cerr << s << endl;
+    } catch (...) {
+        cerr << "unknown exception" << endl;
+    }
+}
+int main(int argc, char *argv[])
+{
+     //Sleep(25000);
+    if (argv != NULL) {
+        if (argv[1] != NULL) {
+            run(string(argv[1]));
+        } else {
+            cout << "Configuration file is missing!" << endl;
+        }
+    }
+}
diff --git a/apps/cpu/Thermoplast/CMakeLists.txt b/apps/cpu/Thermoplast/CMakeLists.txt
deleted file mode 100644
index 5624b03136a7c901d1fe69fa224464104d3a08a6..0000000000000000000000000000000000000000
--- a/apps/cpu/Thermoplast/CMakeLists.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
-
-########################################################
-## C++ PROJECT                                       ###
-########################################################
-PROJECT(thermoplast)
-IF(${USE_DEM_COUPLING})
-	INCLUDE(${APPS_ROOT}/IncludsList.cmake) 
-	INCLUDE(${SOURCE_ROOT}/DemCoupling/IncludsList.cmake)
-
-	#################################################################
-	###   LOCAL FILES                                             ###
-	#################################################################
-	FILE(GLOB SPECIFIC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/*.h
-							 ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
-							 ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp  )
-	 
-	SET(ALL_SOURCES ${ALL_SOURCES} ${SPECIFIC_FILES})
-	SOURCE_GROUP(src FILES ${SPECIFIC_FILES})
-	  
-	SET(CAB_ADDITIONAL_LINK_LIBRARIES ${CAB_ADDITIONAL_LINK_LIBRARIES} VirtualFluids)
-	
-	#message("CAB_ADDITIONAL_LINK_LIBRARIES: " ${CAB_ADDITIONAL_LINK_LIBRARIES})
-
-	#################################################################
-	###   CREATE PROJECT                                          ###
-	#################################################################
-	CREATE_CAB_PROJECT(thermoplast BINARY)
-ENDIF()
\ No newline at end of file
diff --git a/apps/cpu/Thermoplast/config.txt b/apps/cpu/Thermoplast/config.txt
deleted file mode 100644
index 18e7e7ea896acdc20527de1d6af9154fbac16e6b..0000000000000000000000000000000000000000
--- a/apps/cpu/Thermoplast/config.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-#simulation parameters
-
-#x1min x2min x3min x1max x2max x3max
-#boundingBox = 0 0 0 300 1520 2320
-
-#boundingBox = 60 1370 130 190 1530 320 #test bb
-
-boundingBox = 60 20 130 190 170 320 #test bb
-
-#boundingBox = 60 0 10 190 1530 750 #test bb 2
-
-#boundingBox = 60 0 10 190 1530 2320  #production bb
- 
-blocknx = 10 10 10 
-#blocknx = 300 420 320
-availMem = 25e9
-#uLB = 0.1
-uLB = 0.03
-Re = 300
-
-#PE parameters
-#test pe offset
-peMinOffset = 46 2 2
-peMaxOffset = -8 -25 -2
-
-#production pe offset
-#peMinOffset = 46 18 14
-#peMaxOffset = -8 -25 -23
-
-sphereTime = 10
-
-#geometry files
-pathGeo = d:/Projects/ThermoPlast/SimPerfMS
-michel = /Werkzeug_Michel_MS.stl
-plexiglas = /plexiglas.stl
-
-#obstacle
-obstacle = true
-obstacleGeo1 = /QuaderMS.stl #/DreieckMS_2.stl # DreieckSchoen.iges.stl #/QuaderMS.stl
-obstacleGeo2 = /KugelMS_2.stl
-obstacleGeo3 = /DreieckMS.stl
-
-pathOut = g:/temp/thermoplastObst
-
-logToFile = false
-
-#restart
-restart = false
-restartStep = 1000
-
-#timing
-nupsTime = 100 100 1000000
-cpStart = 1000
-cpStep =  1000
-outTime = 1000
-endTime = 100000
-
diff --git a/apps/cpu/Thermoplast/thermoplast.cpp b/apps/cpu/Thermoplast/thermoplast.cpp
deleted file mode 100644
index b004543254e4788ffa6a7314536b1811c217700b..0000000000000000000000000000000000000000
--- a/apps/cpu/Thermoplast/thermoplast.cpp
+++ /dev/null
@@ -1,763 +0,0 @@
-#include <iostream>
-#include <string>
-
-#include "PointerDefinitions.h"
-
-#include <iostream>
-#include <string>
-#include <memory>
-#include <array>
-
-#include "VirtualFluids.h"
-#include <muParser.h>
-#include "ForceCalculator.h"
-
-
-#include <MovableObjectInteractor.h>
-#include <DemCoProcessor.h>
-#include <PePartitioningGridVisitor.h>
-
-#include <PePhysicsEngineMaterialAdapter.h>
-#include <PePhysicsEngineGeometryAdapter.h>
-#include <PePhysicsEngineSolverAdapter.h>
-#include "PeLoadBalancerAdapter.h"
-
-#include <VelocityBcReconstructor.h>
-#include <EquilibriumReconstructor.h>
-#include <ExtrapolationReconstructor.h>
-
-#include <DummyPhysicsEngineSolverAdapter.h>
-#include <DummyPhysicsEngineMaterialAdapter.h>
-#include <DummyPhysicsEngineGeometryAdapter.h>
-#include <WriteDemObjectsCoProcessor.h>
-#include <WritePeBlocksCoProcessor.h>
-
-#include "CreateDemObjectsCoProcessor.h"
-#include "RestartDemObjectsCoProcessor.h"
-
-using namespace std;
-
-//simulation bounding box
-double g_minX1 = 0;
-double g_minX2 = 0;
-double g_minX3 = 0;
-
-double g_maxX1 = 0;
-double g_maxX2 = 0;
-double g_maxX3 = 0;
-
-vector<double> peMinOffset;
-vector<double> peMaxOffset;
-
-string          pathOut;// = "d:/temp/thermoplastCluster";
-string          pathGeo;// = "d:/Projects/ThermoPlast/Geometrie";
-
-void addNozzle(SPtr<Grid3D> grid, SPtr<vf::mpi::Communicator> comm, SPtr<BCAdapter> noSlipBCAdapter/*, InteractorsHelper& intHelper*/)
-{
-   int myid = comm->getProcessID();
-   if (myid==0) UBLOG(logINFO, "Add nozzles:start");
-
-   SPtr<UbScheduler> sch(new UbScheduler(1));
-   WriteGbObjectsCoProcessor gbObjectsCoProcessor(grid, sch, pathOut, WbWriterVtkXmlBinary::getInstance(), comm);
-
-   std::vector< SPtr<Interactor3D> > interactors;
-
-   for (int i = 0; i <= 55; i++)
-   {
-      SPtr<GbTriFaceMesh3D> bbGeo = SPtr<GbTriFaceMesh3D>(GbTriFaceMesh3DCreator::getInstance()->readMeshFromSTLFile2(pathGeo+"/n_bb_new/bb_new"+UbSystem::toString(i)+".stl", "bb", GbTriFaceMesh3D::KDTREE_SAHPLIT, false));
-      SPtr<Interactor3D> bbInt = SPtr<D3Q27TriFaceMeshInteractor>(new D3Q27TriFaceMeshInteractor(bbGeo, grid, noSlipBCAdapter, Interactor3D::SOLID, Interactor3D::EDGES));
-      //GbSystem3D::writeGeoObject(bbGeo.get(), pathOut+"/ns/bbGeo"+UbSystem::toString(i), WbWriterVtkXmlBinary::getInstance());
-      //intHelper.addInteractor(bbInt);
-      if (myid==0) gbObjectsCoProcessor.addGbObject(bbGeo);
-      interactors.push_back(bbInt);
-   }
-
-   for (int i = 0; i <= 334; i++)
-   {
-      SPtr<GbTriFaceMesh3D> bbGeo = SPtr<GbTriFaceMesh3D>(GbTriFaceMesh3DCreator::getInstance()->readMeshFromSTLFile2(pathGeo+"/n_bb/bb"+UbSystem::toString(i)+".stl", "bb", GbTriFaceMesh3D::KDTREE_SAHPLIT, false));
-      SPtr<Interactor3D> bbInt = SPtr<D3Q27TriFaceMeshInteractor>(new D3Q27TriFaceMeshInteractor(bbGeo, grid, noSlipBCAdapter, Interactor3D::SOLID, Interactor3D::EDGES));
-      //GbSystem3D::writeGeoObject(bbGeo.get(), pathOut+"/ns/bbGeo"+UbSystem::toString(i), WbWriterVtkXmlBinary::getInstance());
-      //intHelper.addInteractor(bbInt);
-      if (myid==0) gbObjectsCoProcessor.addGbObject(bbGeo);
-      interactors.push_back(bbInt);
-   }
-
-   for (int i = 0; i <= 51; i++)
-   {
-      SPtr<GbTriFaceMesh3D> bsGeo = SPtr<GbTriFaceMesh3D>(GbTriFaceMesh3DCreator::getInstance()->readMeshFromSTLFile2(pathGeo+"/n_bs/bs"+UbSystem::toString(i)+".stl", "bs", GbTriFaceMesh3D::KDTREE_SAHPLIT, false));
-      SPtr<Interactor3D> bsInt = SPtr<D3Q27TriFaceMeshInteractor>(new D3Q27TriFaceMeshInteractor(bsGeo, grid, noSlipBCAdapter, Interactor3D::SOLID, Interactor3D::EDGES));
-      //intHelper.addInteractor(bsInt);
-      if (myid==0) gbObjectsCoProcessor.addGbObject(bsGeo);
-      interactors.push_back(bsInt);
-   }
-
-   std::array<int, 6> n ={ 0,1,3,4,6,7 };
-
-   for (int i = 0; i < n.size(); i++)
-   {
-      SPtr<GbTriFaceMesh3D> biGeo = SPtr<GbTriFaceMesh3D>(GbTriFaceMesh3DCreator::getInstance()->readMeshFromSTLFile2(pathGeo+"/n_bi/bi"+UbSystem::toString(n[i])+".stl", "bi", GbTriFaceMesh3D::KDTREE_SAHPLIT, false));
-      SPtr<Interactor3D> biInt = SPtr<D3Q27TriFaceMeshInteractor>(new D3Q27TriFaceMeshInteractor(biGeo, grid, noSlipBCAdapter, Interactor3D::SOLID, Interactor3D::EDGES));
-      //intHelper.addInteractor(biInt);
-      if (myid==0) gbObjectsCoProcessor.addGbObject(biGeo);
-      interactors.push_back(biInt);
-   }
-
-   if (myid==0) gbObjectsCoProcessor.process(0);
-
-
-   for (SPtr<Interactor3D> interactor : interactors)
-   {
-      std::vector< std::shared_ptr<Block3D> > blockVector;
-      UbTupleInt3 blockNX=grid->getBlockNX();
-      SPtr<GbObject3D> geoObject(interactor->getGbObject3D());
-      double ext = 0.0;
-      std::array<double, 6> AABB ={ geoObject->getX1Minimum(),geoObject->getX2Minimum(),geoObject->getX3Minimum(),geoObject->getX1Maximum(),geoObject->getX2Maximum(),geoObject->getX3Maximum() };
-      grid->getBlocksByCuboid(AABB[0]-(double)val<1>(blockNX)*ext, AABB[1]-(double)val<2>(blockNX)*ext, AABB[2]-(double)val<3>(blockNX)*ext, AABB[3]+(double)val<1>(blockNX)*ext, AABB[4]+(double)val<2>(blockNX)*ext, AABB[5]+(double)val<3>(blockNX)*ext, blockVector);
-      for (std::shared_ptr<Block3D> block : blockVector)
-      {
-         if (block->getKernel())
-         {
-            interactor->setBCBlock(block);
-         }
-      }
-      interactor->initInteractor();
-   }
-
-   if (myid==0) UBLOG(logINFO, "Add nozzles:end");
-}
-
-std::shared_ptr<DemCoProcessor> makePeCoProcessor(SPtr<Grid3D> grid, SPtr<vf::mpi::Communicator> comm, const SPtr<UbScheduler> peScheduler, const std::shared_ptr<LBMUnitConverter> lbmUnitConverter, int maxpeIterations)
-{
-   double peRelaxtion = 0.7;
-   //int maxpeIterations = 10000;
-   //Beschleunigung g
-   double g = 9.81 * lbmUnitConverter->getFactorAccWToLb();
-   //Vector3D globalLinearAcc(0.0, -g, 0.0);
-   //Vector3D globalLinearAcc(0.0, 0.0, -g);
-   Vector3D globalLinearAcc(0.0, 0.0, 0.0);
-
-   std::shared_ptr<PePhysicsEngineMaterialAdapter> planeMaterial = std::make_shared<PePhysicsEngineMaterialAdapter>("granular", 1.0, 0, 0.1 / 2, 0.1 / 2, 0.5, 1, 1, 0, 0);
-
-   const int gridNX1 = val<1>(grid->getBlockNX()) * grid->getNX1();
-   const int gridNX2 = val<2>(grid->getBlockNX()) * grid->getNX2();
-   const int gridNX3 = val<3>(grid->getBlockNX()) * grid->getNX3();
-
-   //UbTupleInt3 simulationDomain(gridNx, gridNy, gridNz);
-   //std::array<double, 6> simulationDomain = {1, 1, 1, 30, 30, 30};
-   std::array<double, 6> simulationDomain ={ g_minX1, g_minX2, g_minX3, g_minX1+gridNX1, g_minX2+gridNX2, g_minX3+gridNX3 };
-   UbTupleInt3 numberOfBlocks(grid->getNX1(), grid->getNX2(), grid->getNX3());
-   //UbTupleInt3 numberOfBlocks((simulationDomain[3]-simulationDomain[0])/val<1>(grid->getBlockNX()), (simulationDomain[4]-simulationDomain[1])/val<2>(grid->getBlockNX()), (simulationDomain[5]-simulationDomain[2])/val<3>(grid->getBlockNX()));
-   UbTupleBool3 isPeriodic(grid->isPeriodicX1(), grid->isPeriodicX2(), grid->isPeriodicX3());
-   Vector3D minOffset(peMinOffset[0], peMinOffset[1], peMinOffset[2]);
-   Vector3D maxOffset(peMaxOffset[0], peMaxOffset[1], peMaxOffset[2]);
-
-   SPtr<GbObject3D> boxPE(new GbCuboid3D(simulationDomain[0]+minOffset[0], simulationDomain[1]+minOffset[1], simulationDomain[2]+minOffset[2], simulationDomain[3]+maxOffset[0], simulationDomain[4]+maxOffset[1], simulationDomain[5]+maxOffset[2]));
-   GbSystem3D::writeGeoObject(boxPE.get(), pathOut + "/geo/boxPE", WbWriterVtkXmlBinary::getInstance());
-
-   std::shared_ptr<PeParameter> peParamter = std::make_shared<PeParameter>(peRelaxtion, maxpeIterations, globalLinearAcc,
-      planeMaterial, simulationDomain, numberOfBlocks, isPeriodic, minOffset, maxOffset);
-   std::shared_ptr<PeLoadBalancerAdapter> loadBalancer(new PeLoadBalancerAdapter(grid, comm->getNumberOfProcesses(), comm->getProcessID()));
-   std::shared_ptr<PhysicsEngineSolverAdapter> peSolver = std::make_shared<PePhysicsEngineSolverAdapter>(peParamter, loadBalancer);
-   //create obstacle
-   //test
-   std::dynamic_pointer_cast<PePhysicsEngineSolverAdapter>(peSolver)->createObstacle(Vector3D( 90, 260, 472), Vector3D( 115, 320, 460));
-   //production
-   //std::dynamic_pointer_cast<PePhysicsEngineSolverAdapter>(peSolver)->createObstacle(Vector3D( 90, 430, 472), Vector3D( 115, 320, 460));
-   //std::dynamic_pointer_cast<PePhysicsEngineSolverAdapter>(peSolver)->createObstacle(Vector3D( 100, 430, 1840), Vector3D( 130, 320, 470));
-   //std::dynamic_pointer_cast<PePhysicsEngineSolverAdapter>(peSolver)->createObstacle(Vector3D( 100, 821, 1159), Vector3D( 125, 625, 625));
-   //walberla::pe::createSphere(*globalBodyStorage, *forest, *storageId, 0, walberla::pe::Vec3( -720, 820, 1150), 900, global, communicating, infiniteMass);
-   //walberla::pe::createSphere(*globalBodyStorage, *forest, *storageId, 0, walberla::pe::Vec3( -720, 220, 472), 900, material, global, communicating, infiniteMass);
-
-   SPtr<CoProcessor> peblocks(new WritePeBlocksCoProcessor(grid, SPtr<UbScheduler>(new UbScheduler(1)), pathOut, WbWriterVtkXmlBinary::getInstance(), comm, std::dynamic_pointer_cast<PePhysicsEngineSolverAdapter>(peSolver)->getBlockForest()));
-   peblocks->process(0);
-   peblocks.reset();
-
-   const std::shared_ptr<ForceCalculator> forceCalculator = std::make_shared<ForceCalculator>(comm);
-
-   return std::make_shared<DemCoProcessor>(grid, peScheduler, comm, forceCalculator, peSolver);
-}
-
-void createSpheres(double radius, Vector3D origin, int maxX2, int maxX3, double uLB, SPtr<CreateDemObjectsCoProcessor> createSphereCoProcessor)
-{
-   double d = 2.0*radius;
-   double dividerX2 = (double)maxX2/2.0;
-   double dividerX3 = (double)maxX3/2.0;
-   for (int x3 = 0; x3 < maxX3; x3++)
-      for (int x2 = 0; x2 < maxX2; x2++)
-         //for (int x1 = 0; x1 < 1; x1++)
-      {
-         //SPtr<GbObject3D> sphere(new GbSphere3D(origin[0]+2.0*d*(double)x1, origin[1]+(double)x2*1.0*d, origin[2]+(double)x3*1.0*d, radius));
-         SPtr<GbObject3D> sphere(new GbSphere3D(origin[0]+2.0*d, origin[1]+(double)x2*1.0*d, origin[2]+(double)x3*1.0*d, radius));
-         createSphereCoProcessor->addGeoObject(sphere, Vector3D(uLB, -uLB+uLB/dividerX2*(double)x2, -uLB+uLB/dividerX3*(double)x3));
-      }
-}
-
-void thermoplast(string configname)
-{
-   SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
-   int myid = comm->getProcessID();
-
-   vf::basics::ConfigurationFile   config;
-   config.load(configname);
-
-   vector<int>     blocknx = config.getVector<int>("blocknx");
-   vector<double>  boundingBox = config.getVector<double>("boundingBox");
-
-   int             endTime = config.getValue<int>("endTime");
-   double          outTime = config.getValue<double>("outTime");
-   double          availMem = config.getValue<double>("availMem");
-   double          uLB = config.getValue<double>("uLB");
-   double          Re = config.getValue<double>("Re");
-
-   string          michel = config.getValue<string>("michel");
-   string          plexiglas = config.getValue<string>("plexiglas");
-   double          sphereTime = config.getValue<double>("sphereTime");
-
-   double          cpStart = config.getValue<double>("cpStart");
-   double          cpStep = config.getValue<double>("cpStep");
-   bool            restart = config.getValue<bool>("restart");
-   int             restartStep = config.getValue<int>("restartStep");
-
-   peMinOffset = config.getVector<double>("peMinOffset");
-   peMaxOffset = config.getVector<double>("peMaxOffset");
-
-   pathOut = config.getValue<string>("pathOut");
-   pathGeo = config.getValue<string>("pathGeo");
-
-   vector<int>     nupsTime = config.getVector<int>("nupsTime");
-
-   bool            logToFile = config.getValue<bool>("logToFile");
-   if (logToFile)
-   {
-#if defined(__unix__)
-      if (myid==0)
-      {
-         const char* str = pathOut.c_str();
-         mkdir(str, S_IRWXU|S_IRWXG|S_IROTH|S_IXOTH);
-      }
-#endif 
-
-      if (myid==0)
-      {
-         stringstream logFilename;
-         logFilename<<pathOut+"/logfile"+UbSystem::getTimeStamp()+".txt";
-         UbLog::output_policy::setStream(logFilename.str());
-      }
-   }
-
-   bool obstacle = config.getValue<bool>("obstacle");
-   string obstacleGeo1 = config.getValue<string>("obstacleGeo1");
-   string obstacleGeo2 = config.getValue<string>("obstacleGeo2");
-   string obstacleGeo3 = config.getValue<string>("obstacleGeo3");
-
-   if (myid==0) UBLOG(logINFO, "BEGIN LOGGING - " << UbSystem::getTimeStamp());
-
-   //parameters
-   //string          pathOut = "d:/temp/thermoplast3";
-   //string          pathGeo = "d:/Projects/ThermoPlast/Geometrie";
-   int             numOfThreads = 1;
-   //int             blocknx[3] ={ 10,10,10 };
-   //double          endTime = 1000000;
-   //double          outTime = 300;
-   //double          availMem = 8e9;
-   double          deltax = 1;
-   double          rhoLB = 0.0;
-   //double          uLB =  0.1;
-   double          radiusLB = 7.5;
-   double          radiusWorld = 1.5e-3;
-   //double          nuLB = 0.000333333;
-   //double          Re = (uLB*2.0*radiusLB)/nuLB;
-   //double          Re = 900;
-   double          nuLB = (uLB*2.0*radiusLB)/Re;
-
-   //geometry definition
-
-   //simulation bounding box
-   g_minX1 = boundingBox[0];
-   g_minX2 = boundingBox[1];
-   g_minX3 = boundingBox[2];
-
-   g_maxX1 = boundingBox[3];
-   g_maxX2 = boundingBox[4];
-   g_maxX3 = boundingBox[5];
-
-   double blockLength = blocknx[0]*deltax;
-
-   //Grid definition
-   SPtr<Grid3D> grid(new Grid3D(comm));
-   grid->setDeltaX(deltax);
-   grid->setBlockNX(blocknx[0], blocknx[1], blocknx[2]);
-   grid->setPeriodicX1(false);
-   grid->setPeriodicX2(false);
-   grid->setPeriodicX3(false);
-
-   //boundary conditions definition 
-   //////////////////////////////////////////////////////////////////////////////
-   SPtr<BCAdapter> noSlipBCAdapter(new NoSlipBCAdapter());
-   //noSlipBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new NoSlipBCAlgorithm()));
-   noSlipBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new ThinWallNoSlipBCAlgorithm()));
-
-   mu::Parser fct;
-   fct.SetExpr("U");
-   fct.DefineConst("U", uLB);
-   SPtr<BCAdapter> inflowAdapter(new VelocityBCAdapter(true, false, false, fct, 0, BCFunction::INFCONST));
-   inflowAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new VelocityBCAlgorithm()));
-   //inflowAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new VelocityWithDensityBCAlgorithm()));
-
-   SPtr<BCAdapter> outflowAdapter(new DensityBCAdapter(rhoLB));
-   outflowAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new EqDensityBCAlgorithm()));
-   //outflowAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new NonEqDensityBCAlgorithm()));
-   //outflowAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new NonReflectingOutflowBCAlgorithm()));
-
-   //sphere BC
-   mu::Parser fct2;
-   fct2.SetExpr("U");
-   fct2.DefineConst("U", 0.0);
-   SPtr<BCAdapter> velocityBcParticleAdapter(new VelocityBCAdapter(true, false, false, fct2, 0, BCFunction::INFCONST));
-   velocityBcParticleAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new VelocityWithDensityBCAlgorithm()));
-
-   //boundary conditions visitor
-   SPtr<BoundaryConditionsBlockVisitor> bcVisitor(new BoundaryConditionsBlockVisitor());
-   bcVisitor->addBC(noSlipBCAdapter);
-   bcVisitor->addBC(inflowAdapter);
-   bcVisitor->addBC(outflowAdapter);
-   bcVisitor->addBC(velocityBcParticleAdapter);
-   //////////////////////////////////////////////////////////////////////////////////
-
-   //LBM kernel definition
-   SPtr<LBMKernel> kernel;
-   kernel = SPtr<LBMKernel>(new IncompressibleCumulantLBMKernel());
-   //SPtr<BCProcessor> bcProc(new BCProcessor());
-   SPtr<BCProcessor> bcProc(new ThinWallBCProcessor());
-   kernel->setBCProcessor(bcProc);
-
-   //if (myid==0) UBLOG(logINFO, "Read obstacleGeo1:start");
-   //SPtr<GbTriFaceMesh3D> obstacleGeo1geo = SPtr<GbTriFaceMesh3D>(GbTriFaceMesh3DCreator::getInstance()->readMeshFromSTLFile2(pathGeo+obstacleGeo1, "michelGeo", GbTriFaceMesh3D::KDTREE_SAHPLIT, false));
-   //if (myid==0) UBLOG(logINFO, "Read obstacleGeo1:end");
-   //if (myid==0) GbSystem3D::writeGeoObject(obstacleGeo1geo.get(), pathOut+"/geo/obstacleGeo1", WbWriterVtkXmlBinary::getInstance());
-   //g_minX1 = obstacleGeo1geo->getX1Minimum();
-   //g_minX2 = obstacleGeo1geo->getX2Minimum();
-   //g_minX3 = obstacleGeo1geo->getX3Minimum();
-   //g_maxX1 = obstacleGeo1geo->getX1Maximum();
-   //g_maxX2 = obstacleGeo1geo->getX2Maximum();
-   //g_maxX3 = obstacleGeo1geo->getX3Maximum();
-
-
-
-   //blocks generating
-   SPtr<GbObject3D> gridCube(new GbCuboid3D(g_minX1, g_minX2, g_minX3, g_maxX1, g_maxX2, g_maxX3));
-   if (myid == 0) GbSystem3D::writeGeoObject(gridCube.get(), pathOut + "/geo/gridCube", WbWriterVtkXmlBinary::getInstance());
-   GenBlocksGridVisitor genBlocks(gridCube);
-   grid->accept(genBlocks);
-
-
-   //{
-     //SPtr<Interactor3D> obstacleGeo1int = SPtr<D3Q27TriFaceMeshInteractor>(new D3Q27TriFaceMeshInteractor(obstacleGeo1geo, grid, noSlipBCAdapter, Interactor3D::SOLID));
-     //SPtr<Grid3DVisitor> peVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::BSW, MetisPartitioner::KWAY));
-      //InteractorsHelper intHelper(grid, peVisitor, true);
-     //intHelper.addInteractor(obstacleGeo1int);
-     //intHelper.selectBlocks();
-
-     ////create LBM kernel
-      ////SetKernelBlockVisitor kernelVisitor(kernel, nuLB, availMem, 1);
-      ////grid->accept(kernelVisitor);
-
-      ////SPtr<Interactor3D> obstacleGeo1int = SPtr<D3Q27Interactor>(new D3Q27Interactor(obstacleGeo1geo, grid, noSlipBCAdapter, Interactor3D::SOLID));
-      ////UBLOG(logINFO, "Obst: start");
-      ////std::vector< std::shared_ptr<Block3D> > blockVector;
-      ////UbTupleInt3 blockNX=grid->getBlockNX();
-      ////SPtr<GbObject3D> geoObject(obstacleGeo1int->getGbObject3D());
-      ////double ext = 0.0;
-      ////std::array<double, 6> AABB ={ geoObject->getX1Minimum(),geoObject->getX2Minimum(),geoObject->getX3Minimum(),geoObject->getX1Maximum(),geoObject->getX2Maximum(),geoObject->getX3Maximum() };
-      ////grid->getBlocksByCuboid(AABB[0]-(double)val<1>(blockNX)*ext, AABB[1]-(double)val<2>(blockNX)*ext, AABB[2]-(double)val<3>(blockNX)*ext, AABB[3]+(double)val<1>(blockNX)*ext, AABB[4]+(double)val<2>(blockNX)*ext, AABB[5]+(double)val<3>(blockNX)*ext, blockVector);
-      ////for (std::shared_ptr<Block3D> block : blockVector)
-      ////{
-         ////if (block->getKernel())
-         ////{
-            ////obstacleGeo1int->setBCBlock(block);
-         ////}
-      ////}
-      ////UBLOG(logINFO, "Obst: select blocks");
-      ////obstacleGeo1int->initInteractor();
-      ////UBLOG(logINFO, "Obst: end");
-
-      //SPtr<CoProcessor> ppblocks(new WriteBlocksCoProcessor(grid, SPtr<UbScheduler>(new UbScheduler(1)), pathOut, WbWriterVtkXmlBinary::getInstance(), comm));
-      //ppblocks->process(0);
-      //ppblocks.reset();
-   //}
-
-   //return;
-
-
-   /////////////////////////////////////////////////////
-   ////PE domain test
-   //std::array<double, 6> simulationDomain ={ g_minX1, g_minX2, g_minX3, g_maxX1, g_maxX2, g_maxX3 };
-   //Vector3D minOffset(peMinOffset[0], peMinOffset[1], peMinOffset[2]);
-   //Vector3D maxOffset(peMaxOffset[0], peMaxOffset[1], peMaxOffset[2]);
-   //SPtr<GbObject3D> boxPE(new GbCuboid3D(simulationDomain[0]+minOffset[0], simulationDomain[1]+minOffset[1], simulationDomain[2]+minOffset[2], simulationDomain[3]+maxOffset[0], simulationDomain[4]+maxOffset[1], simulationDomain[5]+maxOffset[2]));
-   //GbSystem3D::writeGeoObject(boxPE.get(), pathOut + "/geo/boxPE", WbWriterVtkXmlBinary::getInstance());
-   //return;
-   //////////////////////////////////////////////////////
-
-
-   if (myid == 0)
-   {
-      UBLOG(logINFO, "Parameters:");
-      UBLOG(logINFO, "* uLB    = " << uLB);
-      UBLOG(logINFO, "* rhoLB  = " << rhoLB);
-      UBLOG(logINFO, "* nuLB   = " << nuLB);
-      UBLOG(logINFO, "* deltaX = " << deltax);
-      UBLOG(logINFO, "* radius = " << radiusLB);
-      UBLOG(logINFO, "* Re     = " << Re);
-      UBLOG(logINFO, "* number of threads   = "<<numOfThreads);
-      UBLOG(logINFO, "* number of processes = "<<comm->getNumberOfProcesses());
-      UBLOG(logINFO, "* path = "<<pathOut);
-      UBLOG(logINFO, "Preprocess - start");
-   }
-
-   //GbCuboid3DPtr geoInjector2(new GbCuboid3D(-12, -5, 1210, 64, 105, 1320));
-   //if (myid == 0) GbSystem3D::writeGeoObject(geoInjector2.get(), pathOut + "/geo/geoInjector2", WbWriterVtkXmlASCII::getInstance());
-
-   //GbCuboid3DPtr geoInjector5(new GbCuboid3D(-12, 1415, 205, 64, 1525, 315));
-   //if (myid == 0) GbSystem3D::writeGeoObject(geoInjector5.get(), pathOut + "/geo/geoInjector5", WbWriterVtkXmlASCII::getInstance());
-
-   GbCuboid3DPtr geoInjector4(new GbCuboid3D(-12, -5, 205, 64, 105, 315));
-   if (myid == 0) GbSystem3D::writeGeoObject(geoInjector4.get(), pathOut + "/geo/geoInjector4", WbWriterVtkXmlASCII::getInstance());
-
-   //GbCuboid3DPtr geoInjector7(new GbCuboid3D(28, 705, 542, 103, 815, 652));
-   //if (myid == 0) GbSystem3D::writeGeoObject(geoInjector7.get(), pathOut + "/geo/geoInjector7", WbWriterVtkXmlASCII::getInstance());
-
-   GbCuboid3DPtr testWallGeo(new GbCuboid3D(g_minX1-blockLength, g_minX2 - blockLength, g_maxX3, g_maxX1 + blockLength, g_maxX2 + blockLength, g_maxX3 + blockLength));
-   if (myid == 0) GbSystem3D::writeGeoObject(testWallGeo.get(), pathOut + "/geo/testWallGeo", WbWriterVtkXmlASCII::getInstance());
-
-   if (!restart)
-   {
-      //box
-      SPtr<GbObject3D> box(new GbCuboid3D(g_minX1-blockLength, g_minX2, g_minX3, g_maxX1+blockLength, g_maxX2, g_maxX3));
-      GbSystem3D::writeGeoObject(box.get(), pathOut + "/geo/box", WbWriterVtkXmlBinary::getInstance());
-
-      //michel
-      if (myid==0) UBLOG(logINFO, "Read michelGeo:start");
-      SPtr<GbTriFaceMesh3D> michelGeo = SPtr<GbTriFaceMesh3D>(GbTriFaceMesh3DCreator::getInstance()->readMeshFromSTLFile2(pathGeo+michel, "michelGeo", GbTriFaceMesh3D::KDTREE_SAHPLIT, false));
-      if (myid==0) UBLOG(logINFO, "Read michelGeo:end");
-      if (myid==0) GbSystem3D::writeGeoObject(michelGeo.get(), pathOut+"/geo/michelGeo", WbWriterVtkXmlBinary::getInstance());
-
-      //plexiglas
-      if (myid==0) UBLOG(logINFO, "Read plexiglasGeo:start");
-      SPtr<GbTriFaceMesh3D> plexiglasGeo = SPtr<GbTriFaceMesh3D>(GbTriFaceMesh3DCreator::getInstance()->readMeshFromSTLFile2(pathGeo+plexiglas, "plexiglasGeo", GbTriFaceMesh3D::KDTREE_SAHPLIT, false));
-      if (myid==0) UBLOG(logINFO, "Read plexiglasGeo:end");
-      if (myid==0) GbSystem3D::writeGeoObject(plexiglasGeo.get(), pathOut+"/geo/plexiglasGeo", WbWriterVtkXmlBinary::getInstance());
-
-      //inflow
-      GbCuboid3DPtr geoOutflowMichel(new GbCuboid3D(g_minX1-blockLength, g_minX2 - blockLength, g_minX3 - blockLength, g_minX1, g_maxX2 + blockLength, g_maxX3 + blockLength));
-      if (myid == 0) GbSystem3D::writeGeoObject(geoOutflowMichel.get(), pathOut + "/geo/geoOutflowMichel", WbWriterVtkXmlASCII::getInstance());
-
-      //outflow
-      GbCuboid3DPtr geoOutflowPlexiglas(new GbCuboid3D(g_maxX1, g_minX2 - blockLength, g_minX3 - blockLength, g_maxX1 + blockLength, g_maxX2 + blockLength, g_maxX3 + blockLength));
-      if (myid == 0) GbSystem3D::writeGeoObject(geoOutflowPlexiglas.get(), pathOut + "/geo/geoOutflowPlexiglas", WbWriterVtkXmlASCII::getInstance());
-
-      //set boundary conditions for blocks and create process decomposition for MPI
-      SPtr<D3Q27Interactor> boxInt(new D3Q27Interactor(box, grid, noSlipBCAdapter, Interactor3D::INVERSESOLID));
-
-      //inflow
-      //SPtr<D3Q27Interactor> inflowInjector2Int = SPtr<D3Q27Interactor>(new D3Q27Interactor(geoInjector2, grid, inflowAdapter, Interactor3D::SOLID));
-      //SPtr<D3Q27Interactor> inflowInjector5Int = SPtr<D3Q27Interactor>(new D3Q27Interactor(geoInjector5, grid, inflowAdapter, Interactor3D::SOLID));
-      SPtr<D3Q27Interactor> inflowInjector4Int = SPtr<D3Q27Interactor>(new D3Q27Interactor(geoInjector4, grid, inflowAdapter, Interactor3D::SOLID));
-      //SPtr<D3Q27Interactor> inflowInjector7Int = SPtr<D3Q27Interactor>(new D3Q27Interactor(geoInjector7, grid, inflowAdapter, Interactor3D::SOLID));
-
-      SPtr<D3Q27Interactor> outflowMichelInt = SPtr<D3Q27Interactor>(new D3Q27Interactor(geoOutflowMichel, grid, outflowAdapter, Interactor3D::SOLID));
-
-      //outflow
-      SPtr<D3Q27Interactor> outflowPlexiglasInt = SPtr<D3Q27Interactor>(new D3Q27Interactor(geoOutflowPlexiglas, grid, outflowAdapter, Interactor3D::SOLID));
-
-      //michel
-      SPtr<Interactor3D> michelInt = SPtr<D3Q27TriFaceMeshInteractor>(new D3Q27TriFaceMeshInteractor(michelGeo, grid, noSlipBCAdapter, Interactor3D::SOLID));
-
-      //plexiglas
-      SPtr<Interactor3D> plexiglasInt = SPtr<D3Q27TriFaceMeshInteractor>(new D3Q27TriFaceMeshInteractor(plexiglasGeo, grid, noSlipBCAdapter, Interactor3D::SOLID));
-
-      SPtr<D3Q27Interactor> testWallInt = SPtr<D3Q27Interactor>(new D3Q27Interactor(testWallGeo, grid, inflowAdapter, Interactor3D::SOLID));
-
-      SPtr<Interactor3D> obstacleGeo1int, obstacleGeo2int, obstacleGeo3int;
-      if (obstacle)
-      {
-         //obstacleGeo1
-         if (myid==0) UBLOG(logINFO, "Read obstacleGeo1:start");
-         SPtr<GbTriFaceMesh3D> obstacleGeo1geo = SPtr<GbTriFaceMesh3D>(GbTriFaceMesh3DCreator::getInstance()->readMeshFromSTLFile2(pathGeo+obstacleGeo1, "michelGeo", GbTriFaceMesh3D::KDTREE_SAHPLIT, false));
-         if (myid==0) UBLOG(logINFO, "Read obstacleGeo1:end");
-         if (myid==0) GbSystem3D::writeGeoObject(obstacleGeo1geo.get(), pathOut+"/geo/obstacleGeo1", WbWriterVtkXmlBinary::getInstance());
-         obstacleGeo1int = SPtr<D3Q27TriFaceMeshInteractor>(new D3Q27TriFaceMeshInteractor(obstacleGeo1geo, grid, noSlipBCAdapter, Interactor3D::SOLID));
-         //obstacleGeo2
-         if (myid==0) UBLOG(logINFO, "Read obstacleGeo2:start");
-         SPtr<GbTriFaceMesh3D> obstacleGeo2geo = SPtr<GbTriFaceMesh3D>(GbTriFaceMesh3DCreator::getInstance()->readMeshFromSTLFile2(pathGeo+obstacleGeo2, "michelGeo", GbTriFaceMesh3D::KDTREE_SAHPLIT, false));
-         if (myid==0) UBLOG(logINFO, "Read obstacleGeo2:end");
-         if (myid==0) GbSystem3D::writeGeoObject(obstacleGeo2geo.get(), pathOut+"/geo/obstacleGeo2", WbWriterVtkXmlBinary::getInstance());
-         obstacleGeo2int = SPtr<D3Q27TriFaceMeshInteractor>(new D3Q27TriFaceMeshInteractor(obstacleGeo2geo, grid, noSlipBCAdapter, Interactor3D::SOLID));
-         //obstacleGeo3
-         if (myid==0) UBLOG(logINFO, "Read obstacleGeo3:start");
-         SPtr<GbTriFaceMesh3D> obstacleGeo3geo = SPtr<GbTriFaceMesh3D>(GbTriFaceMesh3DCreator::getInstance()->readMeshFromSTLFile2(pathGeo+obstacleGeo3, "michelGeo", GbTriFaceMesh3D::KDTREE_SAHPLIT, false));
-         if (myid==0) UBLOG(logINFO, "Read obstacleGeo3:end");
-         if (myid==0) GbSystem3D::writeGeoObject(obstacleGeo3geo.get(), pathOut+"/geo/obstacleGeo3", WbWriterVtkXmlBinary::getInstance());
-         obstacleGeo3int = SPtr<D3Q27TriFaceMeshInteractor>(new D3Q27TriFaceMeshInteractor(obstacleGeo3geo, grid, noSlipBCAdapter, Interactor3D::SOLID));
-      }
-
-      //////////////////////////////////////////////////////////////////////////
-      //SPtr<Grid3DVisitor> peVisitor(new PePartitioningGridVisitor(comm, demCoProcessor));
-      SPtr<Grid3DVisitor> peVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::BSW, MetisPartitioner::KWAY));
-      InteractorsHelper intHelper(grid, peVisitor, true);
-
-      //intHelper.addInteractor(obstacleGeo1int);
-
-      intHelper.addInteractor(boxInt);
-      intHelper.addInteractor(michelInt);
-      intHelper.addInteractor(plexiglasInt);
-      //intHelper.addInteractor(inflowInjector2Int);
-      //intHelper.addInteractor(inflowInjector5Int);
-      intHelper.addInteractor(inflowInjector4Int);
-      //intHelper.addInteractor(inflowInjector7Int);
-      intHelper.addInteractor(outflowPlexiglasInt);
-      intHelper.addInteractor(outflowMichelInt);
-      intHelper.addInteractor(obstacleGeo1int);
-      intHelper.addInteractor(obstacleGeo2int);
-      intHelper.addInteractor(obstacleGeo3int);
-      //intHelper.addInteractor(testWallInt);
-      intHelper.selectBlocks();
-
-      //write data for visualization of block grid
-      SPtr<CoProcessor> ppblocks(new WriteBlocksCoProcessor(grid, SPtr<UbScheduler>(new UbScheduler(1)), pathOut, WbWriterVtkXmlBinary::getInstance(), comm));
-      ppblocks->process(0);
-      ppblocks.reset();
-
-      unsigned long long numberOfBlocks = (unsigned long long)grid->getNumberOfBlocks();
-      int ghostLayer = 3;
-      unsigned long long numberOfNodesPerBlock = (unsigned long long)(blocknx[0])* (unsigned long long)(blocknx[1])* (unsigned long long)(blocknx[2]);
-      unsigned long long numberOfNodes = numberOfBlocks * numberOfNodesPerBlock;
-      unsigned long long numberOfNodesPerBlockWithGhostLayer = numberOfBlocks * (blocknx[0] + ghostLayer) * (blocknx[1] + ghostLayer) * (blocknx[2] + ghostLayer);
-      double needMemAll = double(numberOfNodesPerBlockWithGhostLayer*(27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-      double needMem = needMemAll / double(comm->getNumberOfProcesses());
-
-      if (myid == 0)
-      {
-         UBLOG(logINFO, "Number of blocks = " << numberOfBlocks);
-         UBLOG(logINFO, "Number of nodes  = " << numberOfNodes);
-         int minInitLevel = grid->getCoarsestInitializedLevel();
-         int maxInitLevel = grid->getFinestInitializedLevel();
-         for (int level = minInitLevel; level <= maxInitLevel; level++)
-         {
-            int nobl = grid->getNumberOfBlocks(level);
-            UBLOG(logINFO, "Number of blocks for level " << level << " = " << nobl);
-            UBLOG(logINFO, "Number of nodes for level " << level << " = " << nobl*numberOfNodesPerBlock);
-         }
-         UBLOG(logINFO, "Necessary memory  = " << needMemAll << " bytes");
-         UBLOG(logINFO, "Necessary memory per process = " << needMem << " bytes");
-         UBLOG(logINFO, "Available memory per process = " << availMem << " bytes");
-      }
-
-      //create LBM kernel
-      SetKernelBlockVisitor kernelVisitor(kernel, nuLB, availMem, needMem);
-      grid->accept(kernelVisitor);
-
-      addNozzle(grid, comm, noSlipBCAdapter/*,intHelper*/);
-
-      intHelper.setBC();
-
-
-      ////////////////////////////////////////////////////////////////////////////////////////////////////
-      //{
-         ////UBLOG(logINFO, "Obst: start, rank="<<myid);
-            //std::vector< std::shared_ptr<Block3D> > blockVector;
-            //UbTupleInt3 blockNX=grid->getBlockNX();
-            //SPtr<GbObject3D> geoObject(obstacleGeo3int->getGbObject3D());
-            //double ext = 0.0;
-            //std::array<double, 6> AABB ={ geoObject->getX1Minimum(),geoObject->getX2Minimum(),geoObject->getX3Minimum(),geoObject->getX1Maximum(),geoObject->getX2Maximum(),geoObject->getX3Maximum() };
-            //grid->getBlocksByCuboid(AABB[0]-(double)val<1>(blockNX)*ext, AABB[1]-(double)val<2>(blockNX)*ext, AABB[2]-(double)val<3>(blockNX)*ext, AABB[3]+(double)val<1>(blockNX)*ext, AABB[4]+(double)val<2>(blockNX)*ext, AABB[5]+(double)val<3>(blockNX)*ext, blockVector);
-            //for (std::shared_ptr<Block3D> block : blockVector)
-            //{
-               //if (block->getKernel())
-               //{
-                  //obstacleGeo3int->setBCBlock(block);
-               //}
-            //}
-            //UBLOG(logINFO, "Obst: select blocks, number of blocks="<<blockVector.size()<<", rank="<<myid);
-            //obstacleGeo3int->initInteractor();
-            //UBLOG(logINFO, "Obst: end, rank="<<myid);
-      //}
-      //////////////////////////////////////////////////////////////////////////////////////////////////////
-            //initialization of distributions
-      InitDistributionsBlockVisitor initVisitor;
-      //initVisitor.setVx1(uLB);
-      grid->accept(initVisitor);
-
-      //write data for visualization of boundary conditions
-      {
-         //SPtr<UbScheduler> geoSch(new UbScheduler(1));
-         //WriteBoundaryConditionsCoProcessor ppgeo(grid, geoSch, pathOut, WbWriterVtkXmlBinary::getInstance(), comm);
-         //ppgeo.process(0);
-
-         //WriteMacroscopicQuantitiesCoProcessor ppInit(grid, geoSch, pathOut, WbWriterVtkXmlBinary::getInstance(), SPtr<LBMUnitConverter>(new LBMUnitConverter()), comm);
-         //ppInit.process(0);
-      }
-
-      if (myid == 0) UBLOG(logINFO, "Preprocess - end");
-   }
-   //restart
-   //UBLOG(logINFO, "restart definition - start, rank="<<myid);
-   SPtr<UbScheduler> restartSch(new UbScheduler(cpStep, cpStart));
-   //SPtr<MPIIORestartCoProcessor> restartCoProcessor(new MPIIORestartCoProcessor(grid, restartSch, pathOut, comm));
-   SPtr<MPIIOMigrationCoProcessor> restartCoProcessor(new MPIIOMigrationCoProcessor(grid, restartSch, pathOut, comm));
-   restartCoProcessor->setLBMKernel(kernel);
-   restartCoProcessor->setBCProcessor(bcProc);
-
-   if (restart)
-   {
-      //restartStep = restartCoProcessor->readCpTimeStep();
-      restartCoProcessor->restart(restartStep);
-   }
-
-   //PE initialization
-   double refLengthLb = radiusLB*2.0;
-   double refLengthWorld = radiusWorld*2.0;
-   const std::shared_ptr<LBMUnitConverter> lbmUnitConverter = std::make_shared<LBMUnitConverter>(refLengthWorld, LBMUnitConverter::WORLD_MATERIAL::AIR_20C, refLengthLb);
-   if (myid == 0) std::cout << lbmUnitConverter->toString() << std::endl;
-   double rhoSphere = 915 * lbmUnitConverter->getFactorDensityWToLb();  // kg/m^3
-   if (myid == 0) UBLOG(logINFO, "rhoSphere = "<<rhoSphere);
-   SPtr<PhysicsEngineMaterialAdapter> sphereMaterial(new PePhysicsEngineMaterialAdapter("Polypropylen", rhoSphere, 0, 0.15, 0.1, 0.45, 0.5, 1, 0, 0));
-   const int timestep = 2;
-   const SPtr<UbScheduler> peScheduler(new UbScheduler(timestep));
-   int maxpeIterations = 10;//endTime/2;
-   SPtr<DemCoProcessor> demCoProcessor = makePeCoProcessor(grid, comm, peScheduler, lbmUnitConverter, maxpeIterations);
-   demCoProcessor->setBlockVisitor(bcVisitor);
-
-   ////////////////////////////////////////////////////////////////////////////
-   ////generating spheres 
-   //UBLOG(logINFO, "generating spheres - start, rank="<<myid);
-   SPtr<UbScheduler> sphereScheduler(new UbScheduler(sphereTime/*10,10,10*/));
-   double toleranz = 0.0;//0.05;
-   SPtr<CreateDemObjectsCoProcessor> createSphereCoProcessor(new CreateDemObjectsCoProcessor(grid, sphereScheduler, comm, demCoProcessor, sphereMaterial, toleranz));
-   //UBLOG(logINFO, "generating spheres - stop, rank="<<myid);
-
-   ////restart
-   ////UBLOG(logINFO, "restart definition - start, rank="<<myid);
-   //SPtr<UbScheduler> restartSch(new UbScheduler(cpStep, cpStart));
-   ////SPtr<MPIIORestartCoProcessor> restartCoProcessor(new MPIIORestartCoProcessor(grid, restartSch, pathOut, comm));
-   //SPtr<MPIIOMigrationCoProcessor> restartCoProcessor(new MPIIOMigrationCoProcessor(grid, restartSch, pathOut, comm));
-   //restartCoProcessor->setLBMKernel(kernel);
-   //restartCoProcessor->setBCProcessor(bcProc);
-   SPtr<RestartDemObjectsCoProcessor> restartDemObjectsCoProcessor(new RestartDemObjectsCoProcessor(grid, restartSch, pathOut, demCoProcessor, createSphereCoProcessor, radiusLB, comm));
-   //UBLOG(logINFO, "restart definition - stop, rank="<<myid);
-
-   if (restart)
-   {
-      createSphereCoProcessor->setToleranz(0.05);
-      restartDemObjectsCoProcessor->restart(restartStep);
-      createSphereCoProcessor->setToleranz(toleranz);
-   }
-
-   //set connectors
-   //UBLOG(logINFO, "set connectors - start, rank="<<myid);
-   InterpolationProcessorPtr iProcessor(new IncompressibleOffsetInterpolationProcessor());
-   SetConnectorsBlockVisitor setConnsVisitor(comm, true, D3Q27System::ENDDIR, nuLB, iProcessor);
-   grid->accept(setConnsVisitor);
-   //UBLOG(logINFO, "set connectors - stop, rank="<<myid);
-
-   //BC visitor
-   //UBLOG(logINFO, "BC visitor - start, rank="<<myid);
-   grid->accept(*bcVisitor.get());
-   //UBLOG(logINFO, "BC visitor - stop, rank="<<myid);
-
-   //sphere prototypes
-   //UBLOG(logINFO, "sphere prototypes - start, rank="<<myid);
-   double d = 2.0*radiusLB;
-   int maxX2 = 5;
-   int maxX3 = 5;
-   //Vector3D origin1(g_minX1+peMinOffset[0]-1.5*d, geoInjector5->getX2Minimum()+1.4*d-6.0, geoInjector5->getX3Minimum()+1.5*d);
-   //createSpheres(radiusLB, origin1, maxX2, maxX3, uLB, createSphereCoProcessor);
-   //Vector3D origin2(g_minX1+peMinOffset[0]-1.5*d, geoInjector2->getX2Minimum()+2.2*d, geoInjector2->getX3Minimum()+1.5*d);
-   //createSpheres(radiusLB, origin2, maxX2, maxX3, uLB, createSphereCoProcessor);
-
-   Vector3D origin2(g_minX1+peMinOffset[0]-1.5*d, geoInjector4->getX2Minimum()+2.4*d, geoInjector4->getX3Minimum()+1.5*d);
-   createSpheres(radiusLB,origin2,maxX2,maxX3,uLB,createSphereCoProcessor);
-
-   //maxX2 = 7;
-   //maxX3 = 7;
-   //Vector3D origin3(g_minX1+peMinOffset[0]-1.5*d, geoInjector7->getX2Minimum()+0.5*d, geoInjector7->getX3Minimum()+0.5*d);
-   //createSpheres(radiusLB,origin3,maxX2,maxX3,uLB,createSphereCoProcessor);
-
-
-   createSphereCoProcessor->process(0);
-
-   //write data for visualization of macroscopic quantities
-   SPtr<UbScheduler> visSch(new UbScheduler(outTime));
-   SPtr<WriteMacroscopicQuantitiesCoProcessor> writeMQCoProcessor(new WriteMacroscopicQuantitiesCoProcessor(grid, visSch, pathOut,
-      WbWriterVtkXmlBinary::getInstance(), SPtr<LBMUnitConverter>(new LBMUnitConverter()), comm));
-
-   SPtr<WriteBoundaryConditionsCoProcessor> writeBCCoProcessor(new WriteBoundaryConditionsCoProcessor(grid, visSch, pathOut,
-      WbWriterVtkXmlBinary::getInstance(), comm));
-
-   SPtr<WriteDemObjectsCoProcessor> writeDemObjectsCoProcessor(new WriteDemObjectsCoProcessor(grid, visSch, pathOut, WbWriterVtkXmlBinary::getInstance(), demCoProcessor, comm));
-
-   if (!restart)
-   {
-      writeMQCoProcessor->process(0);
-      writeBCCoProcessor->process(0);
-      writeDemObjectsCoProcessor->process(0);
-   }
-   ////performance control
-   SPtr<UbScheduler> nupsSch(new UbScheduler(nupsTime[0], nupsTime[1], nupsTime[2]));
-   SPtr<NUPSCounterCoProcessor> npr(new NUPSCounterCoProcessor(grid, nupsSch, numOfThreads, comm));
-
-   //start simulation 
-   //omp_set_num_threads(numOfThreads);
-   SPtr<UbScheduler> stepGhostLayer(peScheduler);
-   SPtr<Calculator> calculator(new BasicCalculator(grid, stepGhostLayer, endTime));
-
-   calculator->addCoProcessor(npr);
-   calculator->addCoProcessor(createSphereCoProcessor);
-   calculator->addCoProcessor(demCoProcessor);
-   ////calculator->addCoProcessor(writeBCCoProcessor);
-   calculator->addCoProcessor(writeDemObjectsCoProcessor);
-   calculator->addCoProcessor(writeMQCoProcessor);
-   calculator->addCoProcessor(restartDemObjectsCoProcessor);
-   calculator->addCoProcessor(restartCoProcessor);
-
-   if (myid == 0) UBLOG(logINFO, "Simulation-start");
-   calculator->calculate();
-   if (myid == 0) UBLOG(logINFO, "Simulation-end");
-   if (myid==0) UBLOG(logINFO, "END LOGGING - " << UbSystem::getTimeStamp());
-}
-
-//////////////////////////////////////////////////////////////////////////
-int main(int argc, char* argv[])
-{
-   try
-   {
-      //Sleep(30000);
-      walberla::Environment env(argc, argv);
-
-      if (argv!=NULL)
-      {
-         //if (argv[1]!=NULL)
-         //{
-            //thermoplast(string("thermoplast.cfg"));
-         thermoplast(string("d:/Projects/VirtualFluidsGit/source/Applications/Thermoplast/config.txt"));
-         //}
-         //else
-         //{
-            //cout<<"Configuration file must be set!: "<<argv[0]<<" <config file>"<<endl<<std::flush;
-         //}
-      }
-      return 0;
-   }
-   catch (std::exception& e)
-   {
-      UBLOG(logERROR, e.what());
-   }
-   catch (std::string& s)
-   {
-      UBLOG(logERROR, s);
-   }
-   catch (...)
-   {
-      UBLOG(logERROR, "unknown exception");
-   }
-}
diff --git a/apps/cpu/ViskomatXL/viskomat.cpp b/apps/cpu/ViskomatXL/viskomat.cpp
index 0b20e9367f446f0f8d194e59d026d5a91f3e32e9..be1f8bab3f99f5577e5a9ca0b426572a87c5a6af 100644
--- a/apps/cpu/ViskomatXL/viskomat.cpp
+++ b/apps/cpu/ViskomatXL/viskomat.cpp
@@ -8,6 +8,8 @@ using namespace std;
 
 void bflow(string configname)
 {
+    using namespace vf::lbm::dir;
+
    try
    {
       vf::basics::ConfigurationFile   config;
@@ -18,21 +20,21 @@ void bflow(string configname)
       string          geoFile = config.getValue<string>("geoFile");
       int             numOfThreads = config.getValue<int>("numOfThreads");
       vector<int>     blocknx = config.getVector<int>("blocknx");
-      vector<double>  boundingBox = config.getVector<double>("boundingBox");
-      double          endTime = config.getValue<double>("endTime");
-      double          outTime = config.getValue<double>("outTime");
-      double          availMem = config.getValue<double>("availMem");
+      vector<real>  boundingBox = config.getVector<real>("boundingBox");
+      real          endTime = config.getValue<real>("endTime");
+      real          outTime = config.getValue<real>("outTime");
+      real          availMem = config.getValue<real>("availMem");
       int             refineLevel = config.getValue<int>("refineLevel");
       bool            logToFile = config.getValue<bool>("logToFile");
-      double          restartStep = config.getValue<double>("restartStep");
-      double          deltax = config.getValue<double>("deltax");
-      double          cpStep = config.getValue<double>("cpStep");
-      double          cpStart = config.getValue<double>("cpStart");
+      real          restartStep = config.getValue<real>("restartStep");
+      real          deltax = config.getValue<real>("deltax");
+      real          cpStep = config.getValue<real>("cpStep");
+      real          cpStart = config.getValue<real>("cpStart");
       bool            newStart = config.getValue<bool>("newStart");
-      double          OmegaLB = config.getValue<double>("OmegaLB");
-      double          tau0 = config.getValue<double>("tau0");
-      double          N = config.getValue<double>("N");
-      double          mu = config.getValue<double>("mu");
+      real          OmegaLB = config.getValue<real>("OmegaLB");
+      real          tau0 = config.getValue<real>("tau0");
+      real          N = config.getValue<real>("N");
+      real          mu = config.getValue<real>("mu");
 
 
       vf::basics::ConfigurationFile   viscosity;
@@ -58,22 +60,22 @@ void bflow(string configname)
          }
       }
 
-      LBMReal rhoLB = 0.0;
+      real rhoLB = 0.0;
 
       //double N  = 70; //rpm
-      double Omega = 2 * UbMath::PI / 60.0 * N; //rad/s
+      real Omega = 2 * UbMath::PI / 60.0 * N; //rad/s
       //double mu    = 5; //Pa s
-      double R     = 0.165 / 2.0; //m
-      double rho   = 970; //kg/m^3
-      double Re    = Omega * R * R * rho / mu;
+      real R     = 0.165 / 2.0; //m
+      real rho = 2150;// 970; //kg/m^3
+      real Re    = Omega * R * R * rho / mu;
 
       //double nuLB = OmegaLB * R * 1e3 * R * 1e3 / Re;
 
-      double dx = deltax * 1e-3;
-      double nuLB = OmegaLB * (R / dx)*(R / dx) / Re;
+      real dx = deltax * 1e-3;
+      real nuLB = OmegaLB * (R / dx)*(R / dx) / Re;
 
-      double Bm = tau0/(mu*Omega);
-      double tau0LB = Bm*nuLB*OmegaLB;
+      real Bm = tau0/(mu*Omega);
+      real tau0LB = Bm*nuLB*OmegaLB;
 
 
       //double dx = 1.0 * 1e-3;
@@ -89,14 +91,14 @@ void bflow(string configname)
 
       //bounding box
 
-      double g_minX1 = boundingBox[0];
-      double g_maxX1 = boundingBox[1];
+      real g_minX1 = boundingBox[0];
+      real g_maxX1 = boundingBox[1];
 
-      double g_minX2 = boundingBox[2];
-      double g_maxX2 = boundingBox[3];
+      real g_minX2 = boundingBox[2];
+      real g_maxX2 = boundingBox[3];
       
-      double g_minX3 = boundingBox[4];
-      double g_maxX3 = boundingBox[5];
+      real g_minX3 = boundingBox[4];
+      real g_maxX3 = boundingBox[5];
 
       SPtr<Rheology> thix = Rheology::getInstance();
       //thix->setPowerIndex(n);
@@ -191,7 +193,7 @@ void bflow(string configname)
 
       ////////////////////////////////////////////
       //METIS
-      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_MMM, MetisPartitioner::RECURSIVE));
+      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_MMM, MetisPartitioner::RECURSIVE));
       ////////////////////////////////////////////
       //////////////////////////////////////////////////////////////////////////
       //restart
@@ -255,7 +257,7 @@ void bflow(string configname)
           g_maxX2 + deltax, g_maxX3 + deltax));
       if (myid == 0) GbSystem3D::writeGeoObject(wallXmin.get(), outputPath + "/geo/wallXmin", WbWriterVtkXmlASCII::getInstance());
 
-      GbCuboid3DPtr wallXmax(new GbCuboid3D(g_maxX1, g_minX2 - deltax, g_minX3 - deltax, g_maxX1 +  (double)blocknx[0]*deltax,
+      GbCuboid3DPtr wallXmax(new GbCuboid3D(g_maxX1, g_minX2 - deltax, g_minX3 - deltax, g_maxX1 +  (real)blocknx[0]*deltax,
           g_maxX2 + deltax, g_maxX3 + deltax));
       if (myid == 0) GbSystem3D::writeGeoObject(wallXmax.get(), outputPath + "/geo/wallXmax", WbWriterVtkXmlASCII::getInstance());
 
@@ -326,8 +328,8 @@ void bflow(string configname)
          unsigned long nodb = (blocknx[0]) * (blocknx[1]) * (blocknx[2]);
          unsigned long nod = nob * (blocknx[0]) * (blocknx[1]) * (blocknx[2]);
          unsigned long nodg = nob * (blocknx[0] + gl) * (blocknx[1] + gl) * (blocknx[1] + gl);
-         double needMemAll = double(nodg * (27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-         double needMem = needMemAll / double(comm->getNumberOfProcesses());
+         real needMemAll = real(nodg * (27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+         real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
          if (myid == 0)
          {
@@ -420,7 +422,7 @@ void bflow(string configname)
       SPtr<WriteMacroscopicQuantitiesCoProcessor> writeMQCoProcessor(new WriteMacroscopicQuantitiesCoProcessor(grid, visSch, outputPath, WbWriterVtkXmlBinary::getInstance(), SPtr<LBMUnitConverter>(new LBMUnitConverter()), comm));
       //writeMQCoProcessor->process(100);
 
-      SPtr<UbScheduler> forceSch(new UbScheduler(1000));
+      SPtr<UbScheduler> forceSch(new UbScheduler(100));
       SPtr<CalculateTorqueCoProcessor> fp = make_shared<CalculateTorqueCoProcessor>(grid, forceSch, outputPath + "/torque/TorqueRotor.csv", comm);
       fp->addInteractor(rotorInt);
       SPtr<CalculateTorqueCoProcessor> fp2 = make_shared<CalculateTorqueCoProcessor>(grid, forceSch, outputPath + "/torque/TorqueStator.csv", comm);
diff --git a/apps/cpu/rheometer/rheometer.cpp b/apps/cpu/rheometer/rheometer.cpp
index e79d9d13a3763cb5502c24f12fb6ec4d27651814..1506bde80f21bce580c6aa781d1075fdc86bcd62 100644
--- a/apps/cpu/rheometer/rheometer.cpp
+++ b/apps/cpu/rheometer/rheometer.cpp
@@ -8,6 +8,8 @@ using namespace std;
 
 void bflow(string configname)
 {
+    using namespace vf::lbm::dir;
+
    try
    {
       vf::basics::ConfigurationFile   config;
@@ -19,24 +21,24 @@ void bflow(string configname)
       vector<int>     blocknx = config.getVector<int>("blocknx");
       //vector<double>  boundingBox = config.getVector<double>("boundingBox");
       //double          nuLB = 1.5e-3;//config.getValue<double>("nuLB");
-      double          endTime = config.getValue<double>("endTime");
-      double          outTime = config.getValue<double>("outTime");
-      double          availMem = config.getValue<double>("availMem");
+      real          endTime = config.getValue<real>("endTime");
+      real          outTime = config.getValue<real>("outTime");
+      real          availMem = config.getValue<real>("availMem");
       int             refineLevel = config.getValue<int>("refineLevel");
       bool            logToFile = config.getValue<bool>("logToFile");
-      double          restartStep = config.getValue<double>("restartStep");
-      double          deltax = config.getValue<double>("deltax");
-      double          cpStep = config.getValue<double>("cpStep");
-      double          cpStart = config.getValue<double>("cpStart");
+      real          restartStep = config.getValue<real>("restartStep");
+      real          deltax = config.getValue<real>("deltax");
+      real          cpStep = config.getValue<real>("cpStep");
+      real          cpStart = config.getValue<real>("cpStart");
       bool            newStart = config.getValue<bool>("newStart");
-      double          OmegaLB = config.getValue<double>("OmegaLB");
-      double          tau0 = config.getValue<double>("tau0");
-      double          scaleFactor = config.getValue<double>("scaleFactor");
-      double          resolution = config.getValue<double>("resolution");
+      real          OmegaLB = config.getValue<real>("OmegaLB");
+      real          tau0 = config.getValue<real>("tau0");
+      real          scaleFactor = config.getValue<real>("scaleFactor");
+      real          resolution = config.getValue<real>("resolution");
 
       vf::basics::ConfigurationFile   viscosity;
       viscosity.load(viscosityPath + "/viscosity.cfg");
-      double nuLB = viscosity.getValue<double>("nuLB");
+      real nuLB = viscosity.getValue<real>("nuLB");
 
       //outputPath = outputPath + "/rheometerBingham_" + config.getValue<string>("resolution") + "_" + config.getValue<string>("OmegaLB");
 
@@ -61,7 +63,7 @@ void bflow(string configname)
          }
       }
 
-      LBMReal rhoLB = 0.0;
+      real rhoLB = 0.0;
 
       //akoustic
        OmegaLB /= scaleFactor;
@@ -97,13 +99,13 @@ void bflow(string configname)
       //double g_maxX2 = resolution;// boundingBox[1];
       //double g_maxX3 = 1.0; // boundingBox[2];
 
-      double g_minX1 = 0;
-      double g_minX2 = 0;
-      double g_minX3 = 0;
+      real g_minX1 = 0;
+      real g_minX2 = 0;
+      real g_minX3 = 0;
 
-      double g_maxX1 = resolution; // boundingBox[0];
-      double g_maxX2 = resolution; // boundingBox[1];
-      double g_maxX3 = 1.0; // boundingBox[2];
+      real g_maxX1 = resolution; // boundingBox[0];
+      real g_maxX2 = resolution; // boundingBox[1];
+      real g_maxX3 = 1.0; // boundingBox[2];
 
       //double g_minX1 = -boundingBox[0]/2.0;
       //double g_minX2 = -boundingBox[1] / 2.0;
@@ -223,7 +225,7 @@ void bflow(string configname)
 
       ////////////////////////////////////////////
       //METIS
-      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_MMM, MetisPartitioner::KWAY));
+      SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_MMM, MetisPartitioner::KWAY));
       ////////////////////////////////////////////
       //////////////////////////////////////////////////////////////////////////
       //restart
@@ -313,7 +315,7 @@ void bflow(string configname)
 
          ////////////////////////////////////////////
          //METIS
-         SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_MMM, MetisPartitioner::KWAY));
+         SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_MMM, MetisPartitioner::KWAY));
          ////////////////////////////////////////////
          /////delete solid blocks
          if (myid == 0) UBLOG(logINFO, "deleteSolidBlocks - start");
@@ -334,8 +336,8 @@ void bflow(string configname)
          unsigned long nodb = (blocknx[0]) * (blocknx[1]) * (blocknx[2]);
          unsigned long nod = nob * (blocknx[0]) * (blocknx[1]) * (blocknx[2]);
          unsigned long nodg = nob * (blocknx[0] + gl) * (blocknx[1] + gl) * (blocknx[1] + gl);
-         double needMemAll = double(nodg * (27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-         double needMem = needMemAll / double(comm->getNumberOfProcesses());
+         real needMemAll = real(nodg * (27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+         real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
          if (myid == 0)
          {
diff --git a/apps/cpu/sphere/sphere.cpp b/apps/cpu/sphere/sphere.cpp
index bad77ee999d96b89fea43183929ecf4a3b920588..70dcc0ddd189f3906575e92877800ec709199a78 100644
--- a/apps/cpu/sphere/sphere.cpp
+++ b/apps/cpu/sphere/sphere.cpp
@@ -7,6 +7,8 @@ using namespace std;
 ////////////////////////////////////////////////////////////////////////
 void run(string configname)
 {
+    using namespace vf::lbm::dir;
+
    try
    {
       SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
@@ -25,20 +27,20 @@ void run(string configname)
       //const int refineLevel = config.getValue<int>("level");
 
       string outputPath = "d:/temp/sphereBlock_5_SBB";
-      double availMem = 8e9;
-      double outstep = 10000;
-      double endstep = 1e6;
+      real availMem = 8e9;
+      real outstep = 10000;
+      real endstep = 1e6;
       int numOfThreads = 4;
       omp_set_num_threads(numOfThreads);
       int refineLevel = 0;
 
-      LBMReal radius = 5;
-      LBMReal uLB = 1e-3;
-      LBMReal Re = 1;
-      LBMReal rhoLB = 0.0;
-      LBMReal nuLB = (uLB*2.0*radius)/Re;
+      real radius = 5;
+      real uLB = 1e-3;
+      real Re = 1;
+      real rhoLB = 0.0;
+      real nuLB = (uLB*2.0*radius)/Re;
 
-      double dp_LB = 1e-6;
+      real dp_LB = 1e-6;
 //      double rhoLBinflow = dp_LB*3.0;
 
       SPtr<BCAdapter> noSlipBCAdapter(new NoSlipBCAdapter());
@@ -46,7 +48,7 @@ void run(string configname)
       SPtr<BCAdapter> slipBCAdapter(new SlipBCAdapter());
       slipBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new SimpleSlipBCAlgorithm()));
       
-      double H = 50;
+      real H = 50;
       mu::Parser fct;
       fct.SetExpr("U");
       fct.DefineConst("U", uLB);
@@ -67,7 +69,7 @@ void run(string configname)
       bcVisitor.addBC(velBCAdapter);
       bcVisitor.addBC(denBCAdapter);
 
-      double dx = 1;
+      real dx = 1;
 
       const int blocknx1 = 50;
       const int blocknx2 = 50;
@@ -77,7 +79,7 @@ void run(string configname)
       const int gridNx2 = H;
       const int gridNx3 = H;
 
-      double L1, L2, L3;
+      real L1, L2, L3;
       L1 = gridNx1;
       L2 = gridNx2;
       L3 = gridNx3;
@@ -97,15 +99,15 @@ void run(string configname)
       if (true)
       {
          //bounding box
-         double d_minX1 = 0.0;
-         double d_minX2 = 0.0;
-         double d_minX3 = 0.0;
+         real d_minX1 = 0.0;
+         real d_minX2 = 0.0;
+         real d_minX3 = 0.0;
 
-         double d_maxX1 = L1;
-         double d_maxX2 = L2;
-         double d_maxX3 = L3;
+         real d_maxX1 = L1;
+         real d_maxX2 = L2;
+         real d_maxX3 = L3;
 
-         double blockLength = blocknx1*dx;
+         real blockLength = blocknx1*dx;
 
          if (myid == 0)
          {
@@ -126,7 +128,7 @@ void run(string configname)
          GenBlocksGridVisitor genBlocks(gridCube);
          grid->accept(genBlocks);
 
-         double off = 0.0;
+         real off = 0.0;
          SPtr<GbObject3D> refCube(new GbCuboid3D(sphere->getX1Minimum() - off, sphere->getX2Minimum() - off, sphere->getX3Minimum(),
             sphere->getX1Maximum() + off, sphere->getX2Maximum() + off, sphere->getX3Maximum()));
          if (myid == 0) GbSystem3D::writeGeoObject(refCube.get(), outputPath + "/geo/refCube", WbWriterVtkXmlBinary::getInstance());
@@ -180,7 +182,7 @@ void run(string configname)
          //outflow
          SPtr<D3Q27Interactor> outflowInt = SPtr<D3Q27Interactor>(new D3Q27Interactor(geoOutflow, grid, denBCAdapter, Interactor3D::SOLID));
 
-         SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::DIR_00M));
+         SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, DIR_00M));
          InteractorsHelper intHelper(grid, metisVisitor);
          intHelper.addInteractor(sphereInt);
          intHelper.addInteractor(addWallYminInt);
@@ -202,8 +204,8 @@ void run(string configname)
          int gl = 3;
          unsigned long nod = nob * (blocknx1 + gl) * (blocknx2 + gl) * (blocknx3 + gl);
 
-         double needMemAll = double(nod*(27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
-         double needMem = needMemAll / double(comm->getNumberOfProcesses());
+         real needMemAll = real(nod*(27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
+         real needMem = needMemAll / real(comm->getNumberOfProcesses());
 
          if (myid == 0)
          {
@@ -285,7 +287,7 @@ void run(string configname)
       SPtr<UbScheduler> nupsSch(new UbScheduler(10, 30, 100));
       SPtr<CoProcessor> npr(new NUPSCounterCoProcessor(grid, nupsSch, numOfThreads, comm));
 
-      double area = UbMath::PI * radius * radius;
+      real area = UbMath::PI * radius * radius;
       SPtr<UbScheduler> forceSch(new UbScheduler(100));
       SPtr<CalculateForcesCoProcessor> fp = make_shared<CalculateForcesCoProcessor>(grid, forceSch, outputPath + "/forces/forces.txt", comm, uLB, area);
       fp->addInteractor(sphereInt);
diff --git a/apps/gpu/GKS/BoundaryJet/3rdPartyLinking.cmake b/apps/gpu/GKS/BoundaryJet/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/BoundaryJet/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/BoundaryJet/BoundaryJet.cpp b/apps/gpu/GKS/BoundaryJet/BoundaryJet.cpp
deleted file mode 100644
index 76cda7ba2a91d72a4e008938d18ae00d41eff159..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/BoundaryJet/BoundaryJet.cpp
+++ /dev/null
@@ -1,369 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <memory>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/AdiabaticWall.h"
-#include "GksGpu/BoundaryConditions/Inflow.h"
-#include "GksGpu/BoundaryConditions/Extrapolation.h"
-#include "GksGpu/BoundaryConditions/Pressure.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-void thermalCavity( std::string path, std::string simulationName )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint nx = 128;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real L = 1.0;
-    //real H = 0.25;
-    real H = L / real(nx);
-
-    real dx = L / real(nx);
-
-
-    real Ra = 2.0e9;
-
-    real Ba  = 0.1;
-    real eps = 1.2;
-    real Pr  = 0.71;
-    real K   = 2.0;
-    
-    real g   = 1.0;
-    real rho = 1.0;
-
-    real lambda     = Ba / ( 2.0 * g * L );
-    real lambdaHot  = lambda / ( 1.0 + eps * 0.5 );
-    real lambdaCold = lambda / ( 1.0 - eps * 0.5 );
-    
-    real mu = sqrt( Pr * eps * g * L * L * L / Ra ) * rho ;
-
-    real cs  = sqrt( ( ( K + 4.0 ) / ( K + 2.0 ) ) / ( 2.0 * lambda ) );
-    real U   = sqrt( Ra ) * mu / ( rho * L );
-
-    real CFL = 0.25;
-
-    real dt  = CFL * ( dx / ( ( U + cs ) * ( one + ( two * mu ) / ( U * dx * rho ) ) ) );
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt = " << dt << " s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "mu = " << mu << " s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "U  = " << U  << " s\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Parameters parameters;
-
-    parameters.K  = K;
-    parameters.Pr = Pr;
-    parameters.mu = mu;
-
-    parameters.force.x = 0.125 * g;
-    parameters.force.y = 0;
-    parameters.force.z = 0;
-
-    parameters.dt = dt;
-    parameters.dx = dx;
-
-    parameters.lambdaRef = lambda;
-
-    //parameters.viscosityModel = ViscosityModel::sutherlandsLaw;
-    parameters.viscosityModel = ViscosityModel::constant;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    //gridBuilder->addCoarseGrid(-0.5*L, -0.5*L, -0.5*H,  
-    //                            0.5*L,  0.5*L,  0.5*H, dx);
-
-    gridBuilder->addCoarseGrid(-    L,  0.0  , -0.5*H,  
-                                4.0*L,  1.0*L,  0.5*H, dx);
-
-    real L_1 = ( 0.5 - 0.35 ) / 2.0;
-    real L_2 = ( 0.5 - 0.45 ) / 2.0;
-    real L_3 = ( 0.5 - 0.475) / 2.0;
-    real L_4 = ( 0.5 - 0.485) / 2.0;
-
-    Cuboid* cubeMY_1 = new Cuboid (-2.0, -2.0, -2.0, 
-                                    1.9,  L_1,  2.0 );
-
-    Cuboid* cubeMY_2 = new Cuboid (-2.0, -2.0, -2.0, 
-                                    1.8,  L_2,  2.0 );
-
-    Cuboid* cubeMY_3 = new Cuboid (-2.0, -2.0, -2.0, 
-                                    5.0,  L_3,  2.0 );
-
-    Cuboid* cubeMY_4 = new Cuboid (-2.0, -2.0, -2.0, 
-                                    2.0,  L_4,  2.0 );
-
-    Conglomerate refRegion_1;
-    refRegion_1.add(cubeMY_1);
-
-    Conglomerate refRegion_2;
-    refRegion_2.add(cubeMY_2);
-
-    Conglomerate refRegion_3;
-    refRegion_3.add(cubeMY_3);
-
-    Conglomerate refRegion_4;
-    refRegion_4.add(cubeMY_4);
-
-    gridBuilder->setNumberOfLayers(6,6);
-
-    gridBuilder->addGrid( &refRegion_1, 1);
-    gridBuilder->addGrid( &refRegion_2, 2);
-    //gridBuilder->addGrid( &refRegion_3, 3);
-    //gridBuilder->addGrid( &refRegion_4, 4);
-
-    gridBuilder->setPeriodicBoundaryCondition(false, false, true);
-
-    gridBuilder->buildGrids(GKS, false);
-
-    //gridBuilder->writeGridsToVtk(path + "grid/Grid_lev_");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    //meshAdapter.writeMeshVTK( path + "grid/Mesh.vtk" );
-
-    //meshAdapter.writeMeshFaceVTK( path + "grid/MeshFaces.vtk" );
-
-    meshAdapter.findPeriodicBoundaryNeighbors();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CudaUtility::setCudaDevice(0);  // heated
-    //CudaUtility::setCudaDevice(1);  // cooled
-
-    auto dataBase = std::make_shared<DataBase>( "GPU" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-    real inletHeight = 0.02;
-
-    SPtr<BoundaryCondition> bcMX_1 = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambda, false );
-    SPtr<BoundaryCondition> bcMX_2 = std::make_shared<Inflow>( dataBase, Vec3(0.2, 0.0, 0.0), lambda, rho, 0.0, 0.0, inletHeight, -1.0 );
-    //SPtr<BoundaryCondition> bcMX_3 = std::make_shared<Pressure>( dataBase, 0.5 * rho / lambda );
-
-    SPtr<BoundaryCondition> bcPX   = std::make_shared<Pressure>( dataBase, 0.5 * rho / lambda );
-
-    bcMX_1->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x < -L && center.y > inletHeight; } );
-    bcMX_2->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x < -L && center.y < inletHeight; } );
-    //bcMX_2->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x < -L; } );
-    bcPX->findBoundaryCells(   meshAdapter, true, [&](Vec3 center){ return center.x >  4.0*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcMY = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaHot , false );
-    //SPtr<BoundaryCondition> bcMY = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold, 0.0, false );
-    //SPtr<BoundaryCondition> bcMY = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambda, 0.0, false );
-
-    SPtr<BoundaryCondition> bcPY = std::make_shared<Extrapolation>( dataBase );
-    //SPtr<BoundaryCondition> bcPY = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambda, 0.0, false );
-
-    bcMY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y <  0.0  ; } );
-    bcPY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y >  L    ; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcMZ = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPZ = std::make_shared<Periodic>( dataBase );
-    
-    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.5*H; } );
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z >  0.5*H; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->boundaryConditions.push_back( bcMZ );
-    dataBase->boundaryConditions.push_back( bcPZ );
-
-    dataBase->boundaryConditions.push_back( bcMX_1 );
-    dataBase->boundaryConditions.push_back( bcMX_2 );
-    //dataBase->boundaryConditions.push_back( bcMX_3 );
-    dataBase->boundaryConditions.push_back( bcPX );
-    
-    dataBase->boundaryConditions.push_back( bcMY );
-    dataBase->boundaryConditions.push_back( bcPY );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    dataBase->setMesh( meshAdapter );
-
-    CudaUtility::printCudaMemoryUsage();
-
-    Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> ConservedVariables{
-        
-        real y = cellCenter.y;
-
-        real factor = ( 0.0 
-                      + inletHeight*y 
-                      - 1.0  *y*y  ) * ( four / inletHeight / inletHeight );
-
-        real U_local;
-        if( y < inletHeight )
-            U_local = 0.2 * factor;
-        else
-            U_local = 0.0;
-
-        return toConservedVariables( PrimitiveVariables( rho, U_local, 0.0, 0.0, lambda ), parameters.K );
-    });
-
-    dataBase->copyDataHostToDevice();
-
-    Initializer::initializeDataUpdate(dataBase);
-
-    for( uint level = 0; level < dataBase->numberOfLevels; level++ )
-    {
-        for (SPtr<BoundaryCondition> bc : dataBase->boundaryConditions) {
-            bc->runBoundaryConditionKernel(dataBase, parameters, level);
-        }
-    }
-
-    dataBase->copyDataDeviceToHost();
-
-    writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0 );
-
-    ConvergenceAnalyzer convergenceAnalyzer( dataBase );
-
-    //auto turbulenceAnalyzer = std::make_shared<TurbulenceAnalyzer>( dataBase, 50000 );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    cupsAnalyzer.start();
-
-    for( uint iter = 1; iter <= 1000000; iter++ )
-    {
-        TimeStepping::nestedTimeStep(dataBase, parameters, nullptr, 0);
-
-        if( 
-            //( iter < 10     && iter % 1     == 0 ) ||
-            //( iter < 100    && iter % 10    == 0 ) ||
-            //( iter < 1000   && iter % 100   == 0 ) ||
-            ( iter < 100000  && iter % 1000  == 0 ) ||
-            ( iter < 10000000 && iter % 10000 == 0 )
-          )
-        {
-            for( uint level = 0; level < dataBase->numberOfLevels; level++ )
-            {
-                for (SPtr<BoundaryCondition> bc : dataBase->boundaryConditions) {
-                    bc->runBoundaryConditionKernel(dataBase, parameters, level);
-                }
-            }
-            dataBase->copyDataDeviceToHost();
-
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) );
-        }
-
-        cupsAnalyzer.run( iter );
-
-        convergenceAnalyzer.run( iter );
-
-        //turbulenceAnalyzer->run( iter, parameters );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataDeviceToHost();
-
-    //writeVtkXML( dataBase, parameters, 0, path + "grid/Test_1" );
-
-    //turbulenceAnalyzer->download();
-
-    //writeTurbulenceVtkXML(dataBase, turbulenceAnalyzer, 0, path + simulationName + "_Turbulence");
-}
-
-int main( int argc, char* argv[])
-{
-    std::string path( "F:/Work/Computations/out/BoundaryJet/Heated/" );
-    //std::string path( "F:/Work/Computations/out/BoundaryJet/Cooled/" );
-    //std::string path( "out/" );
-    std::string simulationName ( "BoundaryJet" );
-
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precison\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    try
-    {
-        thermalCavity( path, simulationName );
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::ERROR << "Unknown exception!\n";
-    }
-
-   return 0;
-}
diff --git a/apps/gpu/GKS/BoundaryJet/CMakeLists.txt b/apps/gpu/GKS/BoundaryJet/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/BoundaryJet/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/BoundaryJet/CMakePackage.cmake b/apps/gpu/GKS/BoundaryJet/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/BoundaryJet/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/Candle/3rdPartyLinking.cmake b/apps/gpu/GKS/Candle/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/Candle/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/Candle/CMakeLists.txt b/apps/gpu/GKS/Candle/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/Candle/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/Candle/CMakePackage.cmake b/apps/gpu/GKS/Candle/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/Candle/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/Candle/Candle.cpp b/apps/gpu/GKS/Candle/Candle.cpp
deleted file mode 100644
index 2f6ed9f30a23783a6d8bcc87722b8ffde46de802..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/Candle/Candle.cpp
+++ /dev/null
@@ -1,451 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <memory>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-#include "GridGenerator/geometries/Sphere/Sphere.h"
-#include "GridGenerator/geometries/VerticalCylinder/VerticalCylinder.h"
-#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
-#include "GridGenerator/geometries/TriangularMesh/TriangularMesh.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/FlowStateData/FlowStateData.cuh"
-#include "GksGpu/FlowStateData/FlowStateDataConversion.cuh"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-#include "GksGpu/BoundaryConditions/Pressure2.h"
-#include "GksGpu/BoundaryConditions/AdiabaticWall.h"
-#include "GksGpu/BoundaryConditions/PassiveScalarDiriclet.h"
-#include "GksGpu/BoundaryConditions/InflowComplete.h"
-#include "GksGpu/BoundaryConditions/Open.h"
-#include "GksGpu/BoundaryConditions/Extrapolation.h"
-#include "GksGpu/BoundaryConditions/Symmetry.h"
-#include "GksGpu/BoundaryConditions/CreepingMassFlux.h"
-#include "GksGpu/BoundaryConditions/MassCompensation.h"
-
-#include "GksGpu/Interface/Interface.h"
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-
-#include "GksGpu/Restart/Restart.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-void thermalCavity( std::string path, std::string simulationName, uint restartIter )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint nx = 256;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real L = 4.0;
-    real H = 8.0;
-    real W = 0.125;
-
-    real dx = H / real(nx);
-
-    real U = 0.05;0.025;
-
-    real eps = 2.0;
-    real Pr  = 0.71;
-    real K   = 5.0;
-    
-    real g   = 9.81;
-    real rho = 1.2;
-    
-    real mu = 5.0e-4;
-
-    PrimitiveVariables prim( rho, 0.0, 0.0, 0.0, -1.0 );
-
-    setLambdaFromT( prim, 3.0 / T_FAKTOR );
-
-    real cs  = sqrt( ( ( K + 5.0 ) / ( K + 3.0 ) ) / ( 2.0 * prim.lambda ) );
-
-    real CFL = 0.5;0.125;
-
-    real dt  = CFL * ( dx / ( ( U + cs ) * ( one + ( two * mu ) / ( U * dx * rho ) ) ) );
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt = " << dt << " s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "U  = " << U  << " m/s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "cs = " << cs << " m/s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "mu = " << mu << " kg/sm\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Parameters parameters;
-
-    parameters.K  = K;
-    parameters.Pr = Pr;
-    parameters.mu = mu;
-
-    parameters.D = mu;
-
-    parameters.force.x = 0;
-    parameters.force.y = 0;
-    parameters.force.z = -g;
-
-    parameters.dt = dt;
-    parameters.dx = dx;
-
-    parameters.lambdaRef = prim.lambda;
-
-    parameters.rhoRef    = rho;
-
-    //parameters.viscosityModel = ViscosityModel::sutherlandsLaw;
-    parameters.viscosityModel = ViscosityModel::constant;
-
-    *logging::out << logging::Logger::INFO_HIGH << "Pr = " << parameters.Pr << "\n";
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    bool threeDimensional = true;
-
-    if( threeDimensional )
-    {
-        gridBuilder->addCoarseGrid(-0.5*L, -0.5*L, 0.0,
-                                    0.5*L, 0.5*L, H, dx);
-    }
-    else
-    {
-        gridBuilder->addCoarseGrid(-0.5*L, -0.5*dx, 0.0,
-                                    0.5*L, 0.5*dx, H, dx);
-    }
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#ifdef _WIN32
-    //TriangularMesh* stl = TriangularMesh::make("F:/Work/Computations/inp/Unterzug.stl");
-    TriangularMesh* stl = TriangularMesh::make("F:/Work/Computations/inp/Candle.stl");
-#else
-    //TriangularMesh* stl = TriangularMesh::make("inp/Unterzug.stl");
-    TriangularMesh* stl = TriangularMesh::make("inp/Candle.stl");
-#endif
-
-    gridBuilder->addGeometry(stl);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    VerticalCylinder cylinder( 0.0, 0.0, 0.0, 0.6, 8.0 );
-
-    gridBuilder->setNumberOfLayers(20,20);
-
-    gridBuilder->addGrid(&cylinder, 1);
-    gridBuilder->addGrid(stl, 2);
-
-    if( threeDimensional ) gridBuilder->setPeriodicBoundaryCondition(false, false, false);
-    else                   gridBuilder->setPeriodicBoundaryCondition(false, true,  false);
-
-    gridBuilder->buildGrids(GKS, false);
-
-    //gridBuilder->writeGridsToVtk(path + "Grid_lev_");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    //meshAdapter.writeMeshVTK( path + "grid/Mesh.vtk" );
-
-    //meshAdapter.writeMeshFaceVTK( path + "grid/MeshFaces.vtk" );
-
-    if( !threeDimensional )
-        meshAdapter.findPeriodicBoundaryNeighbors();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CudaUtility::setCudaDevice(0);
-
-    auto dataBase = std::make_shared<DataBase>( "GPU" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-    real openBoundaryVelocityLimiter = 0.5;
-
-    SPtr<BoundaryCondition> bcMX = std::make_shared<Open>( dataBase, prim, openBoundaryVelocityLimiter );
-    SPtr<BoundaryCondition> bcPX = std::make_shared<Open>( dataBase, prim, openBoundaryVelocityLimiter );
-    //SPtr<BoundaryCondition> bcMX = std::make_shared<MassCompensation>( dataBase, rho, U, prim.lambda );
-    //SPtr<BoundaryCondition> bcPX = std::make_shared<MassCompensation>( dataBase, rho, U, prim.lambda );
-    //SPtr<BoundaryCondition> bcMX = std::make_shared<IsothermalWall>( dataBase, Vec3(0, 0, 0), prim.lambda, false );
-    //SPtr<BoundaryCondition> bcPX = std::make_shared<IsothermalWall>( dataBase, Vec3(0, 0, 0), prim.lambda, false );
-    //SPtr<BoundaryCondition> bcMX = std::make_shared<Pressure2>( dataBase, c1o2 * prim.rho / prim.lambda );
-    //SPtr<BoundaryCondition> bcPX = std::make_shared<Pressure2>( dataBase, c1o2 * prim.rho / prim.lambda );
-
-    bcMX->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x < -0.5*L; } );
-    bcPX->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x >  0.5*L; } );
-
-    //SPtr<BoundaryCondition> bcMX_2 = std::make_shared<IsothermalWall>( dataBase, Vec3(0, 0, 0), prim.lambda, false );
-    //SPtr<BoundaryCondition> bcPX_2 = std::make_shared<IsothermalWall>( dataBase, Vec3(0, 0, 0), prim.lambda, false );
-    //SPtr<BoundaryCondition> bcMX_2 = std::make_shared<Symmetry>( dataBase, 'x' );
-    //SPtr<BoundaryCondition> bcPX_2 = std::make_shared<Symmetry>( dataBase, 'x' );
-    //SPtr<BoundaryCondition> bcMX_2 = std::make_shared<Pressure2>( dataBase, c1o2 * prim.rho / prim.lambda );
-    //SPtr<BoundaryCondition> bcPX_2 = std::make_shared<Pressure2>( dataBase, c1o2 * prim.rho / prim.lambda );
-
-    //bcMX_2->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x < -0.5*L && center.z > 1.0; } );
-    //bcPX_2->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x >  0.5*L && center.z > 1.0; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcMY;
-    SPtr<BoundaryCondition> bcPY;
-
-    if( threeDimensional )
-    {
-        //bcMY = std::make_shared<Open>( dataBase, prim, openBoundaryVelocityLimiter );
-        //bcPY = std::make_shared<Open>( dataBase, prim, openBoundaryVelocityLimiter );
-        bcMY = std::make_shared<Symmetry>( dataBase, 'y' );
-        bcPY = std::make_shared<Symmetry>( dataBase, 'y' );
-
-        bcMY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y < -0.5*L; } );
-        bcPY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y >  0.5*L; } );
-    }
-    else
-    {
-        bcMY = std::make_shared<Periodic>(dataBase);
-        bcPY = std::make_shared<Periodic>(dataBase);
-
-        bcMY->findBoundaryCells(meshAdapter, false, [&](Vec3 center) { return center.y < -0.5*dx; });
-        bcPY->findBoundaryCells(meshAdapter, false, [&](Vec3 center) { return center.y >  0.5*dx; });
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcMZ = std::make_shared<AdiabaticWall>( dataBase, Vec3(0, 0, 0), true );
-    //SPtr<BoundaryCondition> bcMZ = std::make_shared<IsothermalWall>( dataBase, Vec3(0, 0, 0), prim.lambda, true );
-    //SPtr<BoundaryCondition> bcMZ = std::make_shared<InflowComplete>( dataBase, PrimitiveVariables(rho, 0.0, 0.0, 0.0, prim.lambda, 0.0, 0.0) );
-    //SPtr<BoundaryCondition> bcMZ = std::make_shared<Open>( dataBase );
-
-    //SPtr<BoundaryCondition> bcPZ = std::make_shared<Open>( dataBase, prim );
-    //SPtr<BoundaryCondition> bcPZ = std::make_shared<Extrapolation>( dataBase );
-    SPtr<BoundaryCondition> bcPZ = std::make_shared<Pressure2>( dataBase, c1o2 * prim.rho / prim.lambda );
-    
-    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < 0.0 || ( std::sqrt(center.x*center.x + center.y*center.y) < 0.5 && center.z < 1.0 ); } );
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z > H  ; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    //SPtr<BoundaryCondition> burner = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), 0.5*prim.lambda,  0.0, true );
-
-    //SPtr<BoundaryCondition> burner = std::make_shared<InflowComplete>( dataBase, PrimitiveVariables(rho, 0.0, 0.0, U, prim.lambda, 1.0, 1.0) );
-    SPtr<BoundaryCondition> burner = std::make_shared<CreepingMassFlux>( dataBase, rho, U, prim.lambda );
-
-    burner->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ 
-
-        return center.z > 0.8 && center.z < 1.5 && std::sqrt(center.x*center.x + center.y*center.y) < 0.1;
-    } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->boundaryConditions.push_back( bcMX );
-    dataBase->boundaryConditions.push_back( bcPX );
-
-    //dataBase->boundaryConditions.push_back( bcMX_2 );
-    //dataBase->boundaryConditions.push_back( bcPX_2 );
-    
-    dataBase->boundaryConditions.push_back( bcMY );
-    dataBase->boundaryConditions.push_back( bcPY );
-
-    dataBase->boundaryConditions.push_back( bcMZ );
-    dataBase->boundaryConditions.push_back( bcPZ );
-
-    dataBase->boundaryConditions.push_back( burner );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint startIter = 0;
-
-    dataBase->setMesh( meshAdapter );
-
-    CudaUtility::printCudaMemoryUsage();
-    
-    if( restartIter == INVALID_INDEX )
-    {
-        Initializer::interpret(dataBase, [&](Vec3 cellCenter) -> ConservedVariables {
-
-            PrimitiveVariables primLocal = prim;
-
-            //primLocal.rho = rho * std::exp( - ( 2.0 * g * H * prim.lambda ) * cellCenter.z / H );
-
-            real r = sqrt(cellCenter.x * cellCenter.x + cellCenter.y * cellCenter.y /*+ cellCenter.z * cellCenter.z*/);
-
-            //if( r < 0.6 ) primLocal.S_1 = 1.0;
-
-            //if( r < 0.5 ) prim.lambda /= (two - four*r*r);
-
-            return toConservedVariables(primLocal, parameters.K);
-        });
-
-        writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" );
-    }
-    else
-    {
-        Restart::readRestart( dataBase, path + simulationName + "_" + std::to_string( restartIter ), startIter );
-
-        writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( restartIter ) + "_restart" );
-    }
-
-    dataBase->copyDataHostToDevice();
-
-    for( auto bc : dataBase->boundaryConditions ) 
-        for( uint level = 0; level < dataBase->numberOfLevels; level++ )
-            bc->runBoundaryConditionKernel( dataBase, parameters, level );
-
-    Initializer::initializeDataUpdate(dataBase);
-
-    dataBase->copyDataDeviceToHost();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0 );
-
-    ConvergenceAnalyzer convergenceAnalyzer( dataBase, 1000 );
-
-    //auto turbulenceAnalyzer = std::make_shared<TurbulenceAnalyzer>( dataBase, 50000 );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    cupsAnalyzer.start();
-
-    for( uint iter = startIter + 1; iter <= 100000000; iter++ )
-    {
-        uint runUpTime = 10000;
-
-        if( iter < runUpTime )
-        {
-            //std::dynamic_pointer_cast<InflowComplete>(burner)->prim.S_1 =       1.0 * ( real(iter) / 20000.0 );
-            //std::dynamic_pointer_cast<InflowComplete>(burner)->prim.S_2 = 1.0 - 1.0 * ( real(iter) / 20000.0 );
-
-            //std::dynamic_pointer_cast<InflowComplete>(burner)->prim.W = U * ( real(iter) / 20000.0 );
-
-            //std::dynamic_pointer_cast<CreepingMassFlux>(burner)->velocity = U * ( real(iter) / runUpTime );
-
-            //parameters.mu = mu + 10.0 * mu * ( 1.0 - ( real(iter) / 20000.0 ) );
-
-            //parameters.dt = 0.2 * dt + ( dt - 0.2 * dt ) * ( real(iter) / 40000.0 );
-        }
-
-        cupsAnalyzer.run( iter );
-
-        convergenceAnalyzer.run( iter );
-
-        TimeStepping::nestedTimeStep(dataBase, parameters, 0);
-
-        if( 
-            //( iter >= 7000 && iter % 10 == 0 ) || 
-            ( iter % 10000 == 0 )
-          )
-        {
-            dataBase->copyDataDeviceToHost();
-
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) );
-        }
-
-        if( iter % 10000 == 0 )
-        {
-            Restart::writeRestart( dataBase, path + simulationName + "_" + std::to_string( iter ), iter );
-        }
-
-        //turbulenceAnalyzer->run( iter, parameters );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataDeviceToHost();
-
-    //writeVtkXML( dataBase, parameters, 0, path + "grid/Test_1" );
-
-    //turbulenceAnalyzer->download();
-
-    //writeTurbulenceVtkXML(dataBase, turbulenceAnalyzer, 0, path + simulationName + "_Turbulence");
-}
-
-int main( int argc, char* argv[])
-{
-
-#ifdef _WIN32
-    std::string path( "F:/Work/Computations/out/Candle/" );
-#else
-    std::string path( "out/" );
-#endif
-
-    std::string simulationName ( "Candle" );
-
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precison\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    try
-    {
-        uint restartIter = INVALID_INDEX;
-        //uint restartIter = 200000;
-
-        if( argc > 1 ) restartIter = atoi( argv[1] );
-
-        thermalCavity( path, simulationName, restartIter );
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::ERROR << "Unknown exception!\n";
-    }
-
-   return 0;
-}
diff --git a/apps/gpu/GKS/ChannelFlow/3rdPartyLinking.cmake b/apps/gpu/GKS/ChannelFlow/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ChannelFlow/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/ChannelFlow/CMakeLists.txt b/apps/gpu/GKS/ChannelFlow/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ChannelFlow/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/ChannelFlow/CMakePackage.cmake b/apps/gpu/GKS/ChannelFlow/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ChannelFlow/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/ChannelFlow/ChannelFlow.cpp b/apps/gpu/GKS/ChannelFlow/ChannelFlow.cpp
deleted file mode 100644
index a1142e0dd5b66228e9d38bc566ebbbc279477345..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ChannelFlow/ChannelFlow.cpp
+++ /dev/null
@@ -1,282 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <memory>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-#include "GksGpu/BoundaryConditions/Pressure.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-void channelFlow( std::string path, std::string simulationName )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint nx = 8+1;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real L = 1.0;
-    real H = 1.0;
-
-    real dx = H / real(nx);
-
-    real Re  = 1.0e1;
-    real U  = 0.1;
-    real Ma = 0.1;
-    
-    real Pr  = 0.1;
-    real K   = 2.0;
-
-    real rho = 1.0;
-
-    real mu = U * rho * H / Re;
-
-    real cs = U / Ma;
-    real lambda = c1o2 * ( ( K + 4.0 ) / ( K + 2.0 ) ) / ( cs * cs );
-
-    real g = eight * mu * U / ( H * H );
-
-    real p0 = c1o2 * rho / lambda;
-
-    real CFL = 0.25;
-
-    real dt  = CFL * ( dx / ( ( U + cs ) * ( one + ( two * mu ) / ( U * dx * rho ) ) ) );
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt = " << dt << " s\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Parameters parameters;
-
-    parameters.K  = K;
-    parameters.Pr = Pr;
-    parameters.mu = mu;
-
-    parameters.force.x = g;
-    parameters.force.y = 0;
-    parameters.force.z = 0;
-
-    parameters.dt = dt;
-    parameters.dx = dx;
-
-    parameters.lambdaRef = lambda;
-
-    parameters.viscosityModel = ViscosityModel::constant;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    gridBuilder->addCoarseGrid(-0.5*L, -0.5*H, -0.5*dx,  
-                                0.5*L,  0.5*H,  0.5*dx, dx);
-
-    //Cuboid cube(-1.0, -1.0, 0.45, 1.0, 1.0, 0.55);
-
-    //gridBuilder->setNumberOfLayers(6,6);
-    //gridBuilder->addGrid( &cube, 1);
-
-    gridBuilder->setPeriodicBoundaryCondition(true, false, true);
-
-    gridBuilder->buildGrids(GKS, false);
-
-    //gridBuilder->writeGridsToVtk(path + "grid/Grid_lev_");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    //meshAdapter.writeMeshVTK( path + "grid/Mesh.vtk" );
-
-    //meshAdapter.writeMeshFaceVTK( path + "grid/MeshFaces.vtk" );
-
-    meshAdapter.findPeriodicBoundaryNeighbors();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CudaUtility::setCudaDevice(0);
-
-    auto dataBase = std::make_shared<DataBase>( "GPU" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-    //SPtr<BoundaryCondition> bcMX = std::make_shared<Pressure>( dataBase, p0 + g * 0.5 * L );
-    //SPtr<BoundaryCondition> bcPX = std::make_shared<Pressure>( dataBase, p0 - g * 0.5 * L );
-    SPtr<BoundaryCondition> bcMX = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPX = std::make_shared<Periodic>( dataBase );
-
-    bcMX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x < -0.5*L; } );
-    bcPX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> bcMY = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambda, 0.0, true );
-    SPtr<BoundaryCondition> bcPY = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambda, 0.0, true );
-    //SPtr<BoundaryCondition> bcMY = std::make_shared<Periodic>( dataBase );
-    //SPtr<BoundaryCondition> bcPY = std::make_shared<Periodic>( dataBase );
-
-    bcMY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y < -0.5*H; } );
-    bcPY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y >  0.5*H; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    //SPtr<BoundaryCondition> bcMZ = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambda, 0.0 );
-    //SPtr<BoundaryCondition> bcPZ = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambda, 0.0 );
-    SPtr<BoundaryCondition> bcMZ = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPZ = std::make_shared<Periodic>( dataBase );
-    
-    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.5*dx; } );
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z >  0.5*dx; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->boundaryConditions.push_back( bcMZ );
-    dataBase->boundaryConditions.push_back( bcPZ );
-    
-    dataBase->boundaryConditions.push_back( bcMY );
-    dataBase->boundaryConditions.push_back( bcPY );
-
-    dataBase->boundaryConditions.push_back( bcMX );
-    dataBase->boundaryConditions.push_back( bcPX );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    dataBase->setMesh( meshAdapter );
-
-    CudaUtility::printCudaMemoryUsage();
-
-    Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> ConservedVariables{
-
-        real rhoLocal = rho;// - cellCenter.x * two * lambda * g;
-
-        //real ULocal =0.0;//8.0 * ( ( 0.25 - cellCenter.y * cellCenter.y ) * ( 0.25 - cellCenter.z * cellCenter.z ) ) * U;
-
-        real ULocal = four * ( 0.25 - cellCenter.y * cellCenter.y ) * U;
-
-        return toConservedVariables( PrimitiveVariables( rhoLocal, ULocal, 0.0, 0.0, lambda, 0.0 ), parameters.K );
-    });
-
-    dataBase->copyDataHostToDevice();
-
-    Initializer::initializeDataUpdate(dataBase);
-
-    writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0 );
-
-    ConvergenceAnalyzer convergenceAnalyzer( dataBase );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    cupsAnalyzer.start();
-
-    for( uint iter = 1; iter <= 2000000; iter++ )
-    {
-        TimeStepping::nestedTimeStep(dataBase, parameters, nullptr, 0);
-
-        if( iter % 100000 == 0 )
-        {
-            dataBase->copyDataDeviceToHost();
-
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) );
-        }
-
-        cupsAnalyzer.run( iter );
-
-        convergenceAnalyzer.run( iter );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataDeviceToHost();
-
-    //writeVtkXML( dataBase, parameters, 0, path + "grid/Test_1" );
-
-
-}
-
-int main( int argc, char* argv[])
-{
-    std::string path( "F:/Work/Computations/out/" );
-    //std::string path( "out/" );
-    std::string simulationName ( "ChannelFlow" );
-
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precison\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    try
-    {
-        channelFlow( path, simulationName );
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::ERROR << "Unknown exception!\n";
-    }
-
-   return 0;
-}
diff --git a/apps/gpu/GKS/ChannelFlowObstacle/3rdPartyLinking.cmake b/apps/gpu/GKS/ChannelFlowObstacle/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ChannelFlowObstacle/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/ChannelFlowObstacle/CMakeLists.txt b/apps/gpu/GKS/ChannelFlowObstacle/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ChannelFlowObstacle/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/ChannelFlowObstacle/CMakePackage.cmake b/apps/gpu/GKS/ChannelFlowObstacle/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ChannelFlowObstacle/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/ChannelFlowObstacle/ChannelFlowObstacle.cpp b/apps/gpu/GKS/ChannelFlowObstacle/ChannelFlowObstacle.cpp
deleted file mode 100644
index 98af8e1245d5bd492d387ba580f4a0af628a8620..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ChannelFlowObstacle/ChannelFlowObstacle.cpp
+++ /dev/null
@@ -1,324 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <memory>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-#include "GridGenerator/geometries/TriangularMesh/TriangularMesh.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/FlowStateData/FlowStateData.cuh"
-#include "GksGpu/FlowStateData/FlowStateDataConversion.cuh"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-#include "GksGpu/BoundaryConditions/Pressure.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-
-#include "GksGpu/Restart/Restart.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-void channelFlow( std::string path, std::string simulationName )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint nx = 32+1;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint startIter = 0;
-
-    real L = 1.0;
-    real H = 1.0;
-
-    real dx = H / real(nx);
-
-    real Re  = 1.0e4;
-    real U  = 0.1;
-    real Ma = 0.1;
-    
-    real Pr  = 0.1;
-    real K   = 2.0;
-
-    real rho = 1.0;
-
-    real mu = U * rho * H / Re;
-
-    real cs = U / Ma;
-    real lambda = c1o2 * ( ( K + 4.0 ) / ( K + 2.0 ) ) / ( cs * cs );
-
-    real g = eight * mu * U / ( H * H );
-
-    real p0 = c1o2 * rho / lambda;
-
-    real CFL = 0.25;
-
-    real dt  = CFL * ( dx / ( ( U + cs ) * ( one + ( two * mu ) / ( U * dx * rho ) ) ) );
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt = " << dt << " s\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Parameters parameters;
-
-    parameters.K  = K;
-    parameters.Pr = Pr;
-    parameters.mu = mu;
-
-    parameters.force.x = g;
-    parameters.force.y = 0;
-    parameters.force.z = 0;
-
-    parameters.dt = dt;
-    parameters.dx = dx;
-
-    parameters.lambdaRef = lambda;
-
-    parameters.viscosityModel = ViscosityModel::constant;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    //gridBuilder->addCoarseGrid(-0.5*L, -0.5*H, -0.5*dx,  
-                                //0.5*L,  0.5*H,  0.5*dx, dx);
-
-    gridBuilder->addCoarseGrid(-0.5*L, -0.5*H, -0.5*H,  
-                                2.5*L,  0.5*H,  0.5*H, dx);
-
-    Cuboid cube1(-0.1, -0.1, -0.1, 0.2, 0.1, 0.1);
-    Cuboid cube2(-0.1, -0.1, -0.1, 0.2, 0.1, 0.1);
-
-    gridBuilder->setNumberOfLayers(10,6);
-    gridBuilder->addGrid( &cube1, 2);
-    //gridBuilder->addGrid( &cube2, 3);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    TriangularMesh* CubeSTL = TriangularMesh::make("F:/Work/Computations/inp/Cube.stl");
-
-    gridBuilder->addGeometry(CubeSTL);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    gridBuilder->setPeriodicBoundaryCondition(true, false, true);
-
-    gridBuilder->buildGrids(GKS, false);
-
-    //gridBuilder->writeGridsToVtk(path + "grid/Grid_lev_");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    //meshAdapter.writeMeshVTK( path + "grid/Mesh.vtk" );
-
-    //meshAdapter.writeMeshFaceVTK( path + "grid/MeshFaces.vtk" );
-
-    meshAdapter.findPeriodicBoundaryNeighbors();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CudaUtility::setCudaDevice(0);
-
-    auto dataBase = std::make_shared<DataBase>( "GPU" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-    //SPtr<BoundaryCondition> bcMX = std::make_shared<Pressure>( dataBase, p0 + g * 0.5 * L );
-    //SPtr<BoundaryCondition> bcPX = std::make_shared<Pressure>( dataBase, p0 - g * 0.5 * L );
-    SPtr<BoundaryCondition> bcMX = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPX = std::make_shared<Periodic>( dataBase );
-
-    bcMX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x < -0.5*L; } );
-    bcPX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> bcMY = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambda, 0.0, true );
-    SPtr<BoundaryCondition> bcPY = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambda, 0.0, true );
-    //SPtr<BoundaryCondition> bcMY = std::make_shared<Periodic>( dataBase );
-    //SPtr<BoundaryCondition> bcPY = std::make_shared<Periodic>( dataBase );
-
-    bcMY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y < -0.5*H; } );
-    bcPY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y >  0.5*H; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    //SPtr<BoundaryCondition> bcMZ = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambda, 0.0 );
-    //SPtr<BoundaryCondition> bcPZ = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambda, 0.0 );
-    SPtr<BoundaryCondition> bcMZ = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPZ = std::make_shared<Periodic>( dataBase );
-    
-    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.5*dx; } );
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z >  0.5*dx; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> bcCube = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), 0.8*lambda, 0.0, true );
-
-    bcCube->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return std::fabs(center.x) <  0.1 && 
-                                                                           std::fabs(center.y) <  0.1 && 
-                                                                           std::fabs(center.z) <  0.1; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->boundaryConditions.push_back( bcMZ );
-    dataBase->boundaryConditions.push_back( bcPZ );
-    
-    dataBase->boundaryConditions.push_back( bcMY );
-    dataBase->boundaryConditions.push_back( bcPY );
-
-    dataBase->boundaryConditions.push_back( bcMX );
-    dataBase->boundaryConditions.push_back( bcPX );
-
-    dataBase->boundaryConditions.push_back( bcCube );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    dataBase->setMesh( meshAdapter );
-
-    CudaUtility::printCudaMemoryUsage();
-
-    if( false )
-    {
-        Initializer::interpret(dataBase, [&](Vec3 cellCenter) -> ConservedVariables {
-
-            real rhoLocal = rho;// - cellCenter.x * two * lambda * g;
-
-            //real ULocal =0.0;//8.0 * ( ( 0.25 - cellCenter.y * cellCenter.y ) * ( 0.25 - cellCenter.z * cellCenter.z ) ) * U;
-
-            real ULocal = four * (0.25 - cellCenter.y * cellCenter.y) * U;
-
-            return toConservedVariables(PrimitiveVariables(rhoLocal, ULocal, 0.0, 0.0, lambda), parameters.K);
-        });
-
-        writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" );
-    }
-    else
-    {
-        Restart::readRestart(dataBase, path + simulationName + "_10000.rst", startIter );
-
-        writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string(startIter) + "_restart" );
-    }
-
-    dataBase->copyDataHostToDevice();
-
-    Initializer::initializeDataUpdate(dataBase);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0 );
-
-    ConvergenceAnalyzer convergenceAnalyzer( dataBase );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    cupsAnalyzer.start();
-
-    for( uint iter = startIter + 1; iter <= 2000000; iter++ )
-    {
-        TimeStepping::nestedTimeStep(dataBase, parameters, 0);
-
-        if( iter % 10000 == 0 )
-        {
-            dataBase->copyDataDeviceToHost();
-
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) );
-
-            Restart::writeRestart( dataBase, path + simulationName + "_" + std::to_string( iter ), iter );
-        }
-
-        cupsAnalyzer.run( iter );
-
-        convergenceAnalyzer.run( iter );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataDeviceToHost();
-
-    //writeVtkXML( dataBase, parameters, 0, path + "grid/Test_1" );
-
-
-}
-
-int main( int argc, char* argv[])
-{
-    std::string path( "F:/Work/Computations/out/ChannelFlowObstacle/" );
-    //std::string path( "out/" );
-    std::string simulationName ( "ChannelFlowObstacle" );
-
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precison\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    try
-    {
-        channelFlow( path, simulationName );
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::ERROR << "Unknown exception!\n";
-    }
-
-   return 0;
-}
diff --git a/apps/gpu/GKS/ConcreteHeatFluxBCTest/3rdPartyLinking.cmake b/apps/gpu/GKS/ConcreteHeatFluxBCTest/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ConcreteHeatFluxBCTest/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/ConcreteHeatFluxBCTest/CMakeLists.txt b/apps/gpu/GKS/ConcreteHeatFluxBCTest/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ConcreteHeatFluxBCTest/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/ConcreteHeatFluxBCTest/CMakePackage.cmake b/apps/gpu/GKS/ConcreteHeatFluxBCTest/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ConcreteHeatFluxBCTest/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/ConcreteHeatFluxBCTest/ConcreteHeatFluxBCTest.cpp b/apps/gpu/GKS/ConcreteHeatFluxBCTest/ConcreteHeatFluxBCTest.cpp
deleted file mode 100644
index f23e9d74bb1027e5c3d66b9df289ca84cee21f15..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ConcreteHeatFluxBCTest/ConcreteHeatFluxBCTest.cpp
+++ /dev/null
@@ -1,361 +0,0 @@
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//   ||          ||  ||  ||||||  |||||||| ||    ||  ||||||||  ||
-//    ||        ||   ||  ||   ||    ||    ||    ||  ||    ||  ||
-//     ||      ||    ||  ||||||     ||    ||    ||  ||||||||  ||
-//      ||    ||     ||  ||   ||    ||     ||||||   ||    ||  ||||||    ||||||   ||   ||||||   ||||||   ||||||
-//       ||  ||                                                        ||       ||   ||   ||  ||      |||    ||
-//        ||||       |||||||||||||||||||||||||||||||||||||||||||||||||||||||   ||   ||||||   ||||||     |||
-//                                                                    ||      ||   ||   ||  ||       ||   |||
-//                    i R M B  @  T U  B r a u n s c h w e i g       ||      ||   ||   ||  ||||||   |||||||
-//
-///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <sstream>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <memory>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-#include "GridGenerator/geometries/Sphere/Sphere.h"
-#include "GridGenerator/geometries/VerticalCylinder/VerticalCylinder.h"
-#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
-#include "GridGenerator/geometries/TriangularMesh/TriangularMesh.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-
-#include "GridGenerator/utilities/communication.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/FlowStateData/FlowStateData.cuh"
-#include "GksGpu/FlowStateData/FlowStateDataConversion.cuh"
-#include "GksGpu/FlowStateData/ThermalDependencies.cuh"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-#include "GksGpu/BoundaryConditions/Pressure2.h"
-#include "GksGpu/BoundaryConditions/AdiabaticWall.h"
-#include "GksGpu/BoundaryConditions/HeatFlux.h"
-#include "GksGpu/BoundaryConditions/CreepingMassFlux.h"
-#include "GksGpu/BoundaryConditions/ConcreteHeatFlux.h"
-#include "GksGpu/BoundaryConditions/Open.h"
-
-#include "GksGpu/Communication/Communicator.h"
-#include "GksGpu/Communication/MpiUtility.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-#include "GksGpu/Analyzer/PointTimeseriesCollector.h"
-
-#include "GksGpu/Restart/Restart.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-void thermalCavity( std::string path, std::string simulationName )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real dx = 0.1;
-
-    real L = 1.0;
-
-    real Pr  = 1.0;
-    real K   = 2.0;
-    
-    real g   = 9.81;
-    real rho = 1.0;
-
-    PrimitiveVariables prim( rho, 0.0, 0.0, 0.0, -1.0 );
-    setLambdaFromT( prim, 12.0 );
-
-    real cs  = sqrt( ( ( K + 5.0 ) / ( K + 3.0 ) ) / ( 2.0 * prim.lambda ) );
-
-    real mu  = 1.0e-2;
-
-    real cp = 0.5 * ( K + 5 ) * R_U / M_A;
-
-    real k = mu / Pr * cp;
-
-    real dt  = 0.000025;
-
-    //real dt = 0.01 * dx*dx / ( 2.0 * mu );
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt = " << dt << " s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "cs = " << cs << " m/s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "mu = " << mu << " kg/sm\n";
-
-    //*logging::out << logging::Logger::INFO_HIGH << "HRR = " << U * rhoFuel * LBurner * LBurner * (heatOfReaction * 100.0) / 0.016 / 1000.0 << " kW\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Parameters parameters;
-
-    parameters.K  = K;
-    parameters.Pr = Pr;
-    parameters.mu = mu;
-
-    parameters.D = mu;
-
-    parameters.force.x = 0;
-    parameters.force.y = 0;
-    parameters.force.z = 0;
-
-    parameters.dt = dt;
-    parameters.dx = dx;
-
-    parameters.lambdaRef = prim.lambda;
-
-    //parameters.viscosityModel = ViscosityModel::sutherlandsLaw;
-    parameters.viscosityModel = ViscosityModel::constant;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    gridBuilder->addCoarseGrid(-0.5 * L, -0.5 * L, -0.5 * L,  
-                                0.5 * L,  0.5 * L,  0.5 * L, dx);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    gridBuilder->setPeriodicBoundaryCondition(false, false, false);
-
-    gridBuilder->buildGrids(GKS, false);
-
-    //gridBuilder->writeGridsToVtk(path + "Grid_rank_" + std::to_string( rank ) + "_lev_");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    //meshAdapter.writeMeshVTK( path + "grid/Mesh.vtk" );
-
-    //meshAdapter.writeMeshFaceVTK( path + "MeshFaces_rank_" + std::to_string( rank ) + ".vtk" );
-
-    //meshAdapter.findPeriodicBoundaryNeighbors();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto dataBase = std::make_shared<DataBase>( "GPU" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcWall1 = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), prim.lambda, false);
-
-    bcWall1->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x < -0.5*L; } );
-    
-    SPtr<BoundaryCondition> bcWall2 = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), false);
-
-    bcWall2->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x > -0.5*L; } );
-
-    //SPtr<BoundaryCondition> bcWallHeatFlux = std::make_shared<ConcreteHeatFlux>( dataBase, 9,  0.1 * k / 1.0 / 50.0, 1.0, 50.0, 0.1, 3.0 );
-    //SPtr<BoundaryCondition> bcWallHeatFlux = std::make_shared<ConcreteHeatFlux>( dataBase, 9,  1.0 * k / 1.0 / 50.0, 1.0, 50.0, 0.1, 3.0 );
-    SPtr<BoundaryCondition> bcWallHeatFlux = std::make_shared<ConcreteHeatFlux>( dataBase, 9, 10.0 * k / 1.0 / 50.0, 1.0, 50.0, 0.1, 3.0 );
-
-    bcWallHeatFlux->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x > 0.5*L && std::abs(center.y) < 0.5*L && std::abs(center.z) < 0.5*L; } );
-
-    std::dynamic_pointer_cast<ConcreteHeatFlux>(bcWallHeatFlux)->init();
-
-    //////////////////////////////////////////////////////////////////////////
-
-
-    dataBase->boundaryConditions.push_back( bcWallHeatFlux );
-
-    dataBase->boundaryConditions.push_back( bcWall1 );
-    dataBase->boundaryConditions.push_back( bcWall2 );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint startIter = 0;
-
-    dataBase->setMesh( meshAdapter );
-
-    dataBase->setCommunicators( meshAdapter );
-
-    CudaUtility::printCudaMemoryUsage();
-    
-    Initializer::interpret(dataBase, [&](Vec3 cellCenter) -> ConservedVariables {
-
-        PrimitiveVariables primLocal = prim;
-
-        return toConservedVariables(primLocal, parameters.K);
-    });
-
-    writeVtkXML(dataBase, parameters, 0, path + simulationName + "_0");
-
-    //std::dynamic_pointer_cast<ConcreteHeatFlux>(bcWallHeatFlux)->writeVTKFile(dataBase, parameters, path + simulationName + "_Solid_0");
-    writeConcreteHeatFluxVtkXML( dataBase, std::dynamic_pointer_cast<ConcreteHeatFlux>(bcWallHeatFlux), parameters, 0, path + simulationName + "_Solid_0" );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataHostToDevice();
-
-    for( auto bc : dataBase->boundaryConditions ) 
-        for( uint level = 0; level < dataBase->numberOfLevels; level++ )
-            bc->runBoundaryConditionKernel( dataBase, parameters, level );
-
-    Initializer::initializeDataUpdate(dataBase);
-
-    dataBase->copyDataDeviceToHost();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0, true, 10000 );
-
-    ConvergenceAnalyzer convergenceAnalyzer( dataBase, 10000 );
-
-    //auto turbulenceAnalyzer = std::make_shared<TurbulenceAnalyzer>( dataBase, 50000 );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    *logging::out << logging::Logger::INFO_HIGH << "================================================================================\n";
-    *logging::out << logging::Logger::INFO_HIGH << "================================================================================\n";
-    *logging::out << logging::Logger::INFO_HIGH << "==================   S t a r t    T i m e    S t e p p i n g   =================\n";
-    *logging::out << logging::Logger::INFO_HIGH << "================================================================================\n";
-    *logging::out << logging::Logger::INFO_HIGH << "================================================================================\n";
-
-    cupsAnalyzer.start();
-
-    for( uint iter = startIter + 1; iter <= 10000000; iter++ )
-    {
-        //////////////////////////////////////////////////////////////////////////
-
-        TimeStepping::nestedTimeStep(dataBase, parameters, 0);
-
-        //////////////////////////////////////////////////////////////////////////
-
-        cupsAnalyzer.run( iter, parameters.dt );
-
-        convergenceAnalyzer.run( iter );
-
-        //////////////////////////////////////////////////////////////////////////
-
-        int crashCellIndex = dataBase->getCrashCellIndex();
-        if( crashCellIndex >= 0 )
-        {
-            *logging::out << logging::Logger::LOGGER_ERROR << "Simulation Crashed at CellIndex = " << crashCellIndex << "\n";
-            dataBase->copyDataDeviceToHost();
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) );
-
-            //std::dynamic_pointer_cast<ConcreteHeatFlux>(bcWallHeatFlux)->writeVTKFile(dataBase, parameters, path + simulationName + "_Solid_" + std::to_string( iter ));
-            writeConcreteHeatFluxVtkXML( dataBase, std::dynamic_pointer_cast<ConcreteHeatFlux>(bcWallHeatFlux), parameters, 0, path + simulationName + "_Solid_" + std::to_string( iter ) );
-
-            break;
-        }
-
-        if( iter % 100000 == 0 )
-        {
-            dataBase->copyDataDeviceToHost();
-
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) );
-
-            //std::dynamic_pointer_cast<ConcreteHeatFlux>(bcWallHeatFlux)->writeVTKFile(dataBase, parameters, path + simulationName + "_Solid_" + std::to_string( iter ));
-            writeConcreteHeatFluxVtkXML( dataBase, std::dynamic_pointer_cast<ConcreteHeatFlux>(bcWallHeatFlux), parameters, 0, path + simulationName + "_Solid_" + std::to_string( iter ) );
-        }
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataDeviceToHost();
-
-    //writeVtkXML( dataBase, parameters, 0, path + "grid/Test_1" );
-
-    //turbulenceAnalyzer->download();
-
-    //writeTurbulenceVtkXML(dataBase, turbulenceAnalyzer, 0, path + simulationName + "_Turbulence");
-}
-
-int main( int argc, char* argv[])
-{
-    //////////////////////////////////////////////////////////////////////////
-
-#ifdef _WIN32
-    std::string path( "F:/Work/Computations/out/ConcreteHeatFluxBCTest/" );
-#else
-    std::string path( "out/" );
-#endif
-
-    std::string simulationName ( "ConcreteHeatFluxBCTest" );
-
-    logging::Logger::addStream(&std::cout);
-    
-    std::ofstream logFile( path + simulationName + ".log" );
-    logging::Logger::addStream(&logFile);
-
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    CudaUtility::setCudaDevice( 0 );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precision\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    try
-    {
-        thermalCavity( path, simulationName );
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
-    }
-
-    logFile.close();
-
-    return 0;
-}
diff --git a/apps/gpu/GKS/ConfinedCombustion/3rdPartyLinking.cmake b/apps/gpu/GKS/ConfinedCombustion/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ConfinedCombustion/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/ConfinedCombustion/CMakeLists.txt b/apps/gpu/GKS/ConfinedCombustion/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ConfinedCombustion/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/ConfinedCombustion/CMakePackage.cmake b/apps/gpu/GKS/ConfinedCombustion/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ConfinedCombustion/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/ConfinedCombustion/ConfinedCombustion.cpp b/apps/gpu/GKS/ConfinedCombustion/ConfinedCombustion.cpp
deleted file mode 100644
index e903cb3a06e0250a8d13fbfcb40d619f87245d9c..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ConfinedCombustion/ConfinedCombustion.cpp
+++ /dev/null
@@ -1,430 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <memory>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-#include "GridGenerator/geometries/Sphere/Sphere.h"
-#include "GridGenerator/geometries/VerticalCylinder/VerticalCylinder.h"
-#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/FlowStateData/FlowStateData.cuh"
-#include "GksGpu/FlowStateData/FlowStateDataConversion.cuh"
-#include "GksGpu/FlowStateData/ThermalDependencies.cuh"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-#include "GksGpu/BoundaryConditions/Pressure.h"
-#include "GksGpu/BoundaryConditions/AdiabaticWall.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-void thermalCavity( std::string path, std::string simulationName )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint nx = 64;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real L = 1.0;
-
-    real dx = L / real(nx);
-
-    real U = 0.1;
-
-    real Pr  = 0.71;
-    real K   = 2.0;
-    
-    real g   = 9.81;
-    real rho = 1.2;
-    
-    real mu = 5.0e-3;
-
-    PrimitiveVariables prim( rho, 0.0, 0.0, 0.0, -1.0 );
-
-    setLambdaFromT( prim, 3.0 );
-
-    real cs  = sqrt( ( ( K + 5.0 ) / ( K + 3.0 ) ) / ( 2.0 * prim.lambda ) );
-
-    real CFL = 0.5;
-
-    double dt  = CFL * ( dx / ( ( U + cs ) * ( c1o1 + ( c2o1 * mu ) / ( U * dx * rho ) ) ) );
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt = " << dt << " s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "U  = " << U  << " m/s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "cs = " << cs << " m/s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "mu = " << mu << " kg/sm\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Parameters parameters;
-
-    parameters.K  = K;
-    parameters.Pr = Pr;
-    parameters.mu = mu;
-
-    parameters.D  = mu;
-
-    parameters.force.x = 0;
-    parameters.force.y = 0;
-    parameters.force.z = 0;
-
-    parameters.dt = dt;
-    parameters.dx = dx;
-
-    //parameters.viscosityModel = ViscosityModel::sutherlandsLaw;
-    parameters.viscosityModel = ViscosityModel::constant;
-
-    parameters.enableReaction = true;
-
-    parameters.useHeatReleaseRateLimiter = true;
-    parameters.useTemperatureLimiter     = true;
-    parameters.usePassiveScalarLimiter   = true;
-    parameters.useSmagorinsky            = true;
-
-    parameters.reactionLimiter    = 1.0005;
-    parameters.temperatureLimiter = 1.0e-6;
-
-    parameters.useSpongeLayer = true;
-    parameters.spongeLayerIdx = 2;
-
-    parameters.forcingSchemeIdx = 2;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    gridBuilder->addCoarseGrid(-0.5*L, -0.5*dx, -0.5*dx,  
-                                0.5*L,  0.5*dx,  0.5*dx, dx);
-
-    //gridBuilder->addCoarseGrid(-0.5*L, -0.5*L, -0.5*L,  
-    //                            0.5*L,  0.5*L,  0.5*L, dx);
-
-    gridBuilder->setPeriodicBoundaryCondition(true, true, true);
-
-    gridBuilder->buildGrids(GKS, false);
-
-    //gridBuilder->writeGridsToVtk(path + "grid/Grid_lev_");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    //meshAdapter.writeMeshVTK( path + "grid/Mesh.vtk" );
-
-    //meshAdapter.writeMeshFaceVTK( path + "grid/MeshFaces.vtk" );
-
-    meshAdapter.findPeriodicBoundaryNeighbors();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CudaUtility::setCudaDevice(1);
-
-    auto dataBase = std::make_shared<DataBase>( "GPU" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-    //SPtr<BoundaryCondition> bcMX = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), false );
-    //SPtr<BoundaryCondition> bcPX = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), false );
-
-    //SPtr<BoundaryCondition> bcMX = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold, false );
-    //SPtr<BoundaryCondition> bcPX = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold,  0.0, false );
-
-    SPtr<BoundaryCondition> bcMX = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPX = std::make_shared<Periodic>( dataBase );
-
-    bcMX->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x < -0.5*L; } );
-    bcPX->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    //SPtr<BoundaryCondition> bcMY = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), false );
-    //SPtr<BoundaryCondition> bcPY = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), false );
-
-    //SPtr<BoundaryCondition> bcMY = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold,  0.0, false );
-    //SPtr<BoundaryCondition> bcPY = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold,  0.0, false );
-
-    SPtr<BoundaryCondition> bcMY = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPY = std::make_shared<Periodic>( dataBase );
-
-    //bcMY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y < -0.125*L; } );
-    //bcPY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y >  0.125*L; } );
-
-    bcMY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y < -0.5*dx; } );
-    bcPY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y >  0.5*dx; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    //SPtr<BoundaryCondition> bcMZ = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), true );
-    //SPtr<BoundaryCondition> bcPZ = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), true );
-
-    //SPtr<BoundaryCondition> bcMZ = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaHot, false );
-    //SPtr<BoundaryCondition> bcPZ = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold,  0.0, true );
-
-    SPtr<BoundaryCondition> bcMZ = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPZ = std::make_shared<Periodic>( dataBase );
-    
-    //bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.125*L; } );
-    //bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z >  0.125*L; } );
-
-    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.5*dx; } );
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z >  0.5*dx; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    dataBase->boundaryConditions.push_back( bcMY );
-    dataBase->boundaryConditions.push_back( bcPY );
-
-    dataBase->boundaryConditions.push_back( bcMZ );
-    dataBase->boundaryConditions.push_back( bcPZ );
-
-    dataBase->boundaryConditions.push_back( bcMX );
-    dataBase->boundaryConditions.push_back( bcPX );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    dataBase->setMesh( meshAdapter );
-
-    CudaUtility::printCudaMemoryUsage();
-
-    Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> ConservedVariables{
-
-        //PrimitiveVariables primFuel = prim;
-
-        //primFuel.S_1 = 1.0;
-
-        //////////////////////////////////////////////////////////////////////////
-
-        //PrimitiveVariables primAir = prim;
-
-        //////////////////////////////////////////////////////////////////////////
-
-        //real massFuel = 1.0; 
-        //real massAir  = 2.0 * 32.0/16.0 + 2.0 * 0.767 / 0.233 * 32.0/16.0;
-
-        //real volumeFuel = massFuel / primFuel.rho;
-        //real volumeAir  = massAir  / primAir.rho;
-        //
-        //real volumeRatioFuel = volumeFuel / ( volumeFuel + volumeAir );
-
-        //if(fabs(cellCenter.x) < 0.5 * volumeRatioFuel ) return toConservedVariables( primFuel, parameters.K );
-        //else                                            return toConservedVariables( primAir , parameters.K );
-
-        //////////////////////////////////////////////////////////////////////////
-
-        //PrimitiveVariables primMix = prim;
-
-        //primMix.S_1 = volumeRatioFuel;
-
-        //return toConservedVariables( primMix, parameters.K );
-
-        //////////////////////////////////////////////////////////////////////////
-
-        if( nx == 1 )
-        {
-            // for stoichiometric mixture
-            double Y_F = ( rX * M_F ) / ( rX * M_F + 2.0 * M_A );
-
-            prim.S_1 = Y_F;
-
-            return toConservedVariables(prim, parameters.K);
-        }
-
-        //////////////////////////////////////////////////////////////////////////
-
-        if( nx > 1 )
-        {
-            // for stoichiometric mixture
-            double Y_F = ( rX * M_F ) / ( rX * M_F + 2.0 * M_A );
-
-            prim.S_1 = Y_F;
-
-            //if (cellCenter.x < 0) prim.S_1 = 0.0;
-            //else                  prim.S_1 = 2.0 * Y_F;
-
-            return toConservedVariables(prim, parameters.K);
-        }
-
-        //////////////////////////////////////////////////////////////////////////
-
-        //if( nx > 1 )
-        //{
-        //    double X_F = 1.0;
-        //    double X_A = 1.0 - X_F;
-
-        //    double M = X_F * M_F + X_A * M_A;
-
-        //    double Y_F = X_F * M_F / M;
-        //    double Y_A = X_A * M_A / M;
-
-        //    if (cellCenter.x < 0) prim.S_1 = 0.0;
-        //    else                  prim.S_1 = Y_F;
-
-        //    if (cellCenter.x < 0) prim.rho = 1.2;
-        //    else                  prim.rho = 0.1;
-
-        //    return toConservedVariables(prim, parameters.K);
-        //}
-    });
-
-    //std::cout << toConservedVariables( PrimitiveVariables( rho, 0.0, 0.0, 0.0, lambdaHot, S_1, S_2 ), parameters.K ).rhoE << std::endl;
-
-    dataBase->copyDataHostToDevice();
-
-    for( auto bc : dataBase->boundaryConditions ) 
-        for( uint level = 0; level < dataBase->numberOfLevels; level++ )
-            bc->runBoundaryConditionKernel( dataBase, parameters, 0 );
-
-    Initializer::initializeDataUpdate(dataBase);
-
-    dataBase->copyDataDeviceToHost();
-
-    writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0 );
-
-    ConvergenceAnalyzer convergenceAnalyzer( dataBase );
-
-    //auto turbulenceAnalyzer = std::make_shared<TurbulenceAnalyzer>( dataBase, 50000 );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    cupsAnalyzer.start();
-
-    for( uint iter = 1; iter <= 100000; iter++ )
-    {
-        //if( iter < 100000 )
-        //{
-        //    std::dynamic_pointer_cast<IsothermalWall>(bcMX)->lambda = lambdaCold + ( lambdaHot - lambdaCold ) * ( real(iter) / 100000.0 );
-        //}
-        //if( iter == 100000 )
-        //{
-        //    //std::dynamic_pointer_cast<IsothermalWall>(bcMX)->lambda = lambdaHot;
-        //    dataBase->boundaryConditions[4] = bcMX_2;
-        //}
-
-        cupsAnalyzer.run( iter, parameters.dt );
-
-        TimeStepping::nestedTimeStep(dataBase, parameters, 0);
-
-        if( 
-            //( iter < 10       && iter % 1     == 0 ) ||
-            //( iter < 100      && iter % 10    == 0 ) ||
-            //( iter < 1000     && iter % 100   == 0 ) ||
-            //( iter < 10000    && iter % 1000  == 0 ) ||
-            //( iter < 1000000   && iter % 10000  == 0 )
-            //( iter < 10000000 && iter % 100000 == 0 )
-            //( iter <= 400000 && iter % 100 == 0 )
-            ( iter % 10000 == 0 )
-          )
-        {
-            dataBase->copyDataDeviceToHost();
-
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) );
-        }
-
-        convergenceAnalyzer.run( iter );
-
-        //turbulenceAnalyzer->run( iter, parameters );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataDeviceToHost();
-
-    //writeVtkXML( dataBase, parameters, 0, path + "grid/Test_1" );
-
-    //turbulenceAnalyzer->download();
-
-    //writeTurbulenceVtkXML(dataBase, turbulenceAnalyzer, 0, path + simulationName + "_Turbulence");
-}
-
-int main( int argc, char* argv[])
-{
-
-#ifdef _WIN32
-    std::string path( "F:/Work/Computations/out/ConfinedCombustion/" );
-#else
-    std::string path( "out/" );
-#endif
-
-    std::string simulationName ( "ConfinedCombustion" );
-
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precison\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    try
-    {
-        thermalCavity( path, simulationName );
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
-    }
-
-   return 0;
-}
diff --git a/apps/gpu/GKS/DrivenCavity3D/3rdPartyLinking.cmake b/apps/gpu/GKS/DrivenCavity3D/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/DrivenCavity3D/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/DrivenCavity3D/CMakeLists.txt b/apps/gpu/GKS/DrivenCavity3D/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/DrivenCavity3D/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/DrivenCavity3D/CMakePackage.cmake b/apps/gpu/GKS/DrivenCavity3D/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/DrivenCavity3D/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/DrivenCavity3D/DrivenCavity3D.cpp b/apps/gpu/GKS/DrivenCavity3D/DrivenCavity3D.cpp
deleted file mode 100644
index 5bb85fa912165aa19087f5991d8d3db5d215c411..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/DrivenCavity3D/DrivenCavity3D.cpp
+++ /dev/null
@@ -1,272 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <memory>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/FlowStateData/FlowStateDataConversion.cuh"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-void drivenCavity( std::string path, std::string simulationName )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real L = 1.0;
-
-    real dx = L / 128.0;
-
-    real Re  = 1.0e3;
-    real U  = 0.1;
-    real Ma = 0.1;
-    
-    real Pr  = 1.0;
-    real K   = 2.0;
-
-    real rho = 1.0;
-
-    real mu = U * rho * L / Re;
-
-    real cs = U / Ma;
-    real lambda = c1o2 * ( ( K + 5.0 ) / ( K + 3.0 ) ) / ( cs * cs );
-
-    real CFL = 0.5;
-
-    real dt  = CFL * ( dx / ( ( U + cs ) * ( c1o1 + ( c2o1 * mu ) / ( U * dx * rho ) ) ) );
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt = " << dt << " s\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Parameters parameters;
-
-    parameters.K  = K;
-    parameters.Pr = Pr;
-    parameters.mu = mu;
-
-    parameters.force.x = 0;
-    parameters.force.y = 0;
-    parameters.force.z = 0;
-
-    parameters.dt = dt;
-    parameters.dx = dx;
-
-    parameters.lambdaRef = lambda;
-
-    parameters.viscosityModel = ViscosityModel::constant;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    //gridBuilder->addCoarseGrid(-0.5, -0.5, -0.5,  
-                                //0.5,  0.5,  0.5, dx);
-    gridBuilder->addCoarseGrid(-0.5, -0.5, -0.5*dx,  
-                                0.5,  0.5,  0.5*dx, dx);
-
-    //Cuboid refBox(-1.0, -1.0, 0.475, 1.0, 1.0, 0.55);
-    ////Cuboid refBox(-1.0, -1.0, -1.0, 1.0, 1.0, -0.475);
-
-    //gridBuilder->setNumberOfLayers(6,6);
-    //gridBuilder->addGrid( &refBox, 1);
-    
-    //gridBuilder->setPeriodicBoundaryCondition(false, false, false);
-    gridBuilder->setPeriodicBoundaryCondition(false, false, true);
-
-    gridBuilder->buildGrids(GKS, false);
-
-    //gridBuilder->writeGridsToVtk(path + "grid/Grid_lev_");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    //meshAdapter.writeMeshVTK( path + simulationName + "_Mesh.vtk" );
-
-    //meshAdapter.writeMeshFaceVTK( path + simulationName + "_MeshFaces.vtk" );
-
-    meshAdapter.findPeriodicBoundaryNeighbors();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CudaUtility::setCudaDevice(0);
-
-    auto dataBase = std::make_shared<DataBase>( "GPU" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> bcPY   = std::make_shared<IsothermalWall>( dataBase, Vec3( U  , 0.0, 0.0 ), lambda, false );
-    SPtr<BoundaryCondition> bcWall = std::make_shared<IsothermalWall>( dataBase, Vec3( 0.0, 0.0, 0.0 ), lambda, false );
-
-    bcPY->findBoundaryCells  ( meshAdapter, true,  [&](Vec3 center){ return center.y > 0.5; } );
-    bcWall->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y < 0.5; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> bcMZ = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPZ = std::make_shared<Periodic>( dataBase );
-    
-    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.5*dx; } );
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z >  0.5*dx; } );
-    
-    //dataBase->boundaryConditions.push_back( bcMX );
-    dataBase->boundaryConditions.push_back( bcPY );
-    dataBase->boundaryConditions.push_back( bcWall );
-
-    dataBase->boundaryConditions.push_back( bcMZ );
-    dataBase->boundaryConditions.push_back( bcPZ );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    dataBase->setMesh( meshAdapter );
-
-    //CudaUtility::printCudaMemoryUsage();
-
-    Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> ConservedVariables {
-
-        //real uLocal = U * ( cellCenter.z + 0.5 );
-
-        //if( cellCenter.y )
-
-        real uLocal = 0.0;
-
-        return toConservedVariables( PrimitiveVariables( 1.0, uLocal, 0.0, 0.0, lambda ), parameters.K );
-    });
-
-    dataBase->copyDataHostToDevice();
-
-    Initializer::initializeDataUpdate(dataBase);
-
-    writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    CupsAnalyzer cupsAnalyzer( dataBase, false, 60.0, true, 10000 );
-
-    ConvergenceAnalyzer convergenceAnalyzer( dataBase, 10000 );
-
-    auto turbulenceAnalyzer = std::make_shared<TurbulenceAnalyzer>( dataBase, 80000 );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    cupsAnalyzer.start();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    for( uint iter = 1; iter <= 1000000; iter++ )
-    {
-        TimeStepping::nestedTimeStep(dataBase, parameters, 0);
-
-        if( iter % 10000 == 0 )
-        {
-            dataBase->copyDataDeviceToHost();
-
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) );
-        }
-
-        cupsAnalyzer.run( iter, parameters.dt );
-
-        turbulenceAnalyzer->run( iter, parameters );
-
-        convergenceAnalyzer.run( iter );
-    }
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataDeviceToHost();
-
-    //writeVtkXML( dataBase, parameters, 0, path + "grid/Test_1" );
-
-    turbulenceAnalyzer->download();
-
-    writeTurbulenceVtkXML(dataBase, turbulenceAnalyzer, 0, path + simulationName + "_Turbulence");
-
-
-}
-
-int main( int argc, char* argv[])
-{
-    //std::string path( "E:/DrivenCavity/resultsGKS/" );
-    std::string path( "F:/Work/Computations/out/DrivenCavity/" );
-    //std::string path( "out/" );
-    std::string simulationName ( "DrivenCavity" );
-
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-    
-    try
-    {
-        drivenCavity( path, simulationName );
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
-    }
-
-   return 0;
-}
diff --git a/apps/gpu/GKS/DrivenCavityMultiGPU/3rdPartyLinking.cmake b/apps/gpu/GKS/DrivenCavityMultiGPU/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/DrivenCavityMultiGPU/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/DrivenCavityMultiGPU/CMakeLists.txt b/apps/gpu/GKS/DrivenCavityMultiGPU/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/DrivenCavityMultiGPU/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/DrivenCavityMultiGPU/CMakePackage.cmake b/apps/gpu/GKS/DrivenCavityMultiGPU/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/DrivenCavityMultiGPU/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp b/apps/gpu/GKS/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp
deleted file mode 100644
index 12320e5ee1f4133223177ef7231a7862640cbda2..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp
+++ /dev/null
@@ -1,472 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <memory>
-#include <thread>
-
-#include <mpi.h>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-#include "GridGenerator/geometries/BoundingBox/BoundingBox.h"
-#include "GridGenerator/utilities/communication.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-#include "GksGpu/BoundaryConditions/Pressure.h"
-#include "GksGpu/BoundaryConditions/AdiabaticWall.h"
-
-#include "GksGpu/Communication/Communicator.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-void init( uint rank, SPtr<DataBase> dataBase, SPtr<Parameters> parameters, std::string path, std::string simulationName )
-{
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CudaUtility::setCudaDevice(rank % 4);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint nx = 512;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real L = 1.0;
-
-    real dx = L / real(nx);
-
-    real Re = 10000.0;
-
-    real U  = 1.0;
-    real Ma = 0.1;
-    
-    real Pr  = 0.71;
-    real K   = 2.0;
-
-    real rho = 1.0;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    real gamma = ( K + 5 ) / ( K + 3 );
-
-    real mu = U * rho * L / Re;
-
-    real cs = U / Ma;
-    real lambda = c1o2 * ( ( K + 5.0 ) / ( K + 3.0 ) ) / ( cs * cs );
-
-    real CFL = 0.5;
-
-    real dt  = CFL * ( dx / ( ( U + cs ) * ( one + ( two * mu ) / ( U * dx * rho ) ) ) );
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt = " << dt << " s\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    parameters->K  = K;
-    parameters->Pr = Pr;
-    parameters->mu = mu;
-
-    parameters->force.x = 0;
-    parameters->force.y = 0;
-    parameters->force.z = 0;
-
-    parameters->dt = dt;
-    parameters->dx = dx;
-
-    parameters->lambdaRef = lambda;
-
-    parameters->viscosityModel = ViscosityModel::sutherlandsLaw;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    if( rank == 0 ) gridBuilder->addCoarseGrid(-0.5*L , -0.5*L , -0.5*L ,  
-                                                3.0*dx,  3.0*dx,  3.0*dx, dx);
-
-    if( rank == 1 ) gridBuilder->addCoarseGrid(-3.0*dx, -0.5*L , -0.5*L ,  
-                                                0.5*L ,  3.0*dx,  3.0*dx, dx);
-
-    if( rank == 2 ) gridBuilder->addCoarseGrid(-0.5*L , -3.0*dx, -0.5*L ,  
-                                                3.0*dx,  0.5*L ,  3.0*dx, dx);
-
-    if( rank == 3 ) gridBuilder->addCoarseGrid(-3.0*dx, -3.0*dx, -0.5*L ,  
-                                                0.5*L ,  0.5*L ,  3.0*dx, dx);
-
-    if( rank == 4 ) gridBuilder->addCoarseGrid(-0.5*L , -0.5*L , -3.0*dx,  
-                                                3.0*dx,  3.0*dx,  0.5*L , dx);
-
-    if( rank == 5 ) gridBuilder->addCoarseGrid(-3.0*dx, -0.5*L , -3.0*dx,  
-                                                0.5*L ,  3.0*dx,  0.5*L , dx);
-
-    if( rank == 6 ) gridBuilder->addCoarseGrid(-0.5*L , -3.0*dx, -3.0*dx,  
-                                                3.0*dx,  0.5*L ,  0.5*L , dx);
-
-    if( rank == 7 ) gridBuilder->addCoarseGrid(-3.0*dx, -3.0*dx, -3.0*dx,  
-                                                0.5*L ,  0.5*L ,  0.5*L , dx);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Cuboid cube( -0.1, -0.1, -0.1, 
-                  0.1,  0.1,  0.1 );
-    
-    gridBuilder->setNumberOfLayers(6,6);
-
-    //gridBuilder->addGrid(&cube, 1);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( rank == 0 ) gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>( -1.0, 0.0, 
-                                                                                 -1.0, 0.0, 
-                                                                                 -1.0, 0.0 ) );
-
-    if( rank == 1 ) gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>(  0.0, 1.0, 
-                                                                                 -1.0, 0.0, 
-                                                                                 -1.0, 0.0 ) );
-
-    if( rank == 2 ) gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>( -1.0, 0.0, 
-                                                                                  0.0, 1.0, 
-                                                                                 -1.0, 0.0 ) );
-
-    if( rank == 3 ) gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>(  0.0, 1.0, 
-                                                                                  0.0, 1.0, 
-                                                                                 -1.0, 0.0 ) );
-
-    if( rank == 4 ) gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>( -1.0, 0.0, 
-                                                                                 -1.0, 0.0, 
-                                                                                  0.0, 1.0 ) );
-
-    if( rank == 5 ) gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>(  0.0, 1.0, 
-                                                                                 -1.0, 0.0, 
-                                                                                  0.0, 1.0 ) );
-
-    if( rank == 6 ) gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>( -1.0, 0.0, 
-                                                                                  0.0, 1.0, 
-                                                                                  0.0, 1.0 ) );
-
-    if( rank == 7 ) gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>(  0.0, 1.0, 
-                                                                                  0.0, 1.0, 
-                                                                                  0.0, 1.0 ) );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    gridBuilder->setPeriodicBoundaryCondition(false, false, false);
-
-    gridBuilder->buildGrids(GKS, false);
-            
-    if( rank == 0 ){
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PX, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::PX, 1);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PY, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::PY, 2);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::PZ, 4);
-    }
-            
-    if( rank == 1 ){
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MX, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::MX, 0);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PY, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::PY, 3);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::PZ, 5);
-    }
-            
-    if( rank == 2 ){
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PX, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::PX, 3);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MY, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::MY, 0);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::PZ, 6);
-    }
-            
-    if( rank == 3 ){
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MX, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::MX, 2);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MY, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::MY, 1);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::PZ, 7);
-    }
-            
-    if( rank == 4 ){
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PX, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::PX, 5);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PY, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::PY, 6);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::MZ, 0);
-    }
-            
-    if( rank == 5 ){
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MX, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::MX, 4);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PY, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::PY, 7);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::MZ, 1);
-    }
-            
-    if( rank == 6 ){
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PX, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::PX, 7);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MY, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::MY, 4);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::MZ, 2);
-    }
-            
-    if( rank == 7 ){
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MX, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::MX, 6);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MY, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::MY, 5);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::MZ, 3);
-    }
-
-    gridBuilder->writeGridsToVtk(path + "Grid_" + std::to_string( rank ) + "_lev_");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    meshAdapter.getCommunicationIndices();
-
-    //meshAdapter.writeMeshFaceVTK( path + "grid/MeshFaces_" + std::to_string( threadIndex ) + ".vtk" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcMX = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambda, false );
-    SPtr<BoundaryCondition> bcPX = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambda, false );
-
-    bcMX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x < -0.5*L; } );
-    bcPX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcMY = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambda, false );
-    SPtr<BoundaryCondition> bcPY = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambda, false );
-
-    bcMY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y < -0.5*L; } );
-    bcPY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> bcMZ = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambda, false );
-    SPtr<BoundaryCondition> bcPZ = std::make_shared<IsothermalWall>( dataBase, Vec3(  U,   U, 0.0), lambda, false );
-    
-    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.5*L; } );
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->boundaryConditions.push_back( bcMZ );
-    dataBase->boundaryConditions.push_back( bcPZ );
-
-    dataBase->boundaryConditions.push_back( bcMX );
-    dataBase->boundaryConditions.push_back( bcPX );
-    
-    dataBase->boundaryConditions.push_back( bcMY );
-    dataBase->boundaryConditions.push_back( bcPY );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    dataBase->setMesh( meshAdapter );
-
-    dataBase->setCommunicators( meshAdapter );
-
-    CudaUtility::printCudaMemoryUsage();
-
-    Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> ConservedVariables{
-        return toConservedVariables( PrimitiveVariables( rho, 0.0, 0.0, 0.0, lambda ), parameters->K );
-    });
-
-    dataBase->copyDataHostToDevice();
-
-    Initializer::initializeDataUpdate(dataBase);
-
-    //writeVtkXML( dataBase, *parameters, 0, path + simulationName + "_" + std::to_string( threadIndex ) + "_" + std::to_string( 0 ) );
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void run( uint rank, SPtr<DataBase> dataBase, SPtr<Parameters> parameters, std::string path, std::string simulationName )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CudaUtility::setCudaDevice( rank % 4 );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    writeVtkXML( dataBase, *parameters, 0, path + simulationName + "_" + std::to_string( rank ) + "_" + std::to_string( 0 ) );
-
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 300.0, true, 1000 );
-
-    ConvergenceAnalyzer convergenceAnalyzer( dataBase, 1000 );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    cupsAnalyzer.start();
-
-    for( uint iter = 1; iter <= 100000; iter++ )
-    {
-        TimeStepping::nestedTimeStep(dataBase, *parameters, 0);
-
-        if( 
-            //( iter < 10     && iter % 1     == 0 ) ||
-            //( iter < 100    && iter % 10    == 0 ) ||
-            //( iter < 1000   && iter % 100   == 0 ) ||
-            //( iter < 10000  && iter % 1000  == 0 ) 
-            ( iter < 10000000 && iter % 20000 == 0 )
-          )
-        {
-            dataBase->copyDataDeviceToHost();
-
-            writeVtkXML( dataBase, *parameters, 0, path + simulationName + "_" + std::to_string( rank ) + "_" + std::to_string( iter ) );
-        }
-
-        cupsAnalyzer.run( iter );
-
-        convergenceAnalyzer.run( iter );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataDeviceToHost();
-}
-
-
-
-int main( int argc, char* argv[])
-{
-    MPI_Init(&argc, &argv);
-
-    int rank = 0;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    //std::string path( "F:/Work/Computations/out/" );
-    std::string path( "out/" );
-    std::string simulationName ( "DrivenCavity" );
-            
-    std::ofstream logFile;
-            
-    logFile.open( path + simulationName + "_" + std::to_string(rank) + ".log" );
-
-    logging::Logger::addStream(&logFile);
-
-    logging::Logger::addStream(&std::cout);
-
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precison\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    try
-    {
-        auto dataBase = std::make_shared<DataBase>( "GPU" );
-
-        auto parameters = std::make_shared<Parameters>();
-
-        init( rank, dataBase, parameters, path, simulationName);
-
-        run ( rank, dataBase, parameters, path, simulationName);
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::ERROR << "Unknown exception!\n";
-    }
-
-    logFile.close();
-
-    MPI_Finalize();
-
-    return 0;
-}
diff --git a/apps/gpu/GKS/Flame7cm/CMakeLists.txt b/apps/gpu/GKS/Flame7cm/CMakeLists.txt
deleted file mode 100644
index 75ca5fa4b4e9c51724d32a3733559b9489ce7943..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/Flame7cm/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-PROJECT(Flame7cm LANGUAGES CUDA CXX)
-
-vf_add_library(BUILDTYPE binary PRIVATE_LINK basics GridGenerator GksMeshAdapter GksVtkAdapter GksGpu MPI::MPI_CXX FILES Flame7cm.cpp )
-
-set_source_files_properties(Flame7cm.cpp PROPERTIES LANGUAGE CUDA)
diff --git a/apps/gpu/GKS/Flame7cm/Flame7cm.cpp b/apps/gpu/GKS/Flame7cm/Flame7cm.cpp
deleted file mode 100644
index 4323ce5ae3bf8486a2203adec470e0d1fdc05a70..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/Flame7cm/Flame7cm.cpp
+++ /dev/null
@@ -1,494 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <iostream>
-#include <iomanip>
-#include <exception>
-#include <fstream>
-#include <memory>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-#include "GridGenerator/geometries/Sphere/Sphere.h"
-#include "GridGenerator/geometries/VerticalCylinder/VerticalCylinder.h"
-#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/FlowStateData/FlowStateData.cuh"
-#include "GksGpu/FlowStateData/FlowStateDataConversion.cuh"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-#include "GksGpu/BoundaryConditions/Pressure.h"
-#include "GksGpu/BoundaryConditions/AdiabaticWall.h"
-#include "GksGpu/BoundaryConditions/PassiveScalarDiriclet.h"
-#include "GksGpu/BoundaryConditions/InflowComplete.h"
-#include "GksGpu/BoundaryConditions/Open.h"
-#include "GksGpu/BoundaryConditions/Inflow.h"
-#include "GksGpu/BoundaryConditions/Symmetry.h"
-#include "GksGpu/BoundaryConditions/Pressure2.h"
-#include "GksGpu/BoundaryConditions/CreepingMassFlux.h"
-
-#include "GksGpu/Interface/Interface.h"
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-
-#include "GksGpu/Restart/Restart.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex, uint _nx, bool _useTempLimiter, uint restartIter )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint nx = _nx;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real L = 0.15;
-    real H = 0.4;
-
-    real R = 0.5 * 0.071;
-
-    real dx = H / real(nx);
-
-    real Pr  = 0.71;
-    real K   = 2.0;
-    
-    real g   = 9.81;
-    real rho = 1.2;
-    
-    real mu = 1.8e-5;
-
-    real U = 0.0314;
-    real rhoFuel = 0.68;
-
-    GksGpu::PrimitiveVariables prim( rho, 0.0, 0.0, 0.0, -1.0 );
-
-    GksGpu::setLambdaFromT( prim, 3.0 );
-
-    real cs  = sqrt( ( ( K + 5.0 ) / ( K + 3.0 ) ) / ( 2.0 * prim.lambda ) );
-
-    //real CFL = 0.06125;
-    real CFL = 0.125;
-
-    real dt  = CFL * ( dx / ( ( U + cs ) * ( c1o1 + ( c2o1 * mu ) / ( U * dx * rho ) ) ) );
-
-    //real dh = 4192.0; // kJ / kmol  / T_FAKTOR
-    real dh = 8000.0; // kJ / kmol  / T_FAKTOR
-
-    //////////////////////////////////////////////////////////////////////////
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt = " << dt << " s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "U  = " << U  << " m/s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "cs = " << cs << " m/s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "mu = " << mu << " kg/sm\n";
-    *logging::out << logging::Logger::INFO_HIGH << "Pr = " << Pr << "\n";
-
-    *logging::out << logging::Logger::INFO_HIGH << "HRR = " << U * rhoFuel * M_PI * R * R * ( dh * 100 ) / 0.016 / 1000.0 << " kW\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    GksGpu::Parameters parameters;
-
-    parameters.K  = K;
-    parameters.Pr = Pr;
-    parameters.mu = mu;
-
-    parameters.D = mu;
-
-    parameters.force.x = 0;
-    parameters.force.y = 0;
-    parameters.force.z = -g;
-
-    parameters.dt = dt;
-    parameters.dx = dx;
-
-    parameters.lambdaRef = prim.lambda;
-
-    parameters.rhoRef    = rho;
-
-    parameters.heatOfReaction = dh;
-
-    parameters.viscosityModel = GksGpu::ViscosityModel::sutherlandsLaw;
-    //parameters.viscosityModel = GksGpu::ViscosityModel::constant;
-
-    parameters.enableReaction = true;
-
-    parameters.useHeatReleaseRateLimiter = true;
-    parameters.useTemperatureLimiter     = _useTempLimiter;
-    parameters.usePassiveScalarLimiter   = true;
-    parameters.useSmagorinsky            = true;
-
-    parameters.heatReleaseRateLimiter = 5000000.0;
-    parameters.temperatureLimiter     = 1.0e-8;
-
-    parameters.useSpongeLayer = true;
-    parameters.spongeLayerIdx = 0;
-
-    parameters.forcingSchemeIdx = 2;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    // auto gridFactory = GridFactory::make();
-    // gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    bool threeDimensional = true;
-
-    if( threeDimensional )
-    {
-        gridBuilder->addCoarseGrid(-0.5*L, -0.5*L, 0.0,
-                                    0.5*L,  0.5*L, H, dx);
-    }
-    else
-    {
-        gridBuilder->addCoarseGrid(-0.5*L, -0.5*dx, 0.0,
-                                    0.5*L,  0.5*dx, H, dx);
-    }
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    VerticalCylinder cylinder1( 0.0, 0.0, 0.0, 2.1*R, 0.75*H );
-    VerticalCylinder cylinder2( 0.0, 0.0, 0.0, 1.5*R, 0.15*H );
-    
-    Conglomerate refRing;
-    refRing.add     ( new VerticalCylinder( 0.0, 0.0, 0.0, 1.2*R, 0.02 ) );
-    refRing.subtract( new VerticalCylinder( 0.0, 0.0, 0.0, 0.8*R, 1.0    ) );
-
-    gridBuilder->setNumberOfLayers(0,10);
-    
-    //gridBuilder->addGrid( &cylinder1 );
-    //gridBuilder->addGrid( &cylinder2 );
-    //gridBuilder->addGrid( &refRing );
-
-    if( threeDimensional ) gridBuilder->setPeriodicBoundaryCondition(false, false, false);
-    else                   gridBuilder->setPeriodicBoundaryCondition(false, true,  false);
-
-    gridBuilder->buildGrids(GKS, false);
-
-    //gridBuilder->writeGridsToVtk(path + "grid/Grid_lev_");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    //meshAdapter.writeMeshVTK( path + "grid/Mesh.vtk" );
-
-    //meshAdapter.writeMeshFaceVTK( path + "grid/MeshFaces.vtk" );
-
-    if( !threeDimensional )
-        meshAdapter.findPeriodicBoundaryNeighbors();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksGpu::CudaUtility::setCudaDevice(_gpuIndex);
-
-    auto dataBase = std::make_shared<GksGpu::DataBase>( "GPU" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-    real openBoundaryVelocityLimiter = 1.0;
-
-    SPtr<GksGpu::BoundaryCondition> bcMX = std::make_shared<GksGpu::Open>( dataBase, prim, openBoundaryVelocityLimiter );
-    SPtr<GksGpu::BoundaryCondition> bcPX = std::make_shared<GksGpu::Open>( dataBase, prim, openBoundaryVelocityLimiter );
-
-    SPtr<GksGpu::BoundaryCondition> bcMX_2 = std::make_shared<GksGpu::Symmetry>( dataBase, 'x' );
-    SPtr<GksGpu::BoundaryCondition> bcPX_2 = std::make_shared<GksGpu::Symmetry>( dataBase, 'x' );
-
-    bcMX->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x < -0.5*L; } );
-    bcPX->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x >  0.5*L; } );
-
-    bcMX_2->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x < -0.5*L && center.z > 0.9*H; } );
-    bcPX_2->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x >  0.5*L && center.z > 0.9*H; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    SPtr<GksGpu::BoundaryCondition> bcMY;
-    SPtr<GksGpu::BoundaryCondition> bcPY;
-
-    SPtr<GksGpu::BoundaryCondition> bcMY_2;
-    SPtr<GksGpu::BoundaryCondition> bcPY_2;
-
-    if( threeDimensional )
-    {
-        bcMY = std::make_shared<GksGpu::Open>( dataBase, prim, openBoundaryVelocityLimiter );
-        bcPY = std::make_shared<GksGpu::Open>( dataBase, prim, openBoundaryVelocityLimiter );
-
-        bcMY_2 = std::make_shared<GksGpu::Symmetry>( dataBase, 'y' );
-        bcPY_2 = std::make_shared<GksGpu::Symmetry>( dataBase, 'y' );
-
-        bcMY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y < -0.5*L; } );
-        bcPY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y >  0.5*L; } );
-
-        bcMY_2->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y < -0.5*L && center.z > 0.9*H; } );
-        bcPY_2->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y >  0.5*L && center.z > 0.9*H; } );
-    }
-    else
-    {
-        bcMY = std::make_shared<GksGpu::Periodic>(dataBase);
-        bcPY = std::make_shared<GksGpu::Periodic>(dataBase);
-
-        bcMY->findBoundaryCells(meshAdapter, false, [&](Vec3 center) { return center.y < -0.5*dx; });
-        bcPY->findBoundaryCells(meshAdapter, false, [&](Vec3 center) { return center.y >  0.5*dx; });
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    SPtr<GksGpu::BoundaryCondition> bcMZ = std::make_shared<GksGpu::AdiabaticWall>( dataBase, Vec3(0, 0, 0), true );
-    //SPtr<BoundaryCondition> bcMZ = std::make_shared<IsothermalWall>( dataBase, Vec3(0, 0, 0), prim.lambda, true );
-    //SPtr<BoundaryCondition> bcMZ = std::make_shared<InflowComplete>( dataBase, PrimitiveVariables(rho, 0.0, 0.0, 0.0, prim.lambda, 0.0, 0.0) );
-    //SPtr<BoundaryCondition> bcMZ = std::make_shared<Open>( dataBase );
-
-    SPtr<GksGpu::BoundaryCondition> bcPZ = std::make_shared<GksGpu::Pressure2>( dataBase, c1o2 * prim.rho / prim.lambda );
-    
-    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < 0.0; } );
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z > H  ; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<GksGpu::BoundaryCondition> burner = std::make_shared<GksGpu::CreepingMassFlux>( dataBase, rhoFuel, U, prim.lambda );
-
-    burner->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ 
-        
-        if( threeDimensional )
-            return center.z < 0.0 && std::sqrt(center.x*center.x + center.y*center.y) < R;
-        else
-            return center.z < 0.0 && std::sqrt(center.x*center.x) < R && std::sqrt(center.y*center.y) < 0.5 * dx;
-    } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->boundaryConditions.push_back( burner );
-
-    dataBase->boundaryConditions.push_back( bcMX );
-    dataBase->boundaryConditions.push_back( bcPX );
-    
-    dataBase->boundaryConditions.push_back( bcMY );
-    dataBase->boundaryConditions.push_back( bcPY );
-
-    dataBase->boundaryConditions.push_back( bcMZ );
-    dataBase->boundaryConditions.push_back( bcPZ );
-
-    dataBase->boundaryConditions.push_back( bcMX_2 );
-    dataBase->boundaryConditions.push_back( bcPX_2 );
-
-    if( threeDimensional ){
-        dataBase->boundaryConditions.push_back( bcMY_2 );
-        dataBase->boundaryConditions.push_back( bcPY_2 );
-    }
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint startIter = 0;
-
-    dataBase->setMesh( meshAdapter );
-
-    GksGpu::CudaUtility::printCudaMemoryUsage();
-    
-    if( restartIter == INVALID_INDEX )
-    {
-        GksGpu::Initializer::interpret(dataBase, [&](Vec3 cellCenter) -> GksGpu::ConservedVariables {
-
-            GksGpu::PrimitiveVariables primLocal = prim;
-
-            return GksGpu::toConservedVariables(primLocal, parameters.K);
-        });
-
-        writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" );
-    }
-    else
-    {
-        GksGpu::Restart::readRestart( dataBase, path + simulationName + "_" + std::to_string( restartIter ), startIter );
-
-        writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( restartIter ) + "_restart" );
-    }
-
-    dataBase->copyDataHostToDevice();
-
-    for( auto bc : dataBase->boundaryConditions ) 
-        for( uint level = 0; level < dataBase->numberOfLevels; level++ )
-            bc->runBoundaryConditionKernel( dataBase, parameters, level );
-
-    GksGpu::Initializer::initializeDataUpdate(dataBase);
-
-    dataBase->copyDataDeviceToHost();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint iterPerSecond = uint( c1o1 / parameters.dt ) + 1;
-
-    *logging::out << logging::Logger::INFO_HIGH << "iterPerSecond = " << iterPerSecond << "\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    GksGpu::CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0, true, 10000 );
-
-    GksGpu::ConvergenceAnalyzer convergenceAnalyzer( dataBase, 10000 );
-
-    auto turbulenceAnalyzer = std::make_shared<GksGpu::TurbulenceAnalyzer>( dataBase, 10 * iterPerSecond );
-
-    turbulenceAnalyzer->collect_UU = true;
-    turbulenceAnalyzer->collect_VV = true;
-    turbulenceAnalyzer->collect_WW = true;
-
-    turbulenceAnalyzer->allocate();
-
-    //////////////////////////////////////////////////////////////////////////
-
-    cupsAnalyzer.start();
-
-    for( uint iter = startIter + 1; iter <= 40 * iterPerSecond; iter++ )
-    {
-        cupsAnalyzer.run( iter, parameters.dt );
-
-        convergenceAnalyzer.run( iter );
-
-        GksGpu::TimeStepping::nestedTimeStep(dataBase, parameters, 0);
-
-        int crashCellIndex = dataBase->getCrashCellIndex();
-
-        if( crashCellIndex >= 0 )
-        {
-            *logging::out << logging::Logger::LOGGER_ERROR << "Simulation Crashed at CellIndex = " << crashCellIndex << "\n";
-            dataBase->copyDataDeviceToHost();
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) );
-
-            break;
-        }
-
-        if( 
-            //( iter >= 39360 && iter % 1 == 0 ) || 
-            ( iter % 10000 == 0 )
-          )
-        {
-            dataBase->copyDataDeviceToHost();
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) );
-        }
-
-        if( iter % 10000 == 0 /*|| iter == 39000*/)
-        {
-            dataBase->copyDataDeviceToHost();
-            GksGpu::Restart::writeRestart( dataBase, path + simulationName + "_" + std::to_string( iter ), iter );
-        }
-
-        if( iter % 100000 == 0 )
-        {
-            turbulenceAnalyzer->download();
-
-            writeTurbulenceVtkXML( dataBase, turbulenceAnalyzer, 0, path + simulationName + "_Turbulence_" + std::to_string( iter ) );
-        }
-
-        turbulenceAnalyzer->run( iter, parameters );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataDeviceToHost();
-
-    //writeVtkXML( dataBase, parameters, 0, path + "grid/Test_1" );
-
-    //turbulenceAnalyzer->download();
-
-    //writeTurbulenceVtkXML(dataBase, turbulenceAnalyzer, 0, path + simulationName + "_Turbulence");
-}
-
-int main( int argc, char* argv[])
-{
-    uint restartIter = INVALID_INDEX;
-    //uint restartIter = 30000;
-
-    uint gpuIndex = 1;
-    uint nx = 100;
-    bool useTempLimiter = true;
-
-    if( argc > 1 ) gpuIndex       = atoi( argv[1] );
-    if( argc > 2 ) nx             = atoi( argv[2] );
-    if( argc > 3 ) useTempLimiter = atoi( argv[3] );
-    if( argc > 4 ) restartIter    = atoi( argv[4] );
-
-    //////////////////////////////////////////////////////////////////////////
-
-#ifdef _WIN32
-    std::string path( "F:/Work/Computations/out/Flame7cm/" );
-#else
-    std::string path( "out/" );
-    path += "nx_";
-    path += std::to_string(nx);
-    if( useTempLimiter )
-        path += "_withTempLimiter";
-    path += "/";
-#endif
-
-    std::string simulationName ( "Flame" );
-
-    logging::Logger::addStream(&std::cout);
-    
-    std::ofstream logFile( path + simulationName + ".log" );
-    logging::Logger::addStream(&logFile);
-
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precision\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    try
-    {
-        thermalCavity( path, simulationName, gpuIndex, nx, useTempLimiter, restartIter );
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
-    }
-
-    logFile.close();
-
-    return 0;
-}
diff --git a/apps/gpu/GKS/LiFuXu/3rdPartyLinking.cmake b/apps/gpu/GKS/LiFuXu/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/LiFuXu/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/LiFuXu/CMakeLists.txt b/apps/gpu/GKS/LiFuXu/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/LiFuXu/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/LiFuXu/CMakePackage.cmake b/apps/gpu/GKS/LiFuXu/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/LiFuXu/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/LiFuXu/LiFuXu.cpp b/apps/gpu/GKS/LiFuXu/LiFuXu.cpp
deleted file mode 100644
index 209bd832ac2599bea187da887990cd83adfa9fa7..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/LiFuXu/LiFuXu.cpp
+++ /dev/null
@@ -1,390 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <memory>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-#include "GridGenerator/geometries/Sphere/Sphere.h"
-#include "GridGenerator/geometries/VerticalCylinder/VerticalCylinder.h"
-#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/FlowStateData/FlowStateData.cuh"
-#include "GksGpu/FlowStateData/FlowStateDataConversion.cuh"
-#include "GksGpu/FlowStateData/ThermalDependencies.cuh"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-#include "GksGpu/BoundaryConditions/Pressure.h"
-#include "GksGpu/BoundaryConditions/AdiabaticWall.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-#include "GksGpu/Definitions/MemoryAccessPattern.h"
-
-real solution(Vec3 point, const double U, const double V, const double D, const double time)
-{
-	return c1o4 * ( erf( (  0.225 - ( point.x - U * time ) ) / ( two * sqrt( D * time ) ) ) 
-                  + erf( ( -0.175 + ( point.x - U * time ) ) / ( two * sqrt( D * time ) ) )
-                  )
-				* ( erf( (  0.225 - ( point.y - V * time ) ) / ( two * sqrt( D * time ) ) ) 
-                  + erf( ( -0.175 + ( point.y - V * time ) ) / ( two * sqrt( D * time ) ) )
-                  );
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-
-
-Vec3 cellCenter(std::shared_ptr<DataBase> dataBase, uint cellIdx)
-{
-	Vec3 cellCenter;
-
-	cellCenter.x += c1o8 * dataBase->nodeCoordinates[dataBase->cellToNode[cellIdx][0]].x;
-	cellCenter.y += c1o8 * dataBase->nodeCoordinates[dataBase->cellToNode[cellIdx][0]].y;
-	cellCenter.x += c1o8 * dataBase->nodeCoordinates[dataBase->cellToNode[cellIdx][1]].x;
-	cellCenter.y += c1o8 * dataBase->nodeCoordinates[dataBase->cellToNode[cellIdx][1]].y;
-	cellCenter.x += c1o8 * dataBase->nodeCoordinates[dataBase->cellToNode[cellIdx][2]].x;
-	cellCenter.y += c1o8 * dataBase->nodeCoordinates[dataBase->cellToNode[cellIdx][2]].y;
-	cellCenter.x += c1o8 * dataBase->nodeCoordinates[dataBase->cellToNode[cellIdx][3]].x;
-	cellCenter.y += c1o8 * dataBase->nodeCoordinates[dataBase->cellToNode[cellIdx][3]].y;
-
-	cellCenter.x += c1o8 * dataBase->nodeCoordinates[dataBase->cellToNode[cellIdx][4]].x;
-	cellCenter.y += c1o8 * dataBase->nodeCoordinates[dataBase->cellToNode[cellIdx][4]].y;
-	cellCenter.x += c1o8 * dataBase->nodeCoordinates[dataBase->cellToNode[cellIdx][5]].x;
-	cellCenter.y += c1o8 * dataBase->nodeCoordinates[dataBase->cellToNode[cellIdx][5]].y;
-	cellCenter.x += c1o8 * dataBase->nodeCoordinates[dataBase->cellToNode[cellIdx][6]].x;
-	cellCenter.y += c1o8 * dataBase->nodeCoordinates[dataBase->cellToNode[cellIdx][6]].y;
-	cellCenter.x += c1o8 * dataBase->nodeCoordinates[dataBase->cellToNode[cellIdx][7]].x;
-	cellCenter.y += c1o8 * dataBase->nodeCoordinates[dataBase->cellToNode[cellIdx][7]].y;
-
-	return cellCenter;
-}
-
-void printL_2Norm(const std::shared_ptr<DataBase> dataBase, const real U, const real V, const real D, const real time)
-{
-	dataBase->copyDataDeviceToHost();
-
-	double l_2 = zero;
-	double sum = zero;
-
-	for (uint cellIdx = 0; cellIdx < dataBase->perLevelCount[0].numberOfBulkCells; cellIdx++)
-	{
-		Vec3 center = cellCenter(dataBase, cellIdx);
-
-		//double simulatedResult = dataBase->dataHost[ RHO_S_1(cellIdx, dataBase->numberOfCells) ];
-		double simulatedResult = dataBase->dataHost[ RHO_S_2(cellIdx, dataBase->numberOfCells) ];
-		
-        double analyticResult  = solution(center, U, V, D, time);
-
-		double err = abs(simulatedResult - analyticResult);
-
-		sum += analyticResult * analyticResult;
-
-		l_2 += err * err;
-
-		//std::cout << std::endl << err << " " << analyticResult;
-	}
-	l_2 = sqrt( l_2/sum );
-	std::cout << std::endl << "The l2 norm is " << l_2 << std::endl;
-}
-
-void printL_MaxNorm(const std::shared_ptr<DataBase> dataBase, const real U, const real V, const real D, const real time)
-{
-	dataBase->copyDataDeviceToHost();
-
-	double max = zero;
-
-	for (uint cellIdx = 0; cellIdx < dataBase->perLevelCount[0].numberOfBulkCells; cellIdx++)
-	{
-		Vec3 center = cellCenter(dataBase, cellIdx);
-
-		//double simulatedResult = dataBase->dataHost[ RHO_S_1(cellIdx, dataBase->numberOfCells) ];
-		double simulatedResult = dataBase->dataHost[ RHO_S_2(cellIdx, dataBase->numberOfCells) ];
-		
-        double analyticResult  = solution(center, U, V, D, time);
-
-		double err = abs(simulatedResult - analyticResult);
-
-		if( err > max ) max = err;
-
-		//std::cout << std::endl << err << " " << analyticResult;
-	}
-	std::cout << std::endl << "The max norm is " << max << std::endl;
-}
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void thermalCavity( std::string path, std::string simulationName )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint nx = 128;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real L = 1.0;
-
-    real dx = L / real(nx);
-
-    real U = 100.0;
-
-    real Ma = 0.1;
-
-    real Pr  = 1.0;
-    real K   = 2.0;
-    
-    real rho = 1.0;
-    
-    real mu = 0.01;
-
-    real D = 1.5;
-
-    real cs = U / Ma;
-    PrimitiveVariables prim( rho, U, U, 0.0, ( ( K + 5.0 ) / ( K + 3.0 ) ) / ( 2.0 * cs * cs ) );
-
-    real CFL = 0.25;
-
-    real dt  = CFL * ( dx / ( ( U + cs ) * ( one + ( two * D ) / ( U * dx * rho ) ) ) );
-
-    dt = 1.0e-6;
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt = " << dt << " s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "U  = " << U  << " m/s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "cs = " << cs << " m/s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "mu = " << mu << " kg/sm\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Parameters parameters;
-
-    parameters.K  = K;
-    parameters.Pr = Pr;
-    parameters.mu = mu;
-
-    parameters.D  = D;
-
-    parameters.force.x = 0;
-    parameters.force.y = 0;
-    parameters.force.z = 0;
-
-    parameters.dt = dt;
-    parameters.dx = dx;
-
-    //parameters.viscosityModel = ViscosityModel::sutherlandsLaw;
-    parameters.viscosityModel = ViscosityModel::constant;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    gridBuilder->addCoarseGrid( 0.0, 0.0, -0.5*dx,  
-                                  L,   L,  0.5*dx, dx);
-
-    gridBuilder->setPeriodicBoundaryCondition(true, true, true);
-
-    gridBuilder->buildGrids(GKS, false);
-
-    //gridBuilder->writeGridsToVtk(path + "grid/Grid_lev_");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    //meshAdapter.writeMeshVTK( path + "grid/Mesh.vtk" );
-
-    //meshAdapter.writeMeshFaceVTK( path + "grid/MeshFaces.vtk" );
-
-    meshAdapter.findPeriodicBoundaryNeighbors();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CudaUtility::setCudaDevice(1);
-
-    auto dataBase = std::make_shared<DataBase>( "GPU" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> bcMX = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPX = std::make_shared<Periodic>( dataBase );
-
-    bcMX->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x < -0.5*L; } );
-    bcPX->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> bcMY = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPY = std::make_shared<Periodic>( dataBase );
-
-    bcMY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y < -0.5*L; } );
-    bcPY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> bcMZ = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPZ = std::make_shared<Periodic>( dataBase );
-
-    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.5*dx; } );
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z >  0.5*dx; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    dataBase->boundaryConditions.push_back( bcMY );
-    dataBase->boundaryConditions.push_back( bcPY );
-
-    dataBase->boundaryConditions.push_back( bcMZ );
-    dataBase->boundaryConditions.push_back( bcPZ );
-
-    dataBase->boundaryConditions.push_back( bcMX );
-    dataBase->boundaryConditions.push_back( bcPX );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    dataBase->setMesh( meshAdapter );
-
-    CudaUtility::printCudaMemoryUsage();
-
-    Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> ConservedVariables{
-
-        PrimitiveVariables localPrim = prim;
-
-        //prim.S_1 = solution(cellCenter, U, U, D, 2e-3);
-        prim.S_2 = solution(cellCenter, U, U, D, 2e-3);
-
-        return toConservedVariables(localPrim, parameters.K);
-    });
-
-    //std::cout << toConservedVariables( PrimitiveVariables( rho, 0.0, 0.0, 0.0, lambdaHot, S_1, S_2 ), parameters.K ).rhoE << std::endl;
-
-    dataBase->copyDataHostToDevice();
-
-    for( auto bc : dataBase->boundaryConditions ) 
-        for( uint level = 0; level < dataBase->numberOfLevels; level++ )
-            bc->runBoundaryConditionKernel( dataBase, parameters, 0 );
-
-    Initializer::initializeDataUpdate(dataBase);
-
-    dataBase->copyDataDeviceToHost();
-
-    writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0 );
-
-    ConvergenceAnalyzer convergenceAnalyzer( dataBase );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    cupsAnalyzer.start();
-
-    uint maxIter = 1000;
-
-    for( uint iter = 1; iter <= maxIter; iter++ )
-    {
-        cupsAnalyzer.run( iter );
-
-        TimeStepping::nestedTimeStep(dataBase, parameters, 0);
-
-        if( 
-            ( iter % 100 == 0 )
-          )
-        {
-            dataBase->copyDataDeviceToHost();
-
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) );
-        }
-
-        convergenceAnalyzer.run( iter );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataDeviceToHost();
-
-    std::cout << 2e-3 + maxIter * dt << std::endl;
-
-    printL_2Norm  (dataBase, U, U, D, 2e-3 + maxIter * dt);
-    printL_MaxNorm(dataBase, U, U, D, 2e-3 + maxIter * dt);
-}
-
-int main( int argc, char* argv[])
-{
-    std::string path( "F:/Work/Computations/out/LiFuXu/" );
-    //std::string path( "out/" );
-    std::string simulationName ( "LiFuXu" );
-
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precison\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    try
-    {
-        thermalCavity( path, simulationName );
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::ERROR << "Unknown exception!\n";
-    }
-
-   return 0;
-}
diff --git a/apps/gpu/GKS/MethaneFlame/3rdPartyLinking.cmake b/apps/gpu/GKS/MethaneFlame/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/MethaneFlame/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/MethaneFlame/CMakeLists.txt b/apps/gpu/GKS/MethaneFlame/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/MethaneFlame/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/MethaneFlame/CMakePackage.cmake b/apps/gpu/GKS/MethaneFlame/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/MethaneFlame/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/MethaneFlame/MethaneFlame.cpp b/apps/gpu/GKS/MethaneFlame/MethaneFlame.cpp
deleted file mode 100644
index 42192f8db68dcc294926f7a6bcb770ad59043cdd..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/MethaneFlame/MethaneFlame.cpp
+++ /dev/null
@@ -1,387 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <memory>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-#include "GridGenerator/geometries/Sphere/Sphere.h"
-#include "GridGenerator/geometries/VerticalCylinder/VerticalCylinder.h"
-#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
-#include "GridGenerator/geometries/TriangularMesh/TriangularMesh.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-#include "GksGpu/BoundaryConditions/Pressure.h"
-#include "GksGpu/BoundaryConditions/AdiabaticWall.h"
-#include "GksGpu/BoundaryConditions/Extrapolation.h"
-#include "GksGpu/BoundaryConditions/Inflow.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-void thermalCavity( std::string path, std::string simulationName )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint nx = 32;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-    real L = 3.0;
-    real H = 1.0;
-
-    real dx = H / real(nx);
-
-    real Re  = 100.0;
-    real Ba  = 0.1;
-    real eps = 1.2;
-    real Pr  = 0.71;
-    real K   = 8.0;
-    
-    real U   = 1.0;
-
-    real g   = 9.81;
-    real rho = 1.2;
-
-    real S_1 = 0.0;
-    real S_2 = 0.5;
-
-    real R_Mixture = S_1               * 8.31445984848 / 16.04e-3      // O2
-				   + S_2               * 8.31445984848 / 32.00e-3      // CH4
-		           + (1.0 - S_1 - S_2) * 8.31445984848 / 28.00e-3;     // N2
-
-    real lambdaCold = 0.5 / ( R_Mixture *  300 ) * 1000.0;
-    real lambdaHot  = 0.5 / ( R_Mixture * 1200 ) * 1000.0;
-    
-    real mu = U * rho * 0.25 * H / Re;
-
-    real cs  = sqrt( ( ( K + 5.0 ) / ( K + 3.0 ) ) / ( 2.0 * lambdaCold ) );
-
-    real CFL = 0.025;
-
-    real dt  = CFL * ( dx / ( ( U + cs ) * ( one + ( two * mu ) / ( U * dx * rho ) ) ) );
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt = " << dt   << " s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "Ma = " << U/cs << " m/s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "mu = " << mu   << " kg/sm\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Parameters parameters;
-
-    parameters.K  = K;
-    parameters.Pr = Pr;
-    parameters.mu = mu;
-
-    parameters.D  = mu;
-
-    parameters.force.x = 0;
-    parameters.force.y = 0;
-    parameters.force.z = 0;
-
-    parameters.dt = dt;
-    parameters.dx = dx;
-
-    parameters.lambdaRef = lambdaCold;
-
-    //parameters.viscosityModel = ViscosityModel::sutherlandsLaw;
-    parameters.viscosityModel = ViscosityModel::constant;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    //gridBuilder->addCoarseGrid( 0.0, -0.5*H, -0.5*dx,  
-    //                              L,  0.5*H,  0.5*dx, dx );
-
-    gridBuilder->addCoarseGrid( 0.0  , -0.5*H, -0.5*H,  
-                                2.0*L,  0.5*H,  0.5*H, dx );
-
-    Cuboid  box  (     -H, -H, -H,
-                    1.1*L,  H,  H );
-
-    Sphere  sphere( 0.0, 0.0, 0.0, 0.15 );
-
-    Sphere  sphere2( 0.0, 0.0, 0.0, 0.08 );
-    
-    TriangularMesh* refCylinder = TriangularMesh::make("F:/Work/Computations/out/MethaneFlame/refCylinder.stl");
-    //TriangularMesh* refCylinder = TriangularMesh::make("inp/refCylinder.stl");
-
-    gridBuilder->setNumberOfLayers(0,10);
-
-    //gridBuilder->addGrid( &box, 1 );
-
-    //gridBuilder->addGrid( refCylinder, 2 );
-
-    //gridBuilder->addGrid( &sphere, 3 );
-
-    //gridBuilder->addGrid( &sphere2, 4 );
-
-    gridBuilder->setEnableFixRefinementIntoTheWall(true);
-
-    gridBuilder->setPeriodicBoundaryCondition(false, true, true);
-
-    gridBuilder->buildGrids(GKS, false);
-
-    //gridBuilder->writeGridsToVtk(path + "grid/Grid_lev_");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    //meshAdapter.writeMeshVTK( path + "grid/Mesh.vtk" );
-
-    //meshAdapter.writeMeshFaceVTK( path + "grid/MeshFaces.vtk" );
-
-    meshAdapter.findPeriodicBoundaryNeighbors();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CudaUtility::setCudaDevice(0);
-
-    auto dataBase = std::make_shared<DataBase>( "GPU" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-    //SPtr<BoundaryCondition> bcMX = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), false );
-    SPtr<BoundaryCondition> bcMX = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold, false, 1.0, 0.0 );
-    SPtr<BoundaryCondition> bcPX = std::make_shared<Extrapolation>( dataBase );
-
-
-
-    bcMX->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x < 0.0; } );
-    bcPX->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x >   L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> bcMY = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPY = std::make_shared<Periodic>( dataBase );
-
-    bcMY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y < -0.5*H; } );
-    bcPY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y >  0.5*H; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcMZ = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPZ = std::make_shared<Periodic>( dataBase );
-
-    //bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.5*dx; } );
-    //bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z >  0.5*dx; } );
-    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.5*H; } );
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z >  0.5*H; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> bcJetFuel   = std::make_shared<Inflow>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaHot, rho, 1.0, 0.0, -64.0, 1.0, 0.0 );
-    SPtr<BoundaryCondition> bcJetOxygen = std::make_shared<Inflow>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaHot, rho, 1.0, 0.0, -64.0, 0.0, 0.0 );
-
-
-
-    bcJetFuel->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ 
-        return center.x < 0.0 &&
-               std::sqrt(center.y*center.y + center.z*center.z) < 0.125 / 4.0;
-    } );
-
-    bcJetOxygen->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ 
-        return center.x < 0.0 &&
-               std::sqrt(center.y*center.y + center.z*center.z) < 0.125;
-    } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    dataBase->boundaryConditions.push_back( bcMY );
-    dataBase->boundaryConditions.push_back( bcPY );
-
-    dataBase->boundaryConditions.push_back( bcMZ );
-    dataBase->boundaryConditions.push_back( bcPZ );
-
-    dataBase->boundaryConditions.push_back( bcMX );
-    dataBase->boundaryConditions.push_back( bcPX );
-
-    //dataBase->boundaryConditions.push_back( bcJetOxygen );
-    //dataBase->boundaryConditions.push_back( bcJetFuel );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    dataBase->setMesh( meshAdapter );
-
-    CudaUtility::printCudaMemoryUsage();
-
-    Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> ConservedVariables{
-
-        real rhoLocal = rho;
-        real lambdaLocal = lambdaCold;
-        //real lambdaLocal = lambdaHot;
-
-        //real radius = sqrt( cellCenter.x*cellCenter.x + cellCenter.y*cellCenter.y + cellCenter.z*cellCenter.z );
-
-        //if( radius < 0.2 )
-        //{
-        //    lambdaLocal = lambdaHot;
-        //}
-
-        //lambdaLocal = lambdaCold + ( lambdaHot - lambdaCold ) * exp( - 10. * ( cellCenter.x*cellCenter.x + cellCenter.y*cellCenter.y + cellCenter.z*cellCenter.z ) );
-
-        //lambdaLocal = lambdaCold + ( lambdaHot - lambdaCold ) * ( 0.5 * M_PI + atan( - 1000.0 * ( radius - 0.1) ) ) / M_PI;
-
-        //rhoLocal = rho * lambdaLocal / lambdaCold;
-
-        //lambdaLocal = lambdaCold + ( lambdaHot - lambdaCold ) * exp( - 10. * ( (cellCenter.x-0.5)*(cellCenter.x-0.5) ) );
-
-        real radius = sqrt( cellCenter.y*cellCenter.y + cellCenter.z*cellCenter.z );
-        
-        real factor = 0.0;
-        //if( radius < 0.125 ) factor = ( 1.0 - 64.0 * radius * radius  );
-
-        return toConservedVariables( PrimitiveVariables( rhoLocal, factor * U, 0.0, 0.0, lambdaLocal, 0.0, 0.0 ), parameters.K );
-    });
-
-    dataBase->copyDataHostToDevice();
-
-    for( auto bc : dataBase->boundaryConditions ) 
-        for( uint level = 0; level < dataBase->numberOfLevels; level++ )
-            bc->runBoundaryConditionKernel( dataBase, parameters, 0 );
-
-    Initializer::initializeDataUpdate(dataBase);
-
-    dataBase->copyDataDeviceToHost();
-
-    writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0 );
-
-    ConvergenceAnalyzer convergenceAnalyzer( dataBase, 1000 );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    cupsAnalyzer.start();
-
-    for( uint iter = 1; iter <= 100000; iter++ )
-    {
-        //uint T = 10000;
-        //if( iter <= T )
-        //{
-        //    std::dynamic_pointer_cast<Inflow>(bcJetFuel  )->lambda = lambdaCold + ( lambdaHot - lambdaCold ) * ( real(iter) / real(T) );
-        //    std::dynamic_pointer_cast<Inflow>(bcJetOxygen)->lambda = lambdaCold + ( lambdaHot - lambdaCold ) * ( real(iter) / real(T) );
-        //}
-        ////else if( iter <= 2*T )
-        ////{
-        ////    std::dynamic_pointer_cast<Inflow>(bcJetFuel  )->lambda = lambdaHot - ( lambdaHot - lambdaCold ) * ( real(iter-T) / real(T) );
-        ////    std::dynamic_pointer_cast<Inflow>(bcJetOxygen)->lambda = lambdaHot - ( lambdaHot - lambdaCold ) * ( real(iter-T) / real(T) );
-        ////}
-
-        //if( iter == T )
-        //{
-        //    std::dynamic_pointer_cast<Inflow>(bcJetFuel)->S_1 = 1.0;
-        //    std::dynamic_pointer_cast<Inflow>(bcJetFuel)->S_2 = 0.0;
-        //}
-
-        TimeStepping::nestedTimeStep(dataBase, parameters, 0);
-
-        if( 
-            //( iter < 10       && iter % 1      == 0 ) ||
-            //( iter < 100      && iter % 10     == 0 ) ||
-            //( iter < 1000     && iter % 100    == 0 ) ||
-            //( iter < 10000    && iter % 1000   == 0 ) ||
-            ( iter < 1000000    && iter % 10000  == 0 ) ||
-            ( iter < 100000000  && iter % 100000 == 0 )
-            //( iter > 18400 && iter % 10 == 0 )
-          )
-        {
-            dataBase->copyDataDeviceToHost();
-
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) );
-        }
-
-        cupsAnalyzer.run( iter );
-
-        convergenceAnalyzer.run( iter );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataDeviceToHost();
-}
-
-int main( int argc, char* argv[])
-{
-    std::string path( "F:/Work/Computations/out/MethaneFlame/" );
-    //std::string path( "out/" );
-    std::string simulationName ( "MethaneFlame" );
-
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precison\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    try
-    {
-        thermalCavity( path, simulationName );
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::ERROR << "Unknown exception!\n";
-    }
-
-   return 0;
-}
diff --git a/apps/gpu/GKS/MultiGPU/3rdPartyLinking.cmake b/apps/gpu/GKS/MultiGPU/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/MultiGPU/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/MultiGPU/CMakeLists.txt b/apps/gpu/GKS/MultiGPU/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/MultiGPU/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/MultiGPU/CMakePackage.cmake b/apps/gpu/GKS/MultiGPU/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/MultiGPU/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/MultiGPU/MultiGPU.cpp b/apps/gpu/GKS/MultiGPU/MultiGPU.cpp
deleted file mode 100644
index 7b0c81fa700f0d1aa0260680617050a15496e326..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/MultiGPU/MultiGPU.cpp
+++ /dev/null
@@ -1,400 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <sstream>
-#include <memory>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-#include "GridGenerator/geometries/Sphere/Sphere.h"
-#include "GridGenerator/geometries/VerticalCylinder/VerticalCylinder.h"
-#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-#include "GridGenerator/geometries/BoundingBox/BoundingBox.h"
-#include "GridGenerator/utilities/communication.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/FlowStateData/FlowStateData.cuh"
-#include "GksGpu/FlowStateData/FlowStateDataConversion.cuh"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-#include "GksGpu/BoundaryConditions/Pressure.h"
-#include "GksGpu/BoundaryConditions/AdiabaticWall.h"
-
-#include "GksGpu/Communication/Communicator.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-#include "GksGpu/Communication/MpiUtility.h"
-
-//////////////////////////////////////////////////////////////////////////
-
-void performanceTest( std::string path, std::string simulationName, uint decompositionDimension, uint nx, bool strongScaling )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    int rank = 0;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-
-    int mpiWorldSize = 1;
-    MPI_Comm_size(MPI_COMM_WORLD, &mpiWorldSize);
-
-    //CudaUtility::setCudaDevice(rank % devicesPerNode);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real H = 1.0;
-
-    real L = 1.0;
-
-    if( strongScaling ) L = H / double( mpiWorldSize );
-
-    real dx = H / real(nx);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Parameters parameters;
-
-    parameters.K  = 0;
-    parameters.Pr = 1;
-    parameters.mu = 0.01;
-
-    parameters.force.x = 0.1;
-    parameters.force.y = 0;
-    parameters.force.z = 0;
-
-    parameters.dt = 0.0001;
-    parameters.dx = dx;
-
-    parameters.lambdaRef = 1.0e-2;
-    
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    if( decompositionDimension == 1 && mpiWorldSize > 1 )
-    {
-        gridBuilder->addCoarseGrid( rank*L - 0.5*L - 5.0*dx, -0.5*H, -0.5*H,  
-                                    rank*L + 0.5*L + 5.0*dx,  0.5*H,  0.5*H, dx);
-
-        gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>( rank*L - 0.5*L, rank*L + 0.5*L, 
-                                                                         -H        ,      H,
-                                                                         -H        ,      H ) );
-    }else
-    {
-        gridBuilder->addCoarseGrid( -0.5*H, -0.5*H, -0.5*H,  
-                                     0.5*H,  0.5*H,  0.5*H, dx);
-    }
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    //gridBuilder->setPeriodicBoundaryCondition(false, true, true);
-    gridBuilder->setPeriodicBoundaryCondition(true, false, false);
-
-    gridBuilder->buildGrids(GKS, false);
-
-    MPI_Barrier(MPI_COMM_WORLD);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    if( decompositionDimension == 1 && mpiWorldSize > 1 )
-    {
-        gridBuilder->findCommunicationIndices( CommunicationDirections::PX, GKS );
-        gridBuilder->setCommunicationProcess ( CommunicationDirections::PX, (rank + 1 + mpiWorldSize) % mpiWorldSize );
-
-        gridBuilder->findCommunicationIndices( CommunicationDirections::MX, GKS );
-        gridBuilder->setCommunicationProcess ( CommunicationDirections::MX, (rank - 1 + mpiWorldSize) % mpiWorldSize );
-    }
-    //if( decompositionDimension == 1 && mpiWorldSize > 1 && rank == 0 )
-    //{
-    //    gridBuilder->findCommunicationIndices( CommunicationDirections::PX, GKS );
-    //    gridBuilder->setCommunicationProcess ( CommunicationDirections::PX, (rank + 1 + mpiWorldSize) % mpiWorldSize );
-    //}
-    //else
-    //{
-    //    gridBuilder->findCommunicationIndices( CommunicationDirections::MX, GKS );
-    //    gridBuilder->setCommunicationProcess ( CommunicationDirections::MX, (rank - 1 + mpiWorldSize) % mpiWorldSize );
-    //}
-
-    //gridBuilder->writeGridsToVtk(path + "/Grid_rank_" + std::to_string(rank) + "_lev_");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    meshAdapter.findPeriodicBoundaryNeighbors();
-
-    //meshAdapter.writeMeshFaceVTK(path + "/Faces_rank_" + std::to_string(rank) + ".vtk");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto dataBase = std::make_shared<DataBase>( "GPU" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> bcMX = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPX = std::make_shared<Periodic>( dataBase );
-    //SPtr<BoundaryCondition> bcMX = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), false );
-    //SPtr<BoundaryCondition> bcPX = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.1, 0.0), false );
-
-    bcMX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x < -0.5*L; } );
-    bcPX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    //SPtr<BoundaryCondition> bcMY = std::make_shared<Periodic>( dataBase );
-    //SPtr<BoundaryCondition> bcPY = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcMY = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), false );
-    SPtr<BoundaryCondition> bcPY = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), false );
-
-    bcMY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y < -0.5*H; } );
-    bcPY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y >  0.5*H; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    //SPtr<BoundaryCondition> bcMZ = std::make_shared<Periodic>( dataBase );
-    //SPtr<BoundaryCondition> bcPZ = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcMZ = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), false );
-    SPtr<BoundaryCondition> bcPZ = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), false );
-    
-    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.5*H; } );
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z >  0.5*H; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    if( mpiWorldSize == 1 )
-    {
-        dataBase->boundaryConditions.push_back( bcMX );
-        dataBase->boundaryConditions.push_back( bcPX );
-    }
-    
-    dataBase->boundaryConditions.push_back( bcMY );
-    dataBase->boundaryConditions.push_back( bcPY );
-
-    dataBase->boundaryConditions.push_back( bcMZ );
-    dataBase->boundaryConditions.push_back( bcPZ );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    *logging::out << logging::Logger::INFO_HIGH << "bcMX ==> " << bcMX->numberOfCellsPerLevel[0] << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << "bcPX ==> " << bcPX->numberOfCellsPerLevel[0] << "\n";
-
-    *logging::out << logging::Logger::INFO_HIGH << "bcMY ==> " << bcMY->numberOfCellsPerLevel[0] << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << "bcPY ==> " << bcPY->numberOfCellsPerLevel[0] << "\n";
-
-    *logging::out << logging::Logger::INFO_HIGH << "bcMZ ==> " << bcMZ->numberOfCellsPerLevel[0] << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << "bcPZ ==> " << bcPZ->numberOfCellsPerLevel[0] << "\n";
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    dataBase->setMesh( meshAdapter );
-
-    dataBase->setCommunicators( meshAdapter );
-    
-    //*logging::out << logging::Logger::WARNING << int(dataBase->communicators[0].size()) << "\n";
-    //*logging::out << logging::Logger::WARNING << int(dataBase->communicators[0][0].get()) << "\n";
-    //*logging::out << logging::Logger::WARNING << int(dataBase->communicators[0][1].get()) << "\n";
-
-    CudaUtility::printCudaMemoryUsage();
-
-    Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> ConservedVariables
-    {
-        return toConservedVariables( PrimitiveVariables( 1.0, 1.0, 0.0, 0.0, parameters.lambdaRef ), parameters.K );
-    });
-
-    dataBase->copyDataHostToDevice();
-
-    for( auto bc : dataBase->boundaryConditions ) 
-        for( uint level = 0; level < dataBase->numberOfLevels; level++ )
-            bc->runBoundaryConditionKernel( dataBase, parameters, level );
-
-    Initializer::initializeDataUpdate(dataBase);
-
-    dataBase->copyDataDeviceToHost();
-
-    if( rank == 0 ) writeVtkXMLParallelSummaryFile( dataBase, parameters, path + simulationName + "_0", mpiWorldSize );
-
-    writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" + "_rank_" + std::to_string(rank) );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    const uint numberOfIterations = 1000;
-
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0, true, numberOfIterations );
-
-    MPI_Barrier(MPI_COMM_WORLD);
-
-    cupsAnalyzer.start();
-
-    for( uint iter = 1; iter <= numberOfIterations; iter++ )
-    {
-        TimeStepping::nestedTimeStep(dataBase, parameters, 0);
-    }
-
-    cupsAnalyzer.run( numberOfIterations, parameters.dt );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataDeviceToHost();
-
-    writeVtkXML( dataBase, parameters, 0, path + simulationName + "_final_rank_" + std::to_string(rank) );
-    
-    //////////////////////////////////////////////////////////////////////////
-
-    int crashCellIndex = dataBase->getCrashCellIndex();
-    if( crashCellIndex >= 0 )
-    {
-        *logging::out << logging::Logger::LOGGER_ERROR << "=================================================\n";
-        *logging::out << logging::Logger::LOGGER_ERROR << "=================================================\n";
-        *logging::out << logging::Logger::LOGGER_ERROR << "============= Simulation Crashed!!! =============\n";
-        *logging::out << logging::Logger::LOGGER_ERROR << "=================================================\n";
-        *logging::out << logging::Logger::LOGGER_ERROR << "=================================================\n";
-    }
-}
-
-int main( int argc, char* argv[])
-{
-    //////////////////////////////////////////////////////////////////////////
-
-    int rank = 0;
-    int mpiWorldSize = 1;
-#ifdef USE_CUDA_AWARE_MPI
-    int rank         = MpiUtility::getMpiRankBeforeInit();
-    int mpiWorldSize = MpiUtility::getMpiWorldSizeBeforeInit();
-#else
-    MPI_Init(&argc, &argv);
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &mpiWorldSize);
-#endif
-
-    //////////////////////////////////////////////////////////////////////////
-
-#ifdef _WIN32
-    std::string path( "F:/Work/Computations/out/MultiGPU/" );
-#else
-    //std::string path( "/home/stephan/Computations/out/" );
-    std::string path( "out/" );
-#endif
-
-    std::string simulationName ( "MultiGPU_np_" + std::to_string(mpiWorldSize) );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    bool strongScaling = false;
-    uint nx = 128;
-
-    if( argc > 1 ) path += argv[1]; path += "/";
-    if( argc > 2 ) nx = atoi( argv[2] );
-    if( argc > 3 ) strongScaling = true;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    logging::Logger::addStream(&std::cout);
-    
-    std::ofstream logFile( path + simulationName + "_rank_" + std::to_string(rank) + ".log" );
-    logging::Logger::addStream(&logFile);
-
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    // Important: for Cuda-Aware MPI the device must be set before MPI_Init()
-    int deviceCount = CudaUtility::getCudaDeviceCount();
-
-    if(deviceCount == 0)
-    {
-        std::stringstream msg;
-        msg << "No devices devices found!" << std::endl;
-        *logging::out << logging::Logger::WARNING << msg.str(); msg.str("");
-    }
-
-    CudaUtility::setCudaDevice( rank % deviceCount );
-
-    //////////////////////////////////////////////////////////////////////////
-
-#ifdef USE_CUDA_AWARE_MPI
-    MPI_Init(&argc, &argv);
-#endif
-    
-    //////////////////////////////////////////////////////////////////////////
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precision\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    //////////////////////////////////////////////////////////////////////////
-    //////////////////////////////////////////////////////////////////////////
-    //////////////////////////////////////////////////////////////////////////
-
-    try
-    {
-        performanceTest( path, simulationName, 1, nx, strongScaling );
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-    //////////////////////////////////////////////////////////////////////////
-    //////////////////////////////////////////////////////////////////////////
-
-    logFile.close();
-
-    MPI_Finalize();
-
-   return 0;
-}
diff --git a/apps/gpu/GKS/MultiGPU_nD/3rdPartyLinking.cmake b/apps/gpu/GKS/MultiGPU_nD/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/MultiGPU_nD/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/MultiGPU_nD/CMakeLists.txt b/apps/gpu/GKS/MultiGPU_nD/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/MultiGPU_nD/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/MultiGPU_nD/CMakePackage.cmake b/apps/gpu/GKS/MultiGPU_nD/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/MultiGPU_nD/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/MultiGPU_nD/MultiGPU_nD.cpp b/apps/gpu/GKS/MultiGPU_nD/MultiGPU_nD.cpp
deleted file mode 100644
index bc5a488b55592345de7814febd5fb2b6d5cdf129..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/MultiGPU_nD/MultiGPU_nD.cpp
+++ /dev/null
@@ -1,513 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <sstream>
-#include <memory>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-#include "GridGenerator/geometries/Sphere/Sphere.h"
-#include "GridGenerator/geometries/VerticalCylinder/VerticalCylinder.h"
-#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-#include "GridGenerator/geometries/BoundingBox/BoundingBox.h"
-#include "GridGenerator/utilities/communication.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/FlowStateData/FlowStateData.cuh"
-#include "GksGpu/FlowStateData/FlowStateDataConversion.cuh"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-#include "GksGpu/BoundaryConditions/Pressure.h"
-#include "GksGpu/BoundaryConditions/AdiabaticWall.h"
-
-#include "GksGpu/Communication/Communicator.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-#include "GksGpu/Communication/MpiUtility.h"
-
-//////////////////////////////////////////////////////////////////////////
-
-void performanceTest( std::string path, std::string simulationName, uint decompositionDimension, uint nx, bool strongScaling )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    int rank = 0;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-
-    int mpiWorldSize = 1;
-    MPI_Comm_size(MPI_COMM_WORLD, &mpiWorldSize);
-
-    //CudaUtility::setCudaDevice(rank % devicesPerNode);
-    
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    int sideLengthX, sideLengthY, sideLengthZ, rankX, rankY, rankZ;
-
-    if( decompositionDimension == 1 )
-    {
-        if      (mpiWorldSize == 1 ) { sideLengthX = 1 ; sideLengthY = 1; sideLengthZ = 1; }
-        else if (mpiWorldSize == 2 ) { sideLengthX = 2 ; sideLengthY = 1; sideLengthZ = 1; }
-        else if (mpiWorldSize == 4 ) { sideLengthX = 4 ; sideLengthY = 1; sideLengthZ = 1; }
-        else if (mpiWorldSize == 8 ) { sideLengthX = 8 ; sideLengthY = 1; sideLengthZ = 1; }
-        else if (mpiWorldSize == 16) { sideLengthX = 16; sideLengthY = 1; sideLengthZ = 1; }
-        else if (mpiWorldSize == 32) { sideLengthX = 32; sideLengthY = 1; sideLengthZ = 1; }
-
-        rankX = rank;
-        rankY = 0;
-        rankZ = 0;
-    }
-    else if( decompositionDimension == 2 )
-    {
-        if      (mpiWorldSize == 1 ) { sideLengthX = 1; sideLengthY = 1; sideLengthZ = 1; }
-        else if (mpiWorldSize == 2 ) { sideLengthX = 2; sideLengthY = 1; sideLengthZ = 1; }
-        else if (mpiWorldSize == 4 ) { sideLengthX = 2; sideLengthY = 2; sideLengthZ = 1; }
-        else if (mpiWorldSize == 8 ) { sideLengthX = 4; sideLengthY = 2; sideLengthZ = 1; }
-        else if (mpiWorldSize == 16) { sideLengthX = 4; sideLengthY = 4; sideLengthZ = 1; }
-        else if (mpiWorldSize == 32) { sideLengthX = 8; sideLengthY = 4; sideLengthZ = 1; }
-
-        rankX = rank % sideLengthX;
-        rankY = rank / sideLengthX;
-        rankZ = 0;
-    }
-    else if( decompositionDimension == 3 )
-    {
-        if      (mpiWorldSize == 1 ) { sideLengthX = 1; sideLengthY = 1; sideLengthZ = 1; }
-        else if (mpiWorldSize == 2 ) { sideLengthX = 2; sideLengthY = 1; sideLengthZ = 1; }
-        else if (mpiWorldSize == 4 ) { sideLengthX = 2; sideLengthY = 2; sideLengthZ = 1; }
-        else if (mpiWorldSize == 8 ) { sideLengthX = 2; sideLengthY = 2; sideLengthZ = 2; }
-        else if (mpiWorldSize == 16) { sideLengthX = 4; sideLengthY = 2; sideLengthZ = 2; }
-        else if (mpiWorldSize == 32) { sideLengthX = 4; sideLengthY = 4; sideLengthZ = 2; }
-
-        rankX =   rank %   sideLengthX;
-        rankY = ( rank % ( sideLengthX * sideLengthY ) ) /   sideLengthX;
-        rankZ =   rank                                   / ( sideLengthY * sideLengthX );
-    }
-
-    *logging::out << logging::Logger::INFO_HIGH << "SideLength = " << sideLengthX << " " << sideLengthY << " " << sideLengthZ << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << "rank       = " << rankX << " " << rankY << " " << rankZ << "\n";
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real L  = 1.0;
-
-    real LX = L;
-    real LY = L;
-    real LZ = L;
-
-    real dx = L / real(nx);
-
-    if( strongScaling )
-    {
-        if( decompositionDimension == 1 )
-        {
-            LX /= double(sideLengthX);
-        }
-        else if( decompositionDimension == 2 )
-        {
-            LX /= double(sideLengthX);
-            LY /= double(sideLengthY);
-        }
-        else if( decompositionDimension == 3 )
-        {
-            LX /= double(sideLengthX);
-            LY /= double(sideLengthY);
-            LZ /= double(sideLengthZ);
-        }
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    GksGpu::Parameters parameters;
-
-    parameters.K  = 0;
-    parameters.Pr = 1;
-    parameters.mu = 0.01;
-
-    parameters.force.x = 0;
-    parameters.force.y = 0;
-    parameters.force.z = 0;
-
-    parameters.dt = 0.0001 * ( double(128) / double(nx) );
-    parameters.dx = dx;
-
-    parameters.lambdaRef = 1.0e-2;
-    
-    parameters.forcingSchemeIdx = 2;
-
-    parameters.enableReaction = true;
-    
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real xOverlap = ( sideLengthX == 1 ) ? 0.0 : 5.0*dx;
-    real yOverlap = ( sideLengthY == 1 ) ? 0.0 : 5.0*dx;
-    real zOverlap = ( sideLengthZ == 1 ) ? 0.0 : 5.0*dx;
-
-    gridBuilder->addCoarseGrid(  rankX*LX    - 0.5*L - xOverlap,      rankY*LY    - 0.5*L - yOverlap,      rankZ*LZ    - 0.5*L - zOverlap,
-                                (rankX*LX+1) - 0.5*L + xOverlap,     (rankY*LY+1) - 0.5*L + yOverlap,     (rankZ*LZ+1) - 0.5*L + zOverlap, dx);
-
-    gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>( rankX*LX - 0.5*L, (rankX+1)*LX - 0.5*L, 
-                                                                 rankY*LY - 0.5*L, (rankY+1)*LY - 0.5*L,
-                                                                 rankZ*LZ - 0.5*L, (rankZ+1)*LZ - 0.5*L  ) );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    gridBuilder->setPeriodicBoundaryCondition(sideLengthX == 1, sideLengthY == 1, sideLengthZ == 1);
-
-    *logging::out << logging::Logger::INFO_HIGH << "periodicity = " << (sideLengthX == 1) << " " << (sideLengthY == 1) << " " << (sideLengthZ == 1) << "\n";
-
-    gridBuilder->buildGrids(GKS, false);
-
-    MPI_Barrier(MPI_COMM_WORLD);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    if( mpiWorldSize > 1 )
-    {
-        int rankPX = ( (rankX + 1 + sideLengthX) % sideLengthX ) +    rankY                                    * sideLengthX +    rankZ                                    * sideLengthX * sideLengthY;
-        int rankMX = ( (rankX - 1 + sideLengthX) % sideLengthX ) +    rankY                                    * sideLengthX +    rankZ                                    * sideLengthX * sideLengthY;
-        int rankPY =    rankX                                    + ( (rankY + 1 + sideLengthY) % sideLengthY ) * sideLengthX +    rankZ                                    * sideLengthX * sideLengthY;
-        int rankMY =    rankX                                    + ( (rankY - 1 + sideLengthY) % sideLengthY ) * sideLengthX +    rankZ                                    * sideLengthX * sideLengthY;
-        int rankPZ =    rankX                                    +    rankY                                    * sideLengthX + ( (rankZ + 1 + sideLengthZ) % sideLengthZ ) * sideLengthX * sideLengthY;
-        int rankMZ =    rankX                                    +    rankY                                    * sideLengthX + ( (rankZ - 1 + sideLengthZ) % sideLengthZ ) * sideLengthX * sideLengthY;
-
-        if( sideLengthX > 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::PX, GKS );
-        if( sideLengthX > 1 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::PX, rankPX);
-
-        if( sideLengthX > 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::MX, GKS );
-        if( sideLengthX > 1 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::MX, rankMX);
-
-        if( sideLengthY > 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::PY, GKS );
-        if( sideLengthY > 1 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::PY, rankPY);
-
-        if( sideLengthY > 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::MY, GKS );
-        if( sideLengthY > 1 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::MY, rankMY);
-
-        if( sideLengthZ > 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::PZ, GKS );
-        if( sideLengthZ > 1 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::PZ, rankPZ);
-
-        if( sideLengthZ > 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::MZ, GKS );
-        if( sideLengthZ > 1 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::MZ, rankMZ);
-
-        *logging::out << logging::Logger::INFO_HIGH << "neighborRanks = " << rankPX << " " << rankMX << " " << rankPY << " " << rankMY << " " << rankPZ << " " << rankMZ << "\n";
-    }
-
-    //gridBuilder->writeGridsToVtk(path + "/Grid_rank_" + std::to_string(rank) + "_lev_");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto dataBase = std::make_shared<GksGpu::DataBase>("GPU");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    for ( int i = 0; i < rank % GksGpu::CudaUtility::getCudaDeviceCount(); i++ ) MPI_Barrier(MPI_COMM_WORLD);
-
-    {
-        GksMeshAdapter meshAdapter(gridBuilder);
-
-        meshAdapter.inputGrid();
-
-        if (sideLengthX == 1 || sideLengthY == 1 || sideLengthZ == 1) meshAdapter.findPeriodicBoundaryNeighbors();
-
-        gridBuilder->getGrid(0)->freeMemory();
-
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-        SPtr<GksGpu::BoundaryCondition> bcMX = std::make_shared<GksGpu::Periodic>(dataBase);
-        SPtr<GksGpu::BoundaryCondition> bcPX = std::make_shared<GksGpu::Periodic>(dataBase);
-
-        if (sideLengthX == 1) bcMX->findBoundaryCells(meshAdapter, true, [&](Vec3 center) { return center.x < -0.5*L; });
-        if (sideLengthX == 1) bcPX->findBoundaryCells(meshAdapter, true, [&](Vec3 center) { return center.x > 0.5*L; });
-
-        //////////////////////////////////////////////////////////////////////////
-
-        SPtr<GksGpu::BoundaryCondition> bcMY = std::make_shared<GksGpu::Periodic>(dataBase);
-        SPtr<GksGpu::BoundaryCondition> bcPY = std::make_shared<GksGpu::Periodic>(dataBase);
-
-        if (sideLengthY == 1) bcMY->findBoundaryCells(meshAdapter, true, [&](Vec3 center) { return center.y < -0.5*L; });
-        if (sideLengthY == 1) bcPY->findBoundaryCells(meshAdapter, true, [&](Vec3 center) { return center.y > 0.5*L; });
-
-        //////////////////////////////////////////////////////////////////////////
-
-        SPtr<GksGpu::BoundaryCondition> bcMZ = std::make_shared<GksGpu::Periodic>(dataBase);
-        SPtr<GksGpu::BoundaryCondition> bcPZ = std::make_shared<GksGpu::Periodic>(dataBase);
-
-        if (sideLengthZ == 1) bcMZ->findBoundaryCells(meshAdapter, true, [&](Vec3 center) { return center.z < -0.5*L; });
-        if (sideLengthZ == 1) bcPZ->findBoundaryCells(meshAdapter, true, [&](Vec3 center) { return center.z > 0.5*L; });
-
-        //////////////////////////////////////////////////////////////////////////
-
-        if (sideLengthX == 1) dataBase->boundaryConditions.push_back(bcMX);
-        if (sideLengthX == 1) dataBase->boundaryConditions.push_back(bcPX);
-
-        if (sideLengthY == 1) dataBase->boundaryConditions.push_back(bcMY);
-        if (sideLengthY == 1) dataBase->boundaryConditions.push_back(bcPY);
-
-        if (sideLengthZ == 1) dataBase->boundaryConditions.push_back(bcMZ);
-        if (sideLengthZ == 1) dataBase->boundaryConditions.push_back(bcPZ);
-
-        //////////////////////////////////////////////////////////////////////////
-
-        *logging::out << logging::Logger::INFO_HIGH << "NumberOfBoundaryConditions = " << (int)dataBase->boundaryConditions.size() << "\n";
-
-        if (sideLengthX == 1) *logging::out << logging::Logger::INFO_HIGH << "bcMX ==> " << bcMX->numberOfCellsPerLevel[0] << "\n";
-        if (sideLengthX == 1) *logging::out << logging::Logger::INFO_HIGH << "bcPX ==> " << bcPX->numberOfCellsPerLevel[0] << "\n";
-
-        if (sideLengthY == 1) *logging::out << logging::Logger::INFO_HIGH << "bcMY ==> " << bcMY->numberOfCellsPerLevel[0] << "\n";
-        if (sideLengthY == 1) *logging::out << logging::Logger::INFO_HIGH << "bcPY ==> " << bcPY->numberOfCellsPerLevel[0] << "\n";
-
-        if (sideLengthZ == 1) *logging::out << logging::Logger::INFO_HIGH << "bcMZ ==> " << bcMZ->numberOfCellsPerLevel[0] << "\n";
-        if (sideLengthZ == 1) *logging::out << logging::Logger::INFO_HIGH << "bcPZ ==> " << bcPZ->numberOfCellsPerLevel[0] << "\n";
-
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-        dataBase->setMesh(meshAdapter);
-
-        dataBase->setCommunicators(meshAdapter);
-
-        GksGpu::CudaUtility::printCudaMemoryUsage();
-    }
-
-    for ( int i = 0; i < GksGpu::CudaUtility::getCudaDeviceCount() - rank % GksGpu::CudaUtility::getCudaDeviceCount(); i++ ) MPI_Barrier(MPI_COMM_WORLD);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksGpu::Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> GksGpu::ConservedVariables
-    {
-        real U = 0.1;
-
-        real ULocal =   0.1 + U * sin( 2.0 * M_PI * cellCenter.x ) * cos( 2.0 * M_PI * cellCenter.y ) * cos( 2.0 * M_PI * cellCenter.z );
-        real VLocal =   0.1 - U * cos( 2.0 * M_PI * cellCenter.x ) * sin( 2.0 * M_PI * cellCenter.y ) * cos( 2.0 * M_PI * cellCenter.z );
-        real WLocal =   0.1;
-
-        real rho = 1.0;
-
-        real p0 = 0.5 * rho / parameters.lambdaRef;
-
-        real pLocal = p0 + rho * U * U / 16.0 * ( cos( 2.0 * M_PI * 2.0 * cellCenter.x ) + cos( 2.0 * M_PI * 2.0 * cellCenter.y ) ) * ( 2.0 + cos( 2.0 * M_PI * 2.0 * cellCenter.z ) );
-
-        real rhoLocal = 2.0 * pLocal * parameters.lambdaRef;
-
-        //ULocal = cellCenter.x;
-        //VLocal = cellCenter.y;
-        //WLocal = cellCenter.z;
-
-        //rhoLocal = rank + 1;
-
-        return GksGpu::toConservedVariables( GksGpu::PrimitiveVariables( rhoLocal, ULocal, VLocal, WLocal, parameters.lambdaRef ), parameters.K );
-    });
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataHostToDevice();
-
-    for( auto bc : dataBase->boundaryConditions ) 
-        for( uint level = 0; level < dataBase->numberOfLevels; level++ )
-            bc->runBoundaryConditionKernel( dataBase, parameters, level );
-
-    GksGpu::Initializer::initializeDataUpdate(dataBase);
-
-    dataBase->copyDataDeviceToHost();
-
-    //if( rank == 0 ) writeVtkXMLParallelSummaryFile( dataBase, parameters, path + simulationName + "_0", mpiWorldSize );
-
-    //writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" + "_rank_" + std::to_string(rank) );
-    
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    const uint numberOfIterations = 1000;
-
-    GksGpu::CupsAnalyzer cupsAnalyzer( dataBase, false, 30.0, true, numberOfIterations );
-
-    MPI_Barrier(MPI_COMM_WORLD);
-
-    cupsAnalyzer.start();
-
-    for( uint iter = 1; iter <= numberOfIterations; iter++ )
-    {
-        GksGpu::TimeStepping::nestedTimeStep(dataBase, parameters, 0);
-
-        cupsAnalyzer.run( iter, parameters.dt );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    //dataBase->copyDataDeviceToHost();
-
-    //if( rank == 0 ) writeVtkXMLParallelSummaryFile( dataBase, parameters, path + simulationName + "_final", mpiWorldSize );
-
-    //writeVtkXML( dataBase, parameters, 0, path + simulationName + "_final_rank_" + std::to_string(rank) );
-    
-    //////////////////////////////////////////////////////////////////////////
-
-    int crashCellIndex = dataBase->getCrashCellIndex();
-    if( crashCellIndex >= 0 )
-    {
-        *logging::out << logging::Logger::LOGGER_ERROR << "=================================================\n";
-        *logging::out << logging::Logger::LOGGER_ERROR << "=================================================\n";
-        *logging::out << logging::Logger::LOGGER_ERROR << "============= Simulation Crashed!!! =============\n";
-        *logging::out << logging::Logger::LOGGER_ERROR << "=================================================\n";
-        *logging::out << logging::Logger::LOGGER_ERROR << "=================================================\n";
-    }
-}
-
-int main( int argc, char* argv[])
-{
-    //////////////////////////////////////////////////////////////////////////
-    
-    int rank = 0;
-    int mpiWorldSize = 1;
-#ifdef USE_CUDA_AWARE_MPI
-    int rank         = MpiUtility::getMpiRankBeforeInit();
-    int mpiWorldSize = MpiUtility::getMpiWorldSizeBeforeInit();
-#else
-    MPI_Init(&argc, &argv);
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &mpiWorldSize);
-#endif
-
-    //////////////////////////////////////////////////////////////////////////
-
-#ifdef _WIN32
-    std::string path( "F:/Work/Computations/out/MultiGPU/" );
-#else
-    //std::string path( "/home/stephan/Computations/out/" );
-    std::string path( "out/" );
-#endif
-
-    //////////////////////////////////////////////////////////////////////////
-
-    bool strongScaling = false;
-    uint nx = 128;
-    uint decompositionDimension = 3;
-
-    if( argc > 1 ) nx = atoi( argv[1] );
-    if( argc > 2 ) decompositionDimension = atoi( argv[2] );
-    if( argc > 3 ) strongScaling = true;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    std::string simulationName ( "MultiGPU" );
-
-    if( strongScaling ) simulationName += "_strongScaling";
-    else                simulationName += "_weakScaling";
-
-    simulationName += "_D_" + std::to_string(decompositionDimension);
-
-    simulationName += "_nx_" + std::to_string(nx);
-
-    simulationName += "_np_" + std::to_string(mpiWorldSize);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    logging::Logger::addStream(&std::cout);
-    
-    std::ofstream logFile( path + simulationName + "_rank_" + std::to_string(rank) + ".log" );
-    logging::Logger::addStream(&logFile);
-
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    // Important: for Cuda-Aware MPI the device must be set before MPI_Init()
-    int deviceCount = GksGpu::CudaUtility::getCudaDeviceCount();
-
-    if(deviceCount == 0)
-    {
-        std::stringstream msg;
-        msg << "No devices devices found!" << std::endl;
-        *logging::out << logging::Logger::WARNING << msg.str(); msg.str("");
-    }
-
-    GksGpu::CudaUtility::setCudaDevice( rank % deviceCount );
-
-    //////////////////////////////////////////////////////////////////////////
-
-#ifdef USE_CUDA_AWARE_MPI
-    MPI_Init(&argc, &argv);
-#endif
-    
-    //////////////////////////////////////////////////////////////////////////
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precison\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    //////////////////////////////////////////////////////////////////////////
-    //////////////////////////////////////////////////////////////////////////
-    //////////////////////////////////////////////////////////////////////////
-
-    try
-    {
-        performanceTest( path, simulationName, decompositionDimension, nx, strongScaling );
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-    //////////////////////////////////////////////////////////////////////////
-    //////////////////////////////////////////////////////////////////////////
-
-    logFile.close();
-
-    MPI_Finalize();
-
-   return 0;
-}
diff --git a/apps/gpu/GKS/PoolFire/3rdPartyLinking.cmake b/apps/gpu/GKS/PoolFire/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/PoolFire/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/PoolFire/CMakeLists.txt b/apps/gpu/GKS/PoolFire/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/PoolFire/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/PoolFire/CMakePackage.cmake b/apps/gpu/GKS/PoolFire/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/PoolFire/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/PoolFire/PoolFire.cpp b/apps/gpu/GKS/PoolFire/PoolFire.cpp
deleted file mode 100644
index 0db6adb4adb24b91624fc1fd457359f758cfc19d..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/PoolFire/PoolFire.cpp
+++ /dev/null
@@ -1,488 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <memory>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-#include "GridGenerator/geometries/Sphere/Sphere.h"
-#include "GridGenerator/geometries/VerticalCylinder/VerticalCylinder.h"
-#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
-#include "GridGenerator/geometries/TriangularMesh/TriangularMesh.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/FlowStateData/FlowStateData.cuh"
-#include "GksGpu/FlowStateData/FlowStateDataConversion.cuh"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-#include "GksGpu/BoundaryConditions/Pressure2.h"
-#include "GksGpu/BoundaryConditions/AdiabaticWall.h"
-#include "GksGpu/BoundaryConditions/PassiveScalarDiriclet.h"
-#include "GksGpu/BoundaryConditions/InflowComplete.h"
-#include "GksGpu/BoundaryConditions/Open.h"
-#include "GksGpu/BoundaryConditions/Extrapolation.h"
-#include "GksGpu/BoundaryConditions/Symmetry.h"
-#include "GksGpu/BoundaryConditions/CreepingMassFlux.h"
-#include "GksGpu/BoundaryConditions/MassCompensation.h"
-
-#include "GksGpu/Interface/Interface.h"
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-
-#include "GksGpu/Restart/Restart.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-void thermalCavity( std::string path, std::string simulationName, uint restartIter )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint nx = 128;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real L = 4.0;
-    real H = 4.0;
-    real W = 0.125;
-
-    real R = 0.5;
-
-    real dx = H / real(nx);
-
-    real U = 0.0125;
-
-    real eps = 2.0;
-    real Pr  = 0.71;
-    real K   = 5.0;
-    
-    real g   = 9.81;
-    real rho = 1.2;
-    
-    real mu = 1.5e-5;
-
-    PrimitiveVariables prim( rho, 0.0, 0.0, 0.0, -1.0 );
-
-    setLambdaFromT( prim, 3.0 / T_FAKTOR );
-
-    real cs  = sqrt( ( ( K + 5.0 ) / ( K + 3.0 ) ) / ( 2.0 * prim.lambda ) );
-
-    real CFL = 0.125;
-
-    real dt  = CFL * ( dx / ( ( U + cs ) * ( one + ( two * mu ) / ( U * dx * rho ) ) ) );
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt = " << dt << " s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "U  = " << U  << " m/s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "cs = " << cs << " m/s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "mu = " << mu << " kg/sm\n";
-
-    *logging::out << logging::Logger::INFO_HIGH << "HRR = " << U * rho * M_PI * R * R * 800000.0 / 0.016 / 1000.0 << " kW\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Parameters parameters;
-
-    parameters.K  = K;
-    parameters.Pr = Pr;
-    parameters.mu = mu;
-
-    parameters.D = mu;
-
-    parameters.force.x = 0;
-    parameters.force.y = 0;
-    parameters.force.z = -g;
-
-    parameters.dt = dt;
-    parameters.dx = dx;
-
-    parameters.lambdaRef = prim.lambda;
-
-    parameters.rhoRef    = rho;
-
-    //parameters.viscosityModel = ViscosityModel::sutherlandsLaw;
-    parameters.viscosityModel = ViscosityModel::constant;
-
-    parameters.enableReaction = true;
-
-    *logging::out << logging::Logger::INFO_HIGH << "Pr = " << parameters.Pr << "\n";
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    bool threeDimensional = false;
-
-    if( threeDimensional )
-    {
-        gridBuilder->addCoarseGrid(-0.5*L, -0.5*L, 0.0,
-                                    0.5*L,  0.5*L, H, dx);
-    }
-    else
-    {
-        gridBuilder->addCoarseGrid(-0.5*L, -0.5*dx, 0.0,
-                                    0.5*L,  0.5*dx, H, dx);
-    }
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#ifdef _WIN32
-    //TriangularMesh* stl = TriangularMesh::make("F:/Work/Computations/inp/Unterzug.stl");
-    TriangularMesh* stl = TriangularMesh::make("F:/Work/Computations/inp/Ring.stl");
-#else
-    //TriangularMesh* stl = TriangularMesh::make("inp/Unterzug.stl");
-    TriangularMesh* stl = TriangularMesh::make("inp/Ring.stl");
-#endif
-
-    //gridBuilder->addGeometry(stl);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    VerticalCylinder cylinder( 0.0, 0.0, 0.0, 1.1, 4.0   );
-    VerticalCylinder cylinder2( 0.0, 0.0, 0.0, 0.6, 0.25   );
-    
-    Conglomerate refRing;
-
-    refRing.add     ( new VerticalCylinder( 0.0, 0.0, 0.0, 0.6, 0.125 ) );
-    refRing.subtract( new VerticalCylinder( 0.0, 0.0, 0.0, 0.4, 1.0    ) );
-    //refRing.add     ( new VerticalCylinder( 0.0, 0.0, 0.0, 0.15, 0.125 ) );
-    //refRing.subtract( new VerticalCylinder( 0.0, 0.0, 0.0, 0.05, 1.0    ) );
-
-    gridBuilder->setNumberOfLayers(0,20);
-
-    gridBuilder->addGrid( &cylinder,  1 );
-    gridBuilder->addGrid( &cylinder2, 3 );
-
-    gridBuilder->setNumberOfLayers(10,20);
-
-    //gridBuilder->addGrid( &refRing, 2 );
-    //gridBuilder->addGrid( stl, 2 );
-
-    if( threeDimensional ) gridBuilder->setPeriodicBoundaryCondition(false, false, false);
-    else                   gridBuilder->setPeriodicBoundaryCondition(false, true,  false);
-
-    gridBuilder->buildGrids(GKS, false);
-
-    //gridBuilder->writeGridsToVtk(path + "grid/Grid_lev_");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    //meshAdapter.writeMeshVTK( path + "grid/Mesh.vtk" );
-
-    //meshAdapter.writeMeshFaceVTK( path + "grid/MeshFaces.vtk" );
-
-    if( !threeDimensional )
-        meshAdapter.findPeriodicBoundaryNeighbors();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CudaUtility::setCudaDevice(0);
-
-    auto dataBase = std::make_shared<DataBase>( "GPU" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-    real openBoundaryVelocityLimiter = 1.0;
-    
-    SPtr<BoundaryCondition> bcMX = std::make_shared<Open>( dataBase, prim, openBoundaryVelocityLimiter );
-    SPtr<BoundaryCondition> bcPX = std::make_shared<Open>( dataBase, prim, openBoundaryVelocityLimiter );
-    //SPtr<BoundaryCondition> bcMX = std::make_shared<AdiabaticWall>( dataBase, Vec3(0, 0, 0), true );
-    //SPtr<BoundaryCondition> bcPX = std::make_shared<AdiabaticWall>( dataBase, Vec3(0, 0, 0), true );
-    //SPtr<BoundaryCondition> bcMX = std::make_shared<MassCompensation>( dataBase, rho, U, prim.lambda );
-    //SPtr<BoundaryCondition> bcPX = std::make_shared<MassCompensation>( dataBase, rho, U, prim.lambda );
-    //SPtr<BoundaryCondition> bcMX = std::make_shared<IsothermalWall>( dataBase, Vec3(0, 0, 0), prim.lambda, false );
-    //SPtr<BoundaryCondition> bcPX = std::make_shared<IsothermalWall>( dataBase, Vec3(0, 0, 0), prim.lambda, false );
-    //SPtr<BoundaryCondition> bcMX = std::make_shared<Pressure2>( dataBase, c1o2 * prim.rho / prim.lambda );
-    //SPtr<BoundaryCondition> bcPX = std::make_shared<Pressure2>( dataBase, c1o2 * prim.rho / prim.lambda );
-
-    bcMX->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x < -0.5*L; } );
-    bcPX->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x >  0.5*L; } );
-
-    //SPtr<BoundaryCondition> bcMX_2 = std::make_shared<IsothermalWall>( dataBase, Vec3(0, 0, 0), prim.lambda, false );
-    //SPtr<BoundaryCondition> bcPX_2 = std::make_shared<IsothermalWall>( dataBase, Vec3(0, 0, 0), prim.lambda, false );
-    SPtr<BoundaryCondition> bcMX_2 = std::make_shared<Symmetry>( dataBase, 'x' );
-    SPtr<BoundaryCondition> bcPX_2 = std::make_shared<Symmetry>( dataBase, 'x' );
-    //SPtr<BoundaryCondition> bcMX_2 = std::make_shared<Pressure2>( dataBase, c1o2 * prim.rho / prim.lambda );
-    //SPtr<BoundaryCondition> bcPX_2 = std::make_shared<Pressure2>( dataBase, c1o2 * prim.rho / prim.lambda );
-
-    bcMX_2->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x < -0.5*L && center.z > H - 0.5; } );
-    bcPX_2->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x >  0.5*L && center.z > H - 0.5; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcMY;
-    SPtr<BoundaryCondition> bcPY;
-
-    if( threeDimensional )
-    {
-        //bcMY = std::make_shared<Open>( dataBase, prim, openBoundaryVelocityLimiter );
-        //bcPY = std::make_shared<Open>( dataBase, prim, openBoundaryVelocityLimiter );
-        bcMY = std::make_shared<Symmetry>( dataBase, 'y' );
-        bcPY = std::make_shared<Symmetry>( dataBase, 'y' );
-
-        bcMY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y < -0.5*L; } );
-        bcPY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y >  0.5*L; } );
-    }
-    else
-    {
-        bcMY = std::make_shared<Periodic>(dataBase);
-        bcPY = std::make_shared<Periodic>(dataBase);
-
-        bcMY->findBoundaryCells(meshAdapter, false, [&](Vec3 center) { return center.y < -0.5*dx; });
-        bcPY->findBoundaryCells(meshAdapter, false, [&](Vec3 center) { return center.y >  0.5*dx; });
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcMZ = std::make_shared<AdiabaticWall>( dataBase, Vec3(0, 0, 0), false );
-    //SPtr<BoundaryCondition> bcMZ = std::make_shared<IsothermalWall>( dataBase, Vec3(0, 0, 0), prim.lambda, true );
-    //SPtr<BoundaryCondition> bcMZ = std::make_shared<InflowComplete>( dataBase, PrimitiveVariables(rho, 0.0, 0.0, 0.0, prim.lambda, 0.0, 0.0) );
-    //SPtr<BoundaryCondition> bcMZ = std::make_shared<Open>( dataBase );
-
-    //SPtr<BoundaryCondition> bcPZ = std::make_shared<Open>( dataBase, prim );
-    //SPtr<BoundaryCondition> bcPZ = std::make_shared<Extrapolation>( dataBase );
-    //SPtr<BoundaryCondition> bcPZ = std::make_shared<AdiabaticWall>( dataBase, Vec3(0, 0, 0), true );
-    SPtr<BoundaryCondition> bcPZ = std::make_shared<Pressure2>( dataBase, c1o2 * prim.rho / prim.lambda );
-    
-    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < 0.0; } );
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z > H  ; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    //SPtr<BoundaryCondition> burner = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), 0.5*prim.lambda,  0.0, true );
-
-    //SPtr<BoundaryCondition> burner = std::make_shared<InflowComplete>( dataBase, PrimitiveVariables(rho, 0.0, 0.0, U, prim.lambda, 1.0, 1.0) );
-    SPtr<BoundaryCondition> burner = std::make_shared<CreepingMassFlux>( dataBase, rho, U, prim.lambda );
-
-    burner->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ 
-
-        if( threeDimensional )
-            return center.z < 0.0 && std::sqrt(center.x*center.x + center.y*center.y) < R;
-        else
-            return center.z < 0.0 && std::sqrt(center.x*center.x) < R && std::sqrt(center.y*center.y) < 0.5 * dx;
-    } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->boundaryConditions.push_back( burner );
-
-    dataBase->boundaryConditions.push_back( bcMX );
-    dataBase->boundaryConditions.push_back( bcPX );
-    
-    dataBase->boundaryConditions.push_back( bcMY );
-    dataBase->boundaryConditions.push_back( bcPY );
-
-    dataBase->boundaryConditions.push_back( bcMZ );
-    dataBase->boundaryConditions.push_back( bcPZ );
-
-    dataBase->boundaryConditions.push_back( bcMX_2 );
-    dataBase->boundaryConditions.push_back( bcPX_2 );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint startIter = 0;
-
-    dataBase->setMesh( meshAdapter );
-
-    CudaUtility::printCudaMemoryUsage();
-    
-    if( restartIter == INVALID_INDEX )
-    {
-        Initializer::interpret(dataBase, [&](Vec3 cellCenter) -> ConservedVariables {
-
-            PrimitiveVariables primLocal = prim;
-
-            //primLocal.rho = rho * std::exp( - ( 2.0 * g * H * prim.lambda ) * cellCenter.z / H );
-
-            real r = sqrt(cellCenter.x * cellCenter.x /*+ cellCenter.y * cellCenter.y*/ + cellCenter.z * cellCenter.z);
-
-            //if( r < 0.6 ) primLocal.S_1 = 1.0 - r;
-
-            //if( r < 0.5 ) prim.lambda /= (two - four*r*r);
-
-            return toConservedVariables(primLocal, parameters.K);
-        });
-
-        writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" );
-    }
-    else
-    {
-        Restart::readRestart( dataBase, path + simulationName + "_" + std::to_string( restartIter ), startIter );
-
-        writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( restartIter ) + "_restart" );
-    }
-
-    dataBase->copyDataHostToDevice();
-
-    for( auto bc : dataBase->boundaryConditions ) 
-        for( uint level = 0; level < dataBase->numberOfLevels; level++ )
-            bc->runBoundaryConditionKernel( dataBase, parameters, level );
-
-    Initializer::initializeDataUpdate(dataBase);
-
-    dataBase->copyDataDeviceToHost();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0 );
-
-    ConvergenceAnalyzer convergenceAnalyzer( dataBase, 1000 );
-
-    //auto turbulenceAnalyzer = std::make_shared<TurbulenceAnalyzer>( dataBase, 50000 );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    cupsAnalyzer.start();
-
-    for( uint iter = startIter + 1; iter <= 2000000; iter++ )
-    {
-        uint runUpTime = 10000;
-
-        if( iter < runUpTime )
-        {
-            //std::dynamic_pointer_cast<InflowComplete>(burner)->prim.S_1 =       1.0 * ( real(iter) / 20000.0 );
-            //std::dynamic_pointer_cast<InflowComplete>(burner)->prim.S_2 = 1.0 - 1.0 * ( real(iter) / 20000.0 );
-
-            //std::dynamic_pointer_cast<InflowComplete>(burner)->prim.W = U * ( real(iter) / 20000.0 );
-
-            //std::dynamic_pointer_cast<CreepingMassFlux>(burner)->velocity = U * ( real(iter) / runUpTime );
-
-            //parameters.mu = mu + 10.0 * mu * ( 1.0 - ( real(iter) / 20000.0 ) );
-
-            //parameters.dt = 0.2 * dt + ( dt - 0.2 * dt ) * ( real(iter) / 40000.0 );
-        }
-
-        //if( iter == 5001 )
-        //{
-        //    parameters.enableReaction = false;
-        //    std::dynamic_pointer_cast<CreepingMassFlux>(burner)->velocity = -1.0;
-        //}
-
-        cupsAnalyzer.run( iter );
-
-        convergenceAnalyzer.run( iter );
-
-        TimeStepping::nestedTimeStep(dataBase, parameters, 0);
-
-        if( 
-            //( iter >= 100 && iter % 10 == 0 ) || 
-            ( iter % 400 == 0 )
-          )
-        {
-            dataBase->copyDataDeviceToHost();
-
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) );
-        }
-
-        if( iter % 4000 == 0 )
-        {
-            Restart::writeRestart( dataBase, path + simulationName + "_" + std::to_string( iter ), iter );
-        }
-
-        //turbulenceAnalyzer->run( iter, parameters );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataDeviceToHost();
-
-    //writeVtkXML( dataBase, parameters, 0, path + "grid/Test_1" );
-
-    //turbulenceAnalyzer->download();
-
-    //writeTurbulenceVtkXML(dataBase, turbulenceAnalyzer, 0, path + simulationName + "_Turbulence");
-}
-
-int main( int argc, char* argv[])
-{
-
-#ifdef _WIN32
-    std::string path( "F:/Work/Computations/out/PoolFire/" );
-#else
-    std::string path( "out/" );
-#endif
-
-    std::string simulationName ( "PoolFire" );
-
-    logging::Logger::addStream(&std::cout);
-    
-    std::ofstream logFile( path + simulationName + ".log" );
-    logging::Logger::addStream(&logFile);
-
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precision\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    try
-    {
-        uint restartIter = INVALID_INDEX;
-        //uint restartIter = 33000;
-
-        if( argc > 1 ) restartIter = atoi( argv[1] );
-
-        thermalCavity( path, simulationName, restartIter );
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::ERROR << "Unknown exception!\n";
-    }
-
-    logFile.close();
-
-   return 0;
-}
diff --git a/apps/gpu/GKS/PropaneFlame/3rdPartyLinking.cmake b/apps/gpu/GKS/PropaneFlame/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/PropaneFlame/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/PropaneFlame/CMakeLists.txt b/apps/gpu/GKS/PropaneFlame/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/PropaneFlame/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/PropaneFlame/CMakePackage.cmake b/apps/gpu/GKS/PropaneFlame/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/PropaneFlame/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/PropaneFlame/PropaneFlame.cpp b/apps/gpu/GKS/PropaneFlame/PropaneFlame.cpp
deleted file mode 100644
index bdee594864925933f6afe462e45b473f7d921ed9..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/PropaneFlame/PropaneFlame.cpp
+++ /dev/null
@@ -1,319 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <memory>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/AdiabaticWall.h"
-#include "GksGpu/BoundaryConditions/Inflow.h"
-#include "GksGpu/BoundaryConditions/Extrapolation.h"
-#include "GksGpu/BoundaryConditions/Pressure.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-void thermalCavity( std::string path, std::string simulationName )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    //uint nx = 128;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real dx = 0.2;
-
-    real L = 33.4;
-    real W = 20.0;
-
-    real H = dx;
-
-    real Re  = 1.0e1;
-    real U  = 0.1;
-    real Ma = 0.1;
-    
-    real Pr  = 1.0;
-    real K   = 2.0;
-
-    real rho = 1.0;
-
-    real mu = U * rho * L / Re;
-
-    real cs = U / Ma;
-    real lambda = c1o2 * ( ( K + 5.0 ) / ( K + 3.0 ) ) / ( cs * cs );
-
-    real CFL = 0.5;
-
-    real dt  = CFL * ( dx / ( ( U + cs ) * ( one + ( two * mu ) / ( U * dx * rho ) ) ) );
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt = " << dt << " s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "mu = " << mu << " s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "U  = " << U  << " s\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Parameters parameters;
-
-    parameters.K  = K;
-    parameters.Pr = Pr;
-    parameters.mu = mu;
-
-    parameters.force.x = 0;
-    parameters.force.y = 0;
-    parameters.force.z = 0;
-
-    parameters.dt = dt;
-    parameters.dx = dx;
-
-    parameters.lambdaRef = lambda;
-
-    //parameters.viscosityModel = ViscosityModel::sutherlandsLaw;
-    parameters.viscosityModel = ViscosityModel::constant;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    gridBuilder->addCoarseGrid(-0.5*L, -0.5*W, -0.5*H,  
-                                0.5*L,  0.5*W,  0.5*H, dx);
-
-    gridBuilder->setPeriodicBoundaryCondition(false, false, true);
-
-    gridBuilder->buildGrids(GKS, false);
-
-    //gridBuilder->writeGridsToVtk(path + "grid/Grid_lev_");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    //meshAdapter.writeMeshVTK( path + "grid/Mesh.vtk" );
-
-    //meshAdapter.writeMeshFaceVTK( path + "grid/MeshFaces.vtk" );
-
-    meshAdapter.findPeriodicBoundaryNeighbors();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CudaUtility::setCudaDevice(0);
-
-    auto dataBase = std::make_shared<DataBase>( "GPU" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> bcMX = std::make_shared<Pressure>( dataBase, 0.5 * rho / lambda );
-    SPtr<BoundaryCondition> bcPX = std::make_shared<Pressure>( dataBase, 0.5 * rho / lambda );
-
-    bcMX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x < - 0.5*L; } );
-    bcPX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x >   0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcMY = std::make_shared<Inflow>( dataBase, Vec3(0.0,   U, 0.0), lambda, rho, 0.0, 1.0, 0.0, 0.0 );
-    SPtr<BoundaryCondition> bcPY = std::make_shared<Inflow>( dataBase, Vec3(0.0, - U, 0.0), lambda, rho, 0.0, 1.0, 0.0, 0.0 );
-
-    bcMY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y < - 0.5*W; } );
-    bcPY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y >   0.5*W; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcMZ = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPZ = std::make_shared<Periodic>( dataBase );
-    
-    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.5*H; } );
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z >  0.5*H; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->boundaryConditions.push_back( bcMZ );
-    dataBase->boundaryConditions.push_back( bcPZ );
-
-    dataBase->boundaryConditions.push_back( bcMX );
-    dataBase->boundaryConditions.push_back( bcPX );
-    
-    dataBase->boundaryConditions.push_back( bcMY );
-    dataBase->boundaryConditions.push_back( bcPY );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    dataBase->setMesh( meshAdapter );
-
-    CudaUtility::printCudaMemoryUsage();
-
-    Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> ConservedVariables{
-
-        return toConservedVariables( PrimitiveVariables( rho, 0.0, 0.0, 0.0, lambda ), parameters.K );
-    });
-
-    dataBase->copyDataHostToDevice();
-
-    Initializer::initializeDataUpdate(dataBase);
-
-    for( uint level = 0; level < dataBase->numberOfLevels; level++ )
-    {
-        for (SPtr<BoundaryCondition> bc : dataBase->boundaryConditions) {
-            bc->runBoundaryConditionKernel(dataBase, parameters, level);
-        }
-    }
-
-    dataBase->copyDataDeviceToHost();
-
-    writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0 );
-
-    ConvergenceAnalyzer convergenceAnalyzer( dataBase );
-
-    convergenceAnalyzer.setConvergenceThreshold( ConservedVariables( 1.0e-6, 1.0e-6, 1.0e-6, 1.0e6, 1.0e-6 ) );
-
-
-
-    //auto turbulenceAnalyzer = std::make_shared<TurbulenceAnalyzer>( dataBase, 50000 );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    cupsAnalyzer.start();
-
-    for( uint iter = 1; iter <= 1000000; iter++ )
-    {
-        if( iter == 100000 )
-        {
-            parameters.mu = U * rho * L / 3.0e1;
-        }
-        if( iter == 200000 )
-        {
-            parameters.mu = U * rho * L / 1.0e2;
-        }
-        if( iter == 300000 )
-        {
-            parameters.mu = U * rho * L / 3.0e2;
-        }
-        if( iter == 400000 )
-        {
-            parameters.mu = U * rho * L / 1.0e3;
-        }
-
-        TimeStepping::nestedTimeStep(dataBase, parameters, nullptr, 0);
-
-        if( 
-            //( iter < 10     && iter % 1     == 0 ) ||
-            //( iter < 100    && iter % 10    == 0 ) ||
-            //( iter < 1000   && iter % 100   == 0 ) ||
-            //( iter < 100000  && iter % 1000  == 0 ) ||
-            ( iter < 10000000 && iter % 10000 == 0 )
-          )
-        {
-            for( uint level = 0; level < dataBase->numberOfLevels; level++ )
-            {
-                for (SPtr<BoundaryCondition> bc : dataBase->boundaryConditions) {
-                    bc->runBoundaryConditionKernel(dataBase, parameters, level);
-                }
-            }
-            dataBase->copyDataDeviceToHost();
-
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) );
-        }
-
-        cupsAnalyzer.run( iter );
-
-        if( convergenceAnalyzer.run( iter ) ) break;
-
-        //turbulenceAnalyzer->run( iter, parameters );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataDeviceToHost();
-
-    //writeVtkXML( dataBase, parameters, 0, path + "grid/Test_1" );
-
-    //turbulenceAnalyzer->download();
-
-    //writeTurbulenceVtkXML(dataBase, turbulenceAnalyzer, 0, path + simulationName + "_Turbulence");
-}
-
-int main( int argc, char* argv[])
-{
-    std::string path( "F:/Work/Computations/out/" );
-    //std::string path( "out/" );
-    std::string simulationName ( "PropaneFlame" );
-
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precison\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    try
-    {
-        thermalCavity( path, simulationName );
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::ERROR << "Unknown exception!\n";
-    }
-
-   return 0;
-}
diff --git a/apps/gpu/GKS/RayleighBenardMultiGPU/3rdPartyLinking.cmake b/apps/gpu/GKS/RayleighBenardMultiGPU/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/RayleighBenardMultiGPU/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/RayleighBenardMultiGPU/CMakeLists.txt b/apps/gpu/GKS/RayleighBenardMultiGPU/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/RayleighBenardMultiGPU/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/RayleighBenardMultiGPU/CMakePackage.cmake b/apps/gpu/GKS/RayleighBenardMultiGPU/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/RayleighBenardMultiGPU/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/RayleighBenardMultiGPU/RayleighBenardMultiGPU.cpp b/apps/gpu/GKS/RayleighBenardMultiGPU/RayleighBenardMultiGPU.cpp
deleted file mode 100644
index 811872820c0d9fce485279d086d9b907199230ae..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/RayleighBenardMultiGPU/RayleighBenardMultiGPU.cpp
+++ /dev/null
@@ -1,636 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <sstream>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <memory>
-#include <thread>
-
-#include <mpi.h>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-#include "GridGenerator/geometries/BoundingBox/BoundingBox.h"
-#include "GridGenerator/utilities/communication.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/FlowStateData/FlowStateDataConversion.cuh"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-#include "GksGpu/BoundaryConditions/Pressure.h"
-#include "GksGpu/BoundaryConditions/AdiabaticWall.h"
-
-#include "GksGpu/Communication/Communicator.h"
-#include "GksGpu/Communication/MpiUtility.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-#include "GksGpu/Analyzer/PointTimeSeriesCollector.h"
-#include "GksGpu/Analyzer/HeatFluxAnalyzer.h"
-
-#include "GksGpu/Restart/Restart.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-//uint deviceMap [2] = {2,3};
-uint deviceMap [2] = {0,1};
-
-void simulation( std::string path, std::string simulationName, bool fine, uint restartIter )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    int rank = 0;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-
-    int mpiWorldSize = 1;
-    MPI_Comm_size(MPI_COMM_WORLD, &mpiWorldSize);
-
-    int sideLengthX, sideLengthY, sideLengthZ, rankX, rankY, rankZ;
-
-    if      (mpiWorldSize == 1 ) { sideLengthX = 1; sideLengthY = 1; sideLengthZ = 1; }
-    else if (mpiWorldSize == 2 ) { sideLengthX = 1; sideLengthY = 1; sideLengthZ = 2; }
-    else if (mpiWorldSize == 4 ) { sideLengthX = 1; sideLengthY = 2; sideLengthZ = 2; }
-    else if (mpiWorldSize == 8 ) { sideLengthX = 2; sideLengthY = 2; sideLengthZ = 2; }
-    else
-    {
-        throw std::runtime_error( "This number of processes is not supported for this target!" );
-    }
-
-    rankZ =   rank %   sideLengthZ;
-    rankY = ( rank % ( sideLengthZ * sideLengthY ) ) /   sideLengthZ;
-    rankX =   rank                                   / ( sideLengthY * sideLengthZ );
-
-    *logging::out << logging::Logger::INFO_HIGH << "SideLength = " << sideLengthX << " " << sideLengthY << " " << sideLengthZ << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << "rank       = " << rankX << " " << rankY << " " << rankZ << "\n";
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint nx = 64;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real L = 1.0;
-
-    real dx = L / real(nx);
-
-    real Ra = 3.0e6;
-    //real Ra = 1.0e2;
-
-    real Ba  = 0.1;
-    real eps = 0.8;
-    real Pr  = 0.71;
-    real K   = 2.0;
-    
-    real g   = 1.0;
-    real rho = 1.0;
-
-    real lambda     = Ba / ( 2.0 * g * L );
-    real lambdaHot  = lambda / ( 1.0 + eps * 0.5 );
-    real lambdaCold = lambda / ( 1.0 - eps * 0.5 );
-    
-    real mu = sqrt( Pr * eps * g * L * L * L / Ra ) * rho ;
-
-    real cs  = sqrt( ( ( K + 4.0 ) / ( K + 2.0 ) ) / ( 2.0 * lambda ) );
-    real U   = sqrt( Ra ) * mu / ( rho * L );
-
-    real CFL = 0.5;
-
-    real dt  = CFL * ( dx / ( ( U + cs ) * ( c1o1 + ( c2o1 * mu ) / ( U * dx * rho ) ) ) );
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt = " << dt << " s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "U  = " << U  << " s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "mu = " << mu << " s\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    GksGpu::Parameters parameters;
-
-    parameters.K  = K;
-    parameters.Pr = Pr;
-    parameters.mu = mu;
-
-    parameters.force.x = 0;
-    parameters.force.y = 0;
-    parameters.force.z = -g;
-
-    parameters.dt = dt;
-    parameters.dx = dx;
-
-    parameters.lambdaRef = lambda;
-
-    parameters.viscosityModel = GksGpu::ViscosityModel::sutherlandsLaw2;
-    //parameters.viscosityModel = ViscosityModel::constant;
-
-    parameters.forcingSchemeIdx = 0;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    //                M e s h    G e n e r a t i o n
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real LX = L / double(sideLengthX);
-    real LY = L / double(sideLengthY);
-    real LZ = L / double(sideLengthZ);
-
-    real xOverlap = ( sideLengthX == 1 ) ? 0.0 : 5.0*dx;
-    real yOverlap = ( sideLengthY == 1 ) ? 0.0 : 5.0*dx;
-    real zOverlap = ( sideLengthZ == 1 ) ? 0.0 : 5.0*dx;
-
-    real startX, endX;
-    real startY, endY;
-    real startZ, endZ;
-
-    if( sideLengthX > 1 && rankX == 1 ) startX = -3.0 * dx;
-    else                                startX = -0.5 * L;
-    if( sideLengthX > 1 && rankX == 0 ) endX   =  3.0 * dx;
-    else                                endX   =  0.5 * L;
-
-    if( sideLengthY > 1 && rankY == 1 ) startY = -3.0 * dx;
-    else                                startY = -0.5 * L;
-    if( sideLengthY > 1 && rankY == 0 ) endY   =  3.0 * dx;
-    else                                endY   =  0.5 * L;
-
-    if( sideLengthZ > 1 && rankZ == 1 ) startZ = -3.0 * dx;
-    else                                startZ = -0.5 * L;
-    if( sideLengthZ > 1 && rankZ == 0 ) endZ   =  3.0 * dx;
-    else                                endZ   =  0.5 * L;
-
-    gridBuilder->addCoarseGrid(startX, startY, startZ,  
-                               endX  , endY  , endZ  , dx);
-
-    std::cout << __LINE__ << std::endl;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    real refL[4] = { 0.05, 0.02, 0.025, 0.005 };
-
-    if( fine )
-    {
-        refL[1] = 0.1;
-        refL[2] = 0.05;
-    }
-
-    gridBuilder->setNumberOfLayers(6,6);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Conglomerate coarseRefLevel;
-
-    if( sideLengthX == 1 || rankX == 0 ) coarseRefLevel.add( new Cuboid (-100.0, -100.0,           -100.0, 
-                                                                          100.0, -0.5*L + refL[0],  100.0 ) );
-    if( sideLengthX == 1 || rankX == 1 ) coarseRefLevel.add( new Cuboid (-100.0,  0.5*L - refL[0], -100.0, 
-                                                                          100.0,  100.0,            100.0 ) );
-
-    if( sideLengthY == 1 || rankY == 0 ) coarseRefLevel.add( new Cuboid (-100.0,           -100.0, -100.0, 
-                                                                         -0.5*L + refL[0],  100.0,  100.0 ) );
-    if( sideLengthY == 1 || rankY == 1 ) coarseRefLevel.add( new Cuboid ( 0.5*L - refL[0], -100.0, -100.0, 
-                                                                          100.0,            100.0,  100.0  ) );
-
-    if( sideLengthZ == 1 || rankZ == 0 ) coarseRefLevel.add( new Cuboid (-100.0, -100.0, -100.0, 
-                                                                          100.0,  100.0, -0.5*L + refL[0] ) );
-    if( sideLengthZ == 1 || rankZ == 1 ) coarseRefLevel.add( new Cuboid (-100.0, -100.0,  0.5*L - refL[0], 
-                                                                          100.0,  100.0,  100.0           ) );
-
-    gridBuilder->addGrid( &coarseRefLevel, 1);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Conglomerate firstRefLevel;
-
-    if( sideLengthZ == 1 || rankZ == 0 ) firstRefLevel.add( new Cuboid (-100.0, -100.0, -100.0, 
-                                                                         100.0,  100.0, -0.5*L + refL[1] ) );
-    if( sideLengthZ == 1 || rankZ == 1 ) firstRefLevel.add( new Cuboid (-100.0, -100.0,  0.5*L - refL[1], 
-                                                                         100.0,  100.0,  100.0           ) );
-
-    gridBuilder->addGrid( &firstRefLevel, 2);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    //Conglomerate secondRefLevel;
-
-    //if( rank % 2 == 0 ) secondRefLevel.add( new Cuboid (-100.0,           -100.0, -100.0, 
-    //                                                    -0.5*L + refL[2],  100.0,  100.0 ) );
-    //else                secondRefLevel.add( new Cuboid ( 0.5*L - refL[2], -100.0, -100.0, 
-    //                                                     100.0,            100.0,  100.0 ) );
-
-    //if( rank % 2 == 0 ) secondRefLevel.add( new Cuboid (-100.0,           -100.0, -100.0,   
-    //                                                    -0.5*L + refL[0],  100.0, -0.5*H + refL[2] ) );
-    //else                secondRefLevel.add( new Cuboid ( 0.5*L - refL[0], -100.0, -100.0,   
-    //                                                     100.0,            100.0, -0.5*H + refL[2] ) );
-
-    //if( rank % 2 == 0 ) secondRefLevel.add( new Cuboid (-100.0,           -100.0,  0.5*H - refL[2], 
-    //                                                    -0.5*L + refL[0],  100.0,  100.0   ) );
-    //else                secondRefLevel.add( new Cuboid ( 0.5*L - refL[0], -100.0,  0.5*H - refL[2], 
-    //                                                     100.0,            100.0,  100.0   ) );
-
-    //gridBuilder->addGrid( &secondRefLevel, 3);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    //Conglomerate thirdRefLevel;
-
-    //if( rank % 2 == 0 ) thirdRefLevel.add( new Cuboid (-100.0,           -100.0, -100.0, 
-    //                                                   -0.5*L + refL[3],  100.0,  100.0 ) );
-    //else                thirdRefLevel.add( new Cuboid ( 0.5*L - refL[3], -100.0, -100.0, 
-    //                                                    100.0,            100.0,  100.0 ) );
-
-    //if( fine ) gridBuilder->addGrid( &thirdRefLevel, 4);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( sideLengthX > 1 && rankX == 1 ) startX =    0.0;
-    else                                startX = -100.0;
-    if( sideLengthX > 1 && rankX == 0 ) endX   =    0.0;
-    else                                endX   =  100.0;
-
-    if( sideLengthY > 1 && rankY == 1 ) startY =    0.0;
-    else                                startY = -100.0;
-    if( sideLengthY > 1 && rankY == 0 ) endY   =    0.0;
-    else                                endY   =  100.0;
-
-    if( sideLengthZ > 1 && rankZ == 1 ) startZ =    0.0;
-    else                                startZ = -100.0;
-    if( sideLengthZ > 1 && rankZ == 0 ) endZ   =    0.0;
-    else                                endZ   =  100.0;
-
-    auto subDomainBox = std::make_shared<BoundingBox>( startX, endX, 
-                                                       startY, endY, 
-                                                       startZ, endZ );
-
-    if( mpiWorldSize > 1 ) gridBuilder->setSubDomainBox( subDomainBox );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    gridBuilder->setPeriodicBoundaryCondition(false, false, false);
-
-    gridBuilder->buildGrids(GKS, false);
-            
-    //gridBuilder->writeGridsToVtk( path + simulationName + "_0" + "_rank_" + std::to_string(rank) + "_lev_" );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( mpiWorldSize > 1 )
-    {
-        int rankPX = ( (rankX + 1 + sideLengthX) % sideLengthX ) +    rankY                                    * sideLengthX +    rankZ                                    * sideLengthX * sideLengthY;
-        int rankMX = ( (rankX - 1 + sideLengthX) % sideLengthX ) +    rankY                                    * sideLengthX +    rankZ                                    * sideLengthX * sideLengthY;
-        int rankPY =    rankX                                    + ( (rankY + 1 + sideLengthY) % sideLengthY ) * sideLengthX +    rankZ                                    * sideLengthX * sideLengthY;
-        int rankMY =    rankX                                    + ( (rankY - 1 + sideLengthY) % sideLengthY ) * sideLengthX +    rankZ                                    * sideLengthX * sideLengthY;
-        int rankPZ =    rankX                                    +    rankY                                    * sideLengthX + ( (rankZ + 1 + sideLengthZ) % sideLengthZ ) * sideLengthX * sideLengthY;
-        int rankMZ =    rankX                                    +    rankY                                    * sideLengthX + ( (rankZ - 1 + sideLengthZ) % sideLengthZ ) * sideLengthX * sideLengthY;
-
-        if( sideLengthX > 1 && rankX == 0 ) gridBuilder->findCommunicationIndices( CommunicationDirections::PX, GKS );
-        if( sideLengthX > 1 && rankX == 0 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::PX, rankPX);
-
-        if( sideLengthX > 1 && rankX == 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::MX, GKS );
-        if( sideLengthX > 1 && rankX == 1 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::MX, rankMX);
-
-        if( sideLengthY > 1 && rankY == 0 ) gridBuilder->findCommunicationIndices( CommunicationDirections::PY, GKS );
-        if( sideLengthY > 1 && rankY == 0 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::PY, rankPY);
-
-        if( sideLengthY > 1 && rankY == 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::MY, GKS );
-        if( sideLengthY > 1 && rankY == 1 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::MY, rankMY);
-
-        if( sideLengthZ > 1 && rankZ == 0 ) gridBuilder->findCommunicationIndices( CommunicationDirections::PZ, GKS );
-        if( sideLengthZ > 1 && rankZ == 0 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::PZ, rankPZ);
-
-        if( sideLengthZ > 1 && rankZ == 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::MZ, GKS );
-        if( sideLengthZ > 1 && rankZ == 1 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::MZ, rankMZ);
-
-        *logging::out << logging::Logger::INFO_HIGH << "neighborRanks = " << rankPX << " " << rankMX << " " << rankPY << " " << rankMY << " " << rankPZ << " " << rankMZ << "\n";
-    }
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    //if( mpiWorldSize == 2 ) meshAdapter.findPeriodicBoundaryNeighbors();    
-
-    //meshAdapter.writeMeshFaceVTK( path + simulationName + "_0" + "_rank_" + std::to_string(rank) + ".vtk" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto dataBase = std::make_shared<GksGpu::DataBase>( "GPU" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    //                 B o u n d a r y    C o n d i t i o n s
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    SPtr<GksGpu::BoundaryCondition> bcMX = std::make_shared<GksGpu::AdiabaticWall>( dataBase, Vec3(0,0,0), false );
-    SPtr<GksGpu::BoundaryCondition> bcPX = std::make_shared<GksGpu::AdiabaticWall>( dataBase, Vec3(0,0,0), false );
-
-    SPtr<GksGpu::BoundaryCondition> bcMY = std::make_shared<GksGpu::AdiabaticWall>( dataBase, Vec3(0,0,0), false );
-    SPtr<GksGpu::BoundaryCondition> bcPY = std::make_shared<GksGpu::AdiabaticWall>( dataBase, Vec3(0,0,0), false );
-
-    bcMX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x < -0.5*L; } );
-    bcPX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x >  0.5*L; } );
-
-    bcMY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y < -0.5*L; } );
-    bcPY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    SPtr<GksGpu::BoundaryCondition> bcMZ = std::make_shared<GksGpu::IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaHot , false );
-    SPtr<GksGpu::BoundaryCondition> bcPZ = std::make_shared<GksGpu::IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold, false );
-
-    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.5*L; } );
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->boundaryConditions.push_back( bcMX );
-    dataBase->boundaryConditions.push_back( bcPX );
-
-    dataBase->boundaryConditions.push_back( bcMY );
-    dataBase->boundaryConditions.push_back( bcPY );
-
-    dataBase->boundaryConditions.push_back( bcMZ );
-    dataBase->boundaryConditions.push_back( bcPZ );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    //                 I n i t i a l    C o n d i t i o n s
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint startIter = 0;
-
-    dataBase->setMesh( meshAdapter );
-
-    dataBase->setCommunicators( meshAdapter );
-
-    GksGpu::CudaUtility::printCudaMemoryUsage();
-
-    if( restartIter == INVALID_INDEX )
-    {
-        GksGpu::Initializer::interpret(dataBase, [&](Vec3 cellCenter) -> GksGpu::ConservedVariables {
-
-            //real Th = 1.0 / lambdaHot;
-            //real Tc = 1.0 / lambdaCold;
-            //real T = Th - (Th - Tc)*((cellCenter.x + 0.5 * L) / L);
-            //real lambdaLocal = 1.0 / T;
-
-            return GksGpu::toConservedVariables(GksGpu::PrimitiveVariables(rho, 0.0, 0.0, 0.0, lambda), parameters.K);
-        });
-
-        if (rank == 0) writeVtkXMLParallelSummaryFile(dataBase, parameters, path + simulationName + "_0", mpiWorldSize);
-
-        writeVtkXML(dataBase, parameters, 0, path + simulationName + "_0" + "_rank_" + std::to_string(rank));
-    }
-    else
-    {
-        GksGpu::Restart::readRestart( dataBase, path + simulationName + "_" + std::to_string( restartIter ) + "_rank_" + std::to_string(rank), startIter );
-
-        if (rank == 0) writeVtkXMLParallelSummaryFile( dataBase, parameters, path + simulationName + "_" + std::to_string( restartIter ) + "_restart", mpiWorldSize );
-
-        writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( restartIter ) + "_restart" + "_rank_" + std::to_string(rank) );
-
-
-    }
-
-    dataBase->copyDataHostToDevice();
-
-    GksGpu::Initializer::initializeDataUpdate(dataBase);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    //                  R u n
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksGpu::CupsAnalyzer cupsAnalyzer( dataBase, true, 300.0 );
-
-    GksGpu::ConvergenceAnalyzer convergenceAnalyzer( dataBase );
-
-    //auto turbulenceAnalyzer = std::make_shared<TurbulenceAnalyzer>( dataBase, 0 );
-    auto turbulenceAnalyzer = std::make_shared<GksGpu::TurbulenceAnalyzer>( dataBase, 50000 );
-
-    turbulenceAnalyzer->collect_UU = true;
-    turbulenceAnalyzer->collect_VV = true;
-    turbulenceAnalyzer->collect_WW = true;
-    turbulenceAnalyzer->collect_UV = true;
-    turbulenceAnalyzer->collect_UW = true;
-    turbulenceAnalyzer->collect_VW = true;
-
-    turbulenceAnalyzer->allocate();
-
-    if( restartIter != INVALID_INDEX )
-        turbulenceAnalyzer->readRestartFile( path + simulationName + "_Turbulence_" + std::to_string( restartIter ) + "_rank_" + std::to_string(rank) );
-
-    //auto pointTimeSeriesCollector = std::make_shared<PointTimeSeriesCollector>();
-
-    //for( real y = 0.5 * W; y < real( mpiWorldSize / 2 ) * W; y += W )
-    //{
-    //    if( subDomainBox->isInside( -0.485, y, -0.3*H ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3( -0.485, y, -0.3*H ), 'W', 10000 );
-    //    if( subDomainBox->isInside( -0.485, y, -0.1*H ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3( -0.485, y, -0.1*H ), 'W', 10000 );
-    //    if( subDomainBox->isInside( -0.485, y,  0.1*H ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3( -0.485, y,  0.1*H ), 'W', 10000 );
-    //    if( subDomainBox->isInside( -0.485, y,  0.3*H ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3( -0.485, y,  0.3*H ), 'W', 10000 );
-    //    
-    //    if( subDomainBox->isInside(  0.485, y, -0.3*H ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3(  0.485, y, -0.3*H ), 'W', 10000 );
-    //    if( subDomainBox->isInside(  0.485, y, -0.1*H ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3(  0.485, y, -0.1*H ), 'W', 10000 );
-    //    if( subDomainBox->isInside(  0.485, y,  0.1*H ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3(  0.485, y,  0.1*H ), 'W', 10000 );
-    //    if( subDomainBox->isInside(  0.485, y,  0.3*H ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3(  0.485, y,  0.3*H ), 'W', 10000 );
-    //}
-
-    GksGpu::HeatFluxAnalyzer heatFluxAnalyzerPZ(dataBase, bcPZ, 100, 10000, lambdaHot, lambdaCold, L);
-    GksGpu::HeatFluxAnalyzer heatFluxAnalyzerMZ(dataBase, bcMZ, 100, 10000, lambdaHot, lambdaCold, L);
-    //HeatFluxAnalyzer heatFluxAnalyzer(dataBase, bcPZ);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    cupsAnalyzer.start();
-
-    for( uint iter = startIter + 1; iter <= 100000000; iter++ )
-    {
-        GksGpu::TimeStepping::nestedTimeStep(dataBase, parameters, 0);
-
-        cupsAnalyzer.run( iter, parameters.dt );
-
-        convergenceAnalyzer.run( iter );
-
-        turbulenceAnalyzer->run( iter, parameters );
-
-        if(rankZ == 1) heatFluxAnalyzerPZ.run( iter, parameters );
-        if(rankZ == 0) heatFluxAnalyzerMZ.run( iter, parameters );
-
-        if( iter % 10000 == 0 )
-        //if( iter % 25 == 0 )
-        {
-            dataBase->copyDataDeviceToHost();
-
-            if( rank == 0 ) writeVtkXMLParallelSummaryFile( dataBase, parameters, path + simulationName + "_" + std::to_string( iter ), mpiWorldSize );
-
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) + "_rank_" + std::to_string(rank) );
-
-            if(rankZ == 1) heatFluxAnalyzerPZ.writeToFile( path + simulationName + "_Nu_top_" + std::to_string( iter ) + "_rank_" + std::to_string(rank) );
-            if(rankZ == 0) heatFluxAnalyzerMZ.writeToFile( path + simulationName + "_Nu_bot_" + std::to_string( iter ) + "_rank_" + std::to_string(rank) );
-        }
-
-        //pointTimeSeriesCollector->run(iter, parameters);
-
-        if( iter > 50000 && iter % 10000 == 0 )
-        {
-            turbulenceAnalyzer->download();
-        
-            if( rank == 0 ) writeTurbulenceVtkXMLParallelSummaryFile( dataBase, turbulenceAnalyzer, parameters, path + simulationName + "_Turbulence_" + std::to_string( iter ), mpiWorldSize );
-        
-            writeTurbulenceVtkXML( dataBase, turbulenceAnalyzer, 0, path + simulationName + "_Turbulence_" + std::to_string( iter ) + "_rank_" + std::to_string(rank) );
-        }
-
-        if( iter % 10000 == 0 )
-        {
-            GksGpu::Restart::writeRestart( dataBase, path + simulationName + "_" + std::to_string( iter ) + "_rank_" + std::to_string(rank), iter );
-
-            turbulenceAnalyzer->writeRestartFile( path + simulationName + "_Turbulence_" + std::to_string( iter ) + "_rank_" + std::to_string(rank) );
-        }
-
-        //if( iter % 1000000 == 0 )
-        //{
-        //    pointTimeSeriesCollector->writeToFile(path + simulationName + "_TimeSeries_" + std::to_string( iter ) + "_rank_" + std::to_string(rank));
-        //}
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataDeviceToHost();
-}
-
-
-
-int main( int argc, char* argv[])
-{
-    //////////////////////////////////////////////////////////////////////////
-
-    bool fine = false;
-
-    bool highAspect = true;
-
-    //////////////////////////////////////////////////////////////////////////
-
-#ifdef _WIN32
-    MPI_Init(&argc, &argv);
-    int rank = 0;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    int mpiWorldSize = 1;
-    MPI_Comm_size(MPI_COMM_WORLD, &mpiWorldSize);
-#else
-    int rank         = GksGpu::MpiUtility::getMpiRankBeforeInit();
-    int mpiWorldSize = GksGpu::MpiUtility::getMpiWorldSizeBeforeInit();
-#endif
-
-    //////////////////////////////////////////////////////////////////////////
-
-#ifdef _WIN32
-    std::string path( "F:/Work/Computations/out/RayleighBenardMultiGPU/test/" );
-    //std::string path( "F:/Work/Computations/out/RayleighBenardMultiGPU/" );
-#else
-    std::string path( "out/" );
-#endif
-
-    std::string simulationName ( "ThermalCavity3D" );
-
-    if(fine) simulationName += "_fine";
-    else     simulationName += "_coarse";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    logging::Logger::addStream(&std::cout);
-    
-    std::ofstream logFile( path + simulationName + "_rank_" + std::to_string(rank) + ".log" );
-    logging::Logger::addStream(&logFile);
-
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    // Important: for Cuda-Aware MPI the device must be set before MPI_Init()
-    int deviceCount = GksGpu::CudaUtility::getCudaDeviceCount();
-
-    if(deviceCount == 0)
-    {
-        std::stringstream msg;
-        msg << "No devices devices found!" << std::endl;
-        *logging::out << logging::Logger::WARNING << msg.str(); msg.str("");
-    }
-
-    GksGpu::CudaUtility::setCudaDevice( rank % deviceCount );
-
-    //////////////////////////////////////////////////////////////////////////
-
-#ifndef _WIN32
-    MPI_Init(&argc, &argv);
-#endif
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precision\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    try
-    {
-        uint restartIter = INVALID_INDEX;
-
-        if( argc > 1 ) restartIter = atoi( argv[1] );
-
-        simulation(path, simulationName, fine, restartIter);
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
-    }
-
-    logFile.close();
-
-    MPI_Finalize();
-
-    return 0;
-}
diff --git a/apps/gpu/GKS/Room/3rdPartyLinking.cmake b/apps/gpu/GKS/Room/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/Room/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/Room/CMakeLists.txt b/apps/gpu/GKS/Room/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/Room/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/Room/CMakePackage.cmake b/apps/gpu/GKS/Room/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/Room/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/Room/Room.cpp b/apps/gpu/GKS/Room/Room.cpp
deleted file mode 100644
index cfd245afe481b1ed2e860bca00c164fec2a74d59..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/Room/Room.cpp
+++ /dev/null
@@ -1,340 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <memory>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-#include "GridGenerator/geometries/Sphere/Sphere.h"
-#include "GridGenerator/geometries/VerticalCylinder/VerticalCylinder.h"
-#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-#include "GksGpu/BoundaryConditions/Pressure.h"
-#include "GksGpu/BoundaryConditions/AdiabaticWall.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-void thermalCavity( std::string path, std::string simulationName )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint nx = 128;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real L = 4.0;
-    real H = 3.0;
-
-    real dx = H / real(nx);
-
-
-    real Ra = 1.0e9;
-
-    real Ba  = 0.1;
-    real eps = 1.2;
-    real Pr  = 0.71;
-    real K   = 2.0;
-    
-    real g   = 9.81;
-    real rho = 1.2;
-
-    real lambda     = Ba / ( 2.0 * g * H );
-    real lambdaHot  = lambda / ( 1.0 + eps * 0.5 );
-    real lambdaCold = lambda / ( 1.0 - eps * 0.5 );
-    
-    real mu = sqrt( Pr * eps * g * H * H * H / Ra ) * rho;
-
-    real cs  = sqrt( ( ( K + 5.0 ) / ( K + 3.0 ) ) / ( 2.0 * lambda ) );
-    real U   = sqrt( Ra ) * mu / ( rho * L );
-
-    real CFL = 0.25;
-
-    real dt  = CFL * ( dx / ( ( U + cs ) * ( one + ( two * mu ) / ( U * dx * rho ) ) ) );
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt = " << dt << " s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "U  = " << U  << " m/s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "mu = " << mu << " kg/sm\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Parameters parameters;
-
-    parameters.K  = K;
-    parameters.Pr = Pr;
-    parameters.mu = mu;
-
-    parameters.force.x = 0;
-    parameters.force.y = 0;
-    parameters.force.z = -g;
-
-    parameters.dt = dt;
-    parameters.dx = dx;
-
-    parameters.lambdaRef = lambda;
-
-    parameters.viscosityModel = ViscosityModel::sutherlandsLaw;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    //gridBuilder->addCoarseGrid(-0.5*L, -0.5*L, -0.5*H,  
-    //                            0.5*L,  0.5*L,  0.5*H, dx);
-
-    gridBuilder->addCoarseGrid(-0.5*L, -0.5*L,  0.0,  
-                                0.5*L,  0.5*L,  H  , dx);
-
-    Sphere           sphere  ( 0.0, 0.0, 0.0, 0.6 );
-    VerticalCylinder cylinder( 0.0, 0.0, 0.0, 0.6, 2.0*H );
-
-    //gridBuilder->addGrid( &refRegion_1, 1);
-    //gridBuilder->addGrid( &refRegion_2, 2);
-    //gridBuilder->addGrid( &refRegion_3, 3);
-    //gridBuilder->addGrid( &refRegion_4, 4);
-
-    gridBuilder->setNumberOfLayers(0,10);
-
-    gridBuilder->addGrid( &sphere, 2 );
-    //gridBuilder->addGrid( &cylinder, 2 );
-
-    gridBuilder->setPeriodicBoundaryCondition(false, false, false);
-
-    gridBuilder->buildGrids(GKS, false);
-
-    //gridBuilder->writeGridsToVtk(path + "grid/Grid_lev_");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    //meshAdapter.writeMeshVTK( path + "grid/Mesh.vtk" );
-
-    //meshAdapter.writeMeshFaceVTK( path + "grid/MeshFaces.vtk" );
-
-    //meshAdapter.findPeriodicBoundaryNeighbors();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CudaUtility::setCudaDevice(0);
-
-    auto dataBase = std::make_shared<DataBase>( "GPU" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcMX = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), false );
-    SPtr<BoundaryCondition> bcPX = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), false );
-    //SPtr<BoundaryCondition> bcMX = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold,  0.0, false );
-    //SPtr<BoundaryCondition> bcPX = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold,  0.0, false );
-
-    bcMX->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x < -0.5*L; } );
-    bcPX->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> bcMY = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), false );
-    SPtr<BoundaryCondition> bcPY = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), false );
-    //SPtr<BoundaryCondition> bcMY = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold,  0.0, false );
-    //SPtr<BoundaryCondition> bcPY = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold,  0.0, false );
-
-    bcMY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y < -0.5*L; } );
-    bcPY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcMZ = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), true );
-    SPtr<BoundaryCondition> bcPZ = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), true );
-    //SPtr<BoundaryCondition> bcMZ = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold,  0.0, true );
-    //SPtr<BoundaryCondition> bcPZ = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold,  0.0, true );
-    
-    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < 0.0; } );
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z > H  ; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> hotPlate = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaHot, true );
-
-    hotPlate->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ 
-        //return center.z < 0.0 && 
-        //       std::fabs(center.x) < 0.5 && 
-        //       std::fabs(center.y) < 0.5; 
-
-        return center.z < 0.0 && std::sqrt(center.x*center.x + center.y*center.y) < 0.5;
-    } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->boundaryConditions.push_back( bcMX );
-    dataBase->boundaryConditions.push_back( bcPX );
-    
-    dataBase->boundaryConditions.push_back( bcMY );
-    dataBase->boundaryConditions.push_back( bcPY );
-
-    dataBase->boundaryConditions.push_back( bcMZ );
-    dataBase->boundaryConditions.push_back( bcPZ );
-
-    dataBase->boundaryConditions.push_back( hotPlate );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    dataBase->setMesh( meshAdapter );
-
-    CudaUtility::printCudaMemoryUsage();
-
-    Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> ConservedVariables{
-
-        real rhoLocal = rho * std::exp( - ( 2.0 * g * H * lambdaCold ) * cellCenter.z / H );
-
-        return toConservedVariables( PrimitiveVariables( rhoLocal, 0.0, 0.0, 0.0, lambdaCold ), parameters.K );
-    });
-
-    dataBase->copyDataHostToDevice();
-
-    for( auto bc : dataBase->boundaryConditions ) 
-        for( uint level = 0; level < dataBase->numberOfLevels; level++ )
-            bc->runBoundaryConditionKernel( dataBase, parameters, 0 );
-
-    Initializer::initializeDataUpdate(dataBase);
-
-    dataBase->copyDataDeviceToHost();
-
-    writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0 );
-
-    ConvergenceAnalyzer convergenceAnalyzer( dataBase );
-
-    //auto turbulenceAnalyzer = std::make_shared<TurbulenceAnalyzer>( dataBase, 50000 );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    cupsAnalyzer.start();
-
-    for( uint iter = 1; iter <= 100000000; iter++ )
-    {
-        if( iter < 20000 )
-        {
-            std::dynamic_pointer_cast<IsothermalWall>(hotPlate)->lambda = lambdaCold + ( lambdaHot - lambdaCold ) * ( real(iter) / 20000.0 );
-        }
-        else
-        {
-            std::dynamic_pointer_cast<IsothermalWall>(hotPlate)->lambda = lambdaHot;
-        }
-
-        TimeStepping::nestedTimeStep(dataBase, parameters, 0);
-
-        if( 
-            //( iter < 10     && iter % 1     == 0 ) ||
-            //( iter < 100    && iter % 10    == 0 ) ||
-            //( iter < 1000   && iter % 100   == 0 ) ||
-            //( iter < 10000  && iter % 1000  == 0 ) ||
-            //( iter < 10000000 && iter % 100000 == 0 )
-            ( iter >= 10000 && iter % 100000 == 0 )
-          )
-        {
-            dataBase->copyDataDeviceToHost();
-
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) );
-        }
-
-        cupsAnalyzer.run( iter );
-
-        convergenceAnalyzer.run( iter );
-
-        //turbulenceAnalyzer->run( iter, parameters );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataDeviceToHost();
-
-    //writeVtkXML( dataBase, parameters, 0, path + "grid/Test_1" );
-
-    //turbulenceAnalyzer->download();
-
-    //writeTurbulenceVtkXML(dataBase, turbulenceAnalyzer, 0, path + simulationName + "_Turbulence");
-}
-
-int main( int argc, char* argv[])
-{
-    //std::string path( "F:/Work/Computations/out/" );
-    std::string path( "out/" );
-    std::string simulationName ( "Room" );
-
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precison\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    try
-    {
-        thermalCavity( path, simulationName );
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::ERROR << "Unknown exception!\n";
-    }
-
-   return 0;
-}
diff --git a/apps/gpu/GKS/RoomFire/3rdPartyLinking.cmake b/apps/gpu/GKS/RoomFire/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/RoomFire/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/RoomFire/CMakeLists.txt b/apps/gpu/GKS/RoomFire/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/RoomFire/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/RoomFire/CMakePackage.cmake b/apps/gpu/GKS/RoomFire/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/RoomFire/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/RoomFire/RoomFire.cpp b/apps/gpu/GKS/RoomFire/RoomFire.cpp
deleted file mode 100644
index 814c8fcfe40bb17775897f10eb971d0a463e847a..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/RoomFire/RoomFire.cpp
+++ /dev/null
@@ -1,489 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <memory>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-#include "GridGenerator/geometries/Sphere/Sphere.h"
-#include "GridGenerator/geometries/VerticalCylinder/VerticalCylinder.h"
-#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
-#include "GridGenerator/geometries/TriangularMesh/TriangularMesh.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/FlowStateData/FlowStateData.cuh"
-#include "GksGpu/FlowStateData/FlowStateDataConversion.cuh"
-#include "GksGpu/FlowStateData/ThermalDependencies.cuh"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-#include "GksGpu/BoundaryConditions/Pressure2.h"
-#include "GksGpu/BoundaryConditions/AdiabaticWall.h"
-#include "GksGpu/BoundaryConditions/HeatFlux.h"
-#include "GksGpu/BoundaryConditions/CreepingMassFlux.h"
-#include "GksGpu/BoundaryConditions/Open.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-
-#include "GksGpu/Restart/Restart.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-void thermalCavity( std::string path, std::string simulationName, uint restartIter )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint nx = 128;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real L = 3.0;
-    real H = 3.0;
-    real W = 0.125;
-
-    real LBurner = 1.0;
-
-    real HBurner = 0.5;
-
-    real dx = H / real(nx);
-
-    real Pr  = 0.71;
-    real K   = 5.0;
-    
-    real g   = 9.81;
-    real rho = 1.2;
-
-    PrimitiveVariables prim( rho, 0.0, 0.0, 0.0, -1.0 );
-    setLambdaFromT( prim, 3.0 );
-    
-    real mu = 1.5e-4;
-
-    real cs  = sqrt( ( ( K + 5.0 ) / ( K + 3.0 ) ) / ( 2.0 * prim.lambda ) );
-    real U   = 0.0125;
-
-    real CFL = 0.125;
-
-    real dt  = CFL * ( dx / ( ( U + cs ) * ( one + ( two * mu ) / ( U * dx * rho ) ) ) );
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt = " << dt << " s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "U  = " << U  << " m/s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "cs = " << cs << " m/s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "mu = " << mu << " kg/sm\n";
-
-    *logging::out << logging::Logger::INFO_HIGH << "HRR = " << U * rho * LBurner * LBurner * 800000.0 / 0.016 / 1000.0 << " kW\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Parameters parameters;
-
-    parameters.K  = K;
-    parameters.Pr = Pr;
-    parameters.mu = mu;
-
-    parameters.D = mu;
-
-    parameters.force.x = 0;
-    parameters.force.y = 0;
-    parameters.force.z = -g;
-
-    parameters.dt = dt;
-    parameters.dx = dx;
-
-    parameters.lambdaRef = prim.lambda;
-
-    //parameters.viscosityModel = ViscosityModel::sutherlandsLaw;
-    parameters.viscosityModel = ViscosityModel::constant;
-
-    parameters.enableReaction = true;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    bool threeDimensional = true;
-
-    if( threeDimensional )
-        gridBuilder->addCoarseGrid(-0.5*L, -0.5*L,  0.0,  
-                                    0.5*L,  0.5*L,  H  , dx);
-    else
-        gridBuilder->addCoarseGrid(-0.5*L, -0.5*dx,  0.0,  
-                                    0.5*L,  0.5*dx,  H  , dx);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-//#ifdef _WIN32
-//    //TriangularMesh* stl = TriangularMesh::make("F:/Work/Computations/inp/Unterzug.stl");
-//    TriangularMesh* stl = TriangularMesh::make("F:/Work/Computations/inp/UnterzugObstacle.stl");
-//#else
-//    //TriangularMesh* stl = TriangularMesh::make("inp/Unterzug.stl");
-//    TriangularMesh* stl = TriangularMesh::make("inp/UnterzugObstacle.stl");
-//#endif
-
-    //gridBuilder->addGeometry(stl);
-    
-    Cuboid box( -0.5 * LBurner, -0.5 * LBurner, -HBurner, 
-                 0.5 * LBurner,  0.5 * LBurner,  HBurner );
-    Cuboid beam( -0.15, -10.0, 2.6, 0.15, 10.0, 10.0 );
-
-    Conglomerate solid;
-
-    solid.add(&box);
-    solid.add(&beam);
-
-    gridBuilder->addGeometry(&solid);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    Cuboid boxRefCoarse1 ( -0.8 * LBurner, -0.8 * LBurner, -100.0, 
-                            0.8 * LBurner,  0.8 * LBurner,  100.0 );
-    Cuboid boxRefCoarse2 ( -0.8 * LBurner, -100,    2.3, 
-                            0.8 * LBurner,  100,  100.0 );
-
-    Conglomerate refRegionCoarse;
-
-    refRegionCoarse.add( &boxRefCoarse1 );
-    refRegionCoarse.add( &boxRefCoarse2 );
-
-    gridBuilder->setNumberOfLayers(0,20);
-
-    gridBuilder->addGrid( &refRegionCoarse, 1 );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    Cuboid boxRef ( -0.5 * LBurner, -0.5 * LBurner, -HBurner, 
-                     0.5 * LBurner,  0.5 * LBurner,  HBurner );
-    Cuboid beamRef( -0.15, -10.0, 2.6, 0.15, 10.0, 10.0 );
-
-    boxRef.scale (0.1);
-    beamRef.scale(0.02);
-
-    Conglomerate refRegion1;
-
-    refRegion1.add( &boxRef );
-    refRegion1.add( &beamRef );
-
-    gridBuilder->setNumberOfLayers(0,20);
-
-    gridBuilder->addGrid( &refRegion1, 3 );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    if( threeDimensional )
-        gridBuilder->setPeriodicBoundaryCondition(false, false, false);
-    else
-        gridBuilder->setPeriodicBoundaryCondition(false, true, false);
-
-    gridBuilder->buildGrids(GKS, false);
-
-    //gridBuilder->writeGridsToVtk(path + "Grid_lev_");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    //meshAdapter.writeMeshVTK( path + "grid/Mesh.vtk" );
-
-    //meshAdapter.writeMeshFaceVTK( path + "grid/MeshFaces.vtk" );
-
-    if( !threeDimensional )
-        meshAdapter.findPeriodicBoundaryNeighbors();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CudaUtility::setCudaDevice(0);
-
-    auto dataBase = std::make_shared<DataBase>( "GPU" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcMX = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), false );
-    SPtr<BoundaryCondition> bcPX = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), false );
-    //SPtr<BoundaryCondition> bcMX = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold,  0.0, false );
-    //SPtr<BoundaryCondition> bcPX = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold,  0.0, false );
-
-    bcMX->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x < -0.5*L; } );
-    bcPX->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcMY;
-    SPtr<BoundaryCondition> bcPY;
-
-    if( threeDimensional )
-    {
-        bcMY = std::make_shared<AdiabaticWall>(dataBase, Vec3(0.0, 0.0, 0.0), false);
-        bcPY = std::make_shared<AdiabaticWall>(dataBase, Vec3(0.0, 0.0, 0.0), false);
-
-        bcMY->findBoundaryCells(meshAdapter, false, [&](Vec3 center) { return center.y < -0.5*L; });
-        bcPY->findBoundaryCells(meshAdapter, false, [&](Vec3 center) { return center.y >  0.5*L; });
-    }
-    else
-    {
-        bcMY = std::make_shared<Periodic>(dataBase);
-        bcPY = std::make_shared<Periodic>(dataBase);
-        
-        bcMY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y < -0.5*dx; } );
-        bcPY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y >  0.5*dx; } );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcMZ = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), true );
-    SPtr<BoundaryCondition> bcPZ = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), true );
-    //SPtr<BoundaryCondition> bcMZ = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold,  0.0, true );
-    //SPtr<BoundaryCondition> bcPZ = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold,  0.0, true );
-    
-    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < 0.5; } );
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z > H  ; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    //SPtr<BoundaryCondition> bcBurner = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), 0.5*prim.lambda,  0.0, true );
-    //SPtr<BoundaryCondition> bcBurner = std::make_shared<HeatFlux>( dataBase, 100.0 );
-    SPtr<BoundaryCondition> bcBurner = std::make_shared<CreepingMassFlux>( dataBase, rho, U, prim.lambda );
-
-    bcBurner->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ 
-
-        if( threeDimensional )
-            return center.z > HBurner - 0.125 * dx && center.z < HBurner && std::sqrt(center.x*center.x) < 0.5 * LBurner - dx && std::sqrt(center.y*center.y) < 0.5 * LBurner - dx;
-        else
-            return center.z > HBurner - 0.125 * dx && center.z < HBurner && std::sqrt(center.x*center.x) < 0.5 * LBurner - dx && std::sqrt(center.y*center.y) < 0.5 * dx;
-    } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> bcSolid = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), true );
-
-    bcSolid->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ 
-
-        return center.z > 2.5 && std::sqrt(center.x*center.x) < 0.15;
-    } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> bcWindowOpen = std::make_shared<Open>( dataBase, prim, 1.0 );
-
-    bcWindowOpen->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ 
-
-        return center.z > 1.0 && center.z < 2.0 && std::sqrt(center.x*center.x) > 1.5 && std::sqrt(center.y*center.y) < 1.0;
-    } );
-
-    SPtr<BoundaryCondition> bcWindowPressure = std::make_shared<Pressure2>( dataBase, c1o2 * prim.rho / prim.lambda );
-
-    bcWindowPressure->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ 
-
-        return center.z > 2.0 && center.z < 2.8 && std::sqrt(center.x*center.x) > 1.5 && std::sqrt(center.y*center.y) < 1.0;
-    } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->boundaryConditions.push_back( bcBurner );
-
-    dataBase->boundaryConditions.push_back( bcMX );
-    dataBase->boundaryConditions.push_back( bcPX );
-    
-    dataBase->boundaryConditions.push_back( bcMY );
-    dataBase->boundaryConditions.push_back( bcPY );
-
-    dataBase->boundaryConditions.push_back( bcMZ );
-    dataBase->boundaryConditions.push_back( bcPZ );
-
-    dataBase->boundaryConditions.push_back( bcSolid );
-
-    dataBase->boundaryConditions.push_back( bcWindowOpen     );
-    dataBase->boundaryConditions.push_back( bcWindowPressure );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint startIter = 0;
-
-    dataBase->setMesh( meshAdapter );
-
-    CudaUtility::printCudaMemoryUsage();
-    
-    if( restartIter == INVALID_INDEX )
-    {
-        Initializer::interpret(dataBase, [&](Vec3 cellCenter) -> ConservedVariables {
-
-            //real rhoLocal = rho * std::exp(-(2.0 * g * H * prim.lambda) * cellCenter.z / H);
-
-            //prim.rho = rhoLocal;
-
-            //real r = sqrt(cellCenter.x * cellCenter.x + cellCenter.y * cellCenter.y + cellCenter.z * cellCenter.z);
-
-            //if( r < 0.55 ) prim.S_2 = 1.0;
-
-            return toConservedVariables(prim, parameters.K);
-        });
-
-        writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" );
-    }
-    else
-    {
-        Restart::readRestart( dataBase, path + simulationName + "_" + std::to_string( restartIter ), startIter );
-
-        writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( restartIter ) + "_restart" );
-    }
-
-    dataBase->copyDataHostToDevice();
-
-    for( auto bc : dataBase->boundaryConditions ) 
-        for( uint level = 0; level < dataBase->numberOfLevels; level++ )
-            bc->runBoundaryConditionKernel( dataBase, parameters, level );
-
-    Initializer::initializeDataUpdate(dataBase);
-
-    dataBase->copyDataDeviceToHost();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0, true, 1000 );
-
-    ConvergenceAnalyzer convergenceAnalyzer( dataBase, 1000 );
-
-    //auto turbulenceAnalyzer = std::make_shared<TurbulenceAnalyzer>( dataBase, 50000 );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    cupsAnalyzer.start();
-
-    for( uint iter = startIter + 1; iter <= 100000000; iter++ )
-    {
-        if( iter < 10000 )
-        {
-            //std::dynamic_pointer_cast<PassiveScalarDiriclet>(burner)->S_1 = 10.0 * ( real(iter) / 20000.0 );
-
-            //parameters.mu = mu + 10.0 * mu * ( 1.0 - ( real(iter) / 10000.0 ) );
-
-            //parameters.dt = 0.2 * dt + ( dt - 0.2 * dt ) * ( real(iter) / 40000.0 );
-        }
-
-        cupsAnalyzer.run( iter );
-
-        convergenceAnalyzer.run( iter );
-
-        TimeStepping::nestedTimeStep(dataBase, parameters, 0);
-
-        if( 
-            //( iter >= 34920 && iter % 1 == 0 ) ||
-            //( iter >= 35900 && iter % 10 == 0 ) ||
-            ( iter % 1000 == 0 )
-          )
-        {
-            dataBase->copyDataDeviceToHost();
-
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) );
-        }
-
-        if( iter % 1000 == 0 )
-        {
-            Restart::writeRestart( dataBase, path + simulationName + "_" + std::to_string( iter ), iter );
-        }
-
-        //turbulenceAnalyzer->run( iter, parameters );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataDeviceToHost();
-
-    //writeVtkXML( dataBase, parameters, 0, path + "grid/Test_1" );
-
-    //turbulenceAnalyzer->download();
-
-    //writeTurbulenceVtkXML(dataBase, turbulenceAnalyzer, 0, path + simulationName + "_Turbulence");
-}
-
-int main( int argc, char* argv[])
-{
-
-#ifdef _WIN32
-    std::string path( "F:/Work/Computations/out/RoomFire/" );
-#else
-    std::string path( "out/" );
-#endif
-
-    std::string simulationName ( "RoomFire" );
-
-    logging::Logger::addStream(&std::cout);
-    
-    std::ofstream logFile( path + simulationName + ".log" );
-    logging::Logger::addStream(&logFile);
-
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precision\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    try
-    {
-        uint restartIter = INVALID_INDEX;
-        //uint restartIter = 35000;
-
-        if( argc > 1 ) restartIter = atoi( argv[1] );
-
-        thermalCavity( path, simulationName, restartIter );
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::ERROR << "Unknown exception!\n";
-    }
-
-    logFile.close();
-
-    return 0;
-}
diff --git a/apps/gpu/GKS/RoomFireExtended/3rdPartyLinking.cmake b/apps/gpu/GKS/RoomFireExtended/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/RoomFireExtended/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/RoomFireExtended/CMakeLists.txt b/apps/gpu/GKS/RoomFireExtended/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/RoomFireExtended/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/RoomFireExtended/CMakePackage.cmake b/apps/gpu/GKS/RoomFireExtended/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/RoomFireExtended/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/RoomFireExtended/RoomFireExtended.cpp b/apps/gpu/GKS/RoomFireExtended/RoomFireExtended.cpp
deleted file mode 100644
index 77b8c2bc53721a5f455254ebbec58a47ce36eca6..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/RoomFireExtended/RoomFireExtended.cpp
+++ /dev/null
@@ -1,795 +0,0 @@
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//   ||          ||  ||  ||||||  |||||||| ||    ||  ||||||||  ||
-//    ||        ||   ||  ||   ||    ||    ||    ||  ||    ||  ||
-//     ||      ||    ||  ||||||     ||    ||    ||  ||||||||  ||
-//      ||    ||     ||  ||   ||    ||     ||||||   ||    ||  ||||||    ||||||   ||   ||||||   ||||||   ||||||
-//       ||  ||                                                        ||       ||   ||   ||  ||      |||    ||
-//        ||||       |||||||||||||||||||||||||||||||||||||||||||||||||||||||   ||   ||||||   ||||||     |||
-//                                                                    ||      ||   ||   ||  ||       ||   |||
-//                    i R M B  @  T U  B r a u n s c h w e i g       ||      ||   ||   ||  ||||||   |||||||
-//
-///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <sstream>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <memory>
-#include <algorithm>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-#include "GridGenerator/geometries/Sphere/Sphere.h"
-#include "GridGenerator/geometries/VerticalCylinder/VerticalCylinder.h"
-#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
-#include "GridGenerator/geometries/TriangularMesh/TriangularMesh.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-
-#include "GridGenerator/utilities/communication.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/FlowStateData/FlowStateData.cuh"
-#include "GksGpu/FlowStateData/FlowStateDataConversion.cuh"
-#include "GksGpu/FlowStateData/ThermalDependencies.cuh"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-#include "GksGpu/BoundaryConditions/Pressure2.h"
-#include "GksGpu/BoundaryConditions/AdiabaticWall.h"
-#include "GksGpu/BoundaryConditions/HeatFlux.h"
-#include "GksGpu/BoundaryConditions/CreepingMassFlux.h"
-#include "GksGpu/BoundaryConditions/ConcreteHeatFlux.h"
-#include "GksGpu/BoundaryConditions/Open.h"
-
-#include "GksGpu/Communication/Communicator.h"
-#include "GksGpu/Communication/MpiUtility.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-#include "GksGpu/Analyzer/PointTimeSeriesCollector.h"
-
-#include "GksGpu/Restart/Restart.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-real getHRR( real t );
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// from https://stackoverflow.com/questions/865668/how-to-parse-command-line-arguments-in-c
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-
-char* getCmdOption(char ** begin, char ** end, const std::string & option)
-{
-    char ** itr = std::find(begin, end, option);
-    if (itr != end && ++itr != end)
-    {
-        return *itr;
-    }
-    return 0;
-}
-
-bool cmdOptionExists(char** begin, char** end, const std::string& option)
-{
-    return std::find(begin, end, option) != end;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void thermalCavity( std::string path, std::string simulationName, uint windowIndex, uint restartIter, bool useConreteHeatFluxBC )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    int rank = 0;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-
-    int mpiWorldSize = 1;
-    MPI_Comm_size(MPI_COMM_WORLD, &mpiWorldSize);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    int sideLengthX, sideLengthY, sideLengthZ, rankX, rankY, rankZ;
-
-    if      (mpiWorldSize == 1 ) { sideLengthX = 1; sideLengthY = 1; sideLengthZ = 1; }
-    else if (mpiWorldSize == 2 ) { sideLengthX = 2; sideLengthY = 1; sideLengthZ = 1; }
-    else if (mpiWorldSize == 4 ) { sideLengthX = 2; sideLengthY = 2; sideLengthZ = 1; }
-    else if (mpiWorldSize == 8 ) { sideLengthX = 2; sideLengthY = 2; sideLengthZ = 2; }
-
-    rankX =   rank %   sideLengthX;
-    rankY = ( rank % ( sideLengthX * sideLengthY ) ) /   sideLengthX;
-    rankZ =   rank                                   / ( sideLengthY * sideLengthX );
-
-    *logging::out << logging::Logger::INFO_HIGH << "SideLength = " << sideLengthX << " " << sideLengthY << " " << sideLengthZ << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << "rank       = " << rankX << " " << rankY << " " << rankZ << "\n";
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real dx = 0.1;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real L = 4.0;
-    real B = 3.0;
-
-    real LBurner = 1.0;
-    real HBurner = 0.5;
-
-    real Pr  = 0.71;
-    real K   = 2.0;
-    
-    real g   = 9.81;
-    real rho = 1.2;
-
-    PrimitiveVariables prim( rho, 0.0, 0.0, 0.0, -1.0 );
-    setLambdaFromT( prim, 3.0 );
-
-    real cs  = sqrt( ( ( K + 5.0 ) / ( K + 3.0 ) ) / ( 2.0 * prim.lambda ) );
-
-    real mu      = 1.8e-5;
-    //real U       = 0.025;       // 750 kW on top
-    //real U       = 0.015;       // 900 kW on top
-    //real U       = 0.005;       // 900 kW all around
-    real rhoFuel = 0.5405;
-
-    real heatOfReaction = real(8000.0); // J / mol 
-
-    real specificHeatOfReaction = heatOfReaction / 0.016;
-
-    real HRR = 750.0; // kW
-
-    real U = HRR * 1000.0 / ( rhoFuel * LBurner * LBurner * (specificHeatOfReaction * 100.0) );
-
-    real CFL = 0.125;
-
-    real dt  = CFL * ( dx / ( ( U + cs ) * ( c1o1 + ( c2o1 * mu ) / ( U * dx * rho ) ) ) );
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt = " << dt << " s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "U  = " << U  << " m/s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "cs = " << cs << " m/s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "mu = " << mu << " kg/sm\n";
-
-    //*logging::out << logging::Logger::INFO_HIGH << "HRR = " << U * rhoFuel * LBurner * LBurner * (heatOfReaction * 100.0) / 0.016 / 1000.0 << " kW\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Parameters parameters;
-
-    parameters.K  = K;
-    parameters.Pr = Pr;
-    parameters.mu = mu;
-
-    parameters.D = mu;
-
-    parameters.force.x = 0;
-    parameters.force.y = 0;
-    parameters.force.z = -g;
-
-    parameters.dt = dt;
-    parameters.dx = dx;
-
-    parameters.lambdaRef = prim.lambda;
-
-    parameters.viscosityModel = ViscosityModel::sutherlandsLaw;
-    //parameters.viscosityModel = ViscosityModel::constant;
-
-    parameters.enableReaction = true;
-
-    parameters.heatOfReaction = heatOfReaction;
-
-    parameters.useHeatReleaseRateLimiter = true;
-    parameters.useTemperatureLimiter     = true;
-    parameters.usePassiveScalarLimiter   = true;
-    parameters.useSmagorinsky            = true;
-
-    parameters.reactionLimiter    = 1.0005;
-    parameters.temperatureLimiter = 1.0e-3;
-
-    parameters.useSpongeLayer = true;
-    parameters.spongeLayerIdx = 2;
-
-    parameters.forcingSchemeIdx = 2;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    gridBuilder->addCoarseGrid(-2.1, -1.6, -0.1,  
-                                2.1,  6.0,  5.0, dx);
-    //gridBuilder->addCoarseGrid(-1.1, -1.2, -0.1,  
-                                //1.1,  1.2,  2.2, dx);
-    //gridBuilder->addCoarseGrid(-2.1, -1.6, -0.1,  
-                                //2.1,  1.6,  3.1, dx);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-//#ifdef _WIN32
-//    TriangularMesh* stl = TriangularMesh::make("F:/Work/Computations/inp/RoomExtended7.stl");
-//#else
-//    //TriangularMesh* stl = TriangularMesh::make("inp/Unterzug.stl");
-//    TriangularMesh* stl = TriangularMesh::make("inp/RoomExtended4.stl");
-//#endif
-//
-//    gridBuilder->addGeometry(stl);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    Conglomerate flowDomain;
-
-    flowDomain.add( new Cuboid( -2.0, -1.5, 0.0, 2.0,  1.5, 3.0 ) );      // Room 
-    flowDomain.add( new Cuboid( -2.0,  1.8, 0.0, 2.0,  5.0, 5.0 ) );      // Outside
-    //flowDomain.add( new Cuboid( -0.5, -1.8, 0.0, 0.5, -1.0, 2.0 ) );      // Door
-    flowDomain.subtract( new Cuboid( -0.5, -0.5, -1.0, 0.5, 0.5, 0.5 ) ); // Fire
-    flowDomain.subtract( new Cuboid( -3.0, -0.1,  2.6, 3.0, 0.1, 4.0 ) ); // Beam
-
-    if( windowIndex == 0 ) flowDomain.add( new Cuboid( -1.0 ,  1.0,  1.0,    1.0 ,  3.0,  2.4 ) );      // Window large
-    if( windowIndex == 1 ) flowDomain.add( new Cuboid( -0.5 ,  1.0,  1.0,    0.5 ,  3.0,  2.4 ) );      // Window medium
-    if( windowIndex == 2 ) flowDomain.add( new Cuboid( -0.25,  1.0,  1.5,    0.25,  3.0,  2.0 ) );      // Window small
-    if( windowIndex == 3 ) flowDomain.add( new Cuboid( -1.0 ,  1.0,  1.0,    1.0 ,  3.0,  2.0 ) );      // Window low
-
-    Conglomerate solidDomain;
-
-    solidDomain.add( new Cuboid(-2.2, -1.7, -0.2, 2.2,  6.1,  5.1) );
-    solidDomain.subtract( &flowDomain );
-
-    gridBuilder->addGeometry( &solidDomain );
-    //gridBuilder->addGeometry( &flowDomain );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    Cuboid boxCoarse ( -2.0, -3.0, -0.5, 
-                        3.0,  3.0,  3.5 );
-
-    gridBuilder->addGrid( &boxCoarse, 1 );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real startX = -1e99;
-    real startY = -1e99;
-    real startZ = -1e99;
-    real endX   =  1e99;
-    real endY   =  1e99;
-    real endZ   =  1e99;
-
-    if( mpiWorldSize == 2 )
-    {
-        if( rank == 0 ) { endX   = 0.0; }
-        if( rank == 1 ) { startX = 0.0; }
-    }
-    if( mpiWorldSize == 4 )
-    {
-        if( rank == 0 ) { endX   = 0.0; endY   = 0.0; }
-        if( rank == 1 ) { startX = 0.0; endY   = 0.0; }
-        if( rank == 2 ) { endX   = 0.0; startY = 0.0; }
-        if( rank == 3 ) { startX = 0.0; startY = 0.0; }
-    }
-    if( mpiWorldSize == 8 )
-    {
-        if( rank == 0 ) { endX   = 0.0; endY   = 0.0; endZ   = 1.9; }
-        if( rank == 1 ) { startX = 0.0; endY   = 0.0; endZ   = 1.9; }
-        if( rank == 2 ) { endX   = 0.0; startY = 0.0; endZ   = 1.9; }
-        if( rank == 3 ) { startX = 0.0; startY = 0.0; endZ   = 1.9; }
-        if( rank == 4 ) { endX   = 0.0; endY   = 0.0; startZ = 1.9; }
-        if( rank == 5 ) { startX = 0.0; endY   = 0.0; startZ = 1.9; }
-        if( rank == 6 ) { endX   = 0.0; startY = 0.0; startZ = 1.9; }
-        if( rank == 7 ) { startX = 0.0; startY = 0.0; startZ = 1.9; }
-    }
-
-    auto subDomainBox = std::make_shared<BoundingBox>( startX, endX, 
-                                                       startY, endY, 
-                                                       startZ, endZ );
-
-    gridBuilder->setSubDomainBox( subDomainBox );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    Cuboid roomRef( -2.1, -1.8, -1.0, 
-                     2.1,  1.7, 10.0 );
-    
-    Cuboid windowRef( -1.1,  1.6,  0.9, 
-                       1.1,  2.0,  3.0 );
-
-    Conglomerate refRegion1;
-
-    refRegion1.add( &roomRef );
-    refRegion1.add( &windowRef );
-
-    gridBuilder->setNumberOfLayers(0,22);
-
-    //gridBuilder->addGrid( &refRegion1, 2 );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    Cuboid boxRef ( -0.6 * LBurner, -0.6 * LBurner, -1.0, 
-                     0.6 * LBurner,  0.6 * LBurner, 10.0 );
-    Cuboid beamRef( -10.0, -0.25, 2.4, 10.0, 0.25, 10.0 );
-
-    Conglomerate refRegion2;
-
-    refRegion2.add( &boxRef );
-    refRegion2.add( &beamRef );
-
-    gridBuilder->setNumberOfLayers(0,22);
-    
-    gridBuilder->addGrid( &refRegion2, 2 );
-    //gridBuilder->addGrid( &refRegion2, 3 );
-
-    uint maxLevel = gridBuilder->getNumberOfGridLevels() - 1;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    gridBuilder->setPeriodicBoundaryCondition(false, false, false);
-
-    gridBuilder->buildGrids(GKS, false);
-
-    MPI_Barrier(MPI_COMM_WORLD);
-
-    //gridBuilder->writeGridsToVtk(path + "Grid_rank_" + std::to_string( rank ) + "_lev_");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    
-    if( mpiWorldSize > 1 )
-    {
-        int rankPX = ( (rankX + 1 + sideLengthX) % sideLengthX ) +    rankY                                    * sideLengthX +    rankZ                                    * sideLengthX * sideLengthY;
-        int rankMX = ( (rankX - 1 + sideLengthX) % sideLengthX ) +    rankY                                    * sideLengthX +    rankZ                                    * sideLengthX * sideLengthY;
-        int rankPY =    rankX                                    + ( (rankY + 1 + sideLengthY) % sideLengthY ) * sideLengthX +    rankZ                                    * sideLengthX * sideLengthY;
-        int rankMY =    rankX                                    + ( (rankY - 1 + sideLengthY) % sideLengthY ) * sideLengthX +    rankZ                                    * sideLengthX * sideLengthY;
-        int rankPZ =    rankX                                    +    rankY                                    * sideLengthX + ( (rankZ + 1 + sideLengthZ) % sideLengthZ ) * sideLengthX * sideLengthY;
-        int rankMZ =    rankX                                    +    rankY                                    * sideLengthX + ( (rankZ - 1 + sideLengthZ) % sideLengthZ ) * sideLengthX * sideLengthY;
-
-        if( sideLengthX > 1 && rankX < sideLengthX-1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::PX, GKS );
-        if( sideLengthX > 1 && rankX < sideLengthX-1 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::PX, rankPX);
-
-        if( sideLengthX > 1 && rankX > 0             ) gridBuilder->findCommunicationIndices( CommunicationDirections::MX, GKS );
-        if( sideLengthX > 1 && rankX > 0             ) gridBuilder->setCommunicationProcess ( CommunicationDirections::MX, rankMX);
-
-        if( sideLengthY > 1 && rankY < sideLengthY-1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::PY, GKS );
-        if( sideLengthY > 1 && rankY < sideLengthY-1 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::PY, rankPY);
-
-        if( sideLengthY > 1 && rankY > 0             ) gridBuilder->findCommunicationIndices( CommunicationDirections::MY, GKS );
-        if( sideLengthY > 1 && rankY > 0             ) gridBuilder->setCommunicationProcess ( CommunicationDirections::MY, rankMY);
-
-        if( sideLengthZ > 1 && rankZ < sideLengthZ-1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::PZ, GKS );
-        if( sideLengthZ > 1 && rankZ < sideLengthZ-1 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::PZ, rankPZ);
-
-        if( sideLengthZ > 1 && rankZ > 0             ) gridBuilder->findCommunicationIndices( CommunicationDirections::MZ, GKS );
-        if( sideLengthZ > 1 && rankZ > 0             ) gridBuilder->setCommunicationProcess ( CommunicationDirections::MZ, rankMZ);
-
-        *logging::out << logging::Logger::INFO_HIGH << "neighborRanks = " << rankPX << " " << rankMX << " " << rankPY << " " << rankMY << " " << rankPZ << " " << rankMZ << "\n";
-    }
-
-    //gridBuilder->writeGridsToVtk(path + "Grid_rank_" + std::to_string( rank ) + "_lev_");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    //meshAdapter.writeMeshVTK( path + "grid/Mesh.vtk" );
-
-    //meshAdapter.writeMeshFaceVTK( path + "MeshFaces_rank_" + std::to_string( rank ) + ".vtk" );
-
-    //meshAdapter.findPeriodicBoundaryNeighbors();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    //CudaUtility::setCudaDevice( rank % CudaUtility::getCudaDeviceCount() );
-
-    auto dataBase = std::make_shared<DataBase>( "GPU" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcWall = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), false );
-
-    bcWall->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return true; } );
-    
-    SPtr<BoundaryCondition> bcWallHeatFlux = std::make_shared<ConcreteHeatFlux>( dataBase, 64, 1.0e-6, 2400.0, 880, 0.1, 3.0 );
-
-    bcWallHeatFlux->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return (center.z >  3.0 && center.y <  1.6)
-                                                                                || (center.x >  2.0 && center.y > -1.5 && center.y < 1.5 & center.z < 3.0 && center.z > 0.0)
-                                                                                || (center.x < -2.0 && center.y > -1.5 && center.y < 1.5 & center.z < 3.0 && center.z > 0.0)
-                                                                                || (center.y < -1.5)
-                                                                                || (center.y >  1.5 && center.y < 1.6); } );
-
-    std::dynamic_pointer_cast<ConcreteHeatFlux>(bcWallHeatFlux)->init();
-
-    ////////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> bcOpen = std::make_shared<Open>( dataBase, prim, 1.0 );
-
-    bcOpen->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y < -6.0 || center.y > 1.7; } );
-
-    ////////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcPressure = std::make_shared<Pressure2>( dataBase, c1o2 * prim.rho / prim.lambda );
-
-    bcPressure->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y > 1.7 && center.z > 5.0; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> bcBurner = std::make_shared<CreepingMassFlux>( dataBase, rhoFuel, U, prim.lambda );
-
-    bcBurner->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ 
-
-        return center.z < HBurner && 
-            std::sqrt(center.x*center.x) < 0.5 * LBurner - dx * std::pow(0.5, maxLevel) && 
-            std::sqrt(center.y*center.y) < 0.5 * LBurner - dx * std::pow(0.5, maxLevel);
-        //return center.z < HBurner && std::sqrt(center.x*center.x) < 0.5 * LBurner && std::sqrt(center.y*center.y) < 0.5 * LBurner;
-    } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->boundaryConditions.push_back( bcBurner );
-
-    if( useConreteHeatFluxBC )
-        dataBase->boundaryConditions.push_back( bcWallHeatFlux );
-
-    dataBase->boundaryConditions.push_back( bcWall );
-
-    dataBase->boundaryConditions.push_back( bcOpen );
-
-    dataBase->boundaryConditions.push_back( bcPressure );
-
-    *logging::out << logging::Logger::INFO_HIGH << "Number of cells bcBurner = "   << bcBurner->numberOfCells   << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << "Number of cells bcWall = "     << bcWall->numberOfCells     << "\n";
-
-    *logging::out << logging::Logger::INFO_HIGH << "Number of cells bcOpen = "     << bcOpen->numberOfCells     << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << "Number of cells bcPressure = " << bcPressure->numberOfCells << "\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    auto pointTimeSeriesCollector = std::make_shared<PointTimeSeriesCollector>();
-
-    for( real x = 0.0002; x < 2; x += 0.4449 )
-    {
-        if( subDomainBox->isInside( x, -1.4999, 2.9999 ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3( x, -1.4999, 2.9999 ), 'T' );
-        if( subDomainBox->isInside( x, -1.0,    2.9999 ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3( x, -1.0,    2.9999 ), 'T' );
-        if( subDomainBox->isInside( x, -0.5,    2.9999 ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3( x, -0.5,    2.9999 ), 'T' );
-        if( subDomainBox->isInside( x, -0.2001, 2.9999 ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3( x, -0.2001, 2.9999 ), 'T' );
-
-        if( subDomainBox->isInside( x, -0.2001, 2.5999 ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3( x, -0.2001, 2.5999 ), 'T' );
-        if( subDomainBox->isInside( x,  0.0001, 2.5999 ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3( x,  0.0001, 2.5999 ), 'T' );
-        if( subDomainBox->isInside( x,  0.2001, 2.5999 ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3( x,  0.2001, 2.5999 ), 'T' );
-        
-        if( subDomainBox->isInside( x,  0.2001, 2.9999 ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3( x,  0.2001, 2.9999 ), 'T' );
-        if( subDomainBox->isInside( x,  0.5,    2.9999 ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3( x,  0.5,    2.9999 ), 'T' );
-        if( subDomainBox->isInside( x,  1.0,    2.9999 ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3( x,  1.0,    2.9999 ), 'T' );
-        if( subDomainBox->isInside( x,  1.4999, 2.9999 ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3( x,  1.4999, 2.9999 ), 'T' );
-    }
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint startIter = 0;
-
-    dataBase->setMesh( meshAdapter );
-
-    dataBase->setCommunicators( meshAdapter );
-
-    CudaUtility::printCudaMemoryUsage();
-    
-    if( restartIter == INVALID_INDEX )
-    {
-        Initializer::interpret(dataBase, [&](Vec3 cellCenter) -> ConservedVariables {
-
-            PrimitiveVariables primLocal = prim;
-
-            //if( cellCenter.x > 0 ) primLocal.rho = 1.21;
-
-            primLocal.lambda *= 0.5;
-
-            return toConservedVariables(primLocal, parameters.K);
-        });
-
-        if (rank == 0) writeVtkXMLParallelSummaryFile(dataBase, parameters, path + simulationName + "_0", mpiWorldSize);
-
-        writeVtkXML(dataBase, parameters, 0, path + simulationName + "_0" + "_rank_" + std::to_string(rank));
-
-        if( useConreteHeatFluxBC )
-            writeConcreteHeatFluxVtkXML( dataBase, std::dynamic_pointer_cast<ConcreteHeatFlux>(bcWallHeatFlux), parameters, 0, path + simulationName + "_Solid_0" );
-    }
-    else
-    {
-        Restart::readRestart( dataBase, path + simulationName + "_" + std::to_string( restartIter ) + "_rank_" + std::to_string(rank), startIter );
-
-        if (rank == 0) writeVtkXMLParallelSummaryFile( dataBase, parameters, path + simulationName + "_" + std::to_string( restartIter ) + "_restart", mpiWorldSize );
-
-        writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( restartIter ) + "_restart" + "_rank_" + std::to_string(rank) );
-    }
-
-    dataBase->copyDataHostToDevice();
-
-    for( auto bc : dataBase->boundaryConditions ) 
-        for( uint level = 0; level < dataBase->numberOfLevels; level++ )
-            bc->runBoundaryConditionKernel( dataBase, parameters, level );
-
-    Initializer::initializeDataUpdate(dataBase);
-
-    dataBase->copyDataDeviceToHost();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0, true, 10000 );
-
-    ConvergenceAnalyzer convergenceAnalyzer( dataBase, 10000 );
-
-    //auto turbulenceAnalyzer = std::make_shared<TurbulenceAnalyzer>( dataBase, 50000 );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    *logging::out << logging::Logger::INFO_HIGH << "================================================================================\n";
-    *logging::out << logging::Logger::INFO_HIGH << "================================================================================\n";
-    *logging::out << logging::Logger::INFO_HIGH << "==================   S t a r t    T i m e    S t e p p i n g   =================\n";
-    *logging::out << logging::Logger::INFO_HIGH << "================================================================================\n";
-    *logging::out << logging::Logger::INFO_HIGH << "================================================================================\n";
-
-    MPI_Barrier(MPI_COMM_WORLD);
-
-    cupsAnalyzer.start();
-
-    for( uint iter = startIter + 1; iter <= 100000000; iter++ )
-    {
-        real currentHRR = getHRR( iter * parameters.dt );
-
-        //*logging::out << logging::Logger::LOGGER_ERROR << "HRR(t=" << iter * parameters.dt << ") = " << currentHRR << "\n";
-
-        std::dynamic_pointer_cast<CreepingMassFlux>(bcBurner)->velocity = currentHRR * 1000.0 / ( rhoFuel * LBurner * LBurner * (specificHeatOfReaction * 100.0) );
-
-        //////////////////////////////////////////////////////////////////////////
-
-        TimeStepping::nestedTimeStep(dataBase, parameters, 0);
-
-        //////////////////////////////////////////////////////////////////////////
-
-        cupsAnalyzer.run( iter, parameters.dt );
-
-        convergenceAnalyzer.run( iter );
-
-        //////////////////////////////////////////////////////////////////////////
-
-        pointTimeSeriesCollector->run(iter, parameters);
-
-        int crashCellIndex = dataBase->getCrashCellIndex();
-        if( crashCellIndex >= 0 )
-        {
-            *logging::out << logging::Logger::LOGGER_ERROR << "Simulation Crashed at CellIndex = " << crashCellIndex << "\n";
-            dataBase->copyDataDeviceToHost();
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) );
-
-            break;
-        }
-
-        if( iter % 1000 == 0 )
-        {
-            dataBase->copyDataDeviceToHost();
-
-            if( rank == 0 ) writeVtkXMLParallelSummaryFile( dataBase, parameters, path + simulationName + "_" + std::to_string( iter ), mpiWorldSize );
-
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) + "_rank_" + std::to_string(rank) );
-
-            if( useConreteHeatFluxBC )
-                //std::dynamic_pointer_cast<ConcreteHeatFlux>(bcWallHeatFlux)->writeVTKFile(dataBase, parameters, path + simulationName + "_Solid_" + std::to_string( iter ));
-                writeConcreteHeatFluxVtkXML( dataBase, std::dynamic_pointer_cast<ConcreteHeatFlux>(bcWallHeatFlux), parameters, 0, path + simulationName + "_Solid_" + std::to_string( iter ) );
-        }
-
-        if( iter % 10000 == 0 )
-        {
-            Restart::writeRestart( dataBase, path + simulationName + "_" + std::to_string( iter ) + "_rank_" + std::to_string(rank), iter );
-        }
-
-        if( iter % 100000 == 0 )
-        {
-            pointTimeSeriesCollector->writeToFile(path + simulationName + "_TimeSeries_" + std::to_string( iter ) + "_rank_" + std::to_string(rank));
-        }
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataDeviceToHost();
-
-    //writeVtkXML( dataBase, parameters, 0, path + "grid/Test_1" );
-
-    //turbulenceAnalyzer->download();
-
-    //writeTurbulenceVtkXML(dataBase, turbulenceAnalyzer, 0, path + simulationName + "_Turbulence");
-}
-
-int main( int argc, char* argv[])
-{
-    MPI_Init(&argc, &argv);
-    int rank = 0;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    int mpiWorldSize = 1;
-    MPI_Comm_size(MPI_COMM_WORLD, &mpiWorldSize);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    uint restartIter = INVALID_INDEX;
-    //uint restartIter = 140000;
-
-    uint windowIndex = 2;
-
-    bool useConcreteHeatFluxBC = true;
-
-    uint defaultDevice = 0;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( cmdOptionExists(argv, argv+argc, "-w" ) ) 
-        windowIndex = atoi( getCmdOption(argv, argv+argc, "-w") );
-
-    if( cmdOptionExists(argv, argv+argc, "--useConcreteHeatFlux" ) ) 
-        useConcreteHeatFluxBC = true;
-
-    if( cmdOptionExists(argv, argv+argc, "-r" ) ) 
-        restartIter = atoi( getCmdOption(argv, argv+argc, "-r") );
-
-    //////////////////////////////////////////////////////////////////////////
-
-#ifdef _WIN32
-    std::string path( "F:/Work/Computations/out/RoomFireExtended/" );
-#else
-    std::string path( "out/" );
-    
-    //if( argc > 1 ){
-    //    path += "Window_";
-    //    path += argv[1];
-    //    path += "/";
-    //}
-#endif
-
-    std::string simulationName ( "RoomFire" );
-
-    if( useConcreteHeatFluxBC ) simulationName += "_heatFlux";
-    else                        simulationName += "_adiabatic";
-
-    logging::Logger::addStream(&std::cout);
-    
-    std::ofstream logFile( path + simulationName + "_rank_" + std::to_string(rank) + ".log" );
-    logging::Logger::addStream(&logFile);
-
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    // Important: for Cuda-Aware MPI the device must be set before MPI_Init()
-    int deviceCount = CudaUtility::getCudaDeviceCount();
-
-    if(deviceCount == 0)
-    {
-        std::stringstream msg;
-        msg << "No devices devices found!" << std::endl;
-        *logging::out << logging::Logger::WARNING << msg.str(); msg.str("");
-    }
-
-    if( mpiWorldSize == 1 ) CudaUtility::setCudaDevice( 0 );
-    else                    CudaUtility::setCudaDevice( rank % deviceCount );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precision\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    try
-    {
-        thermalCavity( path, simulationName, windowIndex, restartIter, useConcreteHeatFluxBC );
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
-    }
-
-    logFile.close();
-
-    MPI_Finalize();
-
-    return 0;
-}
-
-
-
-
-
-
-real getHRR( real t )
-{
-    // data from 
-    real tInMin_table [] = 
-    { 0.0, 
-      1.2998404845645375,     
-      1.8225528293767326,     
-      2.3411883091040355,     
-      3.690242379336123,      
-      5.8053588126615825,     
-      8.481044195158887,      
-      9.683816463416616,      
-      10.268361262016242,     
-      11.2607867055371,       
-      13.013838692038146,     
-      14.302516331727396,     
-      17.240382966469404,     
-      20.679801074868717,     
-      22.9733288897661 };
-
-
-    real HRR_table [] = 
-    { 0.0,
-      658.3729425582654,
-      590.0388596425946,
-      480.1207528610856,
-      440.4722692284047,
-      414.659889148097,
-      406.6507906206217,
-      374.9279268493922,
-      337.28487256561004,
-      260.02439647836513,
-      141.15465878904172,
-      85.66658361941495,
-      51.906257987905406,
-      33.97096366089556,
-      27.954675614199346 };
-
-    uint upper = 0;
-
-    if( t / 60.0 > tInMin_table[14] ) return HRR_table[14];
-
-    while( tInMin_table[upper] < t / 60.0 ) upper++;
-
-    uint lower = upper - 1;
-
-    real HRR = HRR_table[lower] + ( ( t / 60.0 - tInMin_table[lower] )/( tInMin_table[upper] - tInMin_table[lower] ) ) * ( HRR_table[upper] - HRR_table[lower] );
-
-    return HRR;
-}
\ No newline at end of file
diff --git a/apps/gpu/GKS/RoomMultiGPU/3rdPartyLinking.cmake b/apps/gpu/GKS/RoomMultiGPU/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/RoomMultiGPU/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/RoomMultiGPU/CMakeLists.txt b/apps/gpu/GKS/RoomMultiGPU/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/RoomMultiGPU/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/RoomMultiGPU/CMakePackage.cmake b/apps/gpu/GKS/RoomMultiGPU/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/RoomMultiGPU/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/RoomMultiGPU/RoomMultiGPU.cpp b/apps/gpu/GKS/RoomMultiGPU/RoomMultiGPU.cpp
deleted file mode 100644
index 2c92c5f737612167e1f092944c8b32a87f9d7215..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/RoomMultiGPU/RoomMultiGPU.cpp
+++ /dev/null
@@ -1,522 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <memory>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-#include "GridGenerator/geometries/Sphere/Sphere.h"
-#include "GridGenerator/geometries/VerticalCylinder/VerticalCylinder.h"
-#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-#include "GridGenerator/geometries/BoundingBox/BoundingBox.h"
-#include "GridGenerator/utilities/communication.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-#include "GksGpu/BoundaryConditions/Pressure.h"
-#include "GksGpu/BoundaryConditions/AdiabaticWall.h"
-
-#include "GksGpu/Communication/Communicator.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-//////////////////////////////////////////////////////////////////////////
-// prescribed parameters
-//////////////////////////////////////////////////////////////////////////
-
-uint nx = 64;
-
-real L = 4.0;
-real H = 3.0;
-
-real Ra = 1.0e10;
-
-real Ba  = 0.1;
-real eps = 1.2;
-real Pr  = 0.71;
-real K   = 2.0;
-    
-real g   = 9.81;
-real rho = 1.2;
-
-//////////////////////////////////////////////////////////////////////////
-
-void thermalCavity( std::string path, std::string simulationName )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-    int rank = 0;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-
-    int mpiWorldSize;
-    MPI_Comm_size(MPI_COMM_WORLD, &mpiWorldSize);
-
-    uint gpuPerNode = 2;
-    CudaUtility::setCudaDevice(rank % gpuPerNode);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real dx = 1.0 / real(nx);
-
-    real lambda     = Ba / ( 2.0 * g * H );
-    real lambdaHot  = lambda / ( 1.0 + eps * 0.5 );
-    real lambdaCold = lambda / ( 1.0 - eps * 0.5 );
-    
-    real mu = sqrt( Pr * eps * g * H * H * H / Ra ) * rho;
-
-    real cs  = sqrt( ( ( K + 5.0 ) / ( K + 3.0 ) ) / ( 2.0 * lambda ) );
-    real U   = sqrt( Ra ) * mu / ( rho * L );
-
-    real CFL = 0.25;
-
-    real dt  = CFL * ( dx / ( ( U + cs ) * ( one + ( two * mu ) / ( U * dx * rho ) ) ) );
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt = " << dt << " s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "U  = " << U  << " m/s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "mu = " << mu << " kg/sm\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Parameters parameters;
-
-    parameters.K  = K;
-    parameters.Pr = Pr;
-    parameters.mu = mu;
-
-    parameters.force.x = 0;
-    parameters.force.y = 0;
-    parameters.force.z = -g;
-
-    parameters.dt = dt;
-    parameters.dx = dx;
-
-    parameters.lambdaRef = lambda;
-
-    parameters.viscosityModel = ViscosityModel::sutherlandsLaw;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    //                                          <--X-->   <--Y-->    <------Z----->
-    if( rank == 0 ) gridBuilder->addCoarseGrid( -0.5*L ,  -0.5*L ,           0.0   ,  
-                                                 3.0*dx,   3.0*dx,   0.5*H + 3.0*dx, dx);
-
-    if( rank == 1 ) gridBuilder->addCoarseGrid( -3.0*dx,  -0.5*L ,           0.0   ,  
-                                                 0.5*L ,   3.0*dx,   0.5*H + 3.0*dx, dx);
-
-    if( rank == 2 ) gridBuilder->addCoarseGrid( -0.5*L ,  -3.0*dx,           0.0   ,  
-                                                 3.0*dx,   0.5*L ,   0.5*H + 3.0*dx, dx);
-
-    if( rank == 3 ) gridBuilder->addCoarseGrid( -3.0*dx,  -3.0*dx,           0.0   ,  
-                                                 0.5*L ,   0.5*L ,   0.5*H + 3.0*dx, dx);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    //                                          <--X-->   <--Y-->    <------Z----->
-    if( rank == 4 ) gridBuilder->addCoarseGrid( -0.5*L ,  -0.5*L ,   0.5*H - 3.0*dx,  
-                                                 3.0*dx,   3.0*dx,       H         , dx);
-
-    if( rank == 5 ) gridBuilder->addCoarseGrid( -3.0*dx,  -0.5*L ,   0.5*H - 3.0*dx,  
-                                                 0.5*L ,   3.0*dx,       H         , dx);
-
-    if( rank == 6 ) gridBuilder->addCoarseGrid( -0.5*L ,  -3.0*dx,   0.5*H - 3.0*dx,  
-                                                 3.0*dx,   0.5*L ,       H         , dx);
-
-    if( rank == 7 ) gridBuilder->addCoarseGrid( -3.0*dx,  -3.0*dx,   0.5*H - 3.0*dx,  
-                                                 0.5*L ,   0.5*L ,       H         , dx);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    Sphere           sphere  ( 0.0, 0.0, 0.0, 0.6 );
-    VerticalCylinder cylinder( 0.0, 0.0, 0.0, 0.6, 2.0*H );
-
-    gridBuilder->setNumberOfLayers(6,10);
-
-    //gridBuilder->addGrid( &sphere, 2 );
-    gridBuilder->addGrid( &cylinder, 2 );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    if( rank == 0 ) gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>( -2.0*L, 0.0, 
-                                                                                 -2.0*L, 0.0, 
-                                                                                 -2.0*H, 0.5*H ) );
-
-    if( rank == 1 ) gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>(  0.0  , 2.0*L, 
-                                                                                 -2.0*L, 0.0, 
-                                                                                 -2.0*H, 0.5*H ) );
-
-    if( rank == 2 ) gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>( -2.0*L, 0.0, 
-                                                                                  0.0  , 2.0*L, 
-                                                                                 -2.0*H, 0.5*H ) );
-
-    if( rank == 3 ) gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>(  0.0  , 2.0*L, 
-                                                                                  0.0  , 2.0*L, 
-                                                                                 -2.0*H, 0.5*H ) );
-
-    if( rank == 4 ) gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>( -2.0*L, 0.0, 
-                                                                                 -2.0*L, 0.0, 
-                                                                                  0.5*H, 2.0*H ) );
-
-    if( rank == 5 ) gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>(  0.0  , 2.0*L, 
-                                                                                 -2.0*L, 0.0, 
-                                                                                  0.5*H, 2.0*H ) );
-
-    if( rank == 6 ) gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>( -2.0*L, 0.0, 
-                                                                                  0.0  , 2.0*L, 
-                                                                                  0.5*H, 2.0*H ) );
-
-    if( rank == 7 ) gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>(  0.0  , 2.0*L, 
-                                                                                  0.0  , 2.0*L, 
-                                                                                  0.5*H, 2.0*H ) );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    gridBuilder->setPeriodicBoundaryCondition(false, false, false);
-
-    gridBuilder->buildGrids(GKS, false);
-            
-    if( rank == 0 ){
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PX, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::PX, 1);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PY, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::PY, 2);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::PZ, 4);
-    }
-            
-    if( rank == 1 ){
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MX, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::MX, 0);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PY, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::PY, 3);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::PZ, 5);
-    }
-            
-    if( rank == 2 ){
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PX, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::PX, 3);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MY, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::MY, 0);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::PZ, 6);
-    }
-            
-    if( rank == 3 ){
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MX, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::MX, 2);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MY, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::MY, 1);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::PZ, 7);
-    }
-            
-    if( rank == 4 ){
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PX, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::PX, 5);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PY, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::PY, 6);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::MZ, 0);
-    }
-            
-    if( rank == 5 ){
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MX, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::MX, 4);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PY, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::PY, 7);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::MZ, 1);
-    }
-            
-    if( rank == 6 ){
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PX, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::PX, 7);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MY, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::MY, 4);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::MZ, 2);
-    }
-            
-    if( rank == 7 ){
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MX, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::MX, 6);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MY, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::MY, 5);
-
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, GKS);
-        gridBuilder->setCommunicationProcess (CommunicationDirections::MZ, 3);
-    }
-
-    //gridBuilder->writeGridsToVtk(path + "grid/Grid_rank_" + std::to_string(rank) + "_lev_");
-
-    MPI_Barrier(MPI_COMM_WORLD);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    meshAdapter.getCommunicationIndices();
-
-    //meshAdapter.writeMeshVTK( path + "grid/Mesh.vtk" );
-
-    //meshAdapter.writeMeshFaceVTK( path + "grid/MeshFaces.vtk" );
-
-    //meshAdapter.findPeriodicBoundaryNeighbors();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto dataBase = std::make_shared<DataBase>( "GPU" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcMX = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), false );
-    SPtr<BoundaryCondition> bcPX = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), false );
-    //SPtr<BoundaryCondition> bcMX = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold,  0.0, false );
-    //SPtr<BoundaryCondition> bcPX = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold,  0.0, false );
-
-    bcMX->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x < -0.5*L; } );
-    bcPX->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> bcMY = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), false );
-    SPtr<BoundaryCondition> bcPY = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), false );
-    //SPtr<BoundaryCondition> bcMY = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold,  0.0, false );
-    //SPtr<BoundaryCondition> bcPY = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold,  0.0, false );
-
-    bcMY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y < -0.5*L; } );
-    bcPY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcMZ = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), true );
-    SPtr<BoundaryCondition> bcPZ = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0), true );
-    //SPtr<BoundaryCondition> bcMZ = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold,  0.0, true );
-    //SPtr<BoundaryCondition> bcPZ = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold,  0.0, true );
-    
-    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < 0.0; } );
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z > H  ; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> hotPlate = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaHot, true );
-
-    hotPlate->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ 
-        //return center.z < 0.0 && 
-        //       std::fabs(center.x) < 0.5 && 
-        //       std::fabs(center.y) < 0.5; 
-
-        return center.z < 0.0 && std::sqrt(center.x*center.x + center.y*center.y) < 0.5;
-    } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->boundaryConditions.push_back( bcMX );
-    dataBase->boundaryConditions.push_back( bcPX );
-    
-    dataBase->boundaryConditions.push_back( bcMY );
-    dataBase->boundaryConditions.push_back( bcPY );
-
-    dataBase->boundaryConditions.push_back( bcMZ );
-    dataBase->boundaryConditions.push_back( bcPZ );
-
-    dataBase->boundaryConditions.push_back( hotPlate );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    dataBase->setMesh( meshAdapter );
-
-    dataBase->setCommunicators( meshAdapter );
-
-    CudaUtility::printCudaMemoryUsage();
-
-    Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> ConservedVariables{
-
-        real rhoLocal = rho * std::exp( - ( 2.0 * g * H * lambdaCold ) * cellCenter.z / H );
-
-        return toConservedVariables( PrimitiveVariables( rhoLocal, 0.0, 0.0, 0.0, lambdaCold ), parameters.K );
-    });
-
-    dataBase->copyDataHostToDevice();
-
-    for( auto bc : dataBase->boundaryConditions ) 
-        for( uint level = 0; level < dataBase->numberOfLevels; level++ )
-            bc->runBoundaryConditionKernel( dataBase, parameters, level );
-
-    Initializer::initializeDataUpdate(dataBase);
-
-    dataBase->copyDataDeviceToHost();
-
-    if( rank == 0 ) writeVtkXMLParallelSummaryFile( dataBase, parameters, path + simulationName + "_0", mpiWorldSize );
-
-    writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" + "_rank_" + std::to_string(rank) );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0 );
-
-    ConvergenceAnalyzer convergenceAnalyzer( dataBase );
-
-    //auto turbulenceAnalyzer = std::make_shared<TurbulenceAnalyzer>( dataBase, 50000 );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    cupsAnalyzer.start();
-
-    for( uint iter = 1; iter <= 100000000; iter++ )
-    {
-        if( iter < 20000 )
-        {
-            std::dynamic_pointer_cast<IsothermalWall>(hotPlate)->lambda = lambdaCold + ( lambdaHot - lambdaCold ) * ( real(iter) / 20000.0 );
-        }
-        else
-        {
-            std::dynamic_pointer_cast<IsothermalWall>(hotPlate)->lambda = lambdaHot;
-        }
-
-        TimeStepping::nestedTimeStep(dataBase, parameters, 0);
-
-        if( 
-            //( iter < 10     && iter % 1     == 0 ) ||
-            //( iter < 100    && iter % 10    == 0 ) ||
-            //( iter < 1000   && iter % 100   == 0 ) ||
-            //( iter < 10000  && iter % 1000  == 0 ) ||
-            //( iter < 10000000 && iter % 100000 == 0 )
-            ( iter >= 10000 && iter % 10000 == 0 )
-          )
-        {
-            dataBase->copyDataDeviceToHost();
-
-            if( rank == 0 ) writeVtkXMLParallelSummaryFile( dataBase, parameters, path + simulationName + "_" + std::to_string( iter ), mpiWorldSize );
-
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) + "_rank_" + std::to_string(rank) );
-        }
-
-        cupsAnalyzer.run( iter );
-
-        convergenceAnalyzer.run( iter );
-
-        //turbulenceAnalyzer->run( iter, parameters );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataDeviceToHost();
-
-    //writeVtkXML( dataBase, parameters, 0, path + "grid/Test_1" );
-
-    //turbulenceAnalyzer->download();
-
-    //writeTurbulenceVtkXML(dataBase, turbulenceAnalyzer, 0, path + simulationName + "_Turbulence");
-}
-
-int main( int argc, char* argv[])
-{
-    MPI_Init(&argc, &argv);
-
-    int rank = 0;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-
-    //std::string path( "F:/Work/Computations/out/RoomMultiGPU/" );
-    std::string path( "out/" );
-    std::string simulationName ( "Room" );
-
-    logging::Logger::addStream(&std::cout);
-    
-    std::ofstream logFile( path + simulationName + "_rank_" + std::to_string(rank) + ".log" );
-    logging::Logger::addStream(&logFile);
-
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precison\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    try
-    {
-        thermalCavity( path, simulationName );
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::ERROR << "Unknown exception!\n";
-    }
-
-    logFile.close();
-
-    MPI_Finalize();
-
-   return 0;
-}
diff --git a/apps/gpu/GKS/SalinasVazquez/3rdPartyLinking.cmake b/apps/gpu/GKS/SalinasVazquez/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/SalinasVazquez/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/SalinasVazquez/CMakeLists.txt b/apps/gpu/GKS/SalinasVazquez/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/SalinasVazquez/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/SalinasVazquez/CMakePackage.cmake b/apps/gpu/GKS/SalinasVazquez/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/SalinasVazquez/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/SalinasVazquez/SalinasVazquez.cpp b/apps/gpu/GKS/SalinasVazquez/SalinasVazquez.cpp
deleted file mode 100644
index 1f8f352cac757c7138981d331fd7f7130b9f58a6..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/SalinasVazquez/SalinasVazquez.cpp
+++ /dev/null
@@ -1,564 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <memory>
-#include <thread>
-#include <sstream>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-#include "GridGenerator/geometries/BoundingBox/BoundingBox.h"
-#include "GridGenerator/utilities/communication.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/FlowStateData/FlowStateDataConversion.cuh"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-#include "GksGpu/BoundaryConditions/Pressure.h"
-#include "GksGpu/BoundaryConditions/AdiabaticWall.h"
-#include "GksGpu/BoundaryConditions/SalinasVazquez.h"
-
-#include "GksGpu/Communication/Communicator.h"
-#include "GksGpu/Communication/MpiUtility.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-
-#include "GksGpu/Restart/Restart.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-//uint deviceMap [2] = {2,3};
-uint deviceMap [2] = {0,1};
-
-void simulation( std::string path, std::string simulationName, uint restartIter )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    int rank = 0;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-
-    int mpiWorldSize = 1;
-    MPI_Comm_size(MPI_COMM_WORLD, &mpiWorldSize);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint nx = 128;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real L = 1.0;
-    real H = 0.25;
-
-    real dx = L / real(nx);
-
-    real Ra = 1.58e9;
-
-    real Ba  = 0.1;
-    real eps = 0.132;
-    real Pr  = 0.71;
-    real K   = 2.0;
-    
-    real g   = 1.0;
-    real rho = 1.0;
-
-    real lambda     = Ba / ( 2.0 * g * L );
-    real lambdaHot  = lambda / ( 1.0 + eps * 0.5 );
-    real lambdaCold = lambda / ( 1.0 - eps * 0.5 );
-    
-    real mu = sqrt( Pr * eps * g * L * L * L / Ra ) * rho ;
-
-    real cs  = sqrt( ( ( K + 4.0 ) / ( K + 2.0 ) ) / ( 2.0 * lambda ) );
-    real U   = sqrt( Ra ) * mu / ( rho * L );
-
-    real CFL = 0.25;
-
-    real dt  = CFL * ( dx / ( ( U + cs ) * ( c1o1 + ( c2o1 * mu ) / ( U * dx * rho ) ) ) );
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt = " << dt << " s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "U  = " << U  << " s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "mu = " << mu << " s\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Parameters parameters;
-
-    parameters.K  = K;
-    parameters.Pr = Pr;
-    parameters.mu = mu;
-
-    parameters.force.x = 0;
-    parameters.force.y = 0;
-    parameters.force.z = -g;
-
-    parameters.dt = dt;
-    parameters.dx = dx;
-
-    parameters.lambdaRef = lambda;
-
-    parameters.rhoRef    = rho;
-
-    parameters.viscosityModel = ViscosityModel::sutherlandsLaw;
-
-    parameters.useReactionLimiter      = false;
-    parameters.useTemperatureLimiter   = false;
-    parameters.usePassiveScalarLimiter = false;
-    parameters.useSmagorinsky          = true;
-
-    parameters.forcingSchemeIdx = 0;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    //                M e s h    G e n e r a t i o n
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real startX, endX;
-    real startY, endY;
-    real startZ, endZ;
-
-    if( rank % 2 == 0 ) startX = -0.5 * L;
-    else                startX = -3.0 * dx;
-    if( rank % 2 == 0 ) endX   =  3.0 * dx;
-    else                endX   =  0.5 * L;
-
-    if( mpiWorldSize == 2 )
-    {
-        startY = 0.0;
-        endY   = H;
-    }
-    else
-    {
-        startY =  rank / 2        * H - 3.0 * dx;
-        endY   = (rank / 2 + 1.0) * H + 3.0 * dx;
-    }
-
-    startZ = -0.5 * L;
-    endZ   =  0.5 * L;
-
-    gridBuilder->addCoarseGrid(startX, startY, startZ,  
-                               endX  , endY  , endZ  , dx);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    //real refL[4] = { 0.30, 0.45, 0.49, 0.4975  };
-    real refL[4] = { 0.30, 0.45, 0.475, 0.495  };
-
-    gridBuilder->setNumberOfLayers(6,6);
-
-    Conglomerate coarseRefLevel;
-    
-
-    if( rank % 2 == 0 ) coarseRefLevel.add( new Cuboid (-100.0,   -100.0, -100.0, 
-                                                        -refL[0],  100.0,  100.0 ) );
-    else                coarseRefLevel.add( new Cuboid ( refL[0], -100.0, -100.0, 
-                                                         100.0,    100.0,  100.0 ) );
-
-    coarseRefLevel.add( new Cuboid (-100.0, -100.0, -100.0,   
-                                     100.0,  100.0, -refL[0] ) );
-    coarseRefLevel.add( new Cuboid (-100.0, -100.0,  refL[0], 
-                                     100.0,  100.0,  100.0   ) );
-
-    gridBuilder->addGrid( &coarseRefLevel, 1);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Conglomerate firstRefLevel;
-
-    if( rank % 2 == 0 ) firstRefLevel.add( new Cuboid (-100.0,   -100.0, -100.0, 
-                                                       -refL[1],  100.0,  100.0 ) );
-    else                firstRefLevel.add( new Cuboid ( refL[1], -100.0, -100.0, 
-                                                        100.0,    100.0,  100.0 ) );
-
-    firstRefLevel.add( new Cuboid (-100.0, -100.0, -100.0,   
-                                    100.0,  100.0, -refL[1] ) );
-    firstRefLevel.add( new Cuboid (-100.0, -100.0,  refL[1], 
-                                    100.0,  100.0,  100.0   ) );
-
-    gridBuilder->addGrid( &firstRefLevel, 2);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Conglomerate secondRefLevel;
-
-    if( rank % 2 == 0 ) secondRefLevel.add( new Cuboid (-100.0,   -100.0, -100.0, 
-                                                        -refL[2],  100.0,  100.0 ) );
-    else                secondRefLevel.add( new Cuboid ( refL[2], -100.0, -100.0, 
-                                                         100.0,    100.0,  100.0 ) );
-
-    if( rank % 2 == 0 ) secondRefLevel.add( new Cuboid (-100.0,   -100.0, -100.0,   
-                                                        -refL[0],  100.0, -refL[2] ) );
-    else                secondRefLevel.add( new Cuboid ( refL[0], -100.0, -100.0,   
-                                                         100.0,    100.0, -refL[2] ) );
-
-    if( rank % 2 == 0 ) secondRefLevel.add( new Cuboid (-100.0,   -100.0,  refL[2], 
-                                                        -refL[0],  100.0,  100.0   ) );
-    else                secondRefLevel.add( new Cuboid ( refL[0], -100.0,  refL[2], 
-                                                         100.0,    100.0,  100.0   ) );
-
-    gridBuilder->addGrid( &secondRefLevel, 3);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    //uint numberOfRefinements = 3;
-
-    //for( uint ref = 2; ref < numberOfRefinements; ref++ )
-    //{
-    //    Cuboid* refRegion;
-
-    //    if( rank % 2 == 0 ) refRegion = new Cuboid (-100.0,     -100.0, -100.0, 
-    //                                                -refL[ref],  100.0,  100.0 );
-    //    else                refRegion = new Cuboid ( refL[ref], -100.0, -100.0, 
-    //                                                 100.0,      100.0,  100.0 );
-
-    //    gridBuilder->addGrid( refRegion, ref + 1);
-    //}
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( rank % 2 == 0 ) startX = -100.0;
-    else                startX =    0.0;
-    if( rank % 2 == 0 ) endX   =    0.0;
-    else                endX   =  100.0;
-
-    if( mpiWorldSize == 2 )
-    {
-        startY = -100.0;
-        endY   =  100.0;
-    }
-    else
-    {
-        startY =   real(rank/2)         * H;
-        endY   = ( real(rank/2) + 1.0 ) * H;
-    }
-
-    startZ = -100.0;
-    endZ   =  100.0;
-
-    gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>( startX, endX, 
-                                                                 startY, endY, 
-                                                                 startZ, endZ ) );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( mpiWorldSize == 2 ) gridBuilder->setPeriodicBoundaryCondition(false, true,  false);
-    else                    gridBuilder->setPeriodicBoundaryCondition(false, false, false);
-
-    gridBuilder->buildGrids(GKS, false);
-            
-    //////////////////////////////////////////////////////////////////////////
-
-    if( rank%2 == 0 ) gridBuilder->findCommunicationIndices( CommunicationDirections::PX, GKS );
-    else              gridBuilder->findCommunicationIndices( CommunicationDirections::MX, GKS );
-
-    if( rank%2 == 0 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::PX, rank + 1 );
-    else              gridBuilder->setCommunicationProcess ( CommunicationDirections::MX, rank - 1 );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    if( mpiWorldSize > 2 )
-    {
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PY, GKS);
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MY, GKS);
-
-        gridBuilder->setCommunicationProcess(CommunicationDirections::PY, (rank + 2 + mpiWorldSize) % mpiWorldSize);
-        gridBuilder->setCommunicationProcess(CommunicationDirections::MY, (rank - 2 + mpiWorldSize) % mpiWorldSize);
-    }
-
-    //gridBuilder->writeGridsToVtk(path + "/Grid_rank_" + std::to_string(rank) + "_lev_");     
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    if( mpiWorldSize == 2 ) meshAdapter.findPeriodicBoundaryNeighbors();    
-
-    meshAdapter.getCommunicationIndices();
-
-    //meshAdapter.writeMeshFaceVTK( path + "grid/MeshFaces_" + std::to_string( threadIndex ) + ".vtk" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto dataBase = std::make_shared<DataBase>( "GPU" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    //                 B o u n d a r y    C o n d i t i o n s
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcMX = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaHot , false );
-    SPtr<BoundaryCondition> bcPX = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold, false );
-
-    bcMX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x < -0.5*L; } );
-    bcPX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> bcMZ = std::make_shared<SalinasVazquez>( dataBase, lambdaHot, lambdaCold, 0.3371, -0.2642,  0.5301, -2.6438, true );
-    SPtr<BoundaryCondition> bcPZ = std::make_shared<SalinasVazquez>( dataBase, lambdaHot, lambdaCold, 0.6559, -0.2037, -0.5420, -2.7318, true );
-
-    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.5*L; } );
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( mpiWorldSize == 2 )
-    {
-        SPtr<BoundaryCondition> bcMY = std::make_shared<Periodic>(dataBase);
-        SPtr<BoundaryCondition> bcPY = std::make_shared<Periodic>(dataBase);
-
-        bcMY->findBoundaryCells(meshAdapter, false, [&](Vec3 center) { return center.y < 0; });
-        bcPY->findBoundaryCells(meshAdapter, false, [&](Vec3 center) { return center.y > H; });
-
-        dataBase->boundaryConditions.push_back(bcMY);
-        dataBase->boundaryConditions.push_back(bcPY);
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->boundaryConditions.push_back( bcMZ );
-    dataBase->boundaryConditions.push_back( bcPZ );
-
-    dataBase->boundaryConditions.push_back( bcMX );
-    dataBase->boundaryConditions.push_back( bcPX );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    //                 I n i t i a l    C o n d i t i o n s
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint startIter = 0;
-
-    dataBase->setMesh( meshAdapter );
-
-    dataBase->setCommunicators( meshAdapter );
-
-    CudaUtility::printCudaMemoryUsage();
-
-    if( restartIter == INVALID_INDEX )
-    {
-        Initializer::interpret(dataBase, [&](Vec3 cellCenter) -> ConservedVariables {
-
-            real Th = 1.0 / lambdaHot;
-            real Tc = 1.0 / lambdaCold;
-            real T = Th - (Th - Tc)*((cellCenter.x + 0.5 * L) / L);
-            real lambdaLocal = 1.0 / T;
-
-            return toConservedVariables(PrimitiveVariables(rho, 0.0, 0.0, 0.0, lambda), parameters.K);
-        });
-
-        if (rank == 0) writeVtkXMLParallelSummaryFile(dataBase, parameters, path + simulationName + "_0", mpiWorldSize);
-
-        writeVtkXML(dataBase, parameters, 0, path + simulationName + "_0" + "_rank_" + std::to_string(rank));
-    }
-    else
-    {
-        Restart::readRestart( dataBase, path + simulationName + "_" + std::to_string( restartIter ) + "_rank_" + std::to_string(rank), startIter );
-
-        if (rank == 0) writeVtkXMLParallelSummaryFile( dataBase, parameters, path + simulationName + "_" + std::to_string( restartIter ) + "_restart", mpiWorldSize );
-
-        writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( restartIter ) + "_restart" + "_rank_" + std::to_string(rank) );
-    }
-
-    dataBase->copyDataHostToDevice();
-
-    Initializer::initializeDataUpdate(dataBase);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    //                  R u n
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 300.0 );
-
-    ConvergenceAnalyzer convergenceAnalyzer( dataBase );
-
-    auto turbulenceAnalyzer = std::make_shared<TurbulenceAnalyzer>( dataBase, 500000 );
-    //auto turbulenceAnalyzer = std::make_shared<TurbulenceAnalyzer>( dataBase, 200 );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    cupsAnalyzer.start();
-
-    for( uint iter = startIter + 1; iter <= 10000000; iter++ )
-    {
-        TimeStepping::nestedTimeStep(dataBase, parameters, 0);
-
-        if( 
-            //( iter < 10     && iter % 1     == 0 ) ||
-            //( iter < 100    && iter % 10    == 0 ) ||
-            //( iter < 1000   && iter % 100   == 0 ) ||
-            //( iter < 10000  && iter % 1000  == 0 ) 
-            ( iter < 10000000 && iter % 50000 == 0 )
-          )
-        {
-            dataBase->copyDataDeviceToHost();
-
-            if( rank == 0 ) writeVtkXMLParallelSummaryFile( dataBase, parameters, path + simulationName + "_" + std::to_string( iter ), mpiWorldSize );
-
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) + "_rank_" + std::to_string(rank) );
-        }
-
-        cupsAnalyzer.run( iter, dt );
-
-        convergenceAnalyzer.run( iter );
-
-        turbulenceAnalyzer->run( iter, parameters );
-
-        if( iter > 500000 && iter % 100000 == 0 )
-        {
-            turbulenceAnalyzer->download();
-
-            if( rank == 0 ) writeTurbulenceVtkXMLParallelSummaryFile( dataBase, turbulenceAnalyzer, parameters, path + simulationName + "_Turbulence_" + std::to_string( iter ), mpiWorldSize );
-
-            writeTurbulenceVtkXML( dataBase, turbulenceAnalyzer, 0, path + simulationName + "_Turbulence_" + std::to_string( iter ) + "_rank_" + std::to_string(rank) );
-        }
-
-        if( iter % 100000 == 0 )
-        {
-            turbulenceAnalyzer->download();
-
-            Restart::writeRestart( dataBase, path + simulationName + "_" + std::to_string( iter ) + "_rank_" + std::to_string(rank), iter );
-        }
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataDeviceToHost();
-}
-
-int main( int argc, char* argv[])
-{
-    //////////////////////////////////////////////////////////////////////////
-
-#ifdef _WIN32
-    MPI_Init(&argc, &argv);
-    int rank = 0;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    int mpiWorldSize = 1;
-    MPI_Comm_size(MPI_COMM_WORLD, &mpiWorldSize);
-#else
-    int rank         = MpiUtility::getMpiRankBeforeInit();
-    int mpiWorldSize = MpiUtility::getMpiWorldSizeBeforeInit();
-#endif
-
-    if( mpiWorldSize < 2 || mpiWorldSize%2 != 0 )
-    {
-        std::cerr << "Error: MpiWolrdSize must be multiple of 2!\n";
-        return 1;
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-#ifdef _WIN32
-    std::string path( "F:/Work/Computations/out/SalinasVazquez/" );
-#else
-    std::string path( "out/" );
-#endif
-
-    std::string simulationName ( "SalinasVazquez" );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    logging::Logger::addStream(&std::cout);
-    
-    std::ofstream logFile( path + simulationName + "_rank_" + std::to_string(rank) + ".log" );
-    logging::Logger::addStream(&logFile);
-
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    // Important: for Cuda-Aware MPI the device must be set before MPI_Init()
-    int deviceCount = CudaUtility::getCudaDeviceCount();
-
-    if(deviceCount == 0)
-    {
-        std::stringstream msg;
-        msg << "No devices devices found!" << std::endl;
-        *logging::out << logging::Logger::WARNING << msg.str(); msg.str("");
-    }
-
-    CudaUtility::setCudaDevice( rank % deviceCount );
-
-    //////////////////////////////////////////////////////////////////////////
-
-#ifndef _WIN32
-    MPI_Init(&argc, &argv);
-#endif
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precision\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    try
-    {
-        uint restartIter = INVALID_INDEX;
-
-        if( argc > 1 ) restartIter = atoi( argv[1] );
-
-        simulation(path, simulationName, restartIter);
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
-    }
-
-    logFile.close();
-
-    MPI_Finalize();
-
-    return 0;
-}
diff --git a/apps/gpu/GKS/SandiaFlame_1m/3rdPartyLinking.cmake b/apps/gpu/GKS/SandiaFlame_1m/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/SandiaFlame_1m/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/SandiaFlame_1m/CMakeLists.txt b/apps/gpu/GKS/SandiaFlame_1m/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/SandiaFlame_1m/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/SandiaFlame_1m/CMakePackage.cmake b/apps/gpu/GKS/SandiaFlame_1m/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/SandiaFlame_1m/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/SandiaFlame_1m/SandiaFlame_1m.cpp b/apps/gpu/GKS/SandiaFlame_1m/SandiaFlame_1m.cpp
deleted file mode 100644
index 8c29bbb4f73109deb41e9dd4ab7402241d3079cf..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/SandiaFlame_1m/SandiaFlame_1m.cpp
+++ /dev/null
@@ -1,576 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <iostream>
-#include <iomanip>
-#include <exception>
-#include <fstream>
-#include <memory>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-#include "Core/buildInfo.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-#include "GridGenerator/geometries/Sphere/Sphere.h"
-#include "GridGenerator/geometries/VerticalCylinder/VerticalCylinder.h"
-#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/FlowStateData/FlowStateData.cuh"
-#include "GksGpu/FlowStateData/FlowStateDataConversion.cuh"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-#include "GksGpu/BoundaryConditions/Pressure.h"
-#include "GksGpu/BoundaryConditions/AdiabaticWall.h"
-#include "GksGpu/BoundaryConditions/PassiveScalarDiriclet.h"
-#include "GksGpu/BoundaryConditions/InflowComplete.h"
-#include "GksGpu/BoundaryConditions/Open.h"
-#include "GksGpu/BoundaryConditions/Inflow.h"
-#include "GksGpu/BoundaryConditions/Symmetry.h"
-#include "GksGpu/BoundaryConditions/Pressure2.h"
-#include "GksGpu/BoundaryConditions/CreepingMassFlux.h"
-
-#include "GksGpu/Interface/Interface.h"
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-#include "GksGpu/Analyzer/PointTimeSeriesAnalyzer.h"
-
-#include "GksGpu/Restart/Restart.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex, uint _testIndex, uint _nx, uint restartIter )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    //uint nx = 64;
-    uint nx = _nx;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real L = 3.0;
-    real H = 4.0;
-
-    real R = 0.5;
-
-    real dx = H / real(nx);
-
-    real Pr  = 0.71;
-    real K   = 2.0;
-    
-    real g   = 9.81;
-    real rho = 1.2;
-
-    real mu = 1.8e-5;
-
-    real U = 0.0;
-    real rhoFuel = 0.0;
-
-    if( _testIndex == 14 ) { U = 0.074; rhoFuel = 0.5405; }    // Test 14      low flow rates
-    if( _testIndex == 24 ) { U = 0.097; rhoFuel = 0.5464; }    // Test 24      medium flow rate
-    if( _testIndex == 17 ) { U = 0.117; rhoFuel = 0.5641; }    // Test 17      high flow rate
-
-    GksGpu::PrimitiveVariables prim( rho, 0.0, 0.0, 0.0, -1.0 );
-
-    GksGpu::setLambdaFromT( prim, 2.85 );
-
-    real cs  = sqrt( ( ( K + 5.0 ) / ( K + 3.0 ) ) / ( 2.0 * prim.lambda ) );
-
-    //real CFL = 0.06125;
-    real CFL = 0.125;
-
-    real dt  = CFL * ( dx / ( ( U + cs ) * ( c1o1 + ( c2o1 * mu ) / ( U * dx * rho ) ) ) );
-
-    real dh = 8000.0; // kJ / kmol  / T_FAKTOR
-
-    //////////////////////////////////////////////////////////////////////////
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt = " << dt << " s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "U  = " << U  << " m/s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "cs = " << cs << " m/s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "mu = " << mu << " kg/sm\n";
-    *logging::out << logging::Logger::INFO_HIGH << "Pr = " << Pr << "\n";
-
-    *logging::out << logging::Logger::INFO_HIGH << "HRR = " << U * rhoFuel * M_PI * R * R * ( dh * 100 ) / 0.016 / 1000.0 << " kW\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    GksGpu::Parameters parameters;
-
-    parameters.K  = K;
-    parameters.Pr = Pr;
-    parameters.mu = mu;
-
-    parameters.D = mu;
-
-    parameters.force.x = 0;
-    parameters.force.y = 0;
-    parameters.force.z = -g;
-
-    parameters.dt = dt;
-    parameters.dx = dx;
-
-    parameters.lambdaRef = prim.lambda;
-
-    parameters.rhoRef    = rho;
-
-    parameters.heatOfReaction = dh;
-
-    parameters.viscosityModel = GksGpu::ViscosityModel::sutherlandsLaw;
-    //parameters.viscosityModel = GksGpu::ViscosityModel::constant;
-
-    parameters.enableReaction = true;
-
-    parameters.useHeatReleaseRateLimiter = true;
-    parameters.useReactionLimiter        = true;
-    parameters.useTemperatureLimiter     = true;
-    parameters.usePassiveScalarLimiter   = true;
-    parameters.useSmagorinsky            = true;
-
-    parameters.reactionLimiter    = 1.0005;
-    parameters.temperatureLimiter = 1.0e-3;
-
-    parameters.useSpongeLayer = true;
-    parameters.spongeLayerIdx = 1;
-
-    parameters.forcingSchemeIdx = 2;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    bool threeDimensional = true;
-
-    if( threeDimensional )
-    {
-        gridBuilder->addCoarseGrid(-0.5*L, -0.5*L, 0.0,
-                                    0.5*L,  0.5*L, H, dx);
-    }
-    else
-    {
-        gridBuilder->addCoarseGrid(-0.5*L, -0.5*dx, 0.0,
-                                    0.5*L,  0.5*dx, H, dx);
-    }
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    VerticalCylinder cylinder1( 0.0, 0.0, 0.0, 1.5*R, 0.25*H );
-    VerticalCylinder cylinder2( 0.0, 0.0, 0.0, 1.1*R, 0.05*H );
-    
-    Conglomerate refRing;
-    refRing.add     ( new VerticalCylinder( 0.0, 0.0, 0.0, 1.2*R, 0.1 ) );
-    refRing.subtract( new VerticalCylinder( 0.0, 0.0, 0.0, 0.8*R, 1.0    ) );
-
-    gridBuilder->setNumberOfLayers(0,10);
-    
-    //gridBuilder->addGrid( &cylinder1 );
-    //gridBuilder->addGrid( &cylinder2 );
-    //gridBuilder->addGrid( &refRing );
-
-    if( threeDimensional ) gridBuilder->setPeriodicBoundaryCondition(false, false, false);
-    else                   gridBuilder->setPeriodicBoundaryCondition(false, true,  false);
-
-    gridBuilder->buildGrids(GKS, false);
-
-    //gridBuilder->writeGridsToVtk(path + "grid/Grid_lev_");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    //meshAdapter.writeMeshVTK( path + "grid/Mesh.vtk" );
-
-    //meshAdapter.writeMeshFaceVTK( path + "grid/MeshFaces.vtk" );
-
-    if( !threeDimensional )
-        meshAdapter.findPeriodicBoundaryNeighbors();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksGpu::CudaUtility::setCudaDevice(_gpuIndex);
-
-    auto dataBase = std::make_shared<GksGpu::DataBase>( "GPU" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-    real openBoundaryVelocityLimiter = 1.0;
-
-    SPtr<GksGpu::BoundaryCondition> bcMX = std::make_shared<GksGpu::Open>( dataBase, prim, openBoundaryVelocityLimiter );
-    SPtr<GksGpu::BoundaryCondition> bcPX = std::make_shared<GksGpu::Open>( dataBase, prim, openBoundaryVelocityLimiter );
-
-    SPtr<GksGpu::BoundaryCondition> bcMX_2 = std::make_shared<GksGpu::Symmetry>( dataBase, 'x' );
-    SPtr<GksGpu::BoundaryCondition> bcPX_2 = std::make_shared<GksGpu::Symmetry>( dataBase, 'x' );
-
-    bcMX->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x < -0.5*L; } );
-    bcPX->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x >  0.5*L; } );
-
-    bcMX_2->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x < -0.5*L && center.z > 0.9*H; } );
-    bcPX_2->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.x >  0.5*L && center.z > 0.9*H; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    SPtr<GksGpu::BoundaryCondition> bcMY;
-    SPtr<GksGpu::BoundaryCondition> bcPY;
-
-    SPtr<GksGpu::BoundaryCondition> bcMY_2;
-    SPtr<GksGpu::BoundaryCondition> bcPY_2;
-
-    if( threeDimensional )
-    {
-        bcMY = std::make_shared<GksGpu::Open>( dataBase, prim, openBoundaryVelocityLimiter );
-        bcPY = std::make_shared<GksGpu::Open>( dataBase, prim, openBoundaryVelocityLimiter );
-
-        bcMY_2 = std::make_shared<GksGpu::Symmetry>( dataBase, 'y' );
-        bcPY_2 = std::make_shared<GksGpu::Symmetry>( dataBase, 'y' );
-
-        bcMY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y < -0.5*L; } );
-        bcPY->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y >  0.5*L; } );
-
-        bcMY_2->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y < -0.5*L && center.z > 0.9*H; } );
-        bcPY_2->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.y >  0.5*L && center.z > 0.9*H; } );
-    }
-    else
-    {
-        bcMY = std::make_shared<GksGpu::Periodic>(dataBase);
-        bcPY = std::make_shared<GksGpu::Periodic>(dataBase);
-
-        bcMY->findBoundaryCells(meshAdapter, false, [&](Vec3 center) { return center.y < -0.5*dx; });
-        bcPY->findBoundaryCells(meshAdapter, false, [&](Vec3 center) { return center.y >  0.5*dx; });
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    SPtr<GksGpu::BoundaryCondition> bcMZ = std::make_shared<GksGpu::AdiabaticWall>( dataBase, Vec3(0, 0, 0), true );
-    //SPtr<BoundaryCondition> bcMZ = std::make_shared<IsothermalWall>( dataBase, Vec3(0, 0, 0), prim.lambda, true );
-    //SPtr<BoundaryCondition> bcMZ = std::make_shared<InflowComplete>( dataBase, PrimitiveVariables(rho, 0.0, 0.0, 0.0, prim.lambda, 0.0, 0.0) );
-    //SPtr<BoundaryCondition> bcMZ = std::make_shared<Open>( dataBase );
-
-    SPtr<GksGpu::BoundaryCondition> bcPZ = std::make_shared<GksGpu::Pressure2>( dataBase, c1o2 * prim.rho / prim.lambda );
-    
-    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < 0.0; } );
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z > H  ; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<GksGpu::BoundaryCondition> burner = std::make_shared<GksGpu::CreepingMassFlux>( dataBase, rhoFuel, U, prim.lambda );
-    //SPtr<BoundaryCondition> burner = std::make_shared<Inflow>( dataBase, Vec3(0,0,U), prim.lambda, rhoFuel, 1, 0, 0, 1.0 );
-
-    burner->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ 
-        
-        if( threeDimensional )
-            return center.z < 0.0 && std::sqrt(center.x*center.x + center.y*center.y) < R;
-        else
-            return center.z < 0.0 && std::sqrt(center.x*center.x) < R && std::sqrt(center.y*center.y) < 0.5 * dx;
-    } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->boundaryConditions.push_back( burner );
-
-    dataBase->boundaryConditions.push_back( bcMX );
-    dataBase->boundaryConditions.push_back( bcPX );
-    
-    dataBase->boundaryConditions.push_back( bcMY );
-    dataBase->boundaryConditions.push_back( bcPY );
-
-    dataBase->boundaryConditions.push_back( bcMZ );
-    dataBase->boundaryConditions.push_back( bcPZ );
-
-    dataBase->boundaryConditions.push_back( bcMX_2 );
-    dataBase->boundaryConditions.push_back( bcPX_2 );
-
-    if( threeDimensional ){
-        dataBase->boundaryConditions.push_back( bcMY_2 );
-        dataBase->boundaryConditions.push_back( bcPY_2 );
-    }
-
-    //dataBase->boundaryConditions.push_back( burner );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    auto pointTimeSeriesAnalyzerU_P1 = std::make_shared<GksGpu::PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.0, 0.0, 0.505), 'U' );
-    auto pointTimeSeriesAnalyzerV_P1 = std::make_shared<GksGpu::PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.0, 0.0, 0.505), 'V' );
-    auto pointTimeSeriesAnalyzerW_P1 = std::make_shared<GksGpu::PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.0, 0.0, 0.505), 'W' );
-
-    auto pointTimeSeriesAnalyzerU_P2 = std::make_shared<GksGpu::PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.5, 0.0, 0.505), 'U' );
-    auto pointTimeSeriesAnalyzerV_P2 = std::make_shared<GksGpu::PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.5, 0.0, 0.505), 'V' );
-    auto pointTimeSeriesAnalyzerW_P2 = std::make_shared<GksGpu::PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.5, 0.0, 0.505), 'W' );
-
-    auto pointTimeSeriesAnalyzerU_P3 = std::make_shared<GksGpu::PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.0, 0.0, 2.0), 'U' );
-    auto pointTimeSeriesAnalyzerV_P3 = std::make_shared<GksGpu::PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.0, 0.0, 2.0), 'V' );
-    auto pointTimeSeriesAnalyzerW_P3 = std::make_shared<GksGpu::PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.0, 0.0, 2.0), 'W' );
-
-    auto pointTimeSeriesAnalyzerU_P4 = std::make_shared<GksGpu::PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.5, 0.0, 2.0), 'U' );
-    auto pointTimeSeriesAnalyzerV_P4 = std::make_shared<GksGpu::PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.5, 0.0, 2.0), 'V' );
-    auto pointTimeSeriesAnalyzerW_P4 = std::make_shared<GksGpu::PointTimeSeriesAnalyzer>( dataBase, meshAdapter, Vec3(0.5, 0.0, 2.0), 'W' );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint startIter = 0;
-
-    dataBase->setMesh( meshAdapter );
-
-    GksGpu::CudaUtility::printCudaMemoryUsage();
-    
-    if( restartIter == INVALID_INDEX )
-    {
-        GksGpu::Initializer::interpret(dataBase, [&](Vec3 cellCenter) -> GksGpu::ConservedVariables {
-
-            GksGpu::PrimitiveVariables primLocal = prim;
-
-            return GksGpu::toConservedVariables(primLocal, parameters.K);
-        });
-
-        writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" );
-    }
-    else
-    {
-        GksGpu::Restart::readRestart( dataBase, path + simulationName + "_" + std::to_string( restartIter ), startIter );
-
-        writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( restartIter ) + "_restart" );
-    }
-
-    dataBase->copyDataHostToDevice();
-
-    for( auto bc : dataBase->boundaryConditions ) 
-        for( uint level = 0; level < dataBase->numberOfLevels; level++ )
-            bc->runBoundaryConditionKernel( dataBase, parameters, level );
-
-    GksGpu::Initializer::initializeDataUpdate(dataBase);
-
-    dataBase->copyDataDeviceToHost();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint iterPerSecond = uint( c1o1 / parameters.dt ) + 1;
-
-    *logging::out << logging::Logger::INFO_HIGH << "iterPerSecond = " << iterPerSecond << "\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    GksGpu::CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0, true, 10000 );
-
-    GksGpu::ConvergenceAnalyzer convergenceAnalyzer( dataBase, 10000 );
-
-    auto turbulenceAnalyzer = std::make_shared<GksGpu::TurbulenceAnalyzer>( dataBase, 10 * iterPerSecond );
-
-    turbulenceAnalyzer->collect_UU = true;
-    turbulenceAnalyzer->collect_VV = true;
-    turbulenceAnalyzer->collect_WW = true;
-
-    turbulenceAnalyzer->allocate();
-
-    //////////////////////////////////////////////////////////////////////////
-
-    cupsAnalyzer.start();
-    
-    for( uint iter = startIter + 1; iter <= 40 * iterPerSecond; iter++ )
-    {
-        cupsAnalyzer.run( iter, parameters.dt );
-
-        convergenceAnalyzer.run( iter );
-
-        GksGpu::TimeStepping::nestedTimeStep(dataBase, parameters, 0);
-
-        pointTimeSeriesAnalyzerU_P1->run(iter, parameters);
-        pointTimeSeriesAnalyzerV_P1->run(iter, parameters);
-        pointTimeSeriesAnalyzerW_P1->run(iter, parameters);
-
-        pointTimeSeriesAnalyzerU_P2->run(iter, parameters);
-        pointTimeSeriesAnalyzerV_P2->run(iter, parameters);
-        pointTimeSeriesAnalyzerW_P2->run(iter, parameters);
-
-        pointTimeSeriesAnalyzerU_P3->run(iter, parameters);
-        pointTimeSeriesAnalyzerV_P3->run(iter, parameters);
-        pointTimeSeriesAnalyzerW_P3->run(iter, parameters);
-
-        pointTimeSeriesAnalyzerU_P4->run(iter, parameters);
-        pointTimeSeriesAnalyzerV_P4->run(iter, parameters);
-        pointTimeSeriesAnalyzerW_P4->run(iter, parameters);
-
-        int crashCellIndex = dataBase->getCrashCellIndex();
-
-        if( crashCellIndex >= 0 )
-        {
-            *logging::out << logging::Logger::LOGGER_ERROR << "Simulation Crashed at CellIndex = " << crashCellIndex << "\n";
-            dataBase->copyDataDeviceToHost();
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) );
-
-            break;
-        }
-
-        if( 
-            //( iter >= 39360 && iter % 1 == 0 ) || 
-            ( iter % 10000 == 0 )
-          )
-        {
-            dataBase->copyDataDeviceToHost();
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) );
-        }
-
-        if( iter % 10000 == 0 /*|| iter == 39000*/)
-        {
-            dataBase->copyDataDeviceToHost();
-            GksGpu::Restart::writeRestart( dataBase, path + simulationName + "_" + std::to_string( iter ), iter );
-        }
-
-        if( iter % 100000 == 0 )
-        {
-            turbulenceAnalyzer->download();
-
-            writeTurbulenceVtkXML( dataBase, turbulenceAnalyzer, 0, path + simulationName + "_Turbulence_" + std::to_string( iter ) );
-        }
-
-        if( iter % 100000 == 0 )
-        {
-            pointTimeSeriesAnalyzerU_P1->writeToFile(path + simulationName + "_P1_TimeSeries_" + pointTimeSeriesAnalyzerU_P1->quantity + "_" + std::to_string( iter ));
-            pointTimeSeriesAnalyzerV_P1->writeToFile(path + simulationName + "_P1_TimeSeries_" + pointTimeSeriesAnalyzerV_P1->quantity + "_" + std::to_string( iter ));
-            pointTimeSeriesAnalyzerW_P1->writeToFile(path + simulationName + "_P1_TimeSeries_" + pointTimeSeriesAnalyzerW_P1->quantity + "_" + std::to_string( iter ));
-
-            pointTimeSeriesAnalyzerU_P2->writeToFile(path + simulationName + "_P2_TimeSeries_" + pointTimeSeriesAnalyzerU_P2->quantity + "_" + std::to_string( iter ));
-            pointTimeSeriesAnalyzerV_P2->writeToFile(path + simulationName + "_P2_TimeSeries_" + pointTimeSeriesAnalyzerV_P2->quantity + "_" + std::to_string( iter ));
-            pointTimeSeriesAnalyzerW_P2->writeToFile(path + simulationName + "_P2_TimeSeries_" + pointTimeSeriesAnalyzerW_P2->quantity + "_" + std::to_string( iter ));
-
-            pointTimeSeriesAnalyzerU_P3->writeToFile(path + simulationName + "_P3_TimeSeries_" + pointTimeSeriesAnalyzerU_P3->quantity + "_" + std::to_string( iter ));
-            pointTimeSeriesAnalyzerV_P3->writeToFile(path + simulationName + "_P3_TimeSeries_" + pointTimeSeriesAnalyzerV_P3->quantity + "_" + std::to_string( iter ));
-            pointTimeSeriesAnalyzerW_P3->writeToFile(path + simulationName + "_P3_TimeSeries_" + pointTimeSeriesAnalyzerW_P3->quantity + "_" + std::to_string( iter ));
-
-            pointTimeSeriesAnalyzerU_P4->writeToFile(path + simulationName + "_P4_TimeSeries_" + pointTimeSeriesAnalyzerU_P4->quantity + "_" + std::to_string( iter ));
-            pointTimeSeriesAnalyzerV_P4->writeToFile(path + simulationName + "_P4_TimeSeries_" + pointTimeSeriesAnalyzerV_P4->quantity + "_" + std::to_string( iter ));
-            pointTimeSeriesAnalyzerW_P4->writeToFile(path + simulationName + "_P4_TimeSeries_" + pointTimeSeriesAnalyzerW_P4->quantity + "_" + std::to_string( iter ));
-        }
-
-        turbulenceAnalyzer->run( iter, parameters );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    //dataBase->copyDataDeviceToHost();
-
-    //writeVtkXML( dataBase, parameters, 0, path + "grid/Test_1" );
-
-    turbulenceAnalyzer->download();
-
-    writeTurbulenceVtkXML(dataBase, turbulenceAnalyzer, 0, path + simulationName + "_Turbulence_final");
-
-    pointTimeSeriesAnalyzerU_P1->writeToFile(path + simulationName + "_P1_TimeSeries_" + pointTimeSeriesAnalyzerU_P1->quantity + "_final");
-    pointTimeSeriesAnalyzerV_P1->writeToFile(path + simulationName + "_P1_TimeSeries_" + pointTimeSeriesAnalyzerV_P1->quantity + "_final");
-    pointTimeSeriesAnalyzerW_P1->writeToFile(path + simulationName + "_P1_TimeSeries_" + pointTimeSeriesAnalyzerW_P1->quantity + "_final");
-
-    pointTimeSeriesAnalyzerU_P2->writeToFile(path + simulationName + "_P2_TimeSeries_" + pointTimeSeriesAnalyzerU_P2->quantity + "_final");
-    pointTimeSeriesAnalyzerV_P2->writeToFile(path + simulationName + "_P2_TimeSeries_" + pointTimeSeriesAnalyzerV_P2->quantity + "_final");
-    pointTimeSeriesAnalyzerW_P2->writeToFile(path + simulationName + "_P2_TimeSeries_" + pointTimeSeriesAnalyzerW_P2->quantity + "_final");
-
-    pointTimeSeriesAnalyzerU_P3->writeToFile(path + simulationName + "_P3_TimeSeries_" + pointTimeSeriesAnalyzerU_P3->quantity + "_final");
-    pointTimeSeriesAnalyzerV_P3->writeToFile(path + simulationName + "_P3_TimeSeries_" + pointTimeSeriesAnalyzerV_P3->quantity + "_final");
-    pointTimeSeriesAnalyzerW_P3->writeToFile(path + simulationName + "_P3_TimeSeries_" + pointTimeSeriesAnalyzerW_P3->quantity + "_final");
-
-    pointTimeSeriesAnalyzerU_P4->writeToFile(path + simulationName + "_P4_TimeSeries_" + pointTimeSeriesAnalyzerU_P4->quantity + "_final");
-    pointTimeSeriesAnalyzerV_P4->writeToFile(path + simulationName + "_P4_TimeSeries_" + pointTimeSeriesAnalyzerV_P4->quantity + "_final");
-    pointTimeSeriesAnalyzerW_P4->writeToFile(path + simulationName + "_P4_TimeSeries_" + pointTimeSeriesAnalyzerW_P4->quantity + "_final");
-}
-
-int main( int argc, char* argv[])
-{
-    uint restartIter = INVALID_INDEX;
-    //uint restartIter = 90000;
-        
-    uint gpuIndex = 1;
-    uint testIndex = 24;
-    uint nx = 64;
-
-    if( argc > 1 ) gpuIndex    = atoi( argv[1] );
-
-    if( argc > 2 ) testIndex   = atoi( argv[2] );
-
-    if( argc > 3 ) nx          = atoi( argv[3] );
-
-    if( argc > 4 ) restartIter = atoi( argv[4] );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#ifdef _WIN32
-    std::string path( "F:/Work/Computations/out/SandiaFlame_1m/" );
-#else
-    std::string path( "out/" );
-#endif
-
-    path += "Test_" + std::to_string(testIndex) + "_" + std::to_string(nx) + "/";
-
-    std::string simulationName ( "Flame" );
-
-    logging::Logger::addStream(&std::cout);
-    
-    std::ofstream logFile( path + simulationName + ".log" );
-    logging::Logger::addStream(&logFile);
-
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precision\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    try
-    {
-        thermalCavity( path, simulationName, gpuIndex, testIndex, nx, restartIter );
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
-    }
-
-    logFile.close();
-
-    return 0;
-}
diff --git a/apps/gpu/GKS/ShearWave/3rdPartyLinking.cmake b/apps/gpu/GKS/ShearWave/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ShearWave/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/ShearWave/CMakeLists.txt b/apps/gpu/GKS/ShearWave/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ShearWave/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/ShearWave/CMakePackage.cmake b/apps/gpu/GKS/ShearWave/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ShearWave/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/ShearWave/ShearWave.cpp b/apps/gpu/GKS/ShearWave/ShearWave.cpp
deleted file mode 100644
index e94d921fbdbb141a89eeaad46b5d4b95d174be9e..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ShearWave/ShearWave.cpp
+++ /dev/null
@@ -1,289 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <memory>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-#include "GksGpu/BoundaryConditions/Pressure.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/FlowStateData/AccessDeviceData.cuh"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-void channelFlow( std::string path, std::string simulationName )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint nx = 128;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real L = 1.0;
-    real H = 1.5;
-
-    real dx = L / real(nx);
-
-    real Re  = 1.0e2;
-    real U  = 0.1;
-    real Ma = 0.1;
-    
-    real Pr  = 0.1;
-    real K   = 2.0;
-
-    real rho = 1.0;
-
-    real mu = U * rho * L / Re;
-
-    real cs = U / Ma;
-    real lambda = c1o2 * ( ( K + 4.0 ) / ( K + 2.0 ) ) / ( cs * cs );
-
-    real g = eight * mu * U / ( H * H );
-
-    real p0 = c1o2 * rho / lambda;
-
-    real CFL = 0.25;
-
-    real dt  = CFL * ( dx / ( ( U + cs ) * ( one + ( two * mu ) / ( U * dx * rho ) ) ) );
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt = " << dt << " s\n";
-
-    dt = 0.001 * ( 32.0 / real(nx) );
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt = " << dt << " s\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Parameters parameters;
-
-    parameters.K  = K;
-    parameters.Pr = Pr;
-    parameters.mu = mu;
-
-    parameters.force.x = g;
-    parameters.force.y = 0;
-    parameters.force.z = 0;
-
-    parameters.dt = dt;
-    parameters.dx = dx;
-
-    parameters.lambdaRef = lambda;
-
-    parameters.viscosityModel = ViscosityModel::constant;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    gridBuilder->addCoarseGrid(-0.5*L, -0.5*H, -0.5*dx,  
-                                0.5*L,  0.5*H,  0.5*dx, dx);
-
-    //Cuboid cube(-1.0, -1.0, 0.45, 1.0, 1.0, 0.55);
-
-    //gridBuilder->setNumberOfLayers(6,6);
-    //gridBuilder->addGrid( &cube, 1);
-
-    gridBuilder->setPeriodicBoundaryCondition(true, true, true);
-
-    gridBuilder->buildGrids(GKS, false);
-
-    //gridBuilder->writeGridsToVtk(path + "grid/Grid_lev_");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    //meshAdapter.writeMeshVTK( path + "grid/Mesh.vtk" );
-
-    //meshAdapter.writeMeshFaceVTK( path + "grid/MeshFaces.vtk" );
-
-    meshAdapter.findPeriodicBoundaryNeighbors();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CudaUtility::setCudaDevice(0);
-
-    auto dataBase = std::make_shared<DataBase>( "GPU" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcMX = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPX = std::make_shared<Periodic>( dataBase );
-
-    bcMX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x < -0.5*L; } );
-    bcPX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> bcMY = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPY = std::make_shared<Periodic>( dataBase );
-
-    bcMY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y < -0.5*H; } );
-    bcPY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y >  0.5*H; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    SPtr<BoundaryCondition> bcMZ = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPZ = std::make_shared<Periodic>( dataBase );
-    
-    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.5*dx; } );
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z >  0.5*dx; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    dataBase->boundaryConditions.push_back( bcMY );
-    dataBase->boundaryConditions.push_back( bcPY );
-
-    dataBase->boundaryConditions.push_back( bcMX );
-    dataBase->boundaryConditions.push_back( bcPX );
-
-    dataBase->boundaryConditions.push_back( bcMZ );
-    dataBase->boundaryConditions.push_back( bcPZ );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    dataBase->setMesh( meshAdapter );
-
-    CudaUtility::printCudaMemoryUsage();
-
-    Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> ConservedVariables{
-
-        real ULocal = 0.0;
-        real VLocal = 0.0;
-        real WLocal = U * sin( 2.0 * M_PI * cellCenter.x / L ) * cos( 4.0 / 3.0 * M_PI * cellCenter.y / L );
-
-        return toConservedVariables( PrimitiveVariables( rho, ULocal, VLocal, WLocal, lambda, 0.0 ), parameters.K );
-    });
-
-    dataBase->copyDataHostToDevice();
-
-    Initializer::initializeDataUpdate(dataBase);
-
-    writeVtkXML( dataBase, parameters, 0, path + simulationName + ".0." + std::to_string( nx ) );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0 );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    cupsAnalyzer.start();
-
-    for( uint iter = 1; iter <= 10000 * ( nx / 32 ); iter++ )
-    {
-        TimeStepping::nestedTimeStep(dataBase, parameters, nullptr, 0);
-
-        //if( iter % ( 100 * ( nx / 32 ) ) == 0 )
-        //{
-        //    dataBase->copyDataDeviceToHost();
-
-        //    writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) );
-        //}
-
-        cupsAnalyzer.run( iter );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataDeviceToHost();
-
-    writeVtkXML( dataBase, parameters, 0, path + simulationName + ".Result." + std::to_string( nx ) );
-
-
-    {
-        real sum1 = 0.0;
-        real sum2 = 0.0;
-    
-        for( uint cellIdx = 0; cellIdx < dataBase->perLevelCount[0].numberOfBulkCells; cellIdx++ )
-        {
-            real velocity = dataBase->dataHost[ RHO_W(cellIdx, dataBase->numberOfCells) ] / dataBase->dataHost[ RHO__(cellIdx, dataBase->numberOfCells) ];
-
-            sum1 += velocity * U * sin( 2.0 * M_PI * dataBase->getCellCenter(cellIdx).x / L ) * cos( 4.0 / 3.0 * M_PI * dataBase->getCellCenter(cellIdx).y / L ); 
-            sum2 +=            U * sin( 2.0 * M_PI * dataBase->getCellCenter(cellIdx).x / L ) * cos( 4.0 / 3.0 * M_PI * dataBase->getCellCenter(cellIdx).y / L )
-                  *            U * sin( 2.0 * M_PI * dataBase->getCellCenter(cellIdx).x / L ) * cos( 4.0 / 3.0 * M_PI * dataBase->getCellCenter(cellIdx).y / L ); 
-        }
-
-        *logging::out << logging::Logger::INFO_HIGH << sum1 / sum2 << "\n";
-    }
-}
-
-int main( int argc, char* argv[])
-{
-    std::string path( "F:/Work/Computations/out/" );
-    std::string simulationName ( "ShearWave" );
-
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precison\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    try
-    {
-        channelFlow( path, simulationName );
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::ERROR << "Unknown exception!\n";
-    }
-
-   return 0;
-}
diff --git a/apps/gpu/GKS/SingleGPU/3rdPartyLinking.cmake b/apps/gpu/GKS/SingleGPU/3rdPartyLinking.cmake
deleted file mode 100644
index f8902be1dd38d4f48cfebd807b2b4a6d1ae9e793..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/SingleGPU/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,13 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/OpenMP/Link.cmake)
-linkOpenMP(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/SingleGPU/CMakeLists.txt b/apps/gpu/GKS/SingleGPU/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/SingleGPU/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/SingleGPU/CMakePackage.cmake b/apps/gpu/GKS/SingleGPU/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/SingleGPU/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/SingleGPU/SingleGPU.cpp b/apps/gpu/GKS/SingleGPU/SingleGPU.cpp
deleted file mode 100644
index 2fed78f38419af983d9818ff3f15f2e01f0c5f69..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/SingleGPU/SingleGPU.cpp
+++ /dev/null
@@ -1,335 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <iostream>
-#include <iomanip>
-#include <exception>
-#include <fstream>
-#include <sstream>
-#include <memory>
-#include <omp.h>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-#include "GridGenerator/geometries/Sphere/Sphere.h"
-#include "GridGenerator/geometries/VerticalCylinder/VerticalCylinder.h"
-#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-#include "GridGenerator/geometries/BoundingBox/BoundingBox.h"
-#include "GridGenerator/utilities/communication.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/FlowStateData/FlowStateData.cuh"
-#include "GksGpu/FlowStateData/FlowStateDataConversion.cuh"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-#include "GksGpu/BoundaryConditions/Pressure.h"
-#include "GksGpu/BoundaryConditions/AdiabaticWall.h"
-
-#include "GksGpu/Communication/Communicator.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-#include "GksGpu/Communication/MpiUtility.h"
-
-//////////////////////////////////////////////////////////////////////////
-
-real performanceTest( std::string path, std::string simulationName, uint nx )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real L  = 1.0;
-
-    real LX = L;
-    real LY = L;
-    real LZ = L;
-
-    real dx = L / real(nx);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    GksGpu::Parameters parameters;
-
-    parameters.K  = 0;
-    parameters.Pr = 1;
-    parameters.mu = 0.01;
-
-    parameters.force.x = 0;
-    parameters.force.y = 0;
-    parameters.force.z = 0;
-
-    parameters.dt = 0.0001 * ( double(128) / double(nx) );
-    parameters.dx = dx;
-
-    parameters.lambdaRef = 1.0e-2;
-    
-    parameters.forcingSchemeIdx = 2;
-
-    parameters.enableReaction = true;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-
-    gridBuilder->addCoarseGrid( - 0.5*L, - 0.5*L, - 0.5*L,
-                                  0.5*L,   0.5*L,   0.5*L, dx);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    gridBuilder->setPeriodicBoundaryCondition(true,true,true);
-
-    gridBuilder->buildGrids(GKS, false);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    meshAdapter.findPeriodicBoundaryNeighbors();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto dataBase = std::make_shared<GksGpu::DataBase>( "GPU" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    SPtr<GksGpu::BoundaryCondition> bcMX = std::make_shared<GksGpu::Periodic>( dataBase );
-    SPtr<GksGpu::BoundaryCondition> bcPX = std::make_shared<GksGpu::Periodic>( dataBase );
-
-    bcMX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x < -0.5*L; } );
-    bcPX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<GksGpu::BoundaryCondition> bcMY = std::make_shared<GksGpu::Periodic>( dataBase );
-    SPtr<GksGpu::BoundaryCondition> bcPY = std::make_shared<GksGpu::Periodic>( dataBase );
-
-    bcMY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y < -0.5*L; } );
-    bcPY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    SPtr<GksGpu::BoundaryCondition> bcMZ = std::make_shared<GksGpu::Periodic>( dataBase );
-    SPtr<GksGpu::BoundaryCondition> bcPZ = std::make_shared<GksGpu::Periodic>( dataBase );
-    
-    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.5*L; } );
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->boundaryConditions.push_back( bcMX );
-    dataBase->boundaryConditions.push_back( bcPX );
-    
-    dataBase->boundaryConditions.push_back( bcMY );
-    dataBase->boundaryConditions.push_back( bcPY );
-
-    dataBase->boundaryConditions.push_back( bcMZ );
-    dataBase->boundaryConditions.push_back( bcPZ );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    *logging::out << logging::Logger::INFO_HIGH << "NumberOfBoundaryConditions = " << (int)dataBase->boundaryConditions.size() << "\n";
-
-    *logging::out << logging::Logger::INFO_HIGH << "bcMX ==> " << bcMX->numberOfCellsPerLevel[0] << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << "bcPX ==> " << bcPX->numberOfCellsPerLevel[0] << "\n";
-
-    *logging::out << logging::Logger::INFO_HIGH << "bcMY ==> " << bcMY->numberOfCellsPerLevel[0] << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << "bcPY ==> " << bcPY->numberOfCellsPerLevel[0] << "\n";
-
-    *logging::out << logging::Logger::INFO_HIGH << "bcMZ ==> " << bcMZ->numberOfCellsPerLevel[0] << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << "bcPZ ==> " << bcPZ->numberOfCellsPerLevel[0] << "\n";
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    dataBase->setMesh( meshAdapter );
-
-    dataBase->setCommunicators( meshAdapter );
-
-    GksGpu::CudaUtility::printCudaMemoryUsage();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksGpu::Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> GksGpu::ConservedVariables
-    {
-        real U = 0.1;
-
-        real ULocal =   0.1 + U * sin( 2.0 * M_PI * cellCenter.x ) * cos( 2.0 * M_PI * cellCenter.y ) * cos( 2.0 * M_PI * cellCenter.z );
-        real VLocal =   0.1 - U * cos( 2.0 * M_PI * cellCenter.x ) * sin( 2.0 * M_PI * cellCenter.y ) * cos( 2.0 * M_PI * cellCenter.z );
-        real WLocal =   0.1;
-
-        real rho = 1.0;
-
-        real p0 = 0.5 * rho / parameters.lambdaRef;
-
-        real pLocal = p0 + rho * U * U / 16.0 * ( cos( 2.0 * M_PI * 2.0 * cellCenter.x ) + cos( 2.0 * M_PI * 2.0 * cellCenter.y ) ) * ( 2.0 + cos( 2.0 * M_PI * 2.0 * cellCenter.z ) );
-
-        real rhoLocal = 2.0 * pLocal * parameters.lambdaRef;
-
-        return GksGpu::toConservedVariables( GksGpu::PrimitiveVariables( rhoLocal, ULocal, VLocal, WLocal, parameters.lambdaRef ), parameters.K );
-    });
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataHostToDevice();
-
-    for( auto bc : dataBase->boundaryConditions ) 
-        for( uint level = 0; level < dataBase->numberOfLevels; level++ )
-            bc->runBoundaryConditionKernel( dataBase, parameters, level );
-
-    GksGpu::Initializer::initializeDataUpdate(dataBase);
-
-    //dataBase->copyDataDeviceToHost();
-
-    //writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    const uint numberOfIterations = 1000;
-
-    GksGpu::CupsAnalyzer cupsAnalyzer( dataBase, false, 30.0, true, numberOfIterations );
-
-    real CUPS = 0;
-
-    cupsAnalyzer.start();
-
-    for( uint iter = 1; iter <= numberOfIterations; iter++ )
-    {
-        GksGpu::TimeStepping::nestedTimeStep(dataBase, parameters, 0);
-
-        CUPS = cupsAnalyzer.run( iter, parameters.dt );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    //dataBase->copyDataDeviceToHost();
-
-    //writeVtkXML( dataBase, parameters, 0, path + simulationName + "_final" );
-    
-    //////////////////////////////////////////////////////////////////////////
-
-    int crashCellIndex = dataBase->getCrashCellIndex();
-    if( crashCellIndex >= 0 )
-    {
-        *logging::out << logging::Logger::LOGGER_ERROR << "=================================================\n";
-        *logging::out << logging::Logger::LOGGER_ERROR << "=================================================\n";
-        *logging::out << logging::Logger::LOGGER_ERROR << "============= Simulation Crashed!!! =============\n";
-        *logging::out << logging::Logger::LOGGER_ERROR << "=================================================\n";
-        *logging::out << logging::Logger::LOGGER_ERROR << "=================================================\n";
-    }
-
-    return CUPS;
-}
-
-int main( int argc, char* argv[])
-{
-    //////////////////////////////////////////////////////////////////////////
-
-#ifdef _WIN32
-    std::string path( "F:/Work/Computations/out/SingleGPU/" );
-#else
-    //std::string path( "/home/stephan/Computations/out/" );
-    std::string path( "out/" );
-#endif
-
-    //////////////////////////////////////////////////////////////////////////
-
-    try
-    {
-        logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-        logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-        std::string simulationName ( "SingleGPU" );
-
-        std::ofstream file;
-        file.open( path + simulationName + ".dat" );
-
-        //std::vector<uint> nxList = {32,64,128,256};
-        std::vector<uint> nxList = {128};
-
-        for( auto nx : nxList )
-        {
-            logging::Logger::addStream(&std::cout);
-    
-            std::ofstream logFile( path + simulationName + "_nx_" + std::to_string(nx) + ".log" );
-            logging::Logger::addStream(&logFile);
-
-            GksGpu::CudaUtility::setCudaDevice( 0 );
-    
-            //////////////////////////////////////////////////////////////////////////
-
-            if( sizeof(real) == 4 )
-                *logging::out << logging::Logger::INFO_HIGH << "Using Single Precison\n";
-            else
-                *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-            real CUPS = performanceTest( path, simulationName + "_nx_" + std::to_string(nx), nx );
-
-            file << std::setw(5) << nx <<std::setw(20) << CUPS << std::endl;
-
-            logFile.close();
-            
-            logging::Logger::resetStreams();
-        }
-
-        file.close();
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-    //////////////////////////////////////////////////////////////////////////
-    //////////////////////////////////////////////////////////////////////////
-
-   return 0;
-}
diff --git a/apps/gpu/GKS/TaylorGreen3D/3rdPartyLinking.cmake b/apps/gpu/GKS/TaylorGreen3D/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/TaylorGreen3D/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/TaylorGreen3D/CMakeLists.txt b/apps/gpu/GKS/TaylorGreen3D/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/TaylorGreen3D/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/TaylorGreen3D/CMakePackage.cmake b/apps/gpu/GKS/TaylorGreen3D/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/TaylorGreen3D/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/TaylorGreen3D/TaylorGreen3D.cpp b/apps/gpu/GKS/TaylorGreen3D/TaylorGreen3D.cpp
deleted file mode 100644
index 1fdd80bd207cbd688601f47de3d2ea54814112f3..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/TaylorGreen3D/TaylorGreen3D.cpp
+++ /dev/null
@@ -1,322 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <memory>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-#include "GksGpu/BoundaryConditions/Pressure.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/KineticEnergyAnalyzer.h"
-#include "GksGpu/Analyzer/EnstrophyAnalyzer.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-void writeVelocityFile( SPtr<DataBase> dataBase, std::string filename );
-
-//////////////////////////////////////////////////////////////////////////
-real Re = 1.6e3;
-
-uint dtPerL = 500;
-
-uint nx = 64;
-uint gpuIndex = 0;
-//////////////////////////////////////////////////////////////////////////
-
-void gksTest( std::string path )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CudaUtility::setCudaDevice( gpuIndex );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real L = 1.0;
-
-    real dx = 2.0 * M_PI * L / real(nx);
-
-    real U  = 1.0;
-    real Ma = 0.1;
-    
-    real Pr  = 0.71;
-    real K   = 2.0;
-
-    real rho = 1.0;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    real gamma = ( K + 5 ) / ( K + 3 );
-
-    real mu = U * rho * L / Re;
-
-    real cs = U / Ma;
-    real lambda = c1o2 * ( ( K + 5.0 ) / ( K + 3.0 ) ) / ( cs * cs );
-
-    real CFL = 0.5;
-
-    real dt  = CFL * ( dx / ( ( U + cs ) * ( one + ( two * mu ) / ( U * dx * rho ) ) ) );
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt(CFL=0.5) = " << dt << " s\n";
-
-    dt = L / U /  dtPerL * ( 64.0 / real(nx) );
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt          = " << dt << " s\n";
-
-    *logging::out << logging::Logger::INFO_HIGH << "mu          = " << mu << "\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Parameters parameters;
-
-    parameters.K  = K;
-    parameters.Pr = Pr;
-    parameters.mu = mu;
-
-    parameters.force.x = 0;
-    parameters.force.y = 0;
-    parameters.force.z = 0;
-
-    parameters.dt = dt;
-    parameters.dx = dx;
-
-    parameters.lambdaRef = lambda;
-
-    parameters.viscosityModel = ViscosityModel::constant;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    gridBuilder->addCoarseGrid(-M_PI*L, -M_PI*L, -M_PI*L,  
-                                M_PI*L,  M_PI*L,  M_PI*L, dx);
-
-    //gridBuilder->addCoarseGrid(-2.0 * dx, -0.5*L*2.0*M_PI, -0.5*L*2.0*M_PI,  
-    //                            2.0 * dx,  0.5*L*2.0*M_PI,  0.5*L*2.0*M_PI, dx);
-
-    //Cuboid cube(-1.0, -1.0, 0.45, 1.0, 1.0, 0.55);
-
-    //gridBuilder->setNumberOfLayers(6,6);
-    //gridBuilder->addGrid( &cube, 1);
-
-    gridBuilder->setPeriodicBoundaryCondition(true, true, true);
-
-    gridBuilder->buildGrids(GKS, false);
-
-    //gridBuilder->writeGridsToVtk(path + "grid/Grid_lev_");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    //meshAdapter.writeMeshVTK( path + "grid/Mesh.vtk" );
-
-    //meshAdapter.writeMeshFaceVTK( path + "out/MeshFaces.vtk" );
-
-    meshAdapter.findPeriodicBoundaryNeighbors();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto dataBase = std::make_shared<DataBase>( "GPU" );
-    dataBase->setMesh( meshAdapter );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcMX = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPX = std::make_shared<Periodic>( dataBase );
-
-    bcMX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x < -0.5*L; } );
-    bcPX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> bcMY = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPY = std::make_shared<Periodic>( dataBase );
-
-    bcMY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y < -0.5*L; } );
-    bcPY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcMZ = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPZ = std::make_shared<Periodic>( dataBase );
-
-    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.5*L; } );
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->boundaryConditions.push_back( bcMX );
-    dataBase->boundaryConditions.push_back( bcPX );
-    
-    dataBase->boundaryConditions.push_back( bcMY );
-    dataBase->boundaryConditions.push_back( bcPY );
-
-    dataBase->boundaryConditions.push_back( bcMZ );
-    dataBase->boundaryConditions.push_back( bcPZ );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    CudaUtility::printCudaMemoryUsage();
-
-    Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> ConservedVariables{
-
-        real ULocal =   U * sin( cellCenter.x ) * cos( cellCenter.y ) * cos( cellCenter.z );
-        real VLocal = - U * cos( cellCenter.x ) * sin( cellCenter.y ) * cos( cellCenter.z );
-        real WLocal =   0.0;
-
-        real p0 = 0.5 * rho / lambda;
-
-        real pLocal = p0 + rho * U * U / 16.0 * ( cos( 2.0 * cellCenter.x ) + cos( 2.0 * cellCenter.y ) ) * ( 2.0 + cos( 2.0 * cellCenter.z ) );
-
-        real rhoLocal = 2.0 * pLocal * lambda;
-
-        return toConservedVariables( PrimitiveVariables( rhoLocal, ULocal, VLocal, WLocal, lambda ), parameters.K );
-    });
-
-    dataBase->copyDataHostToDevice();
-
-    Initializer::initializeDataUpdate(dataBase);
-
-    writeVtkXML( dataBase, parameters, 0, path + "TGV_3D_nx_" + std::to_string(nx) + "_dtPerL_" + std::to_string(dtPerL) + "_"          + std::to_string( 0 ) );
-    writeVelocityFile( dataBase,          path + "TGV_3D_nx_" + std::to_string(nx) + "_dtPerL_" + std::to_string(dtPerL) + "_Velocity_" + std::to_string( 0 ) );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    KineticEnergyAnalyzer kineticEnergyAnalyzer( dataBase,             10, 10000 );
-    EnstrophyAnalyzer     enstrophyAnalyzer    ( dataBase, parameters, 10, 10000 );
-
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 60.0, false, 100 );
-
-    cupsAnalyzer.start();
-
-    for( uint iter = 1; iter <= 40 * lround(L/(U*dt)); iter++ )
-    {
-        TimeStepping::nestedTimeStep(dataBase, parameters, 0);
-
-        kineticEnergyAnalyzer.run( iter );
-        enstrophyAnalyzer.run( iter );
-
-        if( iter % ( 5 * lround(L/(U*dt)) ) == 0 )
-        {
-            dataBase->copyDataDeviceToHost();
-
-            writeVtkXML( dataBase, parameters, 0, path + "TGV_3D_nx_" + std::to_string(nx) + "_dtPerL_" + std::to_string(dtPerL) + "_"          + std::to_string( iter / lround(L/(U*dt)) ) );
-            writeVelocityFile( dataBase,          path + "TGV_3D_nx_" + std::to_string(nx) + "_dtPerL_" + std::to_string(dtPerL) + "_Velocity_" + std::to_string( iter / lround(L/(U*dt)) ) );
-            kineticEnergyAnalyzer.writeToFile(    path + "TGV_3D_nx_" + std::to_string(nx) + "_dtPerL_" + std::to_string(dtPerL) + "_EKin_"     + std::to_string( iter / lround(L/(U*dt)) ) );
-            enstrophyAnalyzer.writeToFile    (    path + "TGV_3D_nx_" + std::to_string(nx) + "_dtPerL_" + std::to_string(dtPerL) + "_Enstrophy_"+ std::to_string( iter / lround(L/(U*dt)) ) );
-        }
-
-        cupsAnalyzer.run( iter );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-}
-
-int main( int argc, char* argv[])
-{
-    if( argc > 1 ) gpuIndex = atoi( argv[1] );
-    if( argc > 2 ) Re       = atof( argv[2] );
-    if( argc > 3 ) nx       = atoi( argv[3] );
-    if( argc > 4 ) dtPerL   = atoi( argv[4] );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    //std::string path( "F:/Work/Computations/TaylorGreenVortex_3D/results/GKS/" );
-    std::string path( "./results/GKS/" );
-
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precison\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    try
-    {
-        gksTest( path );
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::ERROR << "Unknown exception!\n";
-    }
-
-   return 0;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include "FlowStateData/AccessDeviceData.cuh"
-
-void writeVelocityFile( SPtr<DataBase> dataBase, std::string filename )
-{
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "writeVelocityFile( " << filename << " )" << "\n";
-
-    std::ofstream file;
-
-    file.open(filename + ".dat" );
-
-    for( uint cellIndex = 0; cellIndex < dataBase->perLevelCount[0].numberOfBulkCells; cellIndex++ )
-    {
-        real rho = dataBase->dataHost[ RHO__(cellIndex, dataBase->numberOfCells) ];
-
-        file << dataBase->dataHost[ RHO_U(cellIndex, dataBase->numberOfCells) ] / rho << ", ";
-        file << dataBase->dataHost[ RHO_V(cellIndex, dataBase->numberOfCells) ] / rho << ", ";
-        file << dataBase->dataHost[ RHO_W(cellIndex, dataBase->numberOfCells) ] / rho << std::endl;
-    }
-
-    file.close();
-
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
-}
\ No newline at end of file
diff --git a/apps/gpu/GKS/ThermalCavity/3rdPartyLinking.cmake b/apps/gpu/GKS/ThermalCavity/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ThermalCavity/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/ThermalCavity/CMakeLists.txt b/apps/gpu/GKS/ThermalCavity/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ThermalCavity/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/ThermalCavity/CMakePackage.cmake b/apps/gpu/GKS/ThermalCavity/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ThermalCavity/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/ThermalCavity/ThermalCavity.cpp b/apps/gpu/GKS/ThermalCavity/ThermalCavity.cpp
deleted file mode 100644
index ae729002cd11ba3793e6a3a98e612f6f4230ca6f..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ThermalCavity/ThermalCavity.cpp
+++ /dev/null
@@ -1,345 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <memory>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-#include "GksGpu/BoundaryConditions/Pressure.h"
-#include "GksGpu/BoundaryConditions/AdiabaticWall.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-void thermalCavity( std::string path, std::string simulationName )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint nx = 128;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real L = 1.0;
-    //real H = 0.25;
-    real H = L / real(nx);
-
-    real dx = L / real(nx);
-
-
-    real Ra = 2.0e9;
-
-    real Ba  = 0.1;
-    real eps = 1.2;
-    real Pr  = 0.71;
-    real K   = 2.0;
-    
-    real g   = 1.0;
-    real rho = 1.0;
-
-    real lambda     = Ba / ( 2.0 * g * L );
-    real lambdaHot  = lambda / ( 1.0 + eps * 0.5 );
-    real lambdaCold = lambda / ( 1.0 - eps * 0.5 );
-    
-    real mu = sqrt( Pr * eps * g * L * L * L / Ra ) * rho ;
-
-    real cs  = sqrt( ( ( K + 4.0 ) / ( K + 2.0 ) ) / ( 2.0 * lambda ) );
-    real U   = sqrt( Ra ) * mu / ( rho * L );
-
-    real CFL = 0.5;
-
-    real dt  = CFL * ( dx / ( ( U + cs ) * ( one + ( two * mu ) / ( U * dx * rho ) ) ) );
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt = " << dt << " s\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Parameters parameters;
-
-    parameters.K  = K;
-    parameters.Pr = Pr;
-    parameters.mu = mu;
-
-    parameters.force.x = 0;
-    parameters.force.y = -g;
-    parameters.force.z = 0;
-
-    parameters.dt = dt;
-    parameters.dx = dx;
-
-    parameters.lambdaRef = lambda;
-
-    parameters.viscosityModel = ViscosityModel::sutherlandsLaw;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    //gridBuilder->addCoarseGrid(-0.5*L, -0.5*L, -0.5*H,  
-    //                            0.5*L,  0.5*L,  0.5*H, dx);
-
-    gridBuilder->addCoarseGrid(-0.5*L, -0.5*L, -0.5*H,  
-                                0.5*L,  0.5*L,  0.5*H, dx);
-
-    real L_1 = 0.35;
-    real L_2 = 0.45;
-    real L_3 = 0.475;
-    real L_4 = 0.485;
-
-    Cuboid* cubeMX_1 = new Cuboid (-1.0, -1.0, -1.0, 
-                                   -L_1,  1.0,  1.0 );
-    Cuboid* cubePX_1 = new Cuboid ( L_1, -1.0, -1.0, 
-                                    1.0,  1.0,  1.0 );
-
-    Cuboid* cubeMX_2 = new Cuboid (-1.0, -1.0, -1.0, 
-                                   -L_2,  1.0,  1.0 );
-    Cuboid* cubePX_2 = new Cuboid ( L_2, -1.0, -1.0, 
-                                    1.0,  1.0,  1.0 );
-
-    Cuboid* cubeMX_3 = new Cuboid (-1.0, -1.0, -1.0, 
-                                   -L_3,  1.0,  1.0 );
-    Cuboid* cubePX_3 = new Cuboid ( L_3, -1.0, -1.0, 
-                                    1.0,  1.0,  1.0 );
-
-    Cuboid* cubeMX_4 = new Cuboid (-1.0, -1.0, -1.0, 
-                                   -L_4,  1.0,  1.0 );
-    Cuboid* cubePX_4 = new Cuboid ( L_4, -1.0, -1.0, 
-                                    1.0,  1.0,  1.0 );
-
-    Conglomerate refRegion_1;
-    refRegion_1.add(cubeMX_1);
-    refRegion_1.add(cubePX_1);
-
-    Conglomerate refRegion_2;
-    refRegion_2.add(cubeMX_2);
-    refRegion_2.add(cubePX_2);
-
-    Conglomerate refRegion_3;
-    refRegion_3.add(cubeMX_3);
-    refRegion_3.add(cubePX_3);
-
-    Conglomerate refRegion_4;
-    refRegion_4.add(cubeMX_4);
-    refRegion_4.add(cubePX_4);
-
-    gridBuilder->setNumberOfLayers(6,6);
-
-    gridBuilder->addGrid( &refRegion_1, 1);
-    //gridBuilder->addGrid( &refRegion_2, 2);
-    //gridBuilder->addGrid( &refRegion_3, 3);
-    //gridBuilder->addGrid( &refRegion_4, 4);
-
-    gridBuilder->setPeriodicBoundaryCondition(false, false, true);
-
-    gridBuilder->buildGrids(GKS, false);
-
-    //gridBuilder->writeGridsToVtk(path + "grid/Grid_lev_");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    //meshAdapter.writeMeshVTK( path + "grid/Mesh.vtk" );
-
-    //meshAdapter.writeMeshFaceVTK( path + "grid/MeshFaces.vtk" );
-
-    meshAdapter.findPeriodicBoundaryNeighbors();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CudaUtility::setCudaDevice(0);
-
-    auto dataBase = std::make_shared<DataBase>( "GPU" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcMX = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaHot,  false );
-    SPtr<BoundaryCondition> bcPX = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold, false );
-    //SPtr<BoundaryCondition> bcMX = std::make_shared<Periodic>( dataBase );
-    //SPtr<BoundaryCondition> bcPX = std::make_shared<Periodic>( dataBase );
-
-    bcMX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x < -0.5*L; } );
-    bcPX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> bcMY = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0) );
-    SPtr<BoundaryCondition> bcPY = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0) );
-    //SPtr<BoundaryCondition> bcMY = std::make_shared<Periodic>( dataBase );
-    //SPtr<BoundaryCondition> bcPY = std::make_shared<Periodic>( dataBase );
-
-    bcMY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y < -0.5*L; } );
-    bcPY->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.y >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    //SPtr<BoundaryCondition> bcMZ = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0) );
-    //SPtr<BoundaryCondition> bcPZ = std::make_shared<AdiabaticWall>( dataBase, Vec3(0.0, 0.0, 0.0) );
-    SPtr<BoundaryCondition> bcMZ = std::make_shared<Periodic>( dataBase );
-    SPtr<BoundaryCondition> bcPZ = std::make_shared<Periodic>( dataBase );
-    
-    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.5*H; } );
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z >  0.5*H; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->boundaryConditions.push_back( bcMZ );
-    dataBase->boundaryConditions.push_back( bcPZ );
-
-    dataBase->boundaryConditions.push_back( bcMX );
-    dataBase->boundaryConditions.push_back( bcPX );
-    
-    dataBase->boundaryConditions.push_back( bcMY );
-    dataBase->boundaryConditions.push_back( bcPY );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    dataBase->setMesh( meshAdapter );
-
-    CudaUtility::printCudaMemoryUsage();
-
-    Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> ConservedVariables{
-
-        real Th = 1.0 / lambdaHot;
-        real Tc = 1.0 / lambdaCold;
-        real T = Th - (Th - Tc)*( (cellCenter.x + 0.5 * L) / L);
-        real lambdaLocal = 1.0 / T;
-
-        return toConservedVariables( PrimitiveVariables( rho, 0.0, 0.0, 0.0, lambda ), parameters.K );
-    });
-
-    dataBase->copyDataHostToDevice();
-
-    Initializer::initializeDataUpdate(dataBase);
-
-    writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0 );
-
-    ConvergenceAnalyzer convergenceAnalyzer( dataBase );
-
-    auto turbulenceAnalyzer = std::make_shared<TurbulenceAnalyzer>( dataBase, 50000 );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    cupsAnalyzer.start();
-
-    for( uint iter = 1; iter <= 100000; iter++ )
-    {
-        TimeStepping::nestedTimeStep(dataBase, parameters, nullptr, 0);
-
-        if( 
-            //( iter < 10     && iter % 1     == 0 ) ||
-            //( iter < 100    && iter % 10    == 0 ) ||
-            //( iter < 1000   && iter % 100   == 0 ) ||
-            //( iter < 10000  && iter % 1000  == 0 ) ||
-            ( iter < 10000000 && iter % 10000 == 0 )
-          )
-        {
-            dataBase->copyDataDeviceToHost();
-
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) );
-        }
-
-        cupsAnalyzer.run( iter );
-
-        convergenceAnalyzer.run( iter );
-
-        turbulenceAnalyzer->run( iter, parameters );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataDeviceToHost();
-
-    //writeVtkXML( dataBase, parameters, 0, path + "grid/Test_1" );
-
-    turbulenceAnalyzer->download();
-
-    writeTurbulenceVtkXML(dataBase, turbulenceAnalyzer, 0, path + simulationName + "_Turbulence");
-}
-
-int main( int argc, char* argv[])
-{
-    std::string path( "F:/Work/Computations/out/" );
-    //std::string path( "out/" );
-    std::string simulationName ( "ThermalCavity" );
-
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precison\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    try
-    {
-        thermalCavity( path, simulationName );
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::ERROR << "Unknown exception!\n";
-    }
-
-   return 0;
-}
diff --git a/apps/gpu/GKS/ThermalCavityMultiGPU/3rdPartyLinking.cmake b/apps/gpu/GKS/ThermalCavityMultiGPU/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ThermalCavityMultiGPU/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/ThermalCavityMultiGPU/CMakeLists.txt b/apps/gpu/GKS/ThermalCavityMultiGPU/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ThermalCavityMultiGPU/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/ThermalCavityMultiGPU/CMakePackage.cmake b/apps/gpu/GKS/ThermalCavityMultiGPU/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ThermalCavityMultiGPU/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/ThermalCavityMultiGPU/ThermalCavityMultiGPU.cpp b/apps/gpu/GKS/ThermalCavityMultiGPU/ThermalCavityMultiGPU.cpp
deleted file mode 100644
index 1af2397d4bfa06f54ee1adb39ab6f618a7829522..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/ThermalCavityMultiGPU/ThermalCavityMultiGPU.cpp
+++ /dev/null
@@ -1,608 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <sstream>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <memory>
-#include <thread>
-
-#include <mpi.h>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-#include "GridGenerator/geometries/BoundingBox/BoundingBox.h"
-#include "GridGenerator/utilities/communication.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/FlowStateData/FlowStateDataConversion.cuh"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-#include "GksGpu/BoundaryConditions/Pressure.h"
-#include "GksGpu/BoundaryConditions/AdiabaticWall.h"
-
-#include "GksGpu/Communication/Communicator.h"
-#include "GksGpu/Communication/MpiUtility.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-#include "GksGpu/Analyzer/PointTimeSeriesCollector.h"
-
-#include "GksGpu/Restart/Restart.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-//uint deviceMap [2] = {2,3};
-uint deviceMap [2] = {0,1};
-
-void simulation( std::string path, std::string simulationName, bool fine, bool highAspect, uint restartIter )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    int rank = 0;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-
-    int mpiWorldSize = 1;
-    MPI_Comm_size(MPI_COMM_WORLD, &mpiWorldSize);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint nx = 64;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real L = 1.0;
-    real W = 0.25;
-
-    real H = L;
-
-    if(highAspect) H = 2.0 * L;
-
-    real dx = L / real(nx);
-
-    real Ra = 5.0e9;
-
-    real Ba  = 0.1;
-    real eps = 1.2;
-    real Pr  = 0.71;
-    real K   = 2.0;
-    
-    real g   = 1.0;
-    real rho = 1.0;
-
-    real lambda     = Ba / ( 2.0 * g * L );
-    real lambdaHot  = lambda / ( 1.0 + eps * 0.5 );
-    real lambdaCold = lambda / ( 1.0 - eps * 0.5 );
-    
-    real mu = sqrt( Pr * eps * g * L * L * L / Ra ) * rho ;
-
-    real cs  = sqrt( ( ( K + 4.0 ) / ( K + 2.0 ) ) / ( 2.0 * lambda ) );
-    real U   = sqrt( Ra ) * mu / ( rho * L );
-
-    real CFL = 0.5;
-
-    real dt  = CFL * ( dx / ( ( U + cs ) * ( c1o1 + ( c2o1 * mu ) / ( U * dx * rho ) ) ) );
-
-    *logging::out << logging::Logger::INFO_HIGH << "dt = " << dt << " s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "U  = " << U  << " s\n";
-    *logging::out << logging::Logger::INFO_HIGH << "mu = " << mu << " s\n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Parameters parameters;
-
-    parameters.K  = K;
-    parameters.Pr = Pr;
-    parameters.mu = mu;
-
-    parameters.force.x = 0;
-    parameters.force.y = 0;
-    parameters.force.z = -g;
-
-    parameters.dt = dt;
-    parameters.dx = dx;
-
-    parameters.lambdaRef = lambda;
-
-    parameters.viscosityModel = ViscosityModel::constant;
-
-    parameters.forcingSchemeIdx = 0;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    //                M e s h    G e n e r a t i o n
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real startX, endX;
-    real startY, endY;
-    real startZ, endZ;
-
-    if( rank % 2 == 0 ) startX = -0.5 * L;
-    else                startX = -3.0 * dx;
-    if( rank % 2 == 0 ) endX   =  3.0 * dx;
-    else                endX   =  0.5 * L;
-
-    if( mpiWorldSize == 2 )
-    {
-        startY = 0.0;
-        endY   = W;
-    }
-    else
-    {
-        startY =  rank / 2        * W - 3.0 * dx;
-        endY   = (rank / 2 + 1.0) * W + 3.0 * dx;
-    }
-
-    startZ = - 0.5 * H;
-    endZ   =   0.5 * H;
-
-    gridBuilder->addCoarseGrid(startX, startY, startZ,  
-                               endX  , endY  , endZ  , dx);
-
-    std::cout << __LINE__ << std::endl;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    real refL[4] = { 0.2, 0.05, 0.025, 0.005 };
-
-    if( fine )
-    {
-        refL[1] = 0.1;
-        refL[2] = 0.05;
-    }
-
-    gridBuilder->setNumberOfLayers(6,6);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Conglomerate coarseRefLevel;
-
-    if( rank % 2 == 0 ) coarseRefLevel.add( new Cuboid (-100.0,           -100.0, -100.0, 
-                                                        -0.5*L + refL[0],  100.0,  100.0 ) );
-    else                coarseRefLevel.add( new Cuboid ( 0.5*L - refL[0], -100.0, -100.0, 
-                                                         100.0,            100.0,  100.0 ) );
-
-    coarseRefLevel.add( new Cuboid (-100.0, -100.0, -100.0,   
-                                     100.0,  100.0, -0.5*H + refL[0] ) );
-    coarseRefLevel.add( new Cuboid (-100.0, -100.0,  0.5*H - refL[0], 
-                                     100.0,  100.0,  100.0   ) );
-
-    gridBuilder->addGrid( &coarseRefLevel, 1);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Conglomerate firstRefLevel;
-
-    if( rank % 2 == 0 ) firstRefLevel.add( new Cuboid (-100.0,           -100.0, -100.0, 
-                                                       -0.5*L + refL[1],  100.0,  100.0 ) );
-    else                firstRefLevel.add( new Cuboid ( 0.5*L - refL[1], -100.0, -100.0, 
-                                                        100.0,            100.0,  100.0 ) );
-
-    firstRefLevel.add( new Cuboid (-100.0, -100.0, -100.0,   
-                                    100.0,  100.0, -0.5*H + refL[1] ) );
-    firstRefLevel.add( new Cuboid (-100.0, -100.0,  0.5*H - refL[1], 
-                                    100.0,  100.0,  100.0   ) );
-
-    gridBuilder->addGrid( &firstRefLevel, 2);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Conglomerate secondRefLevel;
-
-    if( rank % 2 == 0 ) secondRefLevel.add( new Cuboid (-100.0,           -100.0, -100.0, 
-                                                        -0.5*L + refL[2],  100.0,  100.0 ) );
-    else                secondRefLevel.add( new Cuboid ( 0.5*L - refL[2], -100.0, -100.0, 
-                                                         100.0,            100.0,  100.0 ) );
-
-    if( rank % 2 == 0 ) secondRefLevel.add( new Cuboid (-100.0,           -100.0, -100.0,   
-                                                        -0.5*L + refL[0],  100.0, -0.5*H + refL[2] ) );
-    else                secondRefLevel.add( new Cuboid ( 0.5*L - refL[0], -100.0, -100.0,   
-                                                         100.0,            100.0, -0.5*H + refL[2] ) );
-
-    if( rank % 2 == 0 ) secondRefLevel.add( new Cuboid (-100.0,           -100.0,  0.5*H - refL[2], 
-                                                        -0.5*L + refL[0],  100.0,  100.0   ) );
-    else                secondRefLevel.add( new Cuboid ( 0.5*L - refL[0], -100.0,  0.5*H - refL[2], 
-                                                         100.0,            100.0,  100.0   ) );
-
-    gridBuilder->addGrid( &secondRefLevel, 3);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Conglomerate thirdRefLevel;
-
-    if( rank % 2 == 0 ) thirdRefLevel.add( new Cuboid (-100.0,           -100.0, -100.0, 
-                                                       -0.5*L + refL[3],  100.0,  100.0 ) );
-    else                thirdRefLevel.add( new Cuboid ( 0.5*L - refL[3], -100.0, -100.0, 
-                                                        100.0,            100.0,  100.0 ) );
-
-    if( fine ) gridBuilder->addGrid( &thirdRefLevel, 4);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( rank % 2 == 0 ) startX = -100.0;
-    else                startX =    0.0;
-    if( rank % 2 == 0 ) endX   =    0.0;
-    else                endX   =  100.0;
-
-    if( mpiWorldSize == 2 )
-    {
-        startY = -100.0;
-        endY   =  100.0;
-    }
-    else
-    {
-        startY =   real(rank/2)         * W;
-        endY   = ( real(rank/2) + 1.0 ) * W;
-    }
-
-    startZ = -100.0;
-    endZ   =  100.0;
-
-    auto subDomainBox = std::make_shared<BoundingBox>( startX, endX, 
-                                                       startY, endY, 
-                                                       startZ, endZ );
-
-    gridBuilder->setSubDomainBox( subDomainBox );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( mpiWorldSize == 2 ) gridBuilder->setPeriodicBoundaryCondition(false, true,  false);
-    else                    gridBuilder->setPeriodicBoundaryCondition(false, false, false);
-
-    gridBuilder->buildGrids(GKS, false);
-            
-    //gridBuilder->writeGridsToVtk( path + simulationName + "_0" + "_rank_" + std::to_string(rank) + "_lev_" );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( rank%2 == 0 ) gridBuilder->findCommunicationIndices( CommunicationDirections::PX, GKS );
-    else              gridBuilder->findCommunicationIndices( CommunicationDirections::MX, GKS );
-
-    if( rank%2 == 0 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::PX, rank + 1 );
-    else              gridBuilder->setCommunicationProcess ( CommunicationDirections::MX, rank - 1 );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    if( mpiWorldSize > 2 )
-    {
-        gridBuilder->findCommunicationIndices(CommunicationDirections::PY, GKS);
-        gridBuilder->findCommunicationIndices(CommunicationDirections::MY, GKS);
-
-        gridBuilder->setCommunicationProcess(CommunicationDirections::PY, (rank + 2 + mpiWorldSize) % mpiWorldSize);
-        gridBuilder->setCommunicationProcess(CommunicationDirections::MY, (rank - 2 + mpiWorldSize) % mpiWorldSize);
-    }
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    if( mpiWorldSize == 2 ) meshAdapter.findPeriodicBoundaryNeighbors();    
-
-    //meshAdapter.writeMeshFaceVTK( path + simulationName + "_0" + "_rank_" + std::to_string(rank) + ".vtk" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto dataBase = std::make_shared<DataBase>( "GPU" );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    //                 B o u n d a r y    C o n d i t i o n s
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-    SPtr<BoundaryCondition> bcMX = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaHot , false );
-    SPtr<BoundaryCondition> bcPX = std::make_shared<IsothermalWall>( dataBase, Vec3(0.0, 0.0, 0.0), lambdaCold, false );
-
-    bcMX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x < -0.5*L; } );
-    bcPX->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x >  0.5*L; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<BoundaryCondition> bcMZ = std::make_shared<AdiabaticWall>( dataBase, Vec3(0,0,0), true );
-    SPtr<BoundaryCondition> bcPZ = std::make_shared<AdiabaticWall>( dataBase, Vec3(0,0,0), true );
-
-    bcMZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z < -0.5*H; } );
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.z >  0.5*H; } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( mpiWorldSize == 2 )
-    {
-        SPtr<BoundaryCondition> bcMY = std::make_shared<Periodic>(dataBase);
-        SPtr<BoundaryCondition> bcPY = std::make_shared<Periodic>(dataBase);
-
-        bcMY->findBoundaryCells(meshAdapter, false, [&](Vec3 center) { return center.y < 0; });
-        bcPY->findBoundaryCells(meshAdapter, false, [&](Vec3 center) { return center.y > W; });
-
-        dataBase->boundaryConditions.push_back(bcMY);
-        dataBase->boundaryConditions.push_back(bcPY);
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->boundaryConditions.push_back( bcMZ );
-    dataBase->boundaryConditions.push_back( bcPZ );
-
-    dataBase->boundaryConditions.push_back( bcMX );
-    dataBase->boundaryConditions.push_back( bcPX );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    //                 I n i t i a l    C o n d i t i o n s
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    uint startIter = 0;
-
-    dataBase->setMesh( meshAdapter );
-
-    dataBase->setCommunicators( meshAdapter );
-
-    CudaUtility::printCudaMemoryUsage();
-
-    if( restartIter == INVALID_INDEX )
-    {
-        Initializer::interpret(dataBase, [&](Vec3 cellCenter) -> ConservedVariables {
-
-            //real Th = 1.0 / lambdaHot;
-            //real Tc = 1.0 / lambdaCold;
-            //real T = Th - (Th - Tc)*((cellCenter.x + 0.5 * L) / L);
-            //real lambdaLocal = 1.0 / T;
-
-            return toConservedVariables(PrimitiveVariables(rho, 0.0, 0.0, 0.0, lambda), parameters.K);
-        });
-
-        if (rank == 0) writeVtkXMLParallelSummaryFile(dataBase, parameters, path + simulationName + "_0", mpiWorldSize);
-
-        writeVtkXML(dataBase, parameters, 0, path + simulationName + "_0" + "_rank_" + std::to_string(rank));
-    }
-    else
-    {
-        Restart::readRestart( dataBase, path + simulationName + "_" + std::to_string( restartIter ) + "_rank_" + std::to_string(rank), startIter );
-
-        if (rank == 0) writeVtkXMLParallelSummaryFile( dataBase, parameters, path + simulationName + "_" + std::to_string( restartIter ) + "_restart", mpiWorldSize );
-
-        writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( restartIter ) + "_restart" + "_rank_" + std::to_string(rank) );
-
-
-    }
-
-    dataBase->copyDataHostToDevice();
-
-    Initializer::initializeDataUpdate(dataBase);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    //                  R u n
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 300.0 );
-
-    ConvergenceAnalyzer convergenceAnalyzer( dataBase );
-
-    //auto turbulenceAnalyzer = std::make_shared<TurbulenceAnalyzer>( dataBase, 0 );
-    auto turbulenceAnalyzer = std::make_shared<TurbulenceAnalyzer>( dataBase, 500000 );
-
-    turbulenceAnalyzer->collect_UU = true;
-    turbulenceAnalyzer->collect_VV = true;
-    turbulenceAnalyzer->collect_WW = true;
-    turbulenceAnalyzer->collect_UV = true;
-    turbulenceAnalyzer->collect_UW = true;
-    turbulenceAnalyzer->collect_VW = true;
-
-    turbulenceAnalyzer->allocate();
-
-    if( restartIter != INVALID_INDEX )
-        turbulenceAnalyzer->readRestartFile( path + simulationName + "_Turbulence_" + std::to_string( restartIter ) + "_rank_" + std::to_string(rank) );
-
-    auto pointTimeSeriesCollector = std::make_shared<PointTimeSeriesCollector>();
-
-    for( real y = 0.5 * W; y < real( mpiWorldSize / 2 ) * W; y += W )
-    {
-        if( subDomainBox->isInside( -0.485, y, -0.3*H ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3( -0.485, y, -0.3*H ), 'W', 10000 );
-        if( subDomainBox->isInside( -0.485, y, -0.1*H ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3( -0.485, y, -0.1*H ), 'W', 10000 );
-        if( subDomainBox->isInside( -0.485, y,  0.1*H ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3( -0.485, y,  0.1*H ), 'W', 10000 );
-        if( subDomainBox->isInside( -0.485, y,  0.3*H ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3( -0.485, y,  0.3*H ), 'W', 10000 );
-        
-        if( subDomainBox->isInside(  0.485, y, -0.3*H ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3(  0.485, y, -0.3*H ), 'W', 10000 );
-        if( subDomainBox->isInside(  0.485, y, -0.1*H ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3(  0.485, y, -0.1*H ), 'W', 10000 );
-        if( subDomainBox->isInside(  0.485, y,  0.1*H ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3(  0.485, y,  0.1*H ), 'W', 10000 );
-        if( subDomainBox->isInside(  0.485, y,  0.3*H ) ) pointTimeSeriesCollector->addAnalyzer( dataBase, meshAdapter, Vec3(  0.485, y,  0.3*H ), 'W', 10000 );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    cupsAnalyzer.start();
-
-    for( uint iter = startIter + 1; iter <= 100000000; iter++ )
-    {
-        TimeStepping::nestedTimeStep(dataBase, parameters, 0);
-
-        if( iter % 200000 == 0 )
-        {
-            dataBase->copyDataDeviceToHost();
-
-            if( rank == 0 ) writeVtkXMLParallelSummaryFile( dataBase, parameters, path + simulationName + "_" + std::to_string( iter ), mpiWorldSize );
-
-            writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) + "_rank_" + std::to_string(rank) );
-        }
-
-        cupsAnalyzer.run( iter, parameters.dt );
-
-        convergenceAnalyzer.run( iter );
-
-        turbulenceAnalyzer->run( iter, parameters );
-
-        pointTimeSeriesCollector->run(iter, parameters);
-
-        if( iter > 500000 && iter % 200000 == 0 )
-        //if(iter % 1000 == 0)
-        {
-            turbulenceAnalyzer->download();
-
-            if( rank == 0 ) writeTurbulenceVtkXMLParallelSummaryFile( dataBase, turbulenceAnalyzer, parameters, path + simulationName + "_Turbulence_" + std::to_string( iter ), mpiWorldSize );
-
-            writeTurbulenceVtkXML( dataBase, turbulenceAnalyzer, 0, path + simulationName + "_Turbulence_" + std::to_string( iter ) + "_rank_" + std::to_string(rank) );
-        }
-
-        if( iter > 500000 && iter % 200000 == 0 )
-        {
-            Restart::writeRestart( dataBase, path + simulationName + "_" + std::to_string( iter ) + "_rank_" + std::to_string(rank), iter );
-
-            turbulenceAnalyzer->writeRestartFile( path + simulationName + "_Turbulence_" + std::to_string( iter ) + "_rank_" + std::to_string(rank) );
-        }
-
-        if( iter % 1000000 == 0 )
-        {
-            pointTimeSeriesCollector->writeToFile(path + simulationName + "_TimeSeries_" + std::to_string( iter ) + "_rank_" + std::to_string(rank));
-        }
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataDeviceToHost();
-}
-
-
-
-int main( int argc, char* argv[])
-{
-    //////////////////////////////////////////////////////////////////////////
-
-    bool fine = false;
-
-    bool highAspect = true;
-
-    //////////////////////////////////////////////////////////////////////////
-
-#ifdef _WIN32
-    MPI_Init(&argc, &argv);
-    int rank = 0;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    int mpiWorldSize = 1;
-    MPI_Comm_size(MPI_COMM_WORLD, &mpiWorldSize);
-#else
-    int rank         = MpiUtility::getMpiRankBeforeInit();
-    int mpiWorldSize = MpiUtility::getMpiWorldSizeBeforeInit();
-#endif
-
-    if( mpiWorldSize < 2 || mpiWorldSize%2 != 0 )
-    {
-        std::cerr << "Error: MpiWolrdSize must be multiple of 2!\n";
-        return 1;
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-#ifdef _WIN32
-    std::string path( "F:/Work/Computations/out/ThermalCavity3D/" );
-#else
-    std::string path( "out/" );
-#endif
-
-    std::string simulationName ( "ThermalCavity3D" );
-
-    if(fine) simulationName += "_fine";
-    else     simulationName += "_coarse";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    logging::Logger::addStream(&std::cout);
-    
-    std::ofstream logFile( path + simulationName + "_rank_" + std::to_string(rank) + ".log" );
-    logging::Logger::addStream(&logFile);
-
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    // Important: for Cuda-Aware MPI the device must be set before MPI_Init()
-    int deviceCount = CudaUtility::getCudaDeviceCount();
-
-    if(deviceCount == 0)
-    {
-        std::stringstream msg;
-        msg << "No devices devices found!" << std::endl;
-        *logging::out << logging::Logger::WARNING << msg.str(); msg.str("");
-    }
-
-    CudaUtility::setCudaDevice( rank % deviceCount );
-
-    //////////////////////////////////////////////////////////////////////////
-
-#ifndef _WIN32
-    MPI_Init(&argc, &argv);
-#endif
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( sizeof(real) == 4 )
-        *logging::out << logging::Logger::INFO_HIGH << "Using Single Precision\n";
-    else
-        *logging::out << logging::Logger::INFO_HIGH << "Using Double Precision\n";
-
-    try
-    {
-        uint restartIter = INVALID_INDEX;
-
-        if( argc > 1 ) restartIter = atoi( argv[1] );
-
-        simulation(path, simulationName, fine, highAspect, restartIter);
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
-    }
-
-    logFile.close();
-
-    MPI_Finalize();
-
-    return 0;
-}
diff --git a/apps/gpu/GKS/gksTest/3rdPartyLinking.cmake b/apps/gpu/GKS/gksTest/3rdPartyLinking.cmake
deleted file mode 100644
index 72c7afc6076b832263506ab9ce777925cfcc6a66..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/gksTest/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOUR#CE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
diff --git a/apps/gpu/GKS/gksTest/CMakeLists.txt b/apps/gpu/GKS/gksTest/CMakeLists.txt
deleted file mode 100644
index d404310177a2f53760d1c84bce79d7d070fed409..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/gksTest/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR})
-
-set(linkDirectories "")
-set(libsToLink Core GridGenerator GksMeshAdapter GksVtkAdapter GksGpu)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src"
-                       "${CMAKE_SOURCE_DIR}/src/Core"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/GksMeshAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksVtkAdapter"
-                       "${CMAKE_SOURCE_DIR}/src/GksGpu")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${gksAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
diff --git a/apps/gpu/GKS/gksTest/CMakePackage.cmake b/apps/gpu/GKS/gksTest/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/gksTest/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/GKS/gksTest/main.cpp b/apps/gpu/GKS/gksTest/main.cpp
deleted file mode 100644
index b66017eeaf727def602b9ab9f41556a8dd5a984e..0000000000000000000000000000000000000000
--- a/apps/gpu/GKS/gksTest/main.cpp
+++ /dev/null
@@ -1,228 +0,0 @@
-//#define MPI_LOGGING
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <iostream>
-#include <exception>
-#include <fstream>
-#include <memory>
-
-#include "Core/Timer/Timer.h"
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/geometries/Cuboid/Cuboid.h"
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/GridFactory.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksVtkAdapter/VTKInterface.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-#include "GksGpu/BoundaryConditions/Periodic.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-void gksTest( std::string path )
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real L = 1.0;
-
-    real dx = L / 8.0;
-
-    real Re  = 2.0e3;
-    real U  = 0.1;
-    real Ma = 0.1;
-    
-    real Pr  = 1.0;
-    real K   = 0.0;
-
-    real rho = 1.0;
-
-    real mu = U * rho * L / Re;
-
-    real cs = U / Ma;
-    real lambda = c1o2 * ( ( K + 4.0 ) / ( K + 2.0 ) ) / ( cs * cs );
-
-    real CFL = 0.5;
-
-    real dt  = CFL * ( dx / ( ( U + cs ) * ( one + ( two * mu ) / ( U * dx * rho ) ) ) );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Parameters parameters;
-
-    parameters.K  = K;
-    parameters.Pr = Pr;
-    parameters.mu = mu;
-
-    parameters.force.x = 0;
-    parameters.force.y = 0;
-    parameters.force.z = 0;
-
-    parameters.dt = dt;
-    parameters.dx = dx;
-
-    parameters.lambdaRef = lambda;
-
-    parameters.viscosityModel = ViscosityModel::constant;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    gridBuilder->addCoarseGrid(-0.5, -0.5, -0.5,  
-                                0.5,  0.5,  0.5, dx);
-
-    Cuboid refBox(-1.0, -1.0, 0, 1.0, 1.0, 0.55);
-
-    gridBuilder->setNumberOfLayers(1,1);
-    gridBuilder->addGrid( &refBox, 1);
-
-    gridBuilder->setPeriodicBoundaryCondition(true, false, false);
-
-    gridBuilder->buildGrids(GKS, false);
-
-    gridBuilder->writeGridsToVtk(path + "grid/Grid_lev_");
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    GksMeshAdapter meshAdapter( gridBuilder );
-
-    meshAdapter.inputGrid();
-
-    meshAdapter.writeMeshVTK( path + "grid/Mesh.vtk" );
-
-    meshAdapter.writeMeshFaceVTK( path + "grid/MeshFaces.vtk" );
-
-    meshAdapter.findPeriodicBoundaryNeighbors();
-
-    return;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    CudaUtility::setCudaDevice(0);
-
-    auto dataBase = std::make_shared<DataBase>( "GPU" );
-    dataBase->setMesh( meshAdapter );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    //SPtr<BoundaryCondition> bcMX = std::make_shared<Periodic>( dataBase );
-
-    //bcMX->findBoundaryCells( meshAdapter, [&](Vec3 center){ 
-    //    return center.x < -0.5 || center.x > 0.5;
-    //} );
-
-    SPtr<BoundaryCondition> bcPZ = std::make_shared<IsothermalWall>( dataBase, Vec3( U, U, 0.0 ), lambda, 0.0, true );
-
-    bcPZ->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ 
-        return center.z > 0.5;
-    } );
-
-    SPtr<BoundaryCondition> bcWall = std::make_shared<IsothermalWall>( dataBase, Vec3( 0.0, 0.0, 0.0 ), lambda, 0.0, true );
-
-    bcWall->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ 
-        return center.z < 0.5;
-    } );
-    
-    //dataBase->boundaryConditions.push_back( bcMX );
-    dataBase->boundaryConditions.push_back( bcPZ );
-    dataBase->boundaryConditions.push_back( bcWall );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    CudaUtility::printCudaMemoryUsage();
-
-    Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> ConservedVariables {
-        
-        real radius = cellCenter.length();
-
-        return toConservedVariables( PrimitiveVariables( 1.0, 0.0, 0.0, 0.0, lambda, 0.0 ), parameters.K );
-    });
-
-    dataBase->copyDataHostToDevice();
-
-    Initializer::initializeDataUpdate(dataBase);
-
-    writeVtkXML( dataBase, parameters, 0, path + "grid/Test_0" );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    CupsAnalyzer cupsAnalyzer( dataBase, true, 30.0 );
-
-    cupsAnalyzer.start();
-
-    for( uint iter = 1; iter < 100000; iter++ )
-    {
-        TimeStepping::nestedTimeStep(dataBase, parameters, nullptr, 0);
-
-        if( iter % 10000 == 0 )
-        {
-            dataBase->copyDataDeviceToHost();
-
-            writeVtkXML( dataBase, parameters, 0, path + "grid/Test_" + std::to_string( iter ) );
-        }
-
-        cupsAnalyzer.run( iter );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    dataBase->copyDataDeviceToHost();
-
-    //writeVtkXML( dataBase, parameters, 0, path + "grid/Test_1" );
-
-
-}
-
-int main( int argc, char* argv[])
-{
-    std::string path( "F:/Work/Computations/" );
-    //std::string path( "out/" );
-
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-    
-    try
-    {
-        gksTest( path );
-    }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::ERROR << e.what() << "\n";
-    }
-    catch (const std::bad_alloc& e)
-    {  
-        *logging::out << logging::Logger::ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::ERROR << "Unknown exception!\n";
-    }
-
-   return 0;
-}
diff --git a/apps/gpu/HULC/main.cpp b/apps/gpu/HULC/main.cpp
deleted file mode 100644
index 80f8ba4c62b3b0af08425f839d0f802a568db034..0000000000000000000000000000000000000000
--- a/apps/gpu/HULC/main.cpp
+++ /dev/null
@@ -1,411 +0,0 @@
-//#define MPI_LOGGING
-
-
-#include <mpi.h>
-#if defined( MPI_LOGGING )
-	#include <mpe.h>
-#endif
-
-#include <string>
-#include <iostream>
-
-#include "LBM/Simulation.h"
-
-#include "Parameter/Parameter.h"
-#include "DataStructureInitializer/GridProvider.h"
-#include "VirtualFluidsBasics/utilities/input/Input.h"
-#include "VirtualFluidsBasics/utilities/StringUtil/StringUtil.h"
-#include "grid/GridBuilder/LevelGridBuilder.h"
-#include "utilities/transformator/TransformatorImp.h"
-#include "io/GridVTKWriter/GridVTKWriter.h"
-
-#include "io/SimulationFileWriter/SimulationFileWriter.h"
-#include "grid/GridBuilder/LevelGridBuilder.h"
-#include "grid/GridBuilder/ParallelGridBuilder.h"
-#include "geometries/TriangularMesh/TriangularMesh.h"
-
-#include "grid/GridFactory.h"
-#include "grid/GridBuilder/MultipleGridBuilder.h"
-#include <grid/GridMocks.h>
-#include "grid/GridStrategy/GridStrategyMocks.h"
-#include "VirtualFluidsBasics/utilities/logger/Logger.h"
-#include "geometries/Conglomerate/Conglomerate.h"
-#include "io/STLReaderWriter/STLReader.h"
-#include "io/STLReaderWriter/STLWriter.h"
-#include "geometries/TriangularMesh/TriangularMeshStrategy.h"
-#include "Output/FileWriter.h"
-
-
-#include "Kernel/Utilities/KernelFactory/KernelFactoryImp.h"
-#include "PreProcessor/PreProcessorFactory/PreProcessorFactoryImp.h"
-
-
-std::string getGridPath(std::shared_ptr<Parameter> para, std::string Gridpath)
-{
-    if (para->getNumprocs() == 1)
-        return Gridpath + "/";
-    
-    return Gridpath + "/" + StringUtil::toString(para->getMyID()) + "/";
-}
-
-void setParameters(std::shared_ptr<Parameter> para, std::unique_ptr<input::Input> &input)
-{
-    std::string _path = input->getValue("Path");
-    std::string _prefix = input->getValue("Prefix");
-    std::string _gridpath = input->getValue("GridPath");
-    para->setNumprocs(1);
-    std::string gridPath = getGridPath(para, _gridpath);
-    para->setMaxDev(StringUtil::toInt(input->getValue("NumberOfDevices")));
-    para->setDevices(StringUtil::toIntVector(input->getValue("Devices")));
-    para->setOutputPath(_path);
-    para->setOutputPrefix(_prefix);
-    para->setPathAndFilename(_path + "/" + _prefix);
-    para->setPrintFiles(false);
-    para->setPrintFiles(StringUtil::toBool(input->getValue("WriteGrid")));
-    para->setGeometryValues(StringUtil::toBool(input->getValue("GeometryValues")));
-    para->setCalc2ndOrderMoments(StringUtil::toBool(input->getValue("calc2ndOrderMoments")));
-    para->setCalc3rdOrderMoments(StringUtil::toBool(input->getValue("calc3rdOrderMoments")));
-    para->setCalcHighOrderMoments(StringUtil::toBool(input->getValue("calcHigherOrderMoments")));
-    para->setReadGeo(StringUtil::toBool(input->getValue("ReadGeometry")));
-    para->setCalcMedian(StringUtil::toBool(input->getValue("calcMedian")));
-    para->setConcFile(StringUtil::toBool(input->getValue("UseConcFile")));
-    para->setUseMeasurePoints(StringUtil::toBool(input->getValue("UseMeasurePoints")));
-    para->setUseWale(StringUtil::toBool(input->getValue("UseWale")));
-    para->setSimulatePorousMedia(StringUtil::toBool(input->getValue("SimulatePorousMedia")));
-    para->setD3Qxx(StringUtil::toInt(input->getValue("D3Qxx")));
-    para->setTimestepEnd(StringUtil::toInt(input->getValue("TimeEnd")));
-    para->setTimestepOut(StringUtil::toInt(input->getValue("TimeOut")));
-    para->setTimestepStartOut(StringUtil::toInt(input->getValue("TimeStartOut")));
-    para->setTimeCalcMedStart(StringUtil::toInt(input->getValue("TimeStartCalcMedian")));
-    para->setTimeCalcMedEnd(StringUtil::toInt(input->getValue("TimeEndCalcMedian")));
-    para->setPressInID(StringUtil::toInt(input->getValue("PressInID")));
-    para->setPressOutID(StringUtil::toInt(input->getValue("PressOutID")));
-    para->setPressInZ(StringUtil::toInt(input->getValue("PressInZ")));
-    para->setPressOutZ(StringUtil::toInt(input->getValue("PressOutZ")));
-    //////////////////////////////////////////////////////////////////////////
-	para->setCompOn(StringUtil::toBool(input->getValue("CompOn")));
-	para->setDiffOn(StringUtil::toBool(input->getValue("DiffOn")));
-    para->setDiffMod(StringUtil::toInt(input->getValue("DiffMod")));
-    para->setDiffusivity(StringUtil::toFloat(input->getValue("Diffusivity")));
-    para->setTemperatureInit(StringUtil::toFloat(input->getValue("Temp")));
-    para->setTemperatureBC(StringUtil::toFloat(input->getValue("TempBC")));
-    //////////////////////////////////////////////////////////////////////////
-    para->setViscosityLB(StringUtil::toFloat(input->getValue("Viscosity_LB")));
-    para->setVelocityLB(StringUtil::toFloat(input->getValue("Velocity_LB")));
-    para->setViscosityRatio(StringUtil::toFloat(input->getValue("Viscosity_Ratio_World_to_LB")));
-    para->setVelocityRatio(StringUtil::toFloat(input->getValue("Velocity_Ratio_World_to_LB")));
-    para->setDensityRatio(StringUtil::toFloat(input->getValue("Density_Ratio_World_to_LB")));
-    para->setPressRatio(StringUtil::toFloat(input->getValue("Delta_Press")));
-    para->setRealX(StringUtil::toFloat(input->getValue("SliceRealX")));
-    para->setRealY(StringUtil::toFloat(input->getValue("SliceRealY")));
-    para->setFactorPressBC(StringUtil::toFloat(input->getValue("dfpbc")));
-    para->setGeometryFileC(input->getValue("GeometryC"));
-    para->setGeometryFileM(input->getValue("GeometryM"));
-    para->setGeometryFileF(input->getValue("GeometryF"));
-    //////////////////////////////////////////////////////////////////////////
-    para->setgeoVec(gridPath + input->getValue("geoVec"));
-    para->setcoordX(gridPath + input->getValue("coordX"));
-    para->setcoordY(gridPath + input->getValue("coordY"));
-    para->setcoordZ(gridPath + input->getValue("coordZ"));
-    para->setneighborX(gridPath + input->getValue("neighborX"));
-    para->setneighborY(gridPath + input->getValue("neighborY"));
-    para->setneighborZ(gridPath + input->getValue("neighborZ"));
-    para->setscaleCFC(gridPath + input->getValue("scaleCFC"));
-    para->setscaleCFF(gridPath + input->getValue("scaleCFF"));
-    para->setscaleFCC(gridPath + input->getValue("scaleFCC"));
-    para->setscaleFCF(gridPath + input->getValue("scaleFCF"));
-    para->setscaleOffsetCF(gridPath + input->getValue("scaleOffsetCF"));
-    para->setscaleOffsetFC(gridPath + input->getValue("scaleOffsetFC"));
-    para->setgeomBoundaryBcQs(gridPath + input->getValue("geomBoundaryBcQs"));
-    para->setgeomBoundaryBcValues(gridPath + input->getValue("geomBoundaryBcValues"));
-    para->setinletBcQs(gridPath + input->getValue("inletBcQs"));
-    para->setinletBcValues(gridPath + input->getValue("inletBcValues"));
-    para->setoutletBcQs(gridPath + input->getValue("outletBcQs"));
-    para->setoutletBcValues(gridPath + input->getValue("outletBcValues"));
-    para->settopBcQs(gridPath + input->getValue("topBcQs"));
-    para->settopBcValues(gridPath + input->getValue("topBcValues"));
-    para->setbottomBcQs(gridPath + input->getValue("bottomBcQs"));
-    para->setbottomBcValues(gridPath + input->getValue("bottomBcValues"));
-    para->setfrontBcQs(gridPath + input->getValue("frontBcQs"));
-    para->setfrontBcValues(gridPath + input->getValue("frontBcValues"));
-    para->setbackBcQs(gridPath + input->getValue("backBcQs"));
-    para->setbackBcValues(gridPath + input->getValue("backBcValues"));
-    para->setnumberNodes(gridPath + input->getValue("numberNodes"));
-    para->setLBMvsSI(gridPath + input->getValue("LBMvsSI"));
-    //////////////////////////////gridPath + ////////////////////////////////////////////
-    para->setmeasurePoints(gridPath + input->getValue("measurePoints"));
-    para->setpropellerValues(gridPath + input->getValue("propellerValues"));
-    para->setclockCycleForMP(StringUtil::toFloat(input->getValue("measureClockCycle")));
-    para->settimestepForMP(StringUtil::toInt(input->getValue("measureTimestep")));
-    para->setcpTop(gridPath + input->getValue("cpTop"));
-    para->setcpBottom(gridPath + input->getValue("cpBottom"));
-    para->setcpBottom2(gridPath + input->getValue("cpBottom2"));
-    para->setConcentration(gridPath + input->getValue("Concentration"));
-    //////////////////////////////////////////////////////////////////////////
-    //Normals - Geometry
-    para->setgeomBoundaryNormalX(gridPath + input->getValue("geomBoundaryNormalX"));
-    para->setgeomBoundaryNormalY(gridPath + input->getValue("geomBoundaryNormalY"));
-    para->setgeomBoundaryNormalZ(gridPath + input->getValue("geomBoundaryNormalZ"));
-    //Normals - Inlet
-    para->setInflowBoundaryNormalX(gridPath + input->getValue("inletBoundaryNormalX"));
-    para->setInflowBoundaryNormalY(gridPath + input->getValue("inletBoundaryNormalY"));
-    para->setInflowBoundaryNormalZ(gridPath + input->getValue("inletBoundaryNormalZ"));
-    //Normals - Outlet
-    para->setOutflowBoundaryNormalX(gridPath + input->getValue("outletBoundaryNormalX"));
-    para->setOutflowBoundaryNormalY(gridPath + input->getValue("outletBoundaryNormalY"));
-    para->setOutflowBoundaryNormalZ(gridPath + input->getValue("outletBoundaryNormalZ"));
-    //////////////////////////////////////////////////////////////////////////
-    //Forcing
-    para->setForcing(StringUtil::toFloat(input->getValue("ForcingX")), StringUtil::toFloat(input->getValue("ForcingY")), StringUtil::toFloat(input->getValue("ForcingZ")));
-    //////////////////////////////////////////////////////////////////////////
-    //Particles
-    para->setCalcParticles(StringUtil::toBool(input->getValue("calcParticles")));
-    para->setParticleBasicLevel(StringUtil::toInt(input->getValue("baseLevel")));
-    para->setParticleInitLevel(StringUtil::toInt(input->getValue("initLevel")));
-    para->setNumberOfParticles(StringUtil::toInt(input->getValue("numberOfParticles")));
-    para->setneighborWSB(gridPath + input->getValue("neighborWSB"));
-    para->setStartXHotWall(StringUtil::toDouble(input->getValue("startXHotWall")));
-    para->setEndXHotWall(StringUtil::toDouble(input->getValue("endXHotWall")));
-    //////////////////////////////////////////////////////////////////////////
-    //for Multi GPU
-    if (para->getNumprocs() > 1)
-    {
-        ////////////////////////////////////////////////////////////////////////////
-        ////1D domain decomposition
-        //std::vector<std::string> sendProcNeighbors;
-        //std::vector<std::string> recvProcNeighbors;
-        //for (int i = 0; i<para->getNumprocs();i++)
-        //{
-        // sendProcNeighbors.push_back(gridPath + StringUtil::toString(i) + "s.dat");
-        // recvProcNeighbors.push_back(gridPath + StringUtil::toString(i) + "r.dat");
-        //}
-        //para->setPossNeighborFiles(sendProcNeighbors, "send");
-        //para->setPossNeighborFiles(recvProcNeighbors, "recv");
-        //////////////////////////////////////////////////////////////////////////
-        //3D domain decomposition
-        std::vector<std::string> sendProcNeighborsX, sendProcNeighborsY, sendProcNeighborsZ;
-        std::vector<std::string> recvProcNeighborsX, recvProcNeighborsY, recvProcNeighborsZ;
-        for (int i = 0; i < para->getNumprocs(); i++)
-        {
-            sendProcNeighborsX.push_back(gridPath + StringUtil::toString(i) + "Xs.dat");
-            sendProcNeighborsY.push_back(gridPath + StringUtil::toString(i) + "Ys.dat");
-            sendProcNeighborsZ.push_back(gridPath + StringUtil::toString(i) + "Zs.dat");
-            recvProcNeighborsX.push_back(gridPath + StringUtil::toString(i) + "Xr.dat");
-            recvProcNeighborsY.push_back(gridPath + StringUtil::toString(i) + "Yr.dat");
-            recvProcNeighborsZ.push_back(gridPath + StringUtil::toString(i) + "Zr.dat");
-        }
-        para->setPossNeighborFilesX(sendProcNeighborsX, "send");
-        para->setPossNeighborFilesY(sendProcNeighborsY, "send");
-        para->setPossNeighborFilesZ(sendProcNeighborsZ, "send");
-        para->setPossNeighborFilesX(recvProcNeighborsX, "recv");
-        para->setPossNeighborFilesY(recvProcNeighborsY, "recv");
-        para->setPossNeighborFilesZ(recvProcNeighborsZ, "recv");
-    }
-    //////////////////////////////////////////////////////////////////////////
-    //para->setkFull(             input->getValue( "kFull" ));
-    //para->setgeoFull(           input->getValue( "geoFull" ));
-    //para->setnoSlipBcPos(       input->getValue( "noSlipBcPos" ));
-    //para->setnoSlipBcQs(          input->getValue( "noSlipBcQs" ));
-    //para->setnoSlipBcValues(      input->getValue( "noSlipBcValues" ));
-    //para->setnoSlipBcValue(     input->getValue( "noSlipBcValue" ));
-    //para->setslipBcPos(         input->getValue( "slipBcPos" ));
-    //para->setslipBcQs(          input->getValue( "slipBcQs" ));
-    //para->setslipBcValue(       input->getValue( "slipBcValue" ));
-    //para->setpressBcPos(        input->getValue( "pressBcPos" ));
-    //para->setpressBcQs(           input->getValue( "pressBcQs" ));
-    //para->setpressBcValues(       input->getValue( "pressBcValues" ));
-    //para->setpressBcValue(      input->getValue( "pressBcValue" ));
-    //para->setvelBcQs(             input->getValue( "velBcQs" ));
-    //para->setvelBcValues(         input->getValue( "velBcValues" ));
-    //para->setpropellerCylinder( input->getValue( "propellerCylinder" ));
-    //para->setpropellerQs(		 input->getValue( "propellerQs"      ));
-    //para->setwallBcQs(            input->getValue( "wallBcQs"         ));
-    //para->setwallBcValues(        input->getValue( "wallBcValues"     ));
-    //para->setperiodicBcQs(        input->getValue( "periodicBcQs"     ));
-    //para->setperiodicBcValues(    input->getValue( "periodicBcValues" ));
-    //cout << "Try this: " << para->getgeomBoundaryBcValues() << endl;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    //Restart
-    para->setTimeDoCheckPoint(StringUtil::toInt(input->getValue("TimeDoCheckPoint")));
-    para->setTimeDoRestart(StringUtil::toInt(input->getValue("TimeDoRestart")));
-    para->setDoCheckPoint(StringUtil::toBool(input->getValue("DoCheckPoint")));
-    para->setDoRestart(StringUtil::toBool(input->getValue("DoRestart")));
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-/*    para->setMaxLevel(StringUtil::toInt(input->getValue("NOGL")));
-    para->setGridX(StringUtil::toVector(input->getValue("GridX")));                           
-    para->setGridY(StringUtil::toVector(input->getValue("GridY")));                           
-    para->setGridZ(StringUtil::toVector(input->getValue("GridZ")));                  
-    para->setDistX(StringUtil::toVector(input->getValue("DistX")));                  
-    para->setDistY(StringUtil::toVector(input->getValue("DistY")));                  
-    para->setDistZ(StringUtil::toVector(input->getValue("DistZ")));      */            
-
-    para->setNeedInterface(std::vector<bool>{true, true, true, true, true, true});
-
-	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	// Kernel
-	para->setMainKernel(input->getValue("MainKernelName"));
-	para->setMultiKernelOn(StringUtil::toBool(input->getValue("multiKernelOn")));
-	para->setMultiKernelLevel(StringUtil::toIntVector(input->getValue("multiKernelLevel")));
-	para->setMultiKernelName(StringUtil::toStringVector(input->getValue("multiKernelName")));
-	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-}
-
-
-
-void multipleLevel(const std::string& configPath)
-{
-    logging::Logger::setStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-
-    auto gridFactory = SPtr<GridFactory>(new GridFactory());
-    gridFactory->setGridStrategy(SPtr<GridStrategy>(new GridCpuStrategy()));
-    gridFactory->setGrid("grid");
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::RAYCASTING);
-
-    //auto gridBuilderlevel = LevelGridBuilder::makeShared(Device::CPU, "D3Q27");
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    //Conglomerate* conglomerate = new Conglomerate();
-    //conglomerate->add(new Cuboid(10, 10, 10, 30, 30, 30));
-    //conglomerate->subtract(new Sphere(30, 20, 20, 4));
-    //gridBuilder->addGrid(conglomerate, 2);
-
-
-//    gridBuilder->addCoarseGrid(0.0, 0.0, 0.0, 14, 10, 20, 0.25);
-    //TriangularMesh* triangularMesh = TriangularMesh::make("D:/GRIDGENERATION/STL/quadarBinaer.stl", DiscretizationMethod::POINT_IN_OBJECT);
-
-
-    gridBuilder->addCoarseGrid(-10, -8, -3, 50, 20, 20, 0.25);
-    TriangularMesh* triangularMesh = TriangularMesh::make("D:/GRIDGENERATION/STL/input/local_input/bruecke.stl", DiscretizationMethod::RAYCASTING);
-
-
-    //TriangleOffsetSurfaceGeneration::createOffsetTriangularMesh(triangularMesh, 5);
-
-    //TriangularMesh* sphere = TriangularMesh::make("D:/GRIDGENERATION/STL/GTI.stl", DiscretizationMethod::RAYCASTING);
-    //TransformatorImp trans(1.0, Vertex(5.5, 1, 12));
-    //trans.transformWorldToGrid(*sphere);
-    //STLWriter::writeSTL(sphere->triangleVec, "D:/GRIDGENERATION/STL/GTI2.stl", false);
-
-    //gridBuilder->addGrid(new Sphere(20, 20, 20, 8));
-    gridBuilder->addGrid(triangularMesh, 2);
-
-    //gridBuilder->addFineGrid(new Cuboid(15, 15, 15, 25, 25, 25), 1);
-    //gridBuilder->addFineGrid(new Cuboid(17, 17, 17, 23, 23, 23), 2);
-
-
-    //gridBuilder->addFineGrid(17.0, 17.0, 17.0, 20.0, 20.0, 20.0, 3);
-    //gridBuilder->addFineGrid(10.0, 10.0, 10.0, 20.0, 20.0, 20.0, 3);
-
-
-    //gridBuilder->writeGridToVTK("D:/GRIDGENERATION/gridTest_level_2", 2);
-
-    gridBuilder->buildGrids();
-
-    gridBuilder->writeGridToVTK("D:/GRIDGENERATION/gridTestSphere_level_0", 0);
-    gridBuilder->writeGridToVTK("D:/GRIDGENERATION/gridTestSphere_level_1", 1);
-    gridBuilder->writeGridToVTK("D:/GRIDGENERATION/gridTestSphere_level_2", 2);
-
-    //gridBuilder->writeGridToVTK("D:/GRIDGENERATION/gridTestCuboid_level_0", 0);
-    //gridBuilder->writeGridToVTK("D:/GRIDGENERATION/gridTestCuboid_level_1", 1);
-
-    //SimulationFileWriter::write("D:/GRIDGENERATION/couplingVF/test/simu/", gridBuilder, FILEFORMAT::ASCII);
-
-    //const uint level = 2;
-    //gridBuilder->addFineGrid(0.0, 0.0, 0.0, 10.0, 10.0, 10.0, level);
-    //gridBuilderlevel->setGrids(gridBuilder->getGrids());
-
-
-    //gridBuilder->addGrid(14.4921875, 14.4921875, 14.4921875, 16.5078125, 16.5078125, 16.5078125, 0.015625, "cpu", "D3Q27", false, false, false);
-    //gridBuilder->addGrid(13.984375, 13.984375, 13.984375, 17.015625, 17.015625, 17.015625, 0.03125, "cpu", "D3Q27", false, false, false);
-    //gridBuilder->addGrid(13.46875, 13.46875, 13.46875, 17.53125, 17.53125, 17.53125, 0.0625, "cpu", "D3Q27", false, false, false);
-    //gridBuilder->addGrid(12.4375, 12.4375, 12.4375, 18.5625, 18.5625, 18.5625, 0.125, "gpu", "D3Q27", false, false, false);
-    //gridBuilder->addGrid(10.375, 10.375, 10.375, 20.625, 20.625, 20.625, 0.25, "gpu", "D3Q27", false, false, false);
-    //gridBuilder->addGrid(5.25, 5.25, 5.25, 24.75, 24.75, 24.75, 0.5, "gpu", "D3Q27", false, false, false);
-    //gridBuilder->addGrid(0.0, 0.0, 0.0, 30.0, 30.0, 30.0, 1.0, "gpu", "D3Q27", true, true, true);
-
-
-    //gridBuilder->copyDataFromGpu();
-
-    //gridBuilder->meshGeometry("D:/GRIDGENERATION/STL/circleBinaer.stl", 1);
-    //gridBuilder->meshGeometry("D:/GRIDGENERATION/STL/circleBinaer.stl", 0);
-    //gridBuilder->writeGridToVTK("D:/GRIDGENERATION/gridTest_level_1", 1);
-    //gridBuilder->writeGridToVTK("D:/GRIDGENERATION/gridTest_level_0", 0);
-    //gridBuilder->writeGridToVTK("D:/GRIDGENERATION/gridTest_level_2", 2);
-
-    SPtr<Parameter> para = Parameter::make();
-    SPtr<GridProvider> gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, communicator);
-    //SPtr<GridProvider> gridGenerator = GridProvider::makeGridReader(false, para);
-
-    std::ifstream stream;
-    stream.open(configPath.c_str(), std::ios::in);
-    if (stream.fail())
-        throw "can not open config file!\n";
-
-    UPtr<input::Input> input = input::Input::makeInput(stream, "config");
-
-    setParameters(para, input);
-
-    Simulation sim;
-    
-	SPtr<KernelFactory> kernelFactory = KernelFactoryImp::getInstance();
-	SPtr<PreProcessorFactory> preProcessorFactory = PreProcessorFactoryImp::getInstance();
-	sim.setFactories(kernelFactory, preProcessorFactory);
-
-	SPtr<FileWriter> fileWriter = SPtr<FileWriter>(new FileWriter());
-    sim.init(para, gridGenerator, fileWriter);
-    sim.run();
-	sim.free();
-}
-
-
-int main( int argc, char* argv[])
-{
-   MPI_Init(&argc, &argv);
-   std::string str, str2; 
-   if ( argv != NULL )
-   {
-      str = static_cast<std::string>(argv[0]);
-      if (argc > 1)
-      {
-         str2 = static_cast<std::string>(argv[1]);
-         try
-         {
-             multipleLevel(str2);
-         }
-         catch (std::exception e)
-         {
-             std::cout << e.what() << std::flush;
-             //MPI_Abort(MPI_COMM_WORLD, -1);
-         }
-      }
-      else
-      {
-          std::cout << "Configuration file must be set!: lbmgm <config file>" << std::endl << std::flush;
-         //MPI_Abort(MPI_COMM_WORLD, -1);
-      }
-   }
-   /*
-   MPE_Init_log() & MPE_Finish_log() are NOT needed when
-   liblmpe.a is linked with this program.  In that case,
-   MPI_Init() would have called MPE_Init_log() already.
-   */
-#if defined( MPI_LOGGING )
-   MPE_Init_log();
-#endif
-
-   
-
-#if defined( MPI_LOGGING )
-   if ( argv != NULL )
-      MPE_Finish_log( argv[0] );
-   if ( str != "" )
-      MPE_Finish_log( str.c_str() );
-   else
-      MPE_Finish_log( "TestLog" );
-#endif
-
-   MPI_Finalize();
-   return 0;
-}
diff --git a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
index 9d982ebac0059b4512041194100f6e1fdfa61924..94755358f680503e19e0e204946ae51016d39802 100644
--- a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
+++ b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
@@ -42,12 +42,12 @@
 
 //////////////////////////////////////////////////////////////////////////
 
-#include "Core/DataTypes.h"
+#include "DataTypes.h"
 #include "PointerDefinitions.h"
 
-#include "Core/StringUtilities/StringUtil.h"
+#include "StringUtilities/StringUtil.h"
+
 
-#include "Core/VectorTypes.h"
 
 #include <basics/config/ConfigurationFile.h>
 
@@ -83,6 +83,7 @@
 #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
 #include "VirtualFluids_GPU/TurbulenceModels/TurbulenceModelFactory.h"
 #include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
+#include "VirtualFluids_GPU/Kernel/Utilities/KernelTypes.h"
 
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
 
@@ -96,8 +97,6 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-LbmOrGks lbmOrGks = LBM;
-
 std::string path(".");
 
 std::string simulationName("ActuatorLine");
@@ -108,12 +107,6 @@ std::string simulationName("ActuatorLine");
 
 void multipleLevel(const std::string& configPath)
 {
-
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-    logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
-
     vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance();
 
     auto gridFactory = GridFactory::make();
@@ -146,7 +139,7 @@ void multipleLevel(const std::string& configPath)
     const float tStartOutProbe      =  config.getValue<real>("tStartOutProbe");
     const float tOutProbe           =  config.getValue<real>("tOutProbe");
         
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
     GridScalingFactory scalingFactory  = GridScalingFactory();
 
@@ -160,14 +153,14 @@ void multipleLevel(const std::string& configPath)
 							   L_x,  L_y,  L_z, dx);
 
     gridBuilder->setNumberOfLayers(4,0);
-    gridBuilder->addGrid( new Cuboid(   turbPos[0]-1.5*reference_diameter,  turbPos[1]-1.5*reference_diameter,  turbPos[2]-1.5*reference_diameter, 
-                                        turbPos[0]+10.0*reference_diameter, turbPos[1]+1.5*reference_diameter,  turbPos[2]+1.5*reference_diameter) , 1 );
+    gridBuilder->addGrid( std::make_shared<Cuboid>( turbPos[0]-1.5*reference_diameter,  turbPos[1]-1.5*reference_diameter,  turbPos[2]-1.5*reference_diameter, 
+                                                    turbPos[0]+10.0*reference_diameter, turbPos[1]+1.5*reference_diameter,  turbPos[2]+1.5*reference_diameter) , 1 );
     para->setMaxLevel(2);
     scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleCompressible);
 
 	gridBuilder->setPeriodicBoundaryCondition(false, false, false);
 
-	gridBuilder->buildGrids(lbmOrGks, false); // buildGrids() has to be called before setting the BCs!!!!
+	gridBuilder->buildGrids(false); // buildGrids() has to be called before setting the BCs!!!!
 
 	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -192,7 +185,7 @@ void multipleLevel(const std::string& configPath)
     para->setViscosityLB(viscosityLB);
     para->setVelocityRatio( dx / dt );
     para->setViscosityRatio( dx*dx/dt );
-    para->setMainKernel("CumulantK17");
+    para->setMainKernel(vf::CollisionKernel::Compressible::CumulantK17);
 
     para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) {
         rho = (real)0.0;
@@ -274,7 +267,7 @@ int main( int argc, char* argv[])
     {
         try
         {
-            vf::logging::Logger::initalizeLogger();
+            vf::logging::Logger::initializeLogger();
 
             if( argc > 1){ path = argv[1]; }
 
diff --git a/apps/gpu/LBM/Basel/main.cpp b/apps/gpu/LBM/Basel/main.cpp
index 8dc1e456ab89f88ddb36bf050f81ede37610d0c8..4999d3418b269ae4340ca550a5f2c50fc6e45231 100644
--- a/apps/gpu/LBM/Basel/main.cpp
+++ b/apps/gpu/LBM/Basel/main.cpp
@@ -14,12 +14,9 @@
 #define _USE_MATH_DEFINES
 #include <math.h>
 
-//#include "metis.h"
-
-#include "Core/LbmOrGks.h"
-#include "Core/Input/Input.h"
-#include "Core/StringUtilities/StringUtil.h"
-#include "Core/Input/ConfigFileReader/ConfigFileReader.h"
+#include "Input/Input.h"
+#include "StringUtilities/StringUtil.h"
+#include "Input/ConfigFileReader/ConfigFileReader.h"
 
 #include "VirtualFluids_GPU/LBM/Simulation.h"
 #include "VirtualFluids_GPU/Communication/Communicator.h"
@@ -63,17 +60,6 @@
 
 void multipleLevel(const std::string& configPath)
 {
-	//std::ofstream logFile( "F:/Work/Computations/gridGenerator/grid/gridGeneratorLog.txt" );
-	//std::ofstream logFile("F:/Basel2019/log/gridGeneratorLog.txt");
-	//logging::Logger::addStream(&logFile);
-
-	logging::Logger::addStream(&std::cout);
-	logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-	logging::Logger::timeStamp(logging::Logger::ENABLE);
-	logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
-
-	//UbLog::reportingLevel() = UbLog::logLevelFromString("DEBUG5");
-
 	auto gridFactory = GridFactory::make();
 	gridFactory->setGridStrategy(Device::CPU);
 	//gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::RAYCASTING);
@@ -115,10 +101,10 @@ void multipleLevel(const std::string& configPath)
 
 #ifdef _WIN32
 		//Baumbart
-		TriangularMesh* BaselSTL = TriangularMesh::make("M:/Basel2019/stl/BaselUrbanProfile_066_deg_bridge_3_All_CLOSED_WIDE_GROUND.stl");
+		auto BaselSTL = std::make_shared<TriangularMesh>("M:/Basel2019/stl/BaselUrbanProfile_066_deg_bridge_3_All_CLOSED_WIDE_GROUND.stl");
 #else
 		//Phoenix
-		TriangularMesh* BaselSTL = TriangularMesh::make(gridpath + "/stl/BaselUrbanProfile_066_deg_bridge_3_All_CLOSED_WIDE_GROUND.stl");
+		auto BaselSTL = std::make_shared<TriangularMesh>(gridpath + "/stl/BaselUrbanProfile_066_deg_bridge_3_All_CLOSED_WIDE_GROUND.stl");
 #endif
 
 
@@ -130,7 +116,7 @@ void multipleLevel(const std::string& configPath)
 		//Merged for Wind in X Direction
 		gridBuilder->setPeriodicBoundaryCondition(true, true, false);
 
-		gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+		gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 
 		//////////////////////////////////////////////////////////////////////////
 
@@ -232,7 +218,6 @@ int main(int argc, char* argv[])
 			}
 			catch (const std::exception& e)
 			{
-				*logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
 				//MPI_Abort(MPI_COMM_WORLD, -1);
 			}
 			catch (...)
@@ -252,22 +237,17 @@ int main(int argc, char* argv[])
 			}
 			catch (const std::exception& e)
 			{
-
-				*logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
-				//std::cout << e.what() << std::flush;
+				std::cout << e.what() << std::flush;
 				//MPI_Abort(MPI_COMM_WORLD, -1);
 			}
 			catch (const std::bad_alloc e)
 			{
-
-				*logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
-				//std::cout << e.what() << std::flush;
+				std::cout << e.what() << std::flush;
 				//MPI_Abort(MPI_COMM_WORLD, -1);
 			}
 			catch (...)
 			{
-				*logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
-				//std::cout << "unknown exeption" << std::endl;
+				std::cout << "unknown exeption" << std::endl;
 			}
 
 			std::cout << "\nConfiguration file must be set!: lbmgm <config file>" << std::endl << std::flush;
diff --git a/apps/gpu/LBM/BaselMultiGPU/main.cpp b/apps/gpu/LBM/BaselMultiGPU/main.cpp
index 454f0d261e5a01d049bf5593bcf2a7036856b7ab..bfd64d42b428907e08c3a3b7fdb99319e0f05382 100644
--- a/apps/gpu/LBM/BaselMultiGPU/main.cpp
+++ b/apps/gpu/LBM/BaselMultiGPU/main.cpp
@@ -14,12 +14,9 @@
 #define _USE_MATH_DEFINES
 #include <math.h>
 
-//#include "metis.h"
-
-#include "Core/LbmOrGks.h"
-#include "Core/Input/Input.h"
-#include "Core/StringUtilities/StringUtil.h"
-#include "Core/Input/ConfigFileReader/ConfigFileReader.h"
+#include "Input/Input.h"
+#include "StringUtilities/StringUtil.h"
+#include "Input/ConfigFileReader/ConfigFileReader.h"
 
 #include "VirtualFluids_GPU/LBM/Simulation.h"
 #include "VirtualFluids_GPU/Communication/Communicator.h"
@@ -60,14 +57,6 @@
 
 void multipleLevel(const std::string& configPath)
 {
-    //std::ofstream logFile( "F:/Work/Computations/gridGenerator/grid/gridGeneratorLog.txt" );
-    //std::ofstream logFile( "grid/gridGeneratorLog.txt" );
-    //logging::Logger::addStream(&logFile);
-
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-    logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
 
     //UbLog::reportingLevel() = UbLog::logLevelFromString("DEBUG5");
 
@@ -98,28 +87,26 @@ void multipleLevel(const std::string& configPath)
 	logFile2.open(gridpath + std::to_string(generatePart) + "/gridGeneratorLog.txt");//Phoenix
 	//logFile2.open(std::string("M:/Basel2019/grids4/") + std::to_string(generatePart) + "/gridGeneratorLog.txt");//Baumbart
 
-	logging::Logger::addStream(&logFile2);
-
     bool useGridGenerator = false;
 
     if(useGridGenerator){
         real dx = 1.0;
         real vx = 0.05;
 
-        TriangularMesh* BaselSTL;
+        SPtr<TriangularMesh> BaselSTL;
 
 		if (generatePart == 0)
-			BaselSTL = TriangularMesh::make("/work/marschoe/Basel4GPU/stl/BaselUrbanProfile_066_deg_bridge_3_All_CLOSED_WIDE_GROUND.stl"); //Phoenix
-			//BaselSTL = TriangularMesh::make("M:/Basel2019/stl/BaselUrbanProfile_066_deg_bridge_3_All_CLOSED_WIDE_GROUND.stl"); //Baumbart
+			BaselSTL = std::make_shared<TriangularMesh>("/work/marschoe/Basel4GPU/stl/BaselUrbanProfile_066_deg_bridge_3_All_CLOSED_WIDE_GROUND.stl"); //Phoenix
+			//BaselSTL = std::make_shared<TriangularMesh>("M:/Basel2019/stl/BaselUrbanProfile_066_deg_bridge_3_All_CLOSED_WIDE_GROUND.stl"); //Baumbart
 		if (generatePart == 1)
-			BaselSTL = TriangularMesh::make("/work/marschoe/Basel4GPU/stl/BaselUrbanProfile_066_deg_bridge_3_All_CLOSED_WIDE_GROUND_MIRROR_X.stl"); //Phoenix
-			//BaselSTL = TriangularMesh::make("M:/Basel2019/stl/BaselUrbanProfile_066_deg_bridge_3_All_CLOSED_WIDE_GROUND_MIRROR_X.stl"); //Baumbart
+			BaselSTL = std::make_shared<TriangularMesh>("/work/marschoe/Basel4GPU/stl/BaselUrbanProfile_066_deg_bridge_3_All_CLOSED_WIDE_GROUND_MIRROR_X.stl"); //Phoenix
+			//BaselSTL = std::make_shared<TriangularMesh>("M:/Basel2019/stl/BaselUrbanProfile_066_deg_bridge_3_All_CLOSED_WIDE_GROUND_MIRROR_X.stl"); //Baumbart
 		if (generatePart == 2)
-			BaselSTL = TriangularMesh::make("/work/marschoe/Basel4GPU/stl/BaselUrbanProfile_066_deg_bridge_3_All_CLOSED_WIDE_GROUND_MIRROR_X_Y.stl"); //Phoenix
-			//BaselSTL = TriangularMesh::make("M:/Basel2019/stl/BaselUrbanProfile_066_deg_bridge_3_All_CLOSED_WIDE_GROUND_MIRROR_X_Y.stl"); //Baumbart
+			BaselSTL = std::make_shared<TriangularMesh>("/work/marschoe/Basel4GPU/stl/BaselUrbanProfile_066_deg_bridge_3_All_CLOSED_WIDE_GROUND_MIRROR_X_Y.stl"); //Phoenix
+			//BaselSTL = std::make_shared<TriangularMesh>("M:/Basel2019/stl/BaselUrbanProfile_066_deg_bridge_3_All_CLOSED_WIDE_GROUND_MIRROR_X_Y.stl"); //Baumbart
 		if (generatePart == 3)
-			BaselSTL = TriangularMesh::make("/work/marschoe/Basel4GPU/stl/BaselUrbanProfile_066_deg_bridge_3_All_CLOSED_WIDE_GROUND_MIRROR_Y.stl"); //Phoenix
-			//BaselSTL = TriangularMesh::make("M:/Basel2019/stl/BaselUrbanProfile_066_deg_bridge_3_All_CLOSED_WIDE_GROUND_MIRROR_Y.stl"); //Baumbart
+			BaselSTL = std::make_shared<TriangularMesh>("/work/marschoe/Basel4GPU/stl/BaselUrbanProfile_066_deg_bridge_3_All_CLOSED_WIDE_GROUND_MIRROR_Y.stl"); //Phoenix
+			//BaselSTL = std::make_shared<TriangularMesh>("M:/Basel2019/stl/BaselUrbanProfile_066_deg_bridge_3_All_CLOSED_WIDE_GROUND_MIRROR_Y.stl"); //Baumbart
 
 		real lengthInXDirection = 512.0;
 		real lengthInYDirection = 512.0;
@@ -144,7 +131,7 @@ void multipleLevel(const std::string& configPath)
 
 		gridBuilder->setPeriodicBoundaryCondition(false, false, false);
 
-        gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+        gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 
 		//////////////////////////////////////////////////////////////////////////
 
@@ -252,7 +239,6 @@ int main( int argc, char* argv[])
             }
             catch (const std::exception& e)
             {
-                *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
                 //MPI_Abort(MPI_COMM_WORLD, -1);
             }
             catch (...)
@@ -271,22 +257,17 @@ int main( int argc, char* argv[])
             }
             catch (const std::exception& e)
             {
-                
-                *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
-                //std::cout << e.what() << std::flush;
+                std::cout << e.what() << std::flush;
                 //MPI_Abort(MPI_COMM_WORLD, -1);
             }
             catch (const std::bad_alloc e)
             {
-                
-                *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
-                //std::cout << e.what() << std::flush;
+                std::cout << e.what() << std::flush;
                 //MPI_Abort(MPI_COMM_WORLD, -1);
             }
             catch (...)
             {
-                *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
-                //std::cout << "unknown exeption" << std::endl;
+                std::cout << "unknown exeption" << std::endl;
             }
 
             std::cout << "\nConfiguration file must be set!: lbmgm <config file>" << std::endl << std::flush;
diff --git a/apps/gpu/LBM/BaselNU/main.cpp b/apps/gpu/LBM/BaselNU/main.cpp
index f3a3122ed92b410df56dff719878096692625d89..9600785063018ce140606ef66b1ea466dc4bdb40 100644
--- a/apps/gpu/LBM/BaselNU/main.cpp
+++ b/apps/gpu/LBM/BaselNU/main.cpp
@@ -14,12 +14,9 @@
 #define _USE_MATH_DEFINES
 #include <math.h>
 
-//#include "metis.h"
-
-#include "Core/LbmOrGks.h"
-#include "Core/Input/Input.h"
-#include "Core/StringUtilities/StringUtil.h"
-#include "Core/Input/ConfigFileReader/ConfigFileReader.h"
+#include "Input/Input.h"
+#include "StringUtilities/StringUtil.h"
+#include "Input/ConfigFileReader/ConfigFileReader.h"
 
 #include "VirtualFluids_GPU/LBM/Simulation.h"
 #include "VirtualFluids_GPU/Communication/Communicator.h"
@@ -60,16 +57,6 @@
 
 void multipleLevel(const std::string& configPath)
 {
-	std::ofstream logFile("F:/Basel2019NU/grid/gridGeneratorLog.txt");
-	logging::Logger::addStream(&logFile);
-
-	logging::Logger::addStream(&std::cout);
-	logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-	logging::Logger::timeStamp(logging::Logger::ENABLE);
-	logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
-
-	//UbLog::reportingLevel() = UbLog::logLevelFromString("DEBUG5");
-
 	auto gridFactory = GridFactory::make();
 	gridFactory->setGridStrategy(Device::CPU);
 	//gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::RAYCASTING);
@@ -98,7 +85,7 @@ void multipleLevel(const std::string& configPath)
 		real dx = 4.0;
 		real vx = 0.05;
 
-		TriangularMesh* BaselSTL = TriangularMesh::make("M:/Basel2019/stl/BaselUrbanProfile_066_deg_bridge_3_All_CLOSED_WIDE_GROUND.stl");
+		auto BaselSTL = std::make_shared<TriangularMesh>("M:/Basel2019/stl/BaselUrbanProfile_066_deg_bridge_3_All_CLOSED_WIDE_GROUND.stl");
 
 		gridBuilder->addCoarseGrid(-256.0, -256.0, -8.0,
 			                        256.0, 256.0, 160.0, dx);
@@ -107,14 +94,14 @@ void multipleLevel(const std::string& configPath)
 
 		//////////////////////////////////////////////////////////////////////////
 
-		Cuboid* refBoxMX = new Cuboid( -300, -300, - 20, 
-			                           -254,  300,  200 );
-		Cuboid* refBoxPX = new Cuboid(  254, -300, - 20, 
-			                            300,  300,  200 );
-		Cuboid* refBoxMY = new Cuboid( -300, -300, - 20, 
-			                            300, -254,  200 );
-		Cuboid* refBoxPY = new Cuboid( -300,  254, - 20, 
-			                            300,  300,  200 );
+		Cuboid* refBoxMX = std::make_shared<Cuboid>( -300, -300, - 20,
+			                                         -254,  300,  200 );
+		Cuboid* refBoxPX = std::make_shared<Cuboid>( 254, -300, - 20, 
+			                                         300,  300,  200 );
+		Cuboid* refBoxMY = std::make_shared<Cuboid>( -300, -300, - 20, 
+			                                          300, -254,  200 );
+		Cuboid* refBoxPY = std::make_shared<Cuboid>( -300,  254, - 20, 
+			                                          300,  300,  200 );
 
 		Conglomerate* refRegion = new Conglomerate();
 		
@@ -133,7 +120,7 @@ void multipleLevel(const std::string& configPath)
 
 		gridBuilder->setPeriodicBoundaryCondition(true, true, false);
 
-		gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+		gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 
 		//////////////////////////////////////////////////////////////////////////
 
@@ -225,7 +212,6 @@ int main(int argc, char* argv[])
 			}
 			catch (const std::exception& e)
 			{
-				*logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
 				//MPI_Abort(MPI_COMM_WORLD, -1);
 			}
 			catch (...)
@@ -245,22 +231,17 @@ int main(int argc, char* argv[])
 			}
 			catch (const std::exception& e)
 			{
-
-				*logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
-				//std::cout << e.what() << std::flush;
+				std::cout << e.what() << std::flush;
 				//MPI_Abort(MPI_COMM_WORLD, -1);
 			}
 			catch (const std::bad_alloc e)
 			{
-
-				*logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
-				//std::cout << e.what() << std::flush;
+				std::cout << e.what() << std::flush;
 				//MPI_Abort(MPI_COMM_WORLD, -1);
 			}
 			catch (...)
 			{
-				*logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
-				//std::cout << "unknown exeption" << std::endl;
+				std::cout << "unknown exeption" << std::endl;
 			}
 
 			std::cout << "\nConfiguration file must be set!: lbmgm <config file>" << std::endl << std::flush;
diff --git a/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp b/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp
index 5fc31904433bfe2df0722ab1c63f574d3fcb9a35..298c5d9c344a1873a2612a518f72f33f7d6b6f64 100644
--- a/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp
+++ b/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp
@@ -43,15 +43,13 @@
 
 //////////////////////////////////////////////////////////////////////////
 
-#include "Core/DataTypes.h"
+#include "DataTypes.h"
 #include "PointerDefinitions.h"
 
-#include "Core/StringUtilities/StringUtil.h"
-
-#include "Core/VectorTypes.h"
+#include "StringUtilities/StringUtil.h"
 
 #include <basics/config/ConfigurationFile.h>
-#include "lbm/constants/NumericConstants.h"
+#include "basics/constants/NumericConstants.h"
 
 #include <logger/Logger.h>
 
@@ -89,33 +87,22 @@
 #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
 #include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
 #include "VirtualFluids_GPU/TurbulenceModels/TurbulenceModelFactory.h"
+#include "VirtualFluids_GPU/Kernel/Utilities/KernelTypes.h"
 
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
 
 #include "utilities/communication.h"
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
 std::string path(".");
 
 std::string simulationName("BoundaryLayer");
 
-using namespace vf::lbm::constant;
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+using namespace vf::basics::constant;
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-
 void multipleLevel(const std::string& configPath)
 {
-
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-    logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
-
     auto gridFactory = GridFactory::make();
     auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
 
@@ -126,12 +113,12 @@ void multipleLevel(const std::string& configPath)
     vf::basics::ConfigurationFile config;
     config.load(configPath);
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////^
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
     GridScalingFactory scalingFactory  = GridScalingFactory();
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     
-    const int  nProcs = communicator.getNummberOfProcess();
+    const int  nProcs = communicator.getNumberOfProcess();
     const uint procID = vf::gpu::Communicator::getInstance().getPID();
     std::vector<uint> devices(10);
     std::iota(devices.begin(), devices.end(), 0);
@@ -147,8 +134,6 @@ void multipleLevel(const std::string& configPath)
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-    LbmOrGks lbmOrGks = LBM;
-
     const real H = config.getValue("boundaryLayerHeight", 1000.0); // boundary layer height in m
 
     const real L_x = 6*H;
@@ -236,7 +221,7 @@ void multipleLevel(const std::string& configPath)
     bool useStreams = (nProcs > 1 ? true: false);
     // useStreams=false;
     para->setUseStreams(useStreams);
-    para->setMainKernel("CumulantK17");
+    para->setMainKernel(vf::CollisionKernel::Compressible::CumulantK17);
     para->setIsBodyForce( config.getValue<bool>("bodyForce") );
 
     para->setTimestepStartOut(uint(tStartOut/dt) );
@@ -284,11 +269,11 @@ void multipleLevel(const std::string& configPath)
 
     gridBuilder->addCoarseGrid( xGridMin,  0.0,  0.0,
                                 xGridMax,  L_y,  L_z, dx);
-    if(true)// Add refinement
+    if(false)// Add refinement
     {
         gridBuilder->setNumberOfLayers(4,0);
         real xMaxRefinement = readPrecursor? xGridMax-H: xGridMax;   //Stop refinement some distance before outlet if domain ist not periodic
-        gridBuilder->addGrid( new Cuboid( xGridMin, 0.f, 0.f, xMaxRefinement, L_y,  0.5*L_z) , 1 );
+        gridBuilder->addGrid( std::make_shared<Cuboid>( xGridMin, 0.f, 0.f, xMaxRefinement, L_y,  0.5*L_z) , 1 );
         para->setMaxLevel(2);
         scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleCompressible);
     }
@@ -304,28 +289,28 @@ void multipleLevel(const std::string& configPath)
         gridBuilder->setPeriodicBoundaryCondition(!readPrecursor, true, false);
     }
 
-	gridBuilder->buildGrids(lbmOrGks, true); // buildGrids() has to be called before setting the BCs!!!!
+	gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 
     std::cout << "nProcs: "<< nProcs << "Proc: " << procID << " isFirstSubDomain: " << isFirstSubDomain << " isLastSubDomain: " << isLastSubDomain << " isMidSubDomain: " << isMidSubDomain << std::endl;
     
     if(nProcs > 1){
         if (isFirstSubDomain || isMidSubDomain) {
-            gridBuilder->findCommunicationIndices(CommunicationDirections::PX, lbmOrGks);
+            gridBuilder->findCommunicationIndices(CommunicationDirections::PX);
             gridBuilder->setCommunicationProcess(CommunicationDirections::PX, procID+1);
         }
 
         if (isLastSubDomain || isMidSubDomain) {
-            gridBuilder->findCommunicationIndices(CommunicationDirections::MX, lbmOrGks);
+            gridBuilder->findCommunicationIndices(CommunicationDirections::MX);
             gridBuilder->setCommunicationProcess(CommunicationDirections::MX, procID-1);
         }
 
         if (isFirstSubDomain && !readPrecursor) {
-            gridBuilder->findCommunicationIndices(CommunicationDirections::MX, lbmOrGks);
+            gridBuilder->findCommunicationIndices(CommunicationDirections::MX);
             gridBuilder->setCommunicationProcess(CommunicationDirections::MX, nProcs-1);
         }
 
         if (isLastSubDomain && !readPrecursor) {
-            gridBuilder->findCommunicationIndices(CommunicationDirections::PX, lbmOrGks);
+            gridBuilder->findCommunicationIndices(CommunicationDirections::PX);
             gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 0);
         }
     }
@@ -350,13 +335,14 @@ void multipleLevel(const std::string& configPath)
 
     gridBuilder->setStressBoundaryCondition(SideType::MZ,
                                             0.0, 0.0, 1.0,              // wall normals
-                                            samplingOffset, z0, dx);     // wall model settinng
+                                            samplingOffset, z0, dx);    // wall model settinng
+
     para->setHasWallModelMonitor(true);   
     gridBuilder->setSlipBoundaryCondition(SideType::PZ,  0.0f,  0.0f, -1.0f); 
 
     bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityCompressible);
     bcFactory.setStressBoundaryCondition(BoundaryConditionFactory::StressBC::StressPressureBounceBack);
-    bcFactory.setSlipBoundaryCondition(BoundaryConditionFactory::SlipBC::SlipBounceBack); 
+    bcFactory.setSlipBoundaryCondition(BoundaryConditionFactory::SlipBC::SlipCompressibleTurbulentViscosity); 
     bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::OutflowNonReflective);
     bcFactory.setPrecursorBoundaryCondition(useDistributions ? BoundaryConditionFactory::PrecursorBC::DistributionsPrecursor : BoundaryConditionFactory::PrecursorBC::VelocityPrecursor);
     para->setOutflowPressureCorrectionFactor(0.0); 
@@ -454,7 +440,7 @@ int main( int argc, char* argv[])
     {
         try
         {
-            vf::logging::Logger::initalizeLogger();
+            vf::logging::Logger::initializeLogger();
 
             if( argc > 1){ path = argv[1]; }
 
diff --git a/apps/gpu/LBM/ChannelFlow/ChannelFlow.cpp b/apps/gpu/LBM/ChannelFlow/ChannelFlow.cpp
index 98982f6d9da4c95ca12319f56ffcc4a1e83d33af..f553c255b882596ea4614e8e2fa33403e3d9a0f8 100644
--- a/apps/gpu/LBM/ChannelFlow/ChannelFlow.cpp
+++ b/apps/gpu/LBM/ChannelFlow/ChannelFlow.cpp
@@ -45,13 +45,11 @@
 
 //////////////////////////////////////////////////////////////////////////
 
-#include "Core/DataTypes.h"
-#include "Core/LbmOrGks.h"
-#include "Core/Logger/Logger.h"
-#include "Core/VectorTypes.h"
+#include "DataTypes.h"
+
 #include "PointerDefinitions.h"
 #include "config/ConfigurationFile.h"
-#include "logger/Logger.h"
+#include <logger/Logger.h>
 
 //////////////////////////////////////////////////////////////////////////
 
@@ -74,6 +72,7 @@
 #include "VirtualFluids_GPU/LBM/Simulation.h"
 #include "VirtualFluids_GPU/Output/FileWriter.h"
 #include "VirtualFluids_GPU/Parameter/Parameter.h"
+#include "VirtualFluids_GPU/Kernel/Utilities/KernelTypes.h"
 
 //////////////////////////////////////////////////////////////////////////
 
@@ -97,29 +96,21 @@ int main(int argc, char *argv[])
         //////////////////////////////////////////////////////////////////////////
 
         vf::gpu::Communicator &communicator = vf::gpu::Communicator::getInstance();
-        const int numberOfProcesses = communicator.getNummberOfProcess();
+        const int numberOfProcesses = communicator.getNumberOfProcess();
         SPtr<Parameter> para = std::make_shared<Parameter>(numberOfProcesses, communicator.getPID());
         std::vector<uint> devices(10);
         std::iota(devices.begin(), devices.end(), 0);
         para->setDevices(devices);
-        para->setMaxDev(communicator.getNummberOfProcess());
+        para->setMaxDev(communicator.getNumberOfProcess());
         BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
         //////////////////////////////////////////////////////////////////////////
         // setup logger
         //////////////////////////////////////////////////////////////////////////
 
-        std::ofstream logFile("output/log_process" + std::to_string(vf::gpu::Communicator::getInstance().getPID()) +
-                              ".txt");
-        logging::Logger::addStream(&logFile);
-        logging::Logger::addStream(&std::cout);
-        logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-        logging::Logger::timeStamp(logging::Logger::ENABLE);
-        logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
-
         vf::logging::Logger::changeLogPath("output/vflog_process" +
                                            std::to_string(vf::gpu::Communicator::getInstance().getPID()) + ".txt");
-        vf::logging::Logger::initalizeLogger();
+        vf::logging::Logger::initializeLogger();
 
         //////////////////////////////////////////////////////////////////////////
         // setup gridGenerator
@@ -166,7 +157,7 @@ int main(int argc, char *argv[])
         para->setTimestepEnd(timeStepEnd);
 
         para->setOutputPrefix("ChannelFlow");
-        para->setMainKernel("CumulantK17CompChimStream");
+        para->setMainKernel(vf::CollisionKernel::Compressible::CumulantK17);
 
         const uint generatePart = vf::gpu::Communicator::getInstance().getPID();
         real overlap = (real)8.0 * dx;
@@ -202,7 +193,7 @@ int main(int argc, char *argv[])
             // build grids
             //////////////////////////////////////////////////////////////////////////
 
-            gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+            gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 
             //////////////////////////////////////////////////////////////////////////
             // configure communication neighbors
diff --git a/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp b/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
index 69ecb3d8cbd45a8a7419437e934a57bd20b0bc9f..a802de12c032766f9bf14d2a43b4d16078e230f6 100644
--- a/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
+++ b/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
@@ -41,10 +41,7 @@
 
 //////////////////////////////////////////////////////////////////////////
 
-#include "Core/DataTypes.h"
-#include "Core/LbmOrGks.h"
-#include "Core/Logger/Logger.h"
-#include "Core/VectorTypes.h"
+#include "DataTypes.h"
 #include "PointerDefinitions.h"
 
 #include <logger/Logger.h>
@@ -69,13 +66,14 @@
 #include "VirtualFluids_GPU/Output/FileWriter.h"
 #include "VirtualFluids_GPU/Parameter/Parameter.h"
 #include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
+#include "VirtualFluids_GPU/Kernel/Utilities/KernelTypes.h"
 
 //////////////////////////////////////////////////////////////////////////
 
 int main()
 {
     try {
-         vf::logging::Logger::initalizeLogger();
+         vf::logging::Logger::initializeLogger();
         //////////////////////////////////////////////////////////////////////////
         // Simulation parameters
         //////////////////////////////////////////////////////////////////////////
@@ -85,21 +83,12 @@ int main()
         const real L = 1.0;
         const real Re = 1000.0;
         const real velocity = 1.0;
-        const real dt = (real)0.5e-3;
+        const real velocityLB = 0.05; // LB units
         const uint nx = 64;
 
         const uint timeStepOut = 1000;
         const uint timeStepEnd = 10000;
 
-        //////////////////////////////////////////////////////////////////////////
-        // setup logger
-        //////////////////////////////////////////////////////////////////////////
-
-        logging::Logger::addStream(&std::cout);
-        logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-        logging::Logger::timeStamp(logging::Logger::ENABLE);
-        logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
-
         //////////////////////////////////////////////////////////////////////////
         // setup gridGenerator
         //////////////////////////////////////////////////////////////////////////
@@ -109,31 +98,30 @@ int main()
         auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
 
         //////////////////////////////////////////////////////////////////////////
-        // create grid
+        // compute parameters in lattice units
         //////////////////////////////////////////////////////////////////////////
 
-        real dx = L / real(nx);
-
-        gridBuilder->addCoarseGrid(-0.5 * L, -0.5 * L, -0.5 * L, 0.5 * L, 0.5 * L, 0.5 * L, dx);
-
-        gridBuilder->addGrid(new Cuboid(-0.25, -0.25, -0.25, 0.25, 0.25, 0.25), 1); // add fine grid
-        GridScalingFactory scalingFactory = GridScalingFactory();
-        scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleCompressible);
+        const real dx = L / real(nx);
+        const real dt  = velocityLB / velocity * dx;
 
-        gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+        const real vxLB = velocityLB / sqrt(2.0); // LB units
+        const real vyLB = velocityLB / sqrt(2.0); // LB units
 
-        gridBuilder->buildGrids(LbmOrGks::LBM, false);
+        const real viscosityLB = nx * velocityLB / Re; // LB units
 
         //////////////////////////////////////////////////////////////////////////
-        // compute parameters in lattice units
+        // create grid
         //////////////////////////////////////////////////////////////////////////
 
-        const real velocityLB = velocity * dt / dx; // LB units
+        gridBuilder->addCoarseGrid(-0.5 * L, -0.5 * L, -0.5 * L, 0.5 * L, 0.5 * L, 0.5 * L, dx);
 
-        const real vxLB = velocityLB / sqrt(2.0); // LB units
-        const real vyLB = velocityLB / sqrt(2.0); // LB units
+        gridBuilder->addGrid(std::make_shared<Cuboid>(-0.25, -0.25, -0.25, 0.25, 0.25, 0.25), 1); // add fine grid
+        GridScalingFactory scalingFactory = GridScalingFactory();
+        scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleCompressible);
 
-        const real viscosityLB = nx * velocityLB / Re; // LB units
+        gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+
+        gridBuilder->buildGrids(false);
 
         //////////////////////////////////////////////////////////////////////////
         // set parameters
@@ -154,7 +142,7 @@ int main()
         para->setTimestepOut(timeStepOut);
         para->setTimestepEnd(timeStepEnd);
 
-        para->setMainKernel("CumulantK17CompChimRedesigned");
+        para->setMainKernel(vf::CollisionKernel::Compressible::CumulantK17);
 
         //////////////////////////////////////////////////////////////////////////
         // set boundary conditions
@@ -164,8 +152,8 @@ int main()
         gridBuilder->setNoSlipBoundaryCondition(SideType::MX);
         gridBuilder->setNoSlipBoundaryCondition(SideType::PY);
         gridBuilder->setNoSlipBoundaryCondition(SideType::MY);
-        gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0);
         gridBuilder->setNoSlipBoundaryCondition(SideType::MZ);
+        gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0);
 
         BoundaryConditionFactory bcFactory;
 
diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp b/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp
old mode 100644
new mode 100755
index 8ca6939924fcfba22c8b96f000b9d8d05a3f7f43..acab426b4868cc736710c883776c5626ec6b5753
--- a/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp
+++ b/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp
@@ -12,15 +12,12 @@
 
 //////////////////////////////////////////////////////////////////////////
 
-#include "basics/Core/DataTypes.h"
-#include "basics/Core/VectorTypes.h"
+#include "basics/DataTypes.h"
 #include "basics/PointerDefinitions.h"
 
-#include "basics/Core/LbmOrGks.h"
-#include "basics/Core/Logger/Logger.h"
-#include "basics/Core/StringUtilities/StringUtil.h"
+#include "basics/StringUtilities/StringUtil.h"
 #include "basics/config/ConfigurationFile.h"
-#include "logger/Logger.h"
+#include <logger/Logger.h>
 
 //////////////////////////////////////////////////////////////////////////
 
@@ -50,6 +47,8 @@
 #include "VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.h"
 #include "VirtualFluids_GPU/PreProcessor/PreProcessorFactory/PreProcessorFactoryImp.h"
 #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
+#include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
+#include "VirtualFluids_GPU/Kernel/Utilities/KernelTypes.h"
 
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
 
@@ -57,50 +56,26 @@
 
 #include "utilities/communication.h"
 
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//          U s e r    s e t t i n g s
-//
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-const std::string outPath("output/DrivenCavity_Results/");
-const std::string gridPath = "output/DrivenCavity_Results/grid/";
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void multipleLevel(std::filesystem::path& configPath)
+void runVirtualFluids(const vf::basics::ConfigurationFile& config)
 {
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-    logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
-
     vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance();
 
     auto gridFactory = GridFactory::make();
     gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
     auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
 
-    vf::basics::ConfigurationFile config;
-    std::cout << configPath << std::endl;
-    config.load(configPath.string());
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
-
+    GridScalingFactory scalingFactory = GridScalingFactory();
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
     bool useGridGenerator = true;
     bool useLevels        = true;
-    // para->setUseStreams(useStreams);                  // set in config
-    // para->useReducedCommunicationAfterFtoC = true;    // set in config
-    para->setCalcTurbulenceIntensity(false);
 
     if (para->getNumprocs() == 1) {
         para->useReducedCommunicationAfterFtoC = false;
@@ -108,47 +83,40 @@ void multipleLevel(std::filesystem::path& configPath)
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-    const real L        = 1.0;
-    const real Re       = 1000.0; // 1000
+    const std::string outPath("output/" + std::to_string(para->getNumprocs()) + "GPU/");
+    const std::string gridPath = "output/";
+    std::string simulationName("DrivenCavityMultiGPU");
+
+    const real L = 1.0;
+    const real Re = 1000.0;
     const real velocity = 1.0;
-    const real dt       = (real)1.0e-3; // 0.5e-3;
-    const uint nx       = 64;
-    std::string simulationName("DrivenCavityChimMultiGPU");
+    const real velocityLB = 0.05; // LB units
+    const uint nx = 64;
 
     // para->setTimestepOut(10000);   // set in config
     // para->setTimestepEnd(10000);   // set in config
 
     const real dxGrid      = L / real(nx);
-    const real velocityLB  = velocity * dt / dxGrid;       // LB units
+    const real dt  = velocityLB / velocity * dxGrid;
     const real vxLB        = velocityLB / (real)sqrt(2.0); // LB units
     const real vyLB        = velocityLB / (real)sqrt(2.0); // LB units
     const real viscosityLB = nx * velocityLB / Re;         // LB units
 
-    para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) {
-        rho = (real)1.0;
-        vx  = (real)(coordX * velocityLB);
-        vy  = (real)(coordY * velocityLB);
-        vz  = (real)(coordZ * velocityLB);
-    });
-
     para->setVelocityLB(velocityLB);
     para->setViscosityLB(viscosityLB);
     para->setVelocityRatio(velocity / velocityLB);
-    para->setDensityRatio((real)1.0); // correct value?
+    para->setDensityRatio((real)1.0);
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-    para->setCalcDragLift(false);
-    para->setUseWale(false);
-
     if (para->getOutputPath() == "output/") {para->setOutputPath(outPath);}
     para->setOutputPrefix(simulationName);
 
     para->setPrintFiles(true);
     std::cout << "Write result files to " << para->getFName() << std::endl;
 
-    // para->setMainKernel("CumulantK17CompChim");
-    para->setMainKernel("CumulantK17CompChimStream");
+    para->setMainKernel(vf::CollisionKernel::Compressible::CumulantK17);
+    scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleCompressible);
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -157,7 +125,7 @@ void multipleLevel(std::filesystem::path& configPath)
     VF_LOG_INFO("velocity LB [dx/dt]              = {}", vxLB);
     VF_LOG_INFO("viscosity LB [dx/dt]             = {}", viscosityLB);
     VF_LOG_INFO("dxGrid [-]                       = {}\n", dxGrid);
-
+    VF_LOG_INFO("dt [s]                           = {}", dt);
     VF_LOG_INFO("simulation parameters:");
     VF_LOG_INFO("mainKernel                       = {}\n", para->getMainKernel());
 
@@ -171,9 +139,9 @@ void multipleLevel(std::filesystem::path& configPath)
         const real zGridMin = -0.5 * L;
         const real zGridMax = 0.5 * L;
 
-        Cuboid *level1 = nullptr;
+        SPtr<Cuboid> level1 = nullptr;
         if (useLevels)
-            level1 = new Cuboid(-0.25 * L, -0.25 * L, -0.25 * L, 0.25 * L, 0.25 * L, 0.25 * L);
+            level1 = std::make_shared<Cuboid>(-0.25 * L, -0.25 * L, -0.25 * L, 0.25 * L, 0.25 * L, 0.25 * L);
 
         if (para->getNumprocs() > 1) {
 
@@ -185,7 +153,7 @@ void multipleLevel(std::filesystem::path& configPath)
             const real ySplit = 0.0;
             const real zSplit = 0.0;
 
-            if (communicator.getNummberOfProcess() == 2) {
+            if (communicator.getNumberOfProcess() == 2) {
 
                 if (generatePart == 0) {
                     gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xGridMax, yGridMax, zSplit + overlap,
@@ -209,15 +177,15 @@ void multipleLevel(std::filesystem::path& configPath)
                         std::make_shared<BoundingBox>(xGridMin, xGridMax, yGridMin, yGridMax, zSplit, zGridMax));
                 }
 
-                gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+                gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 
                 if (generatePart == 0) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 1);
                 }
 
                 if (generatePart == 1) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 0);
                 }
 
@@ -226,13 +194,13 @@ void multipleLevel(std::filesystem::path& configPath)
                 if (generatePart == 0)
                     gridBuilder->setVelocityBoundaryCondition(SideType::MZ, 0.0, 0.0, 0.0);
                 if (generatePart == 1)
-                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0);
                 gridBuilder->setVelocityBoundaryCondition(SideType::MX, 0.0, 0.0, 0.0);
                 gridBuilder->setVelocityBoundaryCondition(SideType::MY, 0.0, 0.0, 0.0);
                 gridBuilder->setVelocityBoundaryCondition(SideType::PX, 0.0, 0.0, 0.0);
                 gridBuilder->setVelocityBoundaryCondition(SideType::PY, 0.0, 0.0, 0.0);
                 //////////////////////////////////////////////////////////////////////////
-            } else if (communicator.getNummberOfProcess() == 4) {
+            } else if (communicator.getNumberOfProcess() == 4) {
 
                 if (generatePart == 0) {
                     gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xSplit + overlap, yGridMax,
@@ -268,30 +236,30 @@ void multipleLevel(std::filesystem::path& configPath)
                     gridBuilder->setSubDomainBox(
                         std::make_shared<BoundingBox>(xSplit, xGridMax, yGridMin, yGridMax, zSplit, zGridMax));
 
-                gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+                gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 
                 if (generatePart == 0) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 1);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 2);
                 }
                 if (generatePart == 1) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 0);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 3);
                 }
                 if (generatePart == 2) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 3);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 0);
                 }
                 if (generatePart == 3) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 2);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 1);
                 }
 
@@ -303,20 +271,20 @@ void multipleLevel(std::filesystem::path& configPath)
                 }
                 if (generatePart == 2) {
                     gridBuilder->setVelocityBoundaryCondition(SideType::MX, 0.0, 0.0, 0.0);
-                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0);
                 }
                 gridBuilder->setVelocityBoundaryCondition(SideType::MY, 0.0, 0.0, 0.0);
                 gridBuilder->setVelocityBoundaryCondition(SideType::PY, 0.0, 0.0, 0.0);
                 if (generatePart == 3) {
                     gridBuilder->setVelocityBoundaryCondition(SideType::PX, 0.0, 0.0, 0.0);
-                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0);
                 }
                 if (generatePart == 1) {
                     gridBuilder->setVelocityBoundaryCondition(SideType::MZ, 0.0, 0.0, 0.0);
                     gridBuilder->setVelocityBoundaryCondition(SideType::PX, 0.0, 0.0, 0.0);
                 }
                 //////////////////////////////////////////////////////////////////////////
-            } else if (communicator.getNummberOfProcess() == 8) {
+            } else if (communicator.getNumberOfProcess() == 8) {
 
                 if (generatePart == 0) {
                     gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xSplit + overlap, ySplit + overlap,
@@ -380,71 +348,71 @@ void multipleLevel(std::filesystem::path& configPath)
                     gridBuilder->setSubDomainBox(
                         std::make_shared<BoundingBox>(xSplit, xGridMax, ySplit, yGridMax, zSplit, zGridMax));
 
-                gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+                gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
                 gridBuilder->setPeriodicBoundaryCondition(false, false, false);
 
                 if (generatePart == 0) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 1);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 2);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 4);
                 }
                 if (generatePart == 1) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 0);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 3);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 5);
                 }
                 if (generatePart == 2) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 3);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 0);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 6);
                 }
                 if (generatePart == 3) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 2);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 1);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 7);
                 }
                 if (generatePart == 4) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 5);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 6);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 0);
                 }
                 if (generatePart == 5) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 4);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 7);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 1);
                 }
                 if (generatePart == 6) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 7);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 4);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 2);
                 }
                 if (generatePart == 7) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 6);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 5);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 3);
                 }
 
@@ -472,22 +440,22 @@ void multipleLevel(std::filesystem::path& configPath)
                 if (generatePart == 4) {
                     gridBuilder->setVelocityBoundaryCondition(SideType::MX, 0.0, 0.0, 0.0);
                     gridBuilder->setVelocityBoundaryCondition(SideType::MY, 0.0, 0.0, 0.0);
-                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0);
                 }
                 if (generatePart == 5) {
                     gridBuilder->setVelocityBoundaryCondition(SideType::MX, 0.0, 0.0, 0.0);
                     gridBuilder->setVelocityBoundaryCondition(SideType::PY, 0.0, 0.0, 0.0);
-                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0);
                 }
                 if (generatePart == 6) {
                     gridBuilder->setVelocityBoundaryCondition(SideType::MY, 0.0, 0.0, 0.0);
                     gridBuilder->setVelocityBoundaryCondition(SideType::PX, 0.0, 0.0, 0.0);
-                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0);
                 }
                 if (generatePart == 7) {
                     gridBuilder->setVelocityBoundaryCondition(SideType::PY, 0.0, 0.0, 0.0);
                     gridBuilder->setVelocityBoundaryCondition(SideType::PX, 0.0, 0.0, 0.0);
-                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0);
                 }
                 //////////////////////////////////////////////////////////////////////////
             }
@@ -505,7 +473,7 @@ void multipleLevel(std::filesystem::path& configPath)
                 gridBuilder->addGrid(level1, 1);
             }
 
-            gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+            gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
             gridBuilder->setPeriodicBoundaryCondition(false, false, false);
             //////////////////////////////////////////////////////////////////////////
             gridBuilder->setVelocityBoundaryCondition(SideType::MX, 0.0, 0.0, 0.0);
@@ -513,7 +481,7 @@ void multipleLevel(std::filesystem::path& configPath)
             gridBuilder->setVelocityBoundaryCondition(SideType::MY, 0.0, 0.0, 0.0);
             gridBuilder->setVelocityBoundaryCondition(SideType::PY, 0.0, 0.0, 0.0);
             gridBuilder->setVelocityBoundaryCondition(SideType::MZ, 0.0, 0.0, 0.0);
-            gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+            gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0);
 
             //////////////////////////////////////////////////////////////////////////
             gridBuilder->writeGridsToVtk(outPath + "/grid/");
@@ -534,7 +502,7 @@ void multipleLevel(std::filesystem::path& configPath)
         gridGenerator = GridProvider::makeGridReader(FILEFORMAT::BINARY, para, cudaMemoryManager);
     }
 
-    Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory);
+    Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory, &scalingFactory);
     sim.run();
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -548,20 +516,9 @@ int main(int argc, char *argv[])
     if (argv != NULL) {
 
         try {
-            //////////////////////////////////////////////////////////////////////////
-            // assuming that a config files is stored parallel to this file.
-            std::filesystem::path configPath = __FILE__;
-
-            // the config file's default name can be replaced by passing a command line argument
-            std::string configName("configDrivenCavityMultiGPU.txt");
-            if (argc == 2) {
-                configName = argv[1];
-                std::cout << "Using configFile command line argument: " << configName << std::endl;
-            }
-
-            configPath.replace_filename(configName);
-
-            multipleLevel(configPath);
+            VF_LOG_TRACE("For the default config path to work, execute the app from the project root.");
+            vf::basics::ConfigurationFile config = vf::basics::ConfigurationFile::loadConfig(argc, argv, "./apps/gpu/LBM/DrivenCavityMultiGPU/configDrivenCavityMultiGPU.txt");
+            runVirtualFluids(config);
 
             //////////////////////////////////////////////////////////////////////////
         } catch (const spdlog::spdlog_ex &ex) {
diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU_regressionTest.txt b/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU_regressionTest.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5789cdf96049b7c0a31ce693c29cd2db4952a58
--- /dev/null
+++ b/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU_regressionTest.txt
@@ -0,0 +1,17 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=10000
+TimeOut=10000
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = true
+useReducedCommunicationInInterpolation = true
\ No newline at end of file
diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix8GPU_regressionTest.txt b/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix8GPU_regressionTest.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5789cdf96049b7c0a31ce693c29cd2db4952a58
--- /dev/null
+++ b/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix8GPU_regressionTest.txt
@@ -0,0 +1,17 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=10000
+TimeOut=10000
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = true
+useReducedCommunicationInInterpolation = true
\ No newline at end of file
diff --git a/apps/gpu/LBM/DrivenCavityUniform/CMakeLists.txt b/apps/gpu/LBM/DrivenCavityUniform/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..40b4f08d7500c56efae7378df6398d065e4ecbfb
--- /dev/null
+++ b/apps/gpu/LBM/DrivenCavityUniform/CMakeLists.txt
@@ -0,0 +1,10 @@
+PROJECT(DrivenCavityUniform LANGUAGES CUDA CXX)
+
+#LIST(APPEND CS_COMPILER_FLAGS_CXX "-DOMPI_SKIP_MPICXX" )
+
+vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator MPI::MPI_CXX FILES DrivenCavity.cpp)
+
+set_source_files_properties(DrivenCavity.cpp PROPERTIES LANGUAGE CUDA)
+
+set_target_properties(DrivenCavityUniform PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+
diff --git a/apps/gpu/LBM/DrivenCavityUniform/DrivenCavity.cpp b/apps/gpu/LBM/DrivenCavityUniform/DrivenCavity.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..dfa1256c80e6aeb0e209638ba0c7425ff437e2d5
--- /dev/null
+++ b/apps/gpu/LBM/DrivenCavityUniform/DrivenCavity.cpp
@@ -0,0 +1,222 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file LidDrivenCavity.cpp
+//! \ingroup Applications
+//! \author Martin Schoenherr, Stephan Lenz
+//=======================================================================================
+#define _USE_MATH_DEFINES
+#include <exception>
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "DataTypes.h"
+#include <logger/Logger.h>
+
+#include "PointerDefinitions.h"
+
+#include <logger/Logger.h>
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "GridGenerator/grid/BoundaryConditions/Side.h"
+#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
+#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
+#include "GridGenerator/grid/GridFactory.h"
+#include "GridGenerator/geometries/Cuboid/Cuboid.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
+#include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
+#include "VirtualFluids_GPU/Communication/Communicator.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
+#include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
+#include "VirtualFluids_GPU/LBM/Simulation.h"
+#include "VirtualFluids_GPU/Output/FileWriter.h"
+#include "VirtualFluids_GPU/Parameter/Parameter.h"
+#include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
+#include "VirtualFluids_GPU/Kernel/Utilities/KernelTypes.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+int main()
+{
+    try {
+         vf::logging::Logger::initializeLogger();
+        //////////////////////////////////////////////////////////////////////////
+        // Simulation parameters
+        //////////////////////////////////////////////////////////////////////////
+        std::string path("./output/DrivenCavity_uniform");
+        std::string simulationName("LidDrivenCavity");
+
+        const real L = 1.0;
+        const real Re = 1000.0;
+        const real velocity = 1.0;
+        const real dt = (real)0.5e-3;
+        const uint nx = 64;
+
+        const uint timeStepOut = 1000;
+        const uint timeStepEnd = 10000;
+
+        //////////////////////////////////////////////////////////////////////////
+        // setup gridGenerator
+        //////////////////////////////////////////////////////////////////////////
+
+        auto gridFactory = GridFactory::make();
+        gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
+        auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
+
+        //////////////////////////////////////////////////////////////////////////
+        // create grid
+        //////////////////////////////////////////////////////////////////////////
+
+        real dx = L / real(nx);
+
+        gridBuilder->addCoarseGrid(-0.5 * L, -0.5 * L, -0.5 * L, 0.5 * L, 0.5 * L, 0.5 * L, dx);
+
+        // gridBuilder->addGrid(std::make_shared<Cuboid>(-0.25, -0.25, -0.25, 0.25, 0.25, 0.25), 1); // add fine grid
+        GridScalingFactory scalingFactory = GridScalingFactory();
+        scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleCompressible);
+
+        gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+
+        gridBuilder->buildGrids(false);
+
+        //////////////////////////////////////////////////////////////////////////
+        // compute parameters in lattice units
+        //////////////////////////////////////////////////////////////////////////
+
+        const real velocityLB = velocity * dt / dx; // LB units
+
+        const real vxLB = velocityLB / sqrt(2.0); // LB units
+        const real vyLB = velocityLB / sqrt(2.0); // LB units
+
+        const real viscosityLB = nx * velocityLB / Re; // LB units
+
+        //////////////////////////////////////////////////////////////////////////
+        // set parameters
+        //////////////////////////////////////////////////////////////////////////
+        SPtr<Parameter> para = std::make_shared<Parameter>();
+
+        para->setOutputPath(path);
+        para->setOutputPrefix(simulationName);
+
+        para->setPrintFiles(true);
+
+        para->setVelocityLB(velocityLB);
+        para->setViscosityLB(viscosityLB);
+
+        para->setVelocityRatio(velocity / velocityLB);
+        para->setDensityRatio(1.0);
+
+        para->setTimestepOut(timeStepOut);
+        para->setTimestepEnd(timeStepEnd);
+
+        para->setMainKernel(vf::CollisionKernel::Compressible::CumulantK17);
+
+        //////////////////////////////////////////////////////////////////////////
+        // set boundary conditions
+        //////////////////////////////////////////////////////////////////////////
+
+        gridBuilder->setNoSlipBoundaryCondition(SideType::PX);
+        gridBuilder->setNoSlipBoundaryCondition(SideType::MX);
+        gridBuilder->setNoSlipBoundaryCondition(SideType::PY);
+        gridBuilder->setNoSlipBoundaryCondition(SideType::MY);
+        gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0);
+        gridBuilder->setNoSlipBoundaryCondition(SideType::MZ);
+
+        BoundaryConditionFactory bcFactory;
+
+        bcFactory.setNoSlipBoundaryCondition(BoundaryConditionFactory::NoSlipBC::NoSlipBounceBack);
+        bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocitySimpleBounceBackCompressible);
+
+        //////////////////////////////////////////////////////////////////////////
+        // set copy mesh to simulation
+        //////////////////////////////////////////////////////////////////////////
+
+        vf::gpu::Communicator &communicator = vf::gpu::Communicator::getInstance();
+
+        auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para);
+        SPtr<GridProvider> gridGenerator =
+            GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
+
+
+        //////////////////////////////////////////////////////////////////////////
+        // run simulation
+        //////////////////////////////////////////////////////////////////////////
+
+        VF_LOG_INFO("Start Running DrivenCavity Showcase...");
+        printf("\n");
+        VF_LOG_INFO("world parameter:");
+        VF_LOG_INFO("--------------");
+        VF_LOG_INFO("dt [s]                 = {}", dt);
+        VF_LOG_INFO("world_length   [m]     = {}", L);
+        VF_LOG_INFO("world_velocity [m/s]   = {}", velocity);
+        VF_LOG_INFO("dx [m]                 = {}", dx);
+        printf("\n");
+        VF_LOG_INFO("LB parameter:");
+        VF_LOG_INFO("--------------");
+        VF_LOG_INFO("Re                     = {}", Re);
+        VF_LOG_INFO("lb_velocity [dx/dt]    = {}", velocityLB);
+        VF_LOG_INFO("lb_viscosity [dx^2/dt] = {}", viscosityLB);
+        VF_LOG_INFO("lb_vx [dx/dt] (lb_velocity/sqrt(2)) = {}", vxLB);
+        VF_LOG_INFO("lb_vy [dx/dt] (lb_velocity/sqrt(2)) = {}", vyLB);
+        printf("\n");
+        VF_LOG_INFO("simulation parameter:");
+        VF_LOG_INFO("--------------");
+        VF_LOG_INFO("nx                     = {}", nx);
+        VF_LOG_INFO("ny                     = {}", nx);
+        VF_LOG_INFO("nz                     = {}", nx);
+        VF_LOG_INFO("number of nodes        = {}", nx * nx * nx);
+        VF_LOG_INFO("n timesteps            = {}", timeStepOut);
+        VF_LOG_INFO("write_nth_timestep     = {}", timeStepEnd);
+        VF_LOG_INFO("output_path            = {}", path);
+
+        Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory, &scalingFactory);
+        sim.run();
+
+    } catch (const spdlog::spdlog_ex &ex) {
+        std::cout << "Log initialization failed: " << ex.what() << std::endl;
+    } catch (const std::bad_alloc &e) {
+        VF_LOG_CRITICAL("Bad Alloc: {}", e.what());
+    } catch (const std::exception &e) {
+        VF_LOG_CRITICAL("exception: {}", e.what());
+    } catch (...) {
+        VF_LOG_CRITICAL("Unknown exception!");
+    }
+
+    return 0;
+}
diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU.txt b/apps/gpu/LBM/DrivenCavityUniform/configDrivenCavity.txt
similarity index 59%
rename from apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU.txt
rename to apps/gpu/LBM/DrivenCavityUniform/configDrivenCavity.txt
index c710922b9fc82ac7680f5f7daade4faa235bc957..458346a67c7f001580494af1dc9262034613be68 100644
--- a/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU.txt
+++ b/apps/gpu/LBM/DrivenCavityUniform/configDrivenCavity.txt
@@ -1,20 +1,19 @@
 ##################################################
 #GPU Mapping
 ##################################################
-Devices="0 1 2 3"
-NumberOfDevices=4
+#Devices="0 1 2 3"
+#NumberOfDevices=4
 
 ##################################################
 #informations for Writing
 ##################################################
-Path=/work/y0078217/Results/DrivenCavityMultiGPUResults/4GPU/
-#Prefix="DrivenCavityMultiGPU" 
+#Path = "output/"
+#Prefix="DrivenCavity" 
 #WriteGrid=true
 ##################################################
 #informations for reading
 ##################################################
-GridPath=/work/y0078217/Grids/GridDrivenCavityMultiGPU/4GPU/
-#GridPath="C:"
+#GridPath="grid/"
 
 ##################################################
 #number of grid levels
@@ -25,17 +24,11 @@ GridPath=/work/y0078217/Grids/GridDrivenCavityMultiGPU/4GPU/
 #LBM Version
 ##################################################
 #D3Qxx=27
-#MainKernelName=CumulantK17CompChim
+#MainKernelName=CumulantAA2016CompSP27
 
 ##################################################
 #simulation parameter
 ##################################################
-TimeEnd=1
-TimeOut=1
-#TimeStartOut=0
-
-##################################################
-# CUDA Streams and optimized communication (only used for multiple GPUs)
-##################################################
-useStreams = true
-useReducedCommunicationInInterpolation = true
\ No newline at end of file
+#TimeEnd=100000
+#TimeOut=1000 
+#TimeStartOut=0
\ No newline at end of file
diff --git a/apps/gpu/LBM/MusselOyster/MusselOyster.cpp b/apps/gpu/LBM/MusselOyster/MusselOyster.cpp
index efac863fc9efd446e5f266648ad4fa74c954634f..b583633b50542795fe4b27aca42c08cca1a5331c 100644
--- a/apps/gpu/LBM/MusselOyster/MusselOyster.cpp
+++ b/apps/gpu/LBM/MusselOyster/MusselOyster.cpp
@@ -13,15 +13,12 @@
 
 //////////////////////////////////////////////////////////////////////////
 
-#include "basics/Core/DataTypes.h"
-#include "basics/Core/VectorTypes.h"
+#include "basics/DataTypes.h"
 #include "basics/PointerDefinitions.h"
 
-#include "basics/Core/LbmOrGks.h"
-#include "basics/Core/Logger/Logger.h"
-#include "basics/Core/StringUtilities/StringUtil.h"
+#include "basics/StringUtilities/StringUtil.h"
 #include "basics/config/ConfigurationFile.h"
-#include "logger/Logger.h"
+#include <logger/Logger.h>
 
 //////////////////////////////////////////////////////////////////////////
 
@@ -40,7 +37,6 @@
 
 //////////////////////////////////////////////////////////////////////////
 
-#include "VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h"
 #include "VirtualFluids_GPU/Communication/Communicator.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
@@ -49,7 +45,8 @@
 #include "VirtualFluids_GPU/LBM/Simulation.h"
 #include "VirtualFluids_GPU/Output/FileWriter.h"
 #include "VirtualFluids_GPU/Parameter/Parameter.h"
-#include "VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h"
+#include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
+#include "VirtualFluids_GPU/Kernel/Utilities/KernelTypes.h"
 
 //////////////////////////////////////////////////////////////////////////
 
@@ -87,23 +84,16 @@ const std::string simulationName("MusselOyster");
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-void multipleLevel(std::filesystem::path &configPath)
+void runVirtualFluids(const vf::basics::ConfigurationFile& config)
 {
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-    logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
-
     vf::gpu::Communicator &communicator = vf::gpu::Communicator::getInstance();
 
     auto gridFactory = GridFactory::make();
     gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
     auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
 
-    vf::basics::ConfigurationFile config;
-    config.load(configPath.string());
     SPtr<Parameter> para =
-        std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
+        std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -161,8 +151,7 @@ void multipleLevel(std::filesystem::path &configPath)
     std::cout << "Write result files to " << para->getFName() << std::endl;
 
     para->setUseStreams(useStreams);
-    // para->setMainKernel("CumulantK17CompChim");
-    para->setMainKernel("CumulantK17CompChimStream");
+    para->setMainKernel(vf::CollisionKernel::Compressible::CumulantK17);
     
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     
@@ -196,10 +185,10 @@ void multipleLevel(std::filesystem::path &configPath)
         // height MUSSEL = 35.0
         // height Oyster = 72.0
 
-        TriangularMesh *bivalveSTL = TriangularMesh::make(stlPath + bivalveType + ".stl");
-        TriangularMesh *bivalveRef_1_STL = nullptr;
+        SPtr<TriangularMesh> bivalveSTL = std::make_shared<TriangularMesh>(stlPath + bivalveType + ".stl");
+        SPtr<TriangularMesh> bivalveRef_1_STL = nullptr;
         if (useLevels)
-            bivalveRef_1_STL = TriangularMesh::make(stlPath + bivalveType + "_Level1.stl");
+            bivalveRef_1_STL = std::make_shared<TriangularMesh>(stlPath + bivalveType + "_Level1.stl");
 
         if (para->getNumprocs() > 1) {
             const uint generatePart = vf::gpu::Communicator::getInstance().getPID();
@@ -207,7 +196,7 @@ void multipleLevel(std::filesystem::path &configPath)
             real overlap = (real)8.0 * dxGrid;
             gridBuilder->setNumberOfLayers(10, 8);
 
-            if (communicator.getNummberOfProcess() == 2) {
+            if (communicator.getNumberOfProcess() == 2) {
                 const real zSplit = 0.0; // round(((double)bbzp + bbzm) * 0.5);
 
                 if (generatePart == 0) {
@@ -234,15 +223,15 @@ void multipleLevel(std::filesystem::path &configPath)
                         std::make_shared<BoundingBox>(xGridMin, xGridMax, yGridMin, yGridMax, zSplit, zGridMax));
                 }
 
-                gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+                gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 
                 if (generatePart == 0) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 1);
                 }
 
                 if (generatePart == 1) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 0);
                 }
 
@@ -258,7 +247,7 @@ void multipleLevel(std::filesystem::path &configPath)
                 gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
                 gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0); // set pressure BC after velocity BCs
                 //////////////////////////////////////////////////////////////////////////
-            } else if (communicator.getNummberOfProcess() == 4) {
+            } else if (communicator.getNumberOfProcess() == 4) {
 
                 const real xSplit = 100.0;
                 const real zSplit = 0.0;
@@ -299,30 +288,30 @@ void multipleLevel(std::filesystem::path &configPath)
                     gridBuilder->setSubDomainBox(
                         std::make_shared<BoundingBox>(xSplit, xGridMax, yGridMin, yGridMax, zSplit, zGridMax));
 
-                gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+                gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 
                 if (generatePart == 0) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 1);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 2);
                 }
                 if (generatePart == 1) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 0);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 3);
                 }
                 if (generatePart == 2) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 3);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 0);
                 }
                 if (generatePart == 3) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 2);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 1);
                 }
 
@@ -348,7 +337,7 @@ void multipleLevel(std::filesystem::path &configPath)
                     gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0); // set pressure BC after velocity BCs
                 }
                 //////////////////////////////////////////////////////////////////////////
-            } else if (communicator.getNummberOfProcess() == 8) {
+            } else if (communicator.getNumberOfProcess() == 8) {
                 real xSplit = 140.0; // 100.0 // mit groesserem Level 1 140.0
                 real ySplit = 32.0;  // 32.0
                 real zSplit = 0.0;
@@ -417,71 +406,71 @@ void multipleLevel(std::filesystem::path &configPath)
                     gridBuilder->setSubDomainBox(
                         std::make_shared<BoundingBox>(xSplit, xGridMax, ySplit, yGridMax, zSplit, zGridMax));
 
-                gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+                gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
                 gridBuilder->setPeriodicBoundaryCondition(false, false, false);
 
                 if (generatePart == 0) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 1);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 2);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 4);
                 }
                 if (generatePart == 1) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 0);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 3);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 5);
                 }
                 if (generatePart == 2) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 3);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 0);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 6);
                 }
                 if (generatePart == 3) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 2);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 1);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 7);
                 }
                 if (generatePart == 4) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 5);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 6);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 0);
                 }
                 if (generatePart == 5) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 4);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 7);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 1);
                 }
                 if (generatePart == 6) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 7);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 4);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 2);
                 }
                 if (generatePart == 7) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 6);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 5);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 3);
                 }
 
@@ -545,7 +534,7 @@ void multipleLevel(std::filesystem::path &configPath)
 
             gridBuilder->addGeometry(bivalveSTL);
 
-            gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+            gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 
             gridBuilder->setPeriodicBoundaryCondition(false, false, false);
             //////////////////////////////////////////////////////////////////////////
@@ -594,20 +583,9 @@ int main(int argc, char *argv[])
     if (argv != NULL) {
 
         try {
-            //////////////////////////////////////////////////////////////////////////
-            // assuming that a config files is stored parallel to this file.
-            std::filesystem::path configPath = __FILE__;
-
-            // the config file's default name can be replaced by passing a command line argument
-            std::string configName("configMusselOyster.txt");
-            if (argc == 2) {
-                configName = argv[1];
-                std::cout << "Using configFile command line argument: " << configName << std::endl;
-            }
-
-            configPath.replace_filename(configName);
-
-            multipleLevel(configPath);
+            VF_LOG_TRACE("For the default config path to work, execute the app from the project root.");
+            vf::basics::ConfigurationFile config = vf::basics::ConfigurationFile::loadConfig(argc, argv, "./apps/gpu/LBM/MusselOyster/configMusselOyster.txt");
+            runVirtualFluids(config);
 
             //////////////////////////////////////////////////////////////////////////
         } catch (const spdlog::spdlog_ex &ex) {
diff --git a/apps/gpu/LBM/MusselOyster/configPhoenix8GPU.txt b/apps/gpu/LBM/MusselOyster/configPhoenix8GPU.txt
index 4e2b0c91482b6a650ff28a210673cac097cb8c2d..2bf6955062da5c98f6a7b931c19821c52eaf15ea 100644
--- a/apps/gpu/LBM/MusselOyster/configPhoenix8GPU.txt
+++ b/apps/gpu/LBM/MusselOyster/configPhoenix8GPU.txt
@@ -7,14 +7,14 @@ NumberOfDevices=4
 ##################################################
 #informations for Writing
 ##################################################
-Path=/work/y0078217/Results/MusselOysterResults/8GPUOyster05/
+#Path=/work/y0078217/Results/MusselOysterResults/8GPUOyster05/
 #Path="F:/Work/Computations/out/MusselOyster/"
 #Prefix="MusselOyster" 
 #WriteGrid=true
 ##################################################
 #informations for reading
 ##################################################
-GridPath=/work/y0078217/Grids/GridMusselOyster/Oyster8GPU/
+#GridPath=/work/y0078217/Grids/GridMusselOyster/Oyster8GPU/
 #GridPath="C:"
 
 ##################################################
@@ -31,8 +31,8 @@ GridPath=/work/y0078217/Grids/GridMusselOyster/Oyster8GPU/
 ##################################################
 #simulation parameter
 ##################################################
-TimeEnd=400000 # 800000
-TimeOut=100000 # 400000
+TimeEnd=100000 # 800000
+TimeOut=10000 # 400000
 #TimeStartOut=0
 
 ##################################################
diff --git a/apps/gpu/LBM/SphereGPU/Sphere.cpp b/apps/gpu/LBM/SphereGPU/Sphere.cpp
index 6b749b564368bc6223c1283351521221d4950411..a20383b8e7eab9ce61fb8d8c21de95d6033f5c5f 100644
--- a/apps/gpu/LBM/SphereGPU/Sphere.cpp
+++ b/apps/gpu/LBM/SphereGPU/Sphere.cpp
@@ -42,13 +42,10 @@
 
 //////////////////////////////////////////////////////////////////////////
 
-#include "Core/DataTypes.h"
-#include "Core/LbmOrGks.h"
-#include "Core/Logger/Logger.h"
-#include "Core/VectorTypes.h"
+#include "DataTypes.h"
+#include <logger/Logger.h>
 #include "PointerDefinitions.h"
 #include "config/ConfigurationFile.h"
-#include "logger/Logger.h"
 
 //////////////////////////////////////////////////////////////////////////
 
@@ -87,26 +84,17 @@ int main(int argc, char *argv[])
 
         const real L = 1.0;
         const real dSphere = 0.2;
-        const real Re = 1000.0; // related to the sphere's diameter
+        const real Re = 300.0; // related to the sphere's diameter
         const real velocity = 1.0;
         const real dt = (real)0.5e-3;
-        const uint nx = 64;
+        const uint nx = 50;
 
-        const uint timeStepOut = 1000;
+        const uint timeStepOut = 10000;
         const uint timeStepEnd = 10000;
 
-        //////////////////////////////////////////////////////////////////////////
-        // setup logger
-        //////////////////////////////////////////////////////////////////////////
-
-        logging::Logger::addStream(&std::cout);
-        logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-        logging::Logger::timeStamp(logging::Logger::ENABLE);
-        logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
-
         //////////////////////////////////////////////////////////////////////////
         // setup simulation parameters (with or without config file)
-        //////////////////////////////////////////////////////////////////////////
+        //////////////////////////
 
         vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance();;
         SPtr<Parameter> para;
@@ -114,23 +102,8 @@ int main(int argc, char *argv[])
         GridScalingFactory scalingFactory = GridScalingFactory();
         vf::basics::ConfigurationFile config;
         if (useConfigFile) {
-            //////////////////////////////////////////////////////////////////////////
-            // read simulation parameters from config file
-            //////////////////////////////////////////////////////////////////////////
-
-            // assuming that a config files is stored parallel to this file.
-            std::filesystem::path configPath = __FILE__;
-
-            // the config file's default name can be replaced by passing a command line argument
-            std::string configName("config.txt");
-            if (argc == 2) {
-                configName = argv[1];
-                std::cout << "Using configFile command line argument: " << configName << std::endl;
-            }
-
-            configPath.replace_filename(configName);
-            config.load(configPath.string());
-
+            VF_LOG_TRACE("For the default config path to work, execute the app from the project root.");
+            vf::basics::ConfigurationFile config = vf::basics::ConfigurationFile::loadConfig(argc, argv, "./apps/gpu/LBM/SphereGPU/config.txt");
             para = std::make_shared<Parameter>(&config);
         } else {
             para = std::make_shared<Parameter>();
@@ -150,10 +123,10 @@ int main(int argc, char *argv[])
 
         real dx = L / real(nx);
         gridBuilder->addCoarseGrid(-1.0 * L, -0.6 * L, -0.6 * L,
-                                    8.0 * L,  0.6 * L,  0.6 * L, dx);
+                                    3.0 * L,  0.6 * L,  0.6 * L, dx);
 
         // use primitive
-        // Object *sphere = new Sphere(0.0, 0.0, 0.0, dSphere / 2.0);
+        // auto sphere = std::make_shared<Sphere>(0.0, 0.0, 0.0, dSphere / 2.0);
 
         // use stl
         std::string stlPath = "./apps/gpu/LBM/SphereGPU/sphere02.stl";
@@ -161,7 +134,7 @@ int main(int argc, char *argv[])
             stlPath = config.getValue<std::string>("STLPath");
         }
         std::cout << "Reading stl from " << stlPath << "." << std::endl;
-        Object *sphere = TriangularMesh::make(stlPath);
+        auto sphere = std::make_shared<TriangularMesh>(stlPath);
 
         gridBuilder->addGeometry(sphere);
         gridBuilder->setPeriodicBoundaryCondition(false, false, false);
@@ -171,7 +144,7 @@ int main(int argc, char *argv[])
         //////////////////////////////////////////////////////////////////////////
 
         // gridBuilder->setNumberOfLayers(10, 8);
-        // gridBuilder->addGrid(new Sphere(0.0, 0.0, 0.0, 2.0 * dSphere), 1);
+        // gridBuilder->addGrid(std::make_shared<Sphere>(0.0, 0.0, 0.0, 2.0 * dSphere), 1);
         // para->setMaxLevel(2);
         // scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleK17);
 
@@ -179,7 +152,7 @@ int main(int argc, char *argv[])
         // build grid
         //////////////////////////////////////////////////////////////////////////
 
-        gridBuilder->buildGrids(LBM, false);  // buildGrids() has to be called before setting the BCs!!!!
+        gridBuilder->buildGrids(false);  // buildGrids() has to be called before setting the BCs!!!!
 
         //////////////////////////////////////////////////////////////////////////
         // compute parameters in lattice units
diff --git a/apps/gpu/LBM/SphereScaling/SphereScaling.cpp b/apps/gpu/LBM/SphereScaling/SphereScaling.cpp
old mode 100644
new mode 100755
index 1d31ae10a8ae0678505141ebdede43df977dbf5e..da80302e9e9b5b6f43c7eb3eea0ae8be08f22b93
--- a/apps/gpu/LBM/SphereScaling/SphereScaling.cpp
+++ b/apps/gpu/LBM/SphereScaling/SphereScaling.cpp
@@ -1,4 +1,3 @@
-
 #define _USE_MATH_DEFINES
 #include <exception>
 #include <fstream>
@@ -14,15 +13,12 @@
 
 //////////////////////////////////////////////////////////////////////////
 
-#include "basics/Core/DataTypes.h"
-#include "basics/Core/VectorTypes.h"
+#include "basics/DataTypes.h"
 #include "basics/PointerDefinitions.h"
 
-#include "basics/Core/LbmOrGks.h"
-#include "basics/Core/Logger/Logger.h"
-#include "basics/Core/StringUtilities/StringUtil.h"
+#include "basics/StringUtilities/StringUtil.h"
 #include "basics/config/ConfigurationFile.h"
-#include "logger/Logger.h"
+#include <logger/Logger.h>
 
 //////////////////////////////////////////////////////////////////////////
 
@@ -55,6 +51,7 @@
 #include "VirtualFluids_GPU/PreProcessor/PreProcessorFactory/PreProcessorFactoryImp.h"
 #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
 #include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
+#include "VirtualFluids_GPU/Kernel/Utilities/KernelTypes.h"
 
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
 
@@ -65,45 +62,16 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//          U s e r    s e t t i n g s
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-// Phoenix
-// const std::string outPath("/work/y0078217/Results/SphereScalingResults/");
-// const std::string gridPathParent = "/work/y0078217/Grids/GridSphereScaling/";
-// const std::string simulationName("SphereScaling");
-// const std::string stlPath("/home/y0078217/STL/Sphere/");
-
-// Relative Paths
-const std::string outPath("./output/SphereScalingResults/");
-const std::string gridPathParent = "./output/grids/SphereScalingResults/";
-const std::string simulationName("SphereScaling");
-const std::string stlPath("./stl/SphereScaling/");
 
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void multipleLevel(std::filesystem::path& configPath)
+void runVirtualFluids(const vf::basics::ConfigurationFile& config)
 {
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-    logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
-
     vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance();
 
     auto gridFactory = GridFactory::make();
     gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
     auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
 
-    vf::basics::ConfigurationFile config;
-    config.load(configPath.string());
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
     GridScalingFactory scalingFactory = GridScalingFactory();
 
@@ -119,10 +87,13 @@ void multipleLevel(std::filesystem::path& configPath)
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
     bool useGridGenerator   = true;
-    bool useLevels          = true;
+    bool useLevels = true;
     std::string scalingType = "strong"; // "strong" // "weak"
-    // para->setUseStreams(true);                        // set in config
-    // para->useReducedCommunicationAfterFtoC = true;    // set in config
+
+    const std::string outPath("output/" + std::to_string(para->getNumprocs()) + "GPU/");
+    const std::string simulationName("SphereScaling");
+    const std::string gridPath = "./output/grids/";
+    const std::string stlPath("./stl/SphereScaling/");
 
     if (para->getNumprocs() == 1) {
         para->useReducedCommunicationAfterFtoC = false;
@@ -130,10 +101,9 @@ void multipleLevel(std::filesystem::path& configPath)
     if (scalingType != "weak" && scalingType != "strong")
         std::cerr << "unknown scaling type" << std::endl;
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    std::string gridPath(gridPathParent); // only for GridGenerator, for GridReader the gridPath needs to be set in the config file
 
     real dxGrid      = (real)1.0;
-    real vxLB        = (real)0.0005; // LB units
+    real vxLB        = (real)0.005;  // LB units
     real viscosityLB = 0.001;        //(vxLB * dxGrid) / Re;
 
     para->setVelocityLB(vxLB);
@@ -142,14 +112,9 @@ void multipleLevel(std::filesystem::path& configPath)
     para->setViscosityRatio((real)0.058823529);
     para->setDensityRatio((real)998.0);
 
-
-    // para->setTimestepOut(10);
-    // para->setTimestepEnd(10);
-
     para->setCalcDragLift(false);
     para->setUseWale(false);
 
-
     para->setOutputPrefix(simulationName);
     if (para->getOutputPath() == "output/") {para->setOutputPath(outPath);}
     para->setPrintFiles(true);
@@ -159,12 +124,8 @@ void multipleLevel(std::filesystem::path& configPath)
     else
         para->setMaxLevel(1);
 
-    // para->setMainKernel("CumulantK17CompChim");
-    para->setMainKernel("CumulantK17CompChimStream");
-    //para->setMainKernel("CumulantK17CompChimRedesigned");
-    scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleRhoSq);
-
-
+    para->setMainKernel(vf::CollisionKernel::Compressible::CumulantK17);
+    scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleCompressible);
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -214,7 +175,7 @@ void multipleLevel(std::filesystem::path& configPath)
             real overlap = (real)8.0 * dxGrid;
             gridBuilder->setNumberOfLayers(10, 8);
 
-            if (communicator.getNummberOfProcess() == 2) {
+            if (communicator.getNumberOfProcess() == 2) {
                 real zSplit = 0.5 * sideLengthCube;
 
                 if (scalingType == "weak") {
@@ -234,10 +195,10 @@ void multipleLevel(std::filesystem::path& configPath)
                 if (useLevels) {
                     if (scalingType == "strong") {
                         gridBuilder->addGrid(
-                            new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphereLev1),
+                            std::make_shared<Sphere>(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphereLev1),
                             1);
                     } else if (scalingType == "weak") {
-                        gridBuilder->addGrid(new Cuboid(-0.5 * dCubeLev1, -0.5 * dCubeLev1,
+                        gridBuilder->addGrid(std::make_shared<Cuboid>(-0.5 * dCubeLev1, -0.5 * dCubeLev1,
                                                         sideLengthCube - 0.5 * dCubeLev1, 0.5 * dCubeLev1,
                                                         0.5 * dCubeLev1, sideLengthCube + 0.5 * dCubeLev1),
                                              1);
@@ -246,14 +207,14 @@ void multipleLevel(std::filesystem::path& configPath)
 
                 if (scalingType == "weak") {
                     if (useLevels) {
-                        gridBuilder->addGeometry(new Sphere(0.0, 0.0, sideLengthCube, dSphere));
+                        gridBuilder->addGeometry(std::make_shared<Sphere>(0.0, 0.0, sideLengthCube, dSphere));
                     } else {
-                        TriangularMesh *sphereSTL = TriangularMesh::make(stlPath + "Spheres_2GPU.stl");
+                        auto sphereSTL = std::make_shared<TriangularMesh>(stlPath + "Spheres_2GPU.stl");
                         gridBuilder->addGeometry(sphereSTL);
                     }
                 } else if (scalingType == "strong") {
                     gridBuilder->addGeometry(
-                        new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphere));
+                        std::make_shared<Sphere>(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphere));
                 }
 
                 if (generatePart == 0)
@@ -263,15 +224,15 @@ void multipleLevel(std::filesystem::path& configPath)
                     gridBuilder->setSubDomainBox(
                         std::make_shared<BoundingBox>(xGridMin, xGridMax, yGridMin, yGridMax, zSplit, zGridMax));
 
-                gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+                gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 
                 if (generatePart == 0) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 1);
                 }
 
                 if (generatePart == 1) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 0);
                 }
 
@@ -288,7 +249,7 @@ void multipleLevel(std::filesystem::path& configPath)
                 // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
                 //////////////////////////////////////////////////////////////////////////
 
-            } else if (communicator.getNummberOfProcess() == 4) {
+            } else if (communicator.getNumberOfProcess() == 4) {
                 real ySplit = 0.5 * sideLengthCube;
                 real zSplit = 0.5 * sideLengthCube;
 
@@ -319,10 +280,10 @@ void multipleLevel(std::filesystem::path& configPath)
                 if (useLevels) {
                     if (scalingType == "strong") {
                         gridBuilder->addGrid(
-                            new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphereLev1),
+                            std::make_shared<Sphere>(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphereLev1),
                             1);
                     } else if (scalingType == "weak") {
-                        gridBuilder->addGrid(new Cuboid(-0.5 * dCubeLev1, sideLengthCube - 0.5 * dCubeLev1,
+                        gridBuilder->addGrid(std::make_shared<Cuboid>(-0.5 * dCubeLev1, sideLengthCube - 0.5 * dCubeLev1,
                                                         sideLengthCube - 0.5 * dCubeLev1, 0.5 * dCubeLev1,
                                                         sideLengthCube + 0.5 * dCubeLev1,
                                                         sideLengthCube + 0.5 * dCubeLev1),
@@ -332,14 +293,14 @@ void multipleLevel(std::filesystem::path& configPath)
 
                 if (scalingType == "weak") {
                     if (useLevels) {
-                        gridBuilder->addGeometry(new Sphere(0.0, sideLengthCube, sideLengthCube, dSphere));
+                        gridBuilder->addGeometry(std::make_shared<Sphere>(0.0, sideLengthCube, sideLengthCube, dSphere));
                     } else {
-                        TriangularMesh *sphereSTL = TriangularMesh::make(stlPath + "Spheres_4GPU.stl");
+                        auto sphereSTL = std::make_shared<TriangularMesh>(stlPath + "Spheres_4GPU.stl");
                         gridBuilder->addGeometry(sphereSTL);
                     }
                 } else if (scalingType == "strong") {
                     gridBuilder->addGeometry(
-                        new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphere));
+                        std::make_shared<Sphere>(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphere));
                 }
 
                 if (generatePart == 0)
@@ -355,31 +316,31 @@ void multipleLevel(std::filesystem::path& configPath)
                     gridBuilder->setSubDomainBox(
                         std::make_shared<BoundingBox>(xGridMin, xGridMax, ySplit, yGridMax, zSplit, zGridMax));
 
-                gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+                gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
                 gridBuilder->setPeriodicBoundaryCondition(false, false, false);
 
                 if (generatePart == 0) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 1);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 2);
                 }
                 if (generatePart == 1) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 0);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 3);
                 }
                 if (generatePart == 2) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 3);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 0);
                 }
                 if (generatePart == 3) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 2);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 1);
                 }
 
@@ -404,7 +365,7 @@ void multipleLevel(std::filesystem::path& configPath)
                 gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0); // set pressure BC after velocity BCs
                 // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
                 //////////////////////////////////////////////////////////////////////////
-            } else if (communicator.getNummberOfProcess() == 8) {
+            } else if (communicator.getNumberOfProcess() == 8) {
                 real xSplit = 0.5 * sideLengthCube;
                 real ySplit = 0.5 * sideLengthCube;
                 real zSplit = 0.5 * sideLengthCube;
@@ -454,11 +415,11 @@ void multipleLevel(std::filesystem::path& configPath)
                 if (useLevels) {
                     if (scalingType == "strong") {
                         gridBuilder->addGrid(
-                            new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphereLev1),
+                            std::make_shared<Sphere>(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphereLev1),
                             1);
                     } else if (scalingType == "weak") {
                         gridBuilder->addGrid(
-                            new Cuboid(sideLengthCube - 0.5 * dCubeLev1, sideLengthCube - 0.5 * dCubeLev1,
+                            std::make_shared<Cuboid>(sideLengthCube - 0.5 * dCubeLev1, sideLengthCube - 0.5 * dCubeLev1,
                                        sideLengthCube - 0.5 * dCubeLev1, sideLengthCube + 0.5 * dCubeLev1,
                                        sideLengthCube + 0.5 * dCubeLev1, sideLengthCube + 0.5 * dCubeLev1),
                             1);
@@ -467,14 +428,14 @@ void multipleLevel(std::filesystem::path& configPath)
 
                 if (scalingType == "weak") {
                     if (useLevels) {
-                        gridBuilder->addGeometry(new Sphere(sideLengthCube, sideLengthCube, sideLengthCube, dSphere));
+                        gridBuilder->addGeometry(std::make_shared<Sphere>(sideLengthCube, sideLengthCube, sideLengthCube, dSphere));
                     } else {
-                        TriangularMesh *sphereSTL = TriangularMesh::make(stlPath + "Spheres_8GPU.stl");
+                        auto sphereSTL = std::make_shared<TriangularMesh>(stlPath + "Spheres_8GPU.stl");
                         gridBuilder->addGeometry(sphereSTL);
                     }
                 } else if (scalingType == "strong") {
                     gridBuilder->addGeometry(
-                        new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphere));
+                        std::make_shared<Sphere>(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphere));
                 }
 
                 if (generatePart == 0)
@@ -502,71 +463,71 @@ void multipleLevel(std::filesystem::path& configPath)
                     gridBuilder->setSubDomainBox(
                         std::make_shared<BoundingBox>(xSplit, xGridMax, ySplit, yGridMax, zSplit, zGridMax));
 
-                gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+                gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
                 gridBuilder->setPeriodicBoundaryCondition(false, false, false);
 
                 if (generatePart == 0) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 1);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 2);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 4);
                 }
                 if (generatePart == 1) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 0);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 3);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 5);
                 }
                 if (generatePart == 2) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 3);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 0);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 6);
                 }
                 if (generatePart == 3) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 2);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 1);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 7);
                 }
                 if (generatePart == 4) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 5);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 6);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 0);
                 }
                 if (generatePart == 5) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 4);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 7);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 1);
                 }
                 if (generatePart == 6) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 7);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 4);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 2);
                 }
                 if (generatePart == 7) {
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 6);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 5);
-                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ);
                     gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 3);
                 }
 
@@ -628,9 +589,9 @@ void multipleLevel(std::filesystem::path& configPath)
                 gridBuilder->setNumberOfLayers(10, 8);
                 if (scalingType == "strong") {
                     gridBuilder->addGrid(
-                        new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphereLev1), 1);
+                        std::make_shared<Sphere>(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphereLev1), 1);
                 } else if (scalingType == "weak")
-                    gridBuilder->addGrid(new Cuboid(sideLengthCube - 0.5 * dCubeLev1, sideLengthCube - 0.5 * dCubeLev1,
+                    gridBuilder->addGrid(std::make_shared<Cuboid>(sideLengthCube - 0.5 * dCubeLev1, sideLengthCube - 0.5 * dCubeLev1,
                                                     sideLengthCube - 0.5 * dCubeLev1, sideLengthCube + 0.5 * dCubeLev1,
                                                     sideLengthCube + 0.5 * dCubeLev1, sideLengthCube + 0.5 * dCubeLev1),
                                          1);
@@ -638,17 +599,17 @@ void multipleLevel(std::filesystem::path& configPath)
 
             if (scalingType == "weak") {
                 if (useLevels) {
-                    gridBuilder->addGeometry(new Sphere(sideLengthCube, sideLengthCube, sideLengthCube, dSphere));
+                    gridBuilder->addGeometry(std::make_shared<Sphere>(sideLengthCube, sideLengthCube, sideLengthCube, dSphere));
                 } else {
-                    TriangularMesh *sphereSTL = TriangularMesh::make(stlPath + "Spheres_1GPU.stl");
+                    auto sphereSTL = std::make_shared<TriangularMesh>(stlPath + "Spheres_1GPU.stl");
                     gridBuilder->addGeometry(sphereSTL);
                 }
             } else {
                 gridBuilder->addGeometry(
-                    new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphere));
+                    std::make_shared<Sphere>(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphere));
             }
 
-            gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+            gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 
             gridBuilder->setPeriodicBoundaryCondition(false, false, false);
             //////////////////////////////////////////////////////////////////////////
@@ -696,20 +657,9 @@ int main(int argc, char *argv[])
     if (argv != NULL) {
 
         try {
-            //////////////////////////////////////////////////////////////////////////
-            // assuming that a config files is stored parallel to this file.
-            std::filesystem::path configPath = __FILE__;
-
-            // the config file's default name can be replaced by passing a command line argument
-            std::string configName("config.txt");
-            if (argc == 2) {
-                configName = argv[1];
-                std::cout << "Using configFile command line argument: " << configName << std::endl;
-            }
-
-            configPath.replace_filename(configName);
-
-            multipleLevel(configPath);
+            VF_LOG_INFO("For the default config path to work, execute the app from the project root.");
+            vf::basics::ConfigurationFile config = vf::basics::ConfigurationFile::loadConfig(argc, argv, "./apps/gpu/LBM/SphereScaling/config.txt");
+            runVirtualFluids(config);
 
             //////////////////////////////////////////////////////////////////////////
         } catch (const spdlog::spdlog_ex &ex) {
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix4GPU_regressionTest.txt b/apps/gpu/LBM/SphereScaling/configPhoenix4GPU_regressionTest.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5789cdf96049b7c0a31ce693c29cd2db4952a58
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling/configPhoenix4GPU_regressionTest.txt
@@ -0,0 +1,17 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=10000
+TimeOut=10000
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = true
+useReducedCommunicationInInterpolation = true
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_regressionTest.txt b/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_regressionTest.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5789cdf96049b7c0a31ce693c29cd2db4952a58
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_regressionTest.txt
@@ -0,0 +1,17 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=10000
+TimeOut=10000
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = true
+useReducedCommunicationInInterpolation = true
\ No newline at end of file
diff --git a/apps/gpu/LBM/TGV_3D/TGV_3D.cpp b/apps/gpu/LBM/TGV_3D/TGV_3D.cpp
index d8642c7b267bcad6c58ab2a9c178c2d9394ecf2a..050efc6d0f0f2b80ca2da2df26cdb71b1e52f3ad 100644
--- a/apps/gpu/LBM/TGV_3D/TGV_3D.cpp
+++ b/apps/gpu/LBM/TGV_3D/TGV_3D.cpp
@@ -1,63 +1,93 @@
-//#define MPI_LOGGING
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file TGV_3D.cpp
+//! \ingroup Applications
+//! \author Martin Schoenherr
+//=======================================================================================
+#define _USE_MATH_DEFINES
+#include <exception>
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+#include <math.h>
+#include <memory>
+#include <sstream>
+#include <stdexcept>
+#include <string>
 
-//Martin Branch
+#include "mpi.h"
 
-#include <mpi.h>
-#if defined( MPI_LOGGING )
-	#include <mpe.h>
-#endif
+//////////////////////////////////////////////////////////////////////////
 
-#include <string>
-#include <sstream>
-#include <iostream>
-#include <stdexcept>
-#include <fstream>
-#define _USE_MATH_DEFINES
-#include <math.h>
+#include "DataTypes.h"
+#include <logger/Logger.h>
 
-//#include "metis.h"
+#include "PointerDefinitions.h"
 
-#include "basics/Core/LbmOrGks.h"
-#include "basics/Core/StringUtilities/StringUtil.h"
-#include <basics/config/ConfigurationFile.h>
+//////////////////////////////////////////////////////////////////////////
+
+#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
+#include "GridGenerator/geometries/TriangularMesh/TriangularMesh.h"
+#include "GridGenerator/grid/BoundaryConditions/BoundaryCondition.h"
+#include "GridGenerator/grid/BoundaryConditions/Side.h"
+#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
+#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
+#include "GridGenerator/grid/GridFactory.h"
+
+#include "GridGenerator/io/GridVTKWriter/GridVTKWriter.h"
+#include "GridGenerator/io/STLReaderWriter/STLReader.h"
+#include "GridGenerator/io/STLReaderWriter/STLWriter.h"
+#include "GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h"
+
+//////////////////////////////////////////////////////////////////////////
 
-#include "VirtualFluids_GPU/LBM/Simulation.h"
 #include "VirtualFluids_GPU/Communication/Communicator.h"
-#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
-#include "VirtualFluids_GPU/Parameter/Parameter.h"
-#include "VirtualFluids_GPU/Output/FileWriter.h"
-
-#include "VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.h"
-#include "VirtualFluids_GPU/PreProcessor/PreProcessorFactory/PreProcessorFactoryImp.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
 #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
-
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
+#include "VirtualFluids_GPU/LBM/Simulation.h"
+#include "VirtualFluids_GPU/Output/FileWriter.h"
+#include "VirtualFluids_GPU/Parameter/Parameter.h"
 
-#include "global.h"
-
-#include "geometries/Sphere/Sphere.h"
-#include "geometries/VerticalCylinder/VerticalCylinder.h"
-#include "geometries/Cuboid/Cuboid.h"
-#include "geometries/TriangularMesh/TriangularMesh.h"
-#include "geometries/Conglomerate/Conglomerate.h"
-#include "geometries/TriangularMesh/TriangularMeshStrategy.h"
-
-#include "grid/GridBuilder/LevelGridBuilder.h"
-#include "grid/GridBuilder/MultipleGridBuilder.h"
-#include "grid/BoundaryConditions/Side.h"
-#include "grid/BoundaryConditions/BoundaryCondition.h"
-#include "grid/GridFactory.h"
 
-#include "io/SimulationFileWriter/SimulationFileWriter.h"
-#include "io/GridVTKWriter/GridVTKWriter.h"
-#include "io/STLReaderWriter/STLReader.h"
-#include "io/STLReaderWriter/STLWriter.h"
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//          U s e r    s e t t i n g s
+//
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-#include "utilities/math/Math.h"
-#include "utilities/communication.h"
-#include "utilities/transformator/TransformatorImp.h"
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // from https://stackoverflow.com/questions/865668/how-to-parse-command-line-arguments-in-c
@@ -92,25 +122,16 @@ uint gpuIndex = 0;
 bool useLimiter = false;
 bool useWale = false;
 
-std::string kernel( "CumulantK17Comp" );
+std::string kernel( "CumulantK17" );
 
-std::string path("F:/Work/Computations/out/TaylorGreen3DNew/"); //LEGOLAS
-//std::string path("E:/DrivenCavity/results/"); //TESLA03
+//std::string path("F:/Work/Computations/out/TaylorGreen3DNew/"); //LEGOLAS
+std::string path("D:/out/TGV_3D/"); //TESLA03
 
 std::string simulationName("TGV_3D");
 //////////////////////////////////////////////////////////////////////////
 
 void multipleLevel(const std::string& configPath)
 {
-    //std::ofstream logFile( "F:/Work/Computations/gridGenerator/grid/gridGeneratorLog.txt" );
-    //std::ofstream logFile( "grid/gridGeneratorLog.txt" );
-    //logging::Logger::addStream(&logFile);
-
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-    logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
-
     vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance();
 
     //UbLog::reportingLevel() = UbLog::logLevelFromString("DEBUG5");
@@ -124,7 +145,7 @@ void multipleLevel(const std::string& configPath)
 
     vf::basics::ConfigurationFile config;
     config.load(configPath);
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -139,9 +160,8 @@ void multipleLevel(const std::string& configPath)
 
     const real viscosity = nx / ( 2.0 * PI ) * velocity / Re;
 
-    *logging::out << logging::Logger::INFO_HIGH << "velocity = " << velocity << " s\n";
-
-    *logging::out << logging::Logger::INFO_HIGH << "viscosity = " << viscosity << "\n";
+    VF_LOG_INFO("velocity = {}", velocity);
+    VF_LOG_INFO("viscosity = {}", viscosity);
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -152,7 +172,7 @@ void multipleLevel(const std::string& configPath)
 
 	gridBuilder->setPeriodicBoundaryCondition(true, true, true);
 
-	gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+	gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -311,22 +331,17 @@ int main( int argc, char* argv[])
 		}
         catch (const std::bad_alloc& e)
         {
-
-            *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
-            //std::cout << e.what() << std::flush;
+            std::cout << e.what() << std::flush;
             //MPI_Abort(MPI_COMM_WORLD, -1);
         }
         catch (const std::exception& e)
         {
-
-            *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
-            //std::cout << e.what() << std::flush;
+            std::cout << e.what() << std::flush;
             //MPI_Abort(MPI_COMM_WORLD, -1);
         }
         catch (...)
         {
-            *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
-            //std::cout << "unknown exeption" << std::endl;
+            std::cout << "unknown exeption" << std::endl;
         }
 
         //std::cout << "\nConfiguration file must be set!: lbmgm <config file>" << std::endl << std::flush;
diff --git a/apps/gpu/LBM/TGV_3D_GridRef/CMakeLists.txt b/apps/gpu/LBM/TGV_3D_GridRef/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..efb4310669f9c0de7aa5cf3f1e4dffa00bd66cbf
--- /dev/null
+++ b/apps/gpu/LBM/TGV_3D_GridRef/CMakeLists.txt
@@ -0,0 +1,7 @@
+PROJECT(TGV_3D_GridRef LANGUAGES CUDA CXX)
+
+vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator FILES TGV_3D_GridRef.cpp)
+
+set_source_files_properties(TGV_3D_GridRef.cpp PROPERTIES LANGUAGE CUDA)
+
+set_target_properties(TGV_3D_GridRef PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
\ No newline at end of file
diff --git a/apps/gpu/LBM/TGV_3D_GridRef/TGV_3D_GridRef.cpp b/apps/gpu/LBM/TGV_3D_GridRef/TGV_3D_GridRef.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0f945e32d9fbcb2e18d4888a4fa0a2c6e03c21b4
--- /dev/null
+++ b/apps/gpu/LBM/TGV_3D_GridRef/TGV_3D_GridRef.cpp
@@ -0,0 +1,386 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file TGV_3D.cpp
+//! \ingroup Applications
+//! \author Martin Schoenherr
+//=======================================================================================
+#define _USE_MATH_DEFINES
+#include <exception>
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+#include <math.h>
+#include <memory>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+
+#include "mpi.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "DataTypes.h"
+#include <logger/Logger.h>
+
+#include "PointerDefinitions.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "GridGenerator/geometries/Conglomerate/Conglomerate.h"
+#include "GridGenerator/geometries/TriangularMesh/TriangularMesh.h"
+#include "GridGenerator/grid/BoundaryConditions/BoundaryCondition.h"
+#include "GridGenerator/grid/BoundaryConditions/Side.h"
+#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
+#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
+#include "GridGenerator/grid/GridFactory.h"
+
+#include "GridGenerator/io/GridVTKWriter/GridVTKWriter.h"
+#include "GridGenerator/io/STLReaderWriter/STLReader.h"
+#include "GridGenerator/io/STLReaderWriter/STLWriter.h"
+#include "GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "VirtualFluids_GPU/Communication/Communicator.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
+#include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
+#include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
+#include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
+#include "VirtualFluids_GPU/LBM/Simulation.h"
+#include "VirtualFluids_GPU/Output/FileWriter.h"
+#include "VirtualFluids_GPU/Parameter/Parameter.h"
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//          U s e r    s e t t i n g s
+//
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// from https://stackoverflow.com/questions/865668/how-to-parse-command-line-arguments-in-c
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+char* getCmdOption(char ** begin, char ** end, const std::string & option)
+{
+    char ** itr = std::find(begin, end, option);
+    if (itr != end && ++itr != end)
+    {
+        return *itr;
+    }
+    return 0;
+}
+
+bool cmdOptionExists(char** begin, char** end, const std::string& option)
+{
+    return std::find(begin, end, option) != end;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////////
+real Re =  1600.0;
+
+uint dtPerL = 500;
+
+uint nx = 64;
+uint gpuIndex = 0;
+
+bool useLimiter = false;
+bool useWale = false;
+
+std::string kernel( "CumulantK17CompChimRedesigned" );
+
+std::string path("D:/out/TGV_3D/"); //MOLLOK
+
+std::string simulationName("TGV_3D_Gridref_noSqPress");
+//////////////////////////////////////////////////////////////////////////
+
+void multipleLevel(const std::string& configPath)
+{
+    vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance();
+
+    auto gridFactory = GridFactory::make();
+    //gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::RAYCASTING);
+    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
+    //gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_UNDER_TRIANGLE);
+
+    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
+
+    vf::basics::ConfigurationFile config;
+    config.load(configPath);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
+    BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
+    GridScalingFactory scalingFactory = GridScalingFactory();
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+	const real PI = 3.141592653589793238462643383279;
+
+    real L = nx / ( 2.0 * PI );
+
+    const real velocity = 64.0 / ( dtPerL * 2.0 * PI );
+
+    const real viscosity = nx / ( 2.0 * PI ) * velocity / Re;
+
+    *logging::out << logging::Logger::INFO_HIGH << "velocity = " << velocity << " s\n";
+
+    *logging::out << logging::Logger::INFO_HIGH << "viscosity = " << viscosity << "\n";
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+	real dx = 2.0 * PI / real(nx);
+
+	gridBuilder->addCoarseGrid(-PI, -PI, -PI,
+								PI,  PI,  PI, dx);
+
+    gridBuilder->setNumberOfLayers(0, 0);
+
+    auto fineGrid = std::make_shared<Cuboid>(-PI * 0.5, -PI * 0.5, -PI * 0.5, 
+                                     0.0,  PI * 0.5,       0.0);
+
+    gridBuilder->addGrid(fineGrid, 1);
+
+	gridBuilder->setPeriodicBoundaryCondition(true, true, true);
+
+	gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleCompressible);
+
+	//std::stringstream _path;
+ //   std::stringstream _prefix;
+
+ //   //_path << "F:/Work/Computations/TaylorGreenVortex_3D/TGV_LBM/" << nx << "_Re_1.6e4";
+ //   //_path << "F:/Work/Computations/TaylorGreenVortex_3D/TGV_LBM/" << nx << "_neqInit";
+ //   _path << "F:/Work/Computations/TaylorGreenVortex_3D/TGV_LBM/Re_1600/AA2016/" << nx << "_FD_O8";
+
+ //   //_path << "./results/AA2016/" << nx;
+ //   //_path << "./results/CumOne/" << nx;
+ //   //_path << "./results/F3_2018/" << nx;
+
+ //   _prefix << "TGV_3D_" << nx << "_" ;
+
+ //   para->setOutputPath(_path.str());
+ //   para->setOutputPrefix(_prefix.str());
+ //   para->setPathAndFilename(_path.str() + "/" + _prefix.str());
+
+    //////////////////////////////////////////////////////////////////////////
+
+    {
+        std::stringstream _path;
+
+        _path << path;
+        _path << kernel;
+        _path << "SingleGPU";
+
+        if (useLimiter) _path << "_Limiter";
+
+        path = _path.str();
+    }
+
+    //////////////////////////////////////////////////////////////////////////
+
+    {
+        std::stringstream _simulationName;
+
+        _simulationName << simulationName;
+        _simulationName << "_nx_" << nx;
+        _simulationName << "_dtPerL_" << dtPerL << "_";
+
+        simulationName = _simulationName.str();
+    }
+
+    //////////////////////////////////////////////////////////////////////////
+
+    para->setDevices(std::vector<uint>{gpuIndex});
+
+    //////////////////////////////////////////////////////////////////////////
+
+    para->setOutputPath( path );
+    para->setOutputPrefix( simulationName );
+
+    para->setPrintFiles(true);
+
+    para->setTimestepEnd(40 * lround(L / velocity));
+    para->setTimestepOut(5 * lround(L / velocity));
+    //para->setTimestepOut(lround(L / velocity));
+ //   para->setTimestepEnd(2048);
+	//para->setTimestepOut(512);
+ //   para->setTimestepStartOut(500);
+
+    para->setVelocityLB( velocity );
+
+    para->setViscosityLB( viscosity );
+
+    para->setVelocityRatio( 1.0 / velocity );
+
+    para->setDensityRatio(1.0);
+
+    para->setInitialCondition( [&]( real coordX, real coordY, real coordZ, real& rho, real& vx, real& vy, real& vz){
+
+        real a = 1.0;
+        real b = 1.0;
+        real c = 1.0;
+
+        rho = 3.0 * ((velocity * velocity) / 16.0 * ( cos( 2.0 * a * coordX ) + cos( 2.0 * b * coordY ) ) * ( cos( 2.0 * c * coordZ ) + 2.0 ) );
+        vx  =  velocity * sin( a * coordX ) * cos( b * coordY ) * cos( c * coordZ );
+        vy  = -velocity * cos( a * coordX ) * sin( b * coordY ) * cos( c * coordZ );
+        vz  = 0.0;
+
+    } );
+
+    para->setMainKernel( kernel );
+
+    if( !useLimiter )
+        para->setQuadricLimiters( 1000000.0, 1000000.0, 1000000.0 );
+
+    if( useWale )
+        para->setUseWale( true );
+
+    para->setUseInitNeq( true );
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para);
+    SPtr<GridProvider> gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
+    //SPtr<GridProvider> gridGenerator = GridProvider::makeGridReader(FILEFORMAT::BINARY, para, cudaMemoryManager);
+
+    SPtr<FileWriter> fileWriter = SPtr<FileWriter>(new FileWriter());
+    Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory, &scalingFactory);
+    sim.run();
+
+    //sim.addKineticEnergyAnalyzer( 10 );
+    //sim.addEnstrophyAnalyzer( 10 );
+
+    //sim.run();
+}
+
+
+int main( int argc, char* argv[])
+{
+    MPI_Init(&argc, &argv);
+    std::string str, str2;
+    if ( argv != NULL )
+    {
+        //str = static_cast<std::string>(argv[0]);
+
+        try
+        {
+            //////////////////////////////////////////////////////////////////////////
+			std::string targetPath( __FILE__ );
+
+#ifdef _WIN32
+			targetPath = targetPath.substr(0, targetPath.find_last_of('\\') + 1);
+#else
+			targetPath = targetPath.substr(0, targetPath.find_last_of('/') + 1);
+#endif
+
+            //////////////////////////////////////////////////////////////////////////
+
+            if( cmdOptionExists( argv, argv+argc, "--Re" ) )
+                Re = atof( getCmdOption( argv, argv+argc, "--Re" ) );
+
+            if( cmdOptionExists( argv, argv+argc, "--nx" ) )
+                nx = atoi( getCmdOption( argv, argv+argc, "--nx" ) );
+
+            if( cmdOptionExists( argv, argv+argc, "--dtPerL" ) )
+                dtPerL = atoi( getCmdOption( argv, argv+argc, "--dtPerL" ) );
+
+            if( cmdOptionExists( argv, argv+argc, "--kernel" ) )
+                kernel = getCmdOption( argv, argv+argc, "--kernel" );
+
+            if( cmdOptionExists( argv, argv+argc, "--gpu" ) )
+                gpuIndex = atoi( getCmdOption( argv, argv+argc, "--gpu" ) );
+
+            if( cmdOptionExists( argv, argv+argc, "--useLimiter" ) )
+                useLimiter = true;
+
+            if( cmdOptionExists( argv, argv+argc, "--useWale" ) )
+                useWale = true;
+
+			multipleLevel(targetPath + "config.txt");
+
+            //////////////////////////////////////////////////////////////////////////
+		}
+        catch (const std::bad_alloc& e)
+        {
+            std::cout << "Bad alloc: " << e.what() << std::flush;
+            //MPI_Abort(MPI_COMM_WORLD, -1);
+        }
+        catch (const std::exception& e)
+        {
+            std::cout << e.what() << std::flush;
+            //MPI_Abort(MPI_COMM_WORLD, -1);
+        }
+        catch (...)
+        {
+            std::cout << "unknown exeption" << std::endl;
+        }
+
+        //std::cout << "\nConfiguration file must be set!: lbmgm <config file>" << std::endl << std::flush;
+        //MPI_Abort(MPI_COMM_WORLD, -1);
+    }
+
+
+   /*
+   MPE_Init_log() & MPE_Finish_log() are NOT needed when
+   liblmpe.a is linked with this program.  In that case,
+   MPI_Init() would have called MPE_Init_log() already.
+   */
+#if defined( MPI_LOGGING )
+   MPE_Init_log();
+#endif
+
+#if defined( MPI_LOGGING )
+   if ( argv != NULL )
+      MPE_Finish_log( argv[0] );
+   if ( str != "" )
+      MPE_Finish_log( str.c_str() );
+   else
+      MPE_Finish_log( "TestLog" );
+#endif
+
+   MPI_Finalize();
+   return 0;
+}
diff --git a/apps/gpu/LBM/TGV_3D_GridRef/config.txt b/apps/gpu/LBM/TGV_3D_GridRef/config.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae6d3e9bc4be5403d151f3d59ffb13af7164abf0
--- /dev/null
+++ b/apps/gpu/LBM/TGV_3D_GridRef/config.txt
@@ -0,0 +1,36 @@
+##################################################
+#GPU Mapping
+##################################################
+#Devices="0 1 2 3"
+#NumberOfDevices=4
+
+##################################################
+#informations for Writing
+##################################################
+#Path="E:/DrivenCavity/results"
+#Path="F:/Work/Computations/out/DrivenCavity/"
+#Prefix="DrivenCavity" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+#GridPath="E:/DrivenCavity/dummy"
+GridPath="F:/Work/Computations/out/TaylorGreen3DNew/grid"
+
+##################################################
+#number of grid levels
+##################################################
+NOGL=2
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantAA2016CompSP27
+
+##################################################
+#simulation parameter
+##################################################
+#TimeEnd=100000
+#TimeOut=1000 
+#TimeStartOut=0
\ No newline at end of file
diff --git a/apps/gpu/LBM/TGV_3D_MultiGPU/TGV_3D_MultiGPU.cpp b/apps/gpu/LBM/TGV_3D_MultiGPU/TGV_3D_MultiGPU.cpp
index 045c208274bc6bc216d25e8c2fa905916a52f87b..0c7b9b9606201b32a8376ea637858eb14ec817bb 100644
--- a/apps/gpu/LBM/TGV_3D_MultiGPU/TGV_3D_MultiGPU.cpp
+++ b/apps/gpu/LBM/TGV_3D_MultiGPU/TGV_3D_MultiGPU.cpp
@@ -50,8 +50,7 @@
 
 //#include "metis.h"
 
-#include "Core/LbmOrGks.h"
-#include "Core/StringUtilities/StringUtil.h"
+#include "StringUtilities/StringUtil.h"
 #include "basics/config/ConfigurationFile.h"
 
 #include "VirtualFluids_GPU/LBM/Simulation.h"
@@ -157,17 +156,6 @@ void multipleLevel(const std::string& configPath)
     rankY = ( mpirank % ( sideLengthX * sideLengthY ) ) /   sideLengthX;
     rankZ =   mpirank                                   / ( sideLengthY * sideLengthX );
 
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    logging::Logger::addStream(&std::cout);
-
-    std::ofstream logFile( path + simulationName + "_rank_" + std::to_string(mpirank) + ".log" );
-    logging::Logger::addStream(&logFile);
-
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-    logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
-
     vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance();
 
     //UbLog::reportingLevel() = UbLog::logLevelFromString("DEBUG5");
@@ -181,7 +169,7 @@ void multipleLevel(const std::string& configPath)
 
     vf::basics::ConfigurationFile config;
     config.load(configPath);
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
     *logging::out << logging::Logger::INFO_HIGH << "SideLength = " << sideLengthX << " " << sideLengthY << " " << sideLengthZ << "\n";
@@ -231,7 +219,7 @@ void multipleLevel(const std::string& configPath)
 
     gridBuilder->setPeriodicBoundaryCondition(sideLengthX == 1, sideLengthY == 1, sideLengthZ == 1);
 
-	gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+	gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 
     if( mpiWorldSize > 1 )
     {
@@ -242,12 +230,12 @@ void multipleLevel(const std::string& configPath)
         int rankPZ =    rankX                                    +    rankY                                    * sideLengthX + ( (rankZ + 1 + sideLengthZ) % sideLengthZ ) * sideLengthX * sideLengthY;
         int rankMZ =    rankX                                    +    rankY                                    * sideLengthX + ( (rankZ - 1 + sideLengthZ) % sideLengthZ ) * sideLengthX * sideLengthY;
 
-        if( sideLengthX > 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::PX, GKS );
-        if( sideLengthX > 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::MX, GKS );
-        if( sideLengthY > 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::PY, GKS );
-        if( sideLengthY > 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::MY, GKS );
-        if( sideLengthZ > 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::PZ, GKS );
-        if( sideLengthZ > 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::MZ, GKS );
+        if( sideLengthX > 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::PX );
+        if( sideLengthX > 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::MX );
+        if( sideLengthY > 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::PY );
+        if( sideLengthY > 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::MY );
+        if( sideLengthZ > 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::PZ );
+        if( sideLengthZ > 1 ) gridBuilder->findCommunicationIndices( CommunicationDirections::MZ );
 
         if( sideLengthX > 1 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::MX, rankMX);
         if( sideLengthY > 1 ) gridBuilder->setCommunicationProcess ( CommunicationDirections::MY, rankMY);
diff --git a/apps/gpu/LBM/TrafficTest/3rdPartyLinking.cmake b/apps/gpu/LBM/TrafficTest/3rdPartyLinking.cmake
deleted file mode 100644
index 7fb2e8a6d603bc12cd403934bfb4866fc67d498c..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/TrafficTest/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,13 +0,0 @@
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-#linkMPI(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-#linkCuda(${targetName})
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Boost/Link.cmake)
-#linkBoost(${targetName} "serialization")
-#include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-#linkMetis(${targetName})
-
-#if(HULC.BUILD_JSONCPP)
-#  include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-#  linkJsonCpp(${targetName})
-#endif()
\ No newline at end of file
diff --git a/apps/gpu/LBM/TrafficTest/CMakeLists.txt b/apps/gpu/LBM/TrafficTest/CMakeLists.txt
deleted file mode 100644
index 766debd8705a7123decd8a11bb885e8e35b6f2d0..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/TrafficTest/CMakeLists.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src" 
-                       "${CMAKE_SOURCE_DIR}/src/Traffic"
-                       "${CMAKE_SOURCE_DIR}/src/GridGenerator"
-                       "${CMAKE_SOURCE_DIR}/src/Core")
-
-
-
-vf_add_library(BUILDTYPE binary PRIVATE_LINK Traffic basics GridGenerator)
-
-vf_get_library_name(library_name)
-target_include_directories(${library_name} PRIVATE "${CMAKE_SOURCE_DIR}/src/basics")
-target_include_directories(${library_name} PRIVATE "${CMAKE_SOURCE_DIR}/src/gpu")
\ No newline at end of file
diff --git a/apps/gpu/LBM/TrafficTest/CMakePackage.cmake b/apps/gpu/LBM/TrafficTest/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/TrafficTest/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/LBM/TrafficTest/Traffic_Main.cpp b/apps/gpu/LBM/TrafficTest/Traffic_Main.cpp
deleted file mode 100644
index 487b51e7515d9001de0cd6be5938e75ce7832da0..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/TrafficTest/Traffic_Main.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-#include <iostream>
-#include <vector>
-#include <memory>
-#include <ctime>
-
-#include "Core/DataTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "Traffic/TrafficMovementFactory.h"
-#include "Traffic/TrafficMovementFactory - Kopie.h"
-
-int main()
-{
-	 
-	//////Basel
-
-	for (uint i = 0; i < 2; i++) {
-
-		{
-			uint numberOfTimesteps = 1000*1000;
-			bool useGPU = false;
-
-
-			//Stephans Logger
-			logging::Logger::addStream(&std::cout);
-			logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-			logging::Logger::timeStamp(logging::Logger::ENABLE);
-			logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
-
-
-			//init TrafficMovement
-			TrafficMovementFactory * factory = new TrafficMovementFactory();
-			std::string path = "C:/Users/hiwi/BaselDokumente/";
-			factory->initTrafficMovement(path, useGPU);
-
-
-			//clock
-			std::clock_t start;
-			double duration;
-			start = std::clock();
-
-
-			//loop through timestep
-			for (uint step = 1; step <= numberOfTimesteps; step++) {
-				factory->calculateTimestep(step);
-				factory->writeReducedTimestep(step);
-			}
-
-
-			//end simulation
-			duration = (std::clock() - start) / (double)CLOCKS_PER_SEC;
-
-			factory->endSimulation(numberOfTimesteps, duration);
-
-			std::cout << "Dauer: " << duration << '\n';
-
-			factory->writeTimestep(numberOfTimesteps);
-		}
-	}
-
-
-
-	//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	
-	////Testcases
-
-	//{uint numberOfTimesteps = 20;
-
-	//TrafficMovementFactoryTest * factory = new TrafficMovementFactoryTest();
-	//factory->initTrafficMovement(useGPU);
-	//factory->loopThroughTimesteps(numberOfTimesteps);
-
-	//std::cout << std::endl << std::endl; }
-
-
-}
-
diff --git a/apps/gpu/LBM/TrafficTest/a.exp b/apps/gpu/LBM/TrafficTest/a.exp
deleted file mode 100644
index 7d295d48b1c52dfbd8db4278642942f1cbbbd6e1..0000000000000000000000000000000000000000
Binary files a/apps/gpu/LBM/TrafficTest/a.exp and /dev/null differ
diff --git a/apps/gpu/LBM/TrafficTest/resources/ExampleStreets.txt b/apps/gpu/LBM/TrafficTest/resources/ExampleStreets.txt
deleted file mode 100644
index 9e2df6c6dad1adc8fbfcdb7f8e91d794c56d0773..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/TrafficTest/resources/ExampleStreets.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-10
-  256     5    87     5  1
-   87     5   -85     5  1
-  -85     5   -80   256  1
-  -85     5  -256     5  1
- -185  -256   -85     5  1
-  -92     0    87     0  1
-  -85   256   -92     0  1
- -256     0   -92     0  1
-   87     0   256     0  1
-  -92     0  -190  -256  1
\ No newline at end of file
diff --git a/apps/gpu/LBM/TrafficTest/resources/FlatGround.stl b/apps/gpu/LBM/TrafficTest/resources/FlatGround.stl
deleted file mode 100644
index 8b18606cc5050121d3d724f248f7b7bd9d2425e8..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/TrafficTest/resources/FlatGround.stl
+++ /dev/null
@@ -1,86 +0,0 @@
-solid FlatGround
-facet normal 0.0 0.0 -1.0
-  outer loop
-    vertex 300.0 299.9999999999999 -10.000000000000002
-    vertex -300.0 -299.99999999999994 -10.000000000000002
-    vertex -300.0 299.9999999999999 -10.000000000000002
-  endloop
-endfacet
-facet normal 0.0 0.0 -1.0
-  outer loop
-    vertex -300.0 -299.99999999999994 -10.000000000000002
-    vertex 300.0 299.9999999999999 -10.000000000000002
-    vertex 300.0 -299.99999999999994 -10.000000000000002
-  endloop
-endfacet
-facet normal -0.0 -0.0 1.0
-  outer loop
-    vertex 300.0 -299.99999999999994 0.0
-    vertex -300.0 299.9999999999999 0.0
-    vertex -300.0 -299.99999999999994 0.0
-  endloop
-endfacet
-facet normal -0.0 -0.0 1.0
-  outer loop
-    vertex -300.0 299.9999999999999 0.0
-    vertex 300.0 -299.99999999999994 0.0
-    vertex 300.0 299.9999999999999 0.0
-  endloop
-endfacet
-facet normal 0.0 -1.0 0.0
-  outer loop
-    vertex 300.0 -299.99999999999994 0.0
-    vertex -300.0 -299.99999999999994 -10.000000000000002
-    vertex 300.0 -299.99999999999994 -10.000000000000002
-  endloop
-endfacet
-facet normal 0.0 -1.0 0.0
-  outer loop
-    vertex -300.0 -299.99999999999994 -10.000000000000002
-    vertex 300.0 -299.99999999999994 0.0
-    vertex -300.0 -299.99999999999994 0.0
-  endloop
-endfacet
-facet normal -1.0 0.0 0.0
-  outer loop
-    vertex -300.0 299.9999999999999 0.0
-    vertex -300.0 -299.99999999999994 -10.000000000000002
-    vertex -300.0 -299.99999999999994 0.0
-  endloop
-endfacet
-facet normal -1.0 0.0 0.0
-  outer loop
-    vertex -300.0 -299.99999999999994 -10.000000000000002
-    vertex -300.0 299.9999999999999 0.0
-    vertex -300.0 299.9999999999999 -10.000000000000002
-  endloop
-endfacet
-facet normal -0.0 1.0 -0.0
-  outer loop
-    vertex -300.0 299.9999999999999 0.0
-    vertex 300.0 299.9999999999999 -10.000000000000002
-    vertex -300.0 299.9999999999999 -10.000000000000002
-  endloop
-endfacet
-facet normal -0.0 1.0 -0.0
-  outer loop
-    vertex 300.0 299.9999999999999 -10.000000000000002
-    vertex -300.0 299.9999999999999 0.0
-    vertex 300.0 299.9999999999999 0.0
-  endloop
-endfacet
-facet normal 1.0 -0.0 -0.0
-  outer loop
-    vertex 300.0 299.9999999999999 -10.000000000000002
-    vertex 300.0 -299.99999999999994 0.0
-    vertex 300.0 -299.99999999999994 -10.000000000000002
-  endloop
-endfacet
-facet normal 1.0 -0.0 -0.0
-  outer loop
-    vertex 300.0 -299.99999999999994 0.0
-    vertex 300.0 299.9999999999999 -10.000000000000002
-    vertex 300.0 299.9999999999999 0.0
-  endloop
-endfacet
-endsolid FlatGround
diff --git a/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp b/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp
index 06b3678d7c8ddd236c26a69686356fbe87c31db2..d925eef7e7f452e19b63284f9ad3a6af25740e79 100644
--- a/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp
+++ b/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp
@@ -1,3 +1,35 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file WTG_RUB.cpp
+//! \ingroup Applications
+//! \author Martin Schoenherr
+//=======================================================================================
 #define _USE_MATH_DEFINES
 #include <math.h>
 #include <string>
@@ -13,16 +45,10 @@
 
 //////////////////////////////////////////////////////////////////////////
 
-#include "Core/DataTypes.h"
+#include "DataTypes.h"
 #include "PointerDefinitions.h"
 
-#include "Core/LbmOrGks.h"
-#include "Core/StringUtilities/StringUtil.h"
-
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include <basics/config/ConfigurationFile.h>
+#include <logger/Logger.h>
 
 //////////////////////////////////////////////////////////////////////////
 
@@ -51,7 +77,6 @@
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
 #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
 
-#include <logger/Logger.h>
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -63,8 +88,6 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-LbmOrGks lbmOrGks = LBM;
-
 // const real L  = 1.0;
 
 const real velocity  = 1.0;
@@ -106,11 +129,6 @@ std::string chooseVariation();
 
 void multipleLevel(const std::string& configPath)
 {
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-    logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
-
     vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance();
 
     auto gridFactory = GridFactory::make();
@@ -149,8 +167,8 @@ void multipleLevel(const std::string& configPath)
     real z_min = 0.0 + z_offset;
     real z_max = 160.0 + z_offset;
 
-    //TriangularMesh *RubSTL      = TriangularMesh::make(inputPath + "stl/Var02_0deg_FD_b.stl");
-    TriangularMesh *RubSTL      = TriangularMesh::make(inputPath + "stl/" + chooseVariation() + ".stl");
+    //auto RubSTL      = std::make_shared<TriangularMesh>(inputPath + "stl/Var02_0deg_FD_b.stl");
+    auto RubSTL      = std::make_shared<TriangularMesh>(inputPath + "stl/" + chooseVariation() + ".stl");
     std::vector<real> originOfCityXY = { 600.0, y_max / 2, z_offset };
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -186,7 +204,7 @@ void multipleLevel(const std::string& configPath)
 
 	gridBuilder->setPeriodicBoundaryCondition(false, false, false);
 
-	gridBuilder->buildGrids(lbmOrGks, false); // buildGrids() has to be called before setting the BCs!!!!
+	gridBuilder->buildGrids(false); // buildGrids() has to be called before setting the BCs!!!!
 
 	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -200,7 +218,7 @@ void multipleLevel(const std::string& configPath)
     vf::basics::ConfigurationFile config;
     config.load(configPath);
 
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -209,8 +227,8 @@ void multipleLevel(const std::string& configPath)
 	//const real vx = velocityLB / (real)sqrt(2.0); // LB units
 	//const real vy = velocityLB / (real)sqrt(2.0); // LB units
 
-    *logging::out << logging::Logger::INFO_HIGH << "velocity  [dx/dt] = " << velocityLB << " \n";
-    *logging::out << logging::Logger::INFO_HIGH << "viscosity [dx^2/dt] = " << viscosityLB << "\n";
+    VF_LOG_INFO("velocityLB [dx/dt] = " << velocityLB);
+    VF_LOG_INFO("viscosityLB [dx^2/dt] = " << viscosityLB);
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -383,18 +401,18 @@ void addFineGrids(SPtr<MultipleGridBuilder> gridBuilder, uint &maxLevel, real &r
         // FG5 -> dx = 1,25 mm;   lvl 5
         //
         // FineGrid Level 1 ->dx = 2 cm; lvl 1
-        auto FG1 = new Cuboid(-20, -20, -5 + z_offset, 800, 200, 75 + z_offset);
+        auto FG1 = std::make_shared<Cuboid>(-20, -20, -5 + z_offset, 800, 200, 75 + z_offset);
 
         // FineGrid Level 2 -> dx = 1 cm; lvl 2
-        auto FG2_1 = new Cuboid(-20, -20, -5 + z_offset, 760, 200, 10 + z_offset);
-        auto FG2_2 = new Cuboid(500, -20,  5 + z_offset, 680, 210, 50 + z_offset);
+        auto FG2_1 = std::make_shared<Cuboid>(-20, -20, -5 + z_offset, 760, 200, 10 + z_offset);
+        auto FG2_2 = std::make_shared<Cuboid>(500, -20,  5 + z_offset, 680, 210, 50 + z_offset);
         auto FG2   = new Conglomerate();
         FG2->add(FG2_1);
         FG2->add(FG2_2);
 
         // FineGrid Level 3 ->dx = 5 mm; lvl 3
-        auto FG3_1 = new Cuboid(517, -20, -5 + z_offset, 665, 200, 30 + z_offset);
-        auto FG3_2 = new Cuboid(550, 58, -5 + z_offset, 650, 132, 40 + z_offset);
+        auto FG3_1 = std::make_shared<Cuboid>(517, -20, -5 + z_offset, 665, 200, 30 + z_offset);
+        auto FG3_2 = std::make_shared<Cuboid>(550, 58, -5 + z_offset, 650, 132, 40 + z_offset);
         auto FG3   = new Conglomerate();
         FG3->add(FG3_1);
         FG3->add(FG3_2);
@@ -409,19 +427,19 @@ void addFineGrids(SPtr<MultipleGridBuilder> gridBuilder, uint &maxLevel, real &r
                     gridBuilder->addGrid(FG3, 3);
                     if (maxLevel >= 4) {
                         if (rotationOfCity == 0.0) {
-                            TriangularMesh *FG4 = TriangularMesh::make(inputPath + "stl/FG4_0deg.stl");
+                            auto FG4 = std::make_shared<TriangularMesh>(inputPath + "stl/FG4_0deg.stl");
                             gridBuilder->addGrid(FG4, 4);
                         } else {
-                            TriangularMesh *FG4 = TriangularMesh::make(inputPath + "stl/FG4_63deg.stl");
+                            auto FG4 = std::make_shared<TriangularMesh>(inputPath + "stl/FG4_63deg.stl");
                             gridBuilder->addGrid(FG4, 4);
                         }
 
                         if (maxLevel == 5) {
                             if (rotationOfCity == 0.0) {
-                                TriangularMesh *FG5 = TriangularMesh::make(inputPath + "stl/FG5_0deg.stl");
+                                auto FG5 = std::make_shared<TriangularMesh>(inputPath + "stl/FG5_0deg.stl");
                                 gridBuilder->addGrid(FG5, 5);
                             } else {
-                                TriangularMesh *FG5 = TriangularMesh::make(inputPath + "stl/FG5_63deg.stl");
+                                auto FG5 = std::make_shared<TriangularMesh>(inputPath + "stl/FG5_63deg.stl");
                                 gridBuilder->addGrid(FG5, 5);
                             }
                         }
@@ -442,9 +460,9 @@ void addFineGrids(SPtr<MultipleGridBuilder> gridBuilder, uint &maxLevel, real &r
         // FG3 -> dx = 1,25 mm;   lvl 3
         //
         // FineGrid Level 1 -> dx = 5 mm; lvl 1
-        //auto FG1_1 = new Cuboid(-20, -20, -5 + z_offset, 760, 200, 10 + z_offset);
-        auto FG1_1 = new Cuboid(-20, -20, -5 + z_offset, 760, 200, 20 + z_offset);
-        auto FG1_2 = new Cuboid(500, -20,  5 + z_offset, 680, 210, 50 + z_offset);
+        //auto FG1_1 = std::make_shared<Cuboid>(-20, -20, -5 + z_offset, 760, 200, 10 + z_offset);
+        auto FG1_1 = std::make_shared<Cuboid>(-20, -20, -5 + z_offset, 760, 200, 20 + z_offset);
+        auto FG1_2 = std::make_shared<Cuboid>(500, -20,  5 + z_offset, 680, 210, 50 + z_offset);
         auto FG1   = new Conglomerate();
         FG1->add(FG1_1);
         FG1->add(FG1_2);
@@ -455,19 +473,19 @@ void addFineGrids(SPtr<MultipleGridBuilder> gridBuilder, uint &maxLevel, real &r
             gridBuilder->addGrid(FG1, 1);
             if (maxLevel >= 2) {
                 if (rotationOfCity == 0.0) {
-                    TriangularMesh *FG2 = TriangularMesh::make(inputPath + "stl/FG4_0deg.stl");
+                    auto FG2 = std::make_shared<TriangularMesh>(inputPath + "stl/FG4_0deg.stl");
                     gridBuilder->addGrid(FG2, 2);
                 } else {
-                    TriangularMesh *FG2 = TriangularMesh::make(inputPath + "stl/FG4_63deg.stl");
+                    auto FG2 = std::make_shared<TriangularMesh>(inputPath + "stl/FG4_63deg.stl");
                     gridBuilder->addGrid(FG2, 2);
                 }
 
                 if (maxLevel == 3) {
                     if (rotationOfCity == 0.0) {
-                        TriangularMesh *FG3 = TriangularMesh::make(inputPath + "stl/FG5_0deg.stl");
+                        auto FG3 = std::make_shared<TriangularMesh>(inputPath + "stl/FG5_0deg.stl");
                         gridBuilder->addGrid(FG3, 3);
                     } else {
-                        TriangularMesh *FG3 = TriangularMesh::make(inputPath + "stl/FG5_63deg.stl");
+                        auto FG3 = std::make_shared<TriangularMesh>(inputPath + "stl/FG5_63deg.stl");
                         gridBuilder->addGrid(FG3, 3);
                     }
                 }
@@ -487,18 +505,18 @@ void addFineGrids(SPtr<MultipleGridBuilder> gridBuilder, uint &maxLevel, real &r
         // FG4 -> dx = 1.0 mm;   lvl 4
         //
         //// FineGrid Level 1 ->dx = 8.0 mm; lvl 1
-        // auto FG1 = new Cuboid(-20, -20, -5 + z_offset, 800, 200, 75 + z_offset);
+        // auto FG1 = std::make_shared<Cuboid>(-20, -20, -5 + z_offset, 800, 200, 75 + z_offset);
 
         // FineGrid Level 1 -> dx = 8.0 mm; lvl 1
-        auto FG1_1 = new Cuboid(-20, -20, -5 + z_offset, 780, 200, 30 + z_offset);
-        auto FG1_2 = new Cuboid(500, -20, 5 + z_offset, 720, 210, 75 + z_offset);
+        auto FG1_1 = std::make_shared<Cuboid>(-20, -20, -5 + z_offset, 780, 200, 30 + z_offset);
+        auto FG1_2 = std::make_shared<Cuboid>(500, -20, 5 + z_offset, 720, 210, 75 + z_offset);
         auto FG1 = new Conglomerate();
         FG1->add(FG1_1);
         FG1->add(FG1_2);
 
         // FineGrid Level 2 -> dx = 4.0 mm; lvl 2
-        auto FG2_1 = new Cuboid(-20, -20, -5 + z_offset, 760, 200, 10 + z_offset);
-        auto FG2_2 = new Cuboid(520, -20, 5 + z_offset, 700, 210, 50 + z_offset);
+        auto FG2_1 = std::make_shared<Cuboid>(-20, -20, -5 + z_offset, 760, 200, 10 + z_offset);
+        auto FG2_2 = std::make_shared<Cuboid>(520, -20, 5 + z_offset, 700, 210, 50 + z_offset);
         auto FG2 = new Conglomerate();
         FG2->add(FG2_1);
         FG2->add(FG2_2);
@@ -511,19 +529,19 @@ void addFineGrids(SPtr<MultipleGridBuilder> gridBuilder, uint &maxLevel, real &r
                 gridBuilder->addGrid(FG2, 2);
                 if (maxLevel >= 3) {
                     if (rotationOfCity == 0.0) {
-                        TriangularMesh *FG3 = TriangularMesh::make(inputPath + "stl/FG4_0deg.stl");
+                        auto FG3 = std::make_shared<TriangularMesh>(inputPath + "stl/FG4_0deg.stl");
                         gridBuilder->addGrid(FG3, 3);
                     } else {
-                        TriangularMesh *FG3 = TriangularMesh::make(inputPath + "stl/FG4_63deg.stl");
+                        auto FG3 = std::make_shared<TriangularMesh>(inputPath + "stl/FG4_63deg.stl");
                         gridBuilder->addGrid(FG3, 3);
                     }
 
                     if (maxLevel == 4) {
                         if (rotationOfCity == 0.0) {
-                            TriangularMesh *FG4 = TriangularMesh::make(inputPath + "stl/FG5_0deg.stl");
+                            auto FG4 = std::make_shared<TriangularMesh>(inputPath + "stl/FG5_0deg.stl");
                             gridBuilder->addGrid(FG4, 4);
                         } else {
-                            TriangularMesh *FG4 = TriangularMesh::make(inputPath + "stl/FG5_63deg.stl");
+                            auto FG4 = std::make_shared<TriangularMesh>(inputPath + "stl/FG5_63deg.stl");
                             gridBuilder->addGrid(FG4, 4);
                         }
                     }
@@ -542,11 +560,11 @@ void addFineGrids(SPtr<MultipleGridBuilder> gridBuilder, uint &maxLevel, real &r
         // FG2 -> dx = 1 cm;      lvl 2
         //
         // FineGrid Level 1 ->dx = 2 cm; lvl 1
-        auto FG1 = new Cuboid(-20, -20, -5 + z_offset, 800, 200, 75 + z_offset);
+        auto FG1 = std::make_shared<Cuboid>(-20, -20, -5 + z_offset, 800, 200, 75 + z_offset);
 
         // FineGrid Level 2 -> dx = 1 cm; lvl 2
-        auto FG2_1 = new Cuboid(-20, -20, -5 + z_offset, 760, 200, 10 + z_offset);
-        auto FG2_2 = new Cuboid(500, -20, 5 + z_offset, 680, 210, 50 + z_offset);
+        auto FG2_1 = std::make_shared<Cuboid>(-20, -20, -5 + z_offset, 760, 200, 10 + z_offset);
+        auto FG2_2 = std::make_shared<Cuboid>(500, -20, 5 + z_offset, 680, 210, 50 + z_offset);
         auto FG2 = new Conglomerate();
         FG2->add(FG2_1);
         FG2->add(FG2_2);
@@ -751,7 +769,7 @@ int main( int argc, char* argv[])
 {
     try
     {
-        vf::logging::Logger::initalizeLogger();
+        vf::logging::Logger::initializeLogger();
 
         // assuming that the config files is stored parallel to this file.
         std::filesystem::path filePath = __FILE__;
diff --git a/apps/gpu/LBM/gridGeneratorTest/gridGenerator.cpp b/apps/gpu/LBM/gridGeneratorTest/gridGenerator.cpp
index 40a9aeb6212baf4e6b4354b0bd9c13dea95e0e37..93ff9de0f0a156d9d9aec0ec1b6615ae728e7811 100644
--- a/apps/gpu/LBM/gridGeneratorTest/gridGenerator.cpp
+++ b/apps/gpu/LBM/gridGeneratorTest/gridGenerator.cpp
@@ -14,8 +14,7 @@
 #define _USE_MATH_DEFINES
 #include <math.h>
 
-#include "Core/LbmOrGks.h"
-#include "Core/StringUtilities/StringUtil.h"
+#include "StringUtilities/StringUtil.h"
 #include "basics/config/ConfigurationFile.h"
 
 #include "VirtualFluids_GPU/LBM/Simulation.h"
@@ -59,17 +58,6 @@
 
 void multipleLevel(const std::string& configPath)
 {
-    std::ofstream logFile( "F:/Work/Computations/out/Sphere/gridGeneratorLog.txt" );
-    //std::ofstream logFile( "grid/gridGeneratorLog.txt" );
-    logging::Logger::addStream(&logFile);
-
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-    logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
-
-    //UbLog::reportingLevel() = UbLog::logLevelFromString("DEBUG5");
-
     auto gridFactory = GridFactory::make();
     //gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::RAYCASTING);
     gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
@@ -80,7 +68,7 @@ void multipleLevel(const std::string& configPath)
     vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance();
     vf::basics::ConfigurationFile config;
     config.load(configPath);
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
 
@@ -118,7 +106,7 @@ void multipleLevel(const std::string& configPath)
 			//////////////////////////////////////////////////////////////////////////
 			gridBuilder->setPeriodicBoundaryCondition(true, true, true);
 			//////////////////////////////////////////////////////////////////////////
-			gridBuilder->buildGrids(LBM, true);
+			gridBuilder->buildGrids(true);
 			//////////////////////////////////////////////////////////////////////////
 			SPtr<Grid> grid = gridBuilder->getGrid(gridBuilder->getNumberOfLevels() - 1);
 			//////////////////////////////////////////////////////////////////////////
@@ -147,7 +135,7 @@ void multipleLevel(const std::string& configPath)
 
 			gridBuilder->setPeriodicBoundaryCondition(true, true, true);
 
-			gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+			gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 
 			//////////////////////////////////////////////////////////////////////////
 			SPtr<Grid> grid = gridBuilder->getGrid(gridBuilder->getNumberOfLevels() - 1);
@@ -196,13 +184,13 @@ void multipleLevel(const std::string& configPath)
 
             //////////////////////////////////////////////////////////////////////////
 
-            // TriangularMesh* sphereSTL = TriangularMesh::make("F:/Work/Computations/gridGenerator/stl/Sphere/SphereNotOptimal.stl");
+            // auto sphereSTL = std::make_shared<TriangularMesh>("F:/Work/Computations/gridGenerator/stl/Sphere/SphereNotOptimal.stl");
 
-            TriangularMesh* sphereRef_1_STL = TriangularMesh::make("F:/Work/Computations/gridGenerator/stl/Sphere/SphereRef_1.stl");
+            auto sphereRef_1_STL = std::make_shared<TriangularMesh>("F:/Work/Computations/gridGenerator/stl/Sphere/SphereRef_1.stl");
 
-            // TriangularMesh* sphereRef_2_STL = TriangularMesh::make("F:/Work/Computations/gridGenerator/stl/Sphere/SphereRef_2.stl");
+            // auto sphereRef_2_STL = std::make_shared<TriangularMesh>("F:/Work/Computations/gridGenerator/stl/Sphere/SphereRef_2.stl");
 
-            Object* sphere = new Sphere( 0, 0, 0, 0.5*D );
+            auto sphere = std::make_shared<Sphere>( 0, 0, 0, 0.5*D );
 
             gridBuilder->addCoarseGrid(-2.0*D, -2.5*D, -2.5*D,
                                         9.0*D,  2.5*D,  2.5*D, dx);  // DrivAer
@@ -224,7 +212,7 @@ void multipleLevel(const std::string& configPath)
 
             gridBuilder->setPeriodicBoundaryCondition(false, false, false);
 
-            gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+            gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
             //////////////////////////////////////////////////////////////////////////
             gridBuilder->setVelocityBoundaryCondition(SideType::PY, vx , 0.0, 0.0);
             gridBuilder->setVelocityBoundaryCondition(SideType::MY, vx , 0.0, 0.0);
@@ -281,21 +269,21 @@ void multipleLevel(const std::string& configPath)
 
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-            TriangularMesh* DrivAerSTL = TriangularMesh::make("F:/Work/Computations/gridGenerator/stl/DrivAer_Fastback_Coarse.stl");
-            //TriangularMesh* triangularMesh = TriangularMesh::make("M:/TestGridGeneration/STL/DrivAer_NoSTLGroups.stl");
-            //TriangularMesh* triangularMesh = TriangularMesh::make("M:/TestGridGeneration/STL/DrivAer_Coarse.stl");
-            //TriangularMesh* DrivAerSTL = TriangularMesh::make("stl/DrivAer_Fastback_Coarse.stl");
+            auto DrivAerSTL = std::make_shared<TriangularMesh>("F:/Work/Computations/gridGenerator/stl/DrivAer_Fastback_Coarse.stl");
+            //auto triangularMesh = std::make_shared<TriangularMesh>("M:/TestGridGeneration/STL/DrivAer_NoSTLGroups.stl");
+            //auto triangularMesh = std::make_shared<TriangularMesh>("M:/TestGridGeneration/STL/DrivAer_Coarse.stl");
+            //auto DrivAerSTL = std::make_shared<TriangularMesh>("stl/DrivAer_Fastback_Coarse.stl");
 
-            TriangularMesh* DrivAerRefBoxSTL = TriangularMesh::make("F:/Work/Computations/gridGenerator/stl/DrivAer_REF_BOX_Adrea.stl");
-            //TriangularMesh* DrivAerRefBoxSTL = TriangularMesh::make("stl/DrivAer_REF_BOX_Adrea.stl");
+            auto DrivAerRefBoxSTL = std::make_shared<TriangularMesh>("F:/Work/Computations/gridGenerator/stl/DrivAer_REF_BOX_Adrea.stl");
+            //auto DrivAerRefBoxSTL = std::make_shared<TriangularMesh>("stl/DrivAer_REF_BOX_Adrea.stl");
 
             real z0 = 0.318;
 
             gridBuilder->addCoarseGrid(- 5.0, -5.0, 0.0 - z0,
                                         15.0,  5.0, 5.0 - z0, dx);  // DrivAer
 
-            //Object* floorBox = new Cuboid( -0.3, -1, -1, 4.0, 1, 0.2 );
-            //Object* wakeBox  = new Cuboid(  3.5, -1, -1, 5.5, 1, 0.8 );
+            //auto floorBox = std::make_shared<Cuboid>( -0.3, -1, -1, 4.0, 1, 0.2 );
+            //auto wakeBox  = std::make_shared<Cuboid>(  3.5, -1, -1, 5.5, 1, 0.8 );
 
             //Conglomerate* refRegion = new Conglomerate();
 
@@ -313,7 +301,7 @@ void multipleLevel(const std::string& configPath)
 
             gridBuilder->setPeriodicBoundaryCondition(false, false, false);
 
-            gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+            gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 
             //////////////////////////////////////////////////////////////////////////
 
@@ -384,11 +372,11 @@ void multipleLevel(const std::string& configPath)
 
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-            TriangularMesh* STL = TriangularMesh::make("F:/Work/Computations/gridGenerator/stl/PaperPlane_1.stl");
-            //TriangularMesh* STL = TriangularMesh::make("F:/Work/Computations/gridGenerator/stl/PaperPlane_1_winglet.stl");
+            auto STL = std::make_shared<TriangularMesh>("F:/Work/Computations/gridGenerator/stl/PaperPlane_1.stl");
+            //auto STL = std::make_shared<TriangularMesh>("F:/Work/Computations/gridGenerator/stl/PaperPlane_1_winglet.stl");
 
-            TriangularMesh* RefBoxSTL = TriangularMesh::make("F:/Work/Computations/gridGenerator/stl/PaperPlane_1_ref.stl");
-            //TriangularMesh* RefBoxSTL = TriangularMesh::make("F:/Work/Computations/gridGenerator/stl/PaperPlane_1_winglet_ref.stl");
+            auto RefBoxSTL = std::make_shared<TriangularMesh>("F:/Work/Computations/gridGenerator/stl/PaperPlane_1_ref.stl");
+            //auto RefBoxSTL = std::make_shared<TriangularMesh>("F:/Work/Computations/gridGenerator/stl/PaperPlane_1_winglet_ref.stl");
 
             gridBuilder->addCoarseGrid(- 0.5, -0.3, -0.3,
                                          1.0,  0.3,  0.3, dx);
@@ -400,7 +388,7 @@ void multipleLevel(const std::string& configPath)
 
             gridBuilder->setPeriodicBoundaryCondition(false, false, false);
 
-            gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+            gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 
             //////////////////////////////////////////////////////////////////////////
 
@@ -463,7 +451,7 @@ void multipleLevel(const std::string& configPath)
 
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-            TriangularMesh* STL = TriangularMesh::make("F:/Work/Computations/gridGenerator/stl/STL_Group_Test_2_Cylinders.stl");
+            auto STL = std::make_shared<TriangularMesh>("F:/Work/Computations/gridGenerator/stl/STL_Group_Test_2_Cylinders.stl");
 
             gridBuilder->addCoarseGrid(- 2.0, -4.5, -2.0,
                                          4.0,  4.5,  2.0, dx);
@@ -472,7 +460,7 @@ void multipleLevel(const std::string& configPath)
 
             gridBuilder->setPeriodicBoundaryCondition(false, false, false);
 
-            gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+            gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 
             //////////////////////////////////////////////////////////////////////////
 
@@ -519,34 +507,34 @@ void multipleLevel(const std::string& configPath)
 
             std::vector<uint> ignorePatches = { 152, 153, 154 };
 
-            //TriangularMesh* VW370_SERIE_STL = TriangularMesh::make("C:/Users/lenz/Desktop/Work/gridGenerator/stl/VW370_SERIE.stl", ignorePatches);
-            TriangularMesh* VW370_SERIE_STL = TriangularMesh::make("stl/VW370_SERIE.stl", ignorePatches);
+            //auto VW370_SERIE_STL = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/VW370_SERIE.stl", ignorePatches);
+            auto VW370_SERIE_STL = std::make_shared<TriangularMesh>("stl/VW370_SERIE.stl", ignorePatches);
 
-            //TriangularMesh* DLC_RefBox = TriangularMesh::make("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox.stl");
+            //auto DLC_RefBox = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox.stl");
 
-            //TriangularMesh* DLC_RefBox_1 = TriangularMesh::make("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox_withWake/DLC_RefBox_withWake_4m.stl");
-            //TriangularMesh* DLC_RefBox_2 = TriangularMesh::make("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox_withWake/DLC_RefBox_withWake_3m.stl");
-            //TriangularMesh* DLC_RefBox_3 = TriangularMesh::make("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox_withWake/DLC_RefBox_withWake_2m.stl");
-            //TriangularMesh* DLC_RefBox_4 = TriangularMesh::make("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox_withWake/DLC_RefBox_withWake_1m.stl");
+            //auto DLC_RefBox_1 = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox_withWake/DLC_RefBox_withWake_4m.stl");
+            //auto DLC_RefBox_2 = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox_withWake/DLC_RefBox_withWake_3m.stl");
+            //auto DLC_RefBox_3 = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox_withWake/DLC_RefBox_withWake_2m.stl");
+            //auto DLC_RefBox_4 = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox_withWake/DLC_RefBox_withWake_1m.stl");
 
-            //TriangularMesh* DLC_RefBox_Level_3 = TriangularMesh::make("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC/DLC_RefBox_Level_3.stl");
-            //TriangularMesh* DLC_RefBox_Level_4 = TriangularMesh::make("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC/DLC_RefBox_Level_4.stl");
-            //TriangularMesh* DLC_RefBox_Level_5 = TriangularMesh::make("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC/DLC_RefBox_Level_5.stl");
+            //auto DLC_RefBox_Level_3 = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC/DLC_RefBox_Level_3.stl");
+            //auto DLC_RefBox_Level_4 = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC/DLC_RefBox_Level_4.stl");
+            //auto DLC_RefBox_Level_5 = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC/DLC_RefBox_Level_5.stl");
 
-            TriangularMesh* DLC_RefBox_Level_3 = TriangularMesh::make("stl/DLC/DLC_RefBox_Level_3.stl");
-            TriangularMesh* DLC_RefBox_Level_4 = TriangularMesh::make("stl/DLC/DLC_RefBox_Level_4.stl");
-            TriangularMesh* DLC_RefBox_Level_5 = TriangularMesh::make("stl/DLC/DLC_RefBox_Level_5.stl");
+            auto DLC_RefBox_Level_3 = std::make_shared<TriangularMesh>("stl/DLC/DLC_RefBox_Level_3.stl");
+            auto DLC_RefBox_Level_4 = std::make_shared<TriangularMesh>("stl/DLC/DLC_RefBox_Level_4.stl");
+            auto DLC_RefBox_Level_5 = std::make_shared<TriangularMesh>("stl/DLC/DLC_RefBox_Level_5.stl");
 
-            //TriangularMesh* VW370_SERIE_STL = TriangularMesh::make("stl/VW370_SERIE.stl", ignorePatches);
-            //TriangularMesh* DLC_RefBox = TriangularMesh::make("stl/DLC_RefBox.lnx.stl");
-            //TriangularMesh* DLC_RefBox_4 = TriangularMesh::make("stl/DLC_RefBox_withWake/DLC_RefBox_withWake_1m.lnx.stl");
+            //auto VW370_SERIE_STL = std::make_shared<TriangularMesh>("stl/VW370_SERIE.stl", ignorePatches);
+            //auto DLC_RefBox = std::make_shared<TriangularMesh>("stl/DLC_RefBox.lnx.stl");
+            //auto DLC_RefBox_4 = std::make_shared<TriangularMesh>("stl/DLC_RefBox_withWake/DLC_RefBox_withWake_1m.lnx.stl");
 
             gridBuilder->addCoarseGrid(-30.0, -20.0,  0.0 - z0,
                                         50.0,  20.0, 25.0 - z0, dx);
 
             gridBuilder->setNumberOfLayers(10,8);
-            gridBuilder->addGrid( new Cuboid( - 6.6, -6, -0.7, 20.6 , 6, 5.3  ), 1 );
-            gridBuilder->addGrid( new Cuboid( -3.75, -3, -0.7, 11.75, 3, 2.65 ), 2 );
+            gridBuilder->addGrid( std::make_shared<Cuboid>( - 6.6, -6, -0.7, 20.6 , 6, 5.3  ), 1 );
+            gridBuilder->addGrid( std::make_shared<Cuboid>( -3.75, -3, -0.7, 11.75, 3, 2.65 ), 2 );
 
             gridBuilder->setNumberOfLayers(10,8);
             gridBuilder->addGrid(DLC_RefBox_Level_3, 3);
@@ -563,7 +551,7 @@ void multipleLevel(const std::string& configPath)
 
             gridBuilder->setPeriodicBoundaryCondition(false, false, false);
 
-            gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+            gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 
             //////////////////////////////////////////////////////////////////////////
 
@@ -673,10 +661,9 @@ void multipleLevel(const std::string& configPath)
                 logFile2.open( "F:/Work/Computations/gridGenerator/grid/1/gridGeneratorLog.txt" );
                 //logFile2.open( "grid/1/gridGeneratorLog.txt" );
 
-            logging::Logger::addStream(&logFile2);
 
-            TriangularMesh* triangularMesh = TriangularMesh::make("F:/Work/Computations/gridGenerator/stl/Sphere/SphereNotOptimal.stl");
-            //TriangularMesh* triangularMesh = TriangularMesh::make("stl/ShpereNotOptimal.lnx.stl");
+            auto triangularMesh = std::make_shared<TriangularMesh>("F:/Work/Computations/gridGenerator/stl/Sphere/SphereNotOptimal.stl");
+            //auto triangularMesh = std::make_shared<TriangularMesh>("stl/ShpereNotOptimal.lnx.stl");
 
             // all
             //gridBuilder->addCoarseGrid(-2, -2, -2,
@@ -710,7 +697,7 @@ void multipleLevel(const std::string& configPath)
 
             gridBuilder->setPeriodicBoundaryCondition(false, false, false);
 
-            gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+            gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 
             if( generatePart == 0 ){
                 gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
@@ -821,18 +808,17 @@ int main( int argc, char* argv[])
 		}
         catch (const std::bad_alloc& e)
         {
-
-            *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
+            std::cout << "Bad alloc: " << e.what() << std::flush;
         }
         catch (const std::exception& e)
         {
-
-            *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
+            std::cout << e.what() << std::flush;
         }
         catch (...)
         {
-            *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
+            std::cout << "unknown exeption" << std::endl;
         }
+
     }
 
    MPI_Finalize();
diff --git a/apps/gpu/LBM/lbmTest/main.cpp b/apps/gpu/LBM/lbmTest/main.cpp
index 79d767cca40d710a41c7d2d6d1c512f74270023c..abe8e7488a55c773896ef6cb362db2bb8247fa4e 100644
--- a/apps/gpu/LBM/lbmTest/main.cpp
+++ b/apps/gpu/LBM/lbmTest/main.cpp
@@ -16,9 +16,8 @@
 
 #include "metis.h"
 
-#include "Core/LbmOrGks.h"
-#include "Core/Input/Input.h"
-#include "Core/StringUtilities/StringUtil.h"
+#include "Input/Input.h"
+#include "StringUtilities/StringUtil.h"
 
 #include "VirtualFluids_GPU/LBM/Simulation.h"
 #include "VirtualFluids_GPU/Communication/Communicator.h"
@@ -66,7 +65,7 @@ void setParameters(std::shared_ptr<Parameter> para, std::unique_ptr<input::Input
 	Communicator* comm = Communicator::getInstanz();
 
 	para->setMaxDev(StringUtil::toInt(input->getValue("NumberOfDevices")));
-	para->setNumprocs(comm->getNummberOfProcess());
+	para->setNumprocs(comm->getNumberOfProcess());
 	para->setDevices(StringUtil::toUintVector(input->getValue("Devices")));
 	para->setMyID(comm->getPID());
 	
@@ -263,17 +262,6 @@ void setParameters(std::shared_ptr<Parameter> para, std::unique_ptr<input::Input
 
 void multipleLevel(const std::string& configPath)
 {
-    //std::ofstream logFile( "F:/Work/Computations/gridGenerator/grid/gridGeneratorLog.txt" );
-    std::ofstream logFile( "grid/gridGeneratorLog.txt" );
-    logging::Logger::addStream(&logFile);
-
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-    logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
-
-    //UbLog::reportingLevel() = UbLog::logLevelFromString("DEBUG5");
-
     auto gridFactory = GridFactory::make();
     gridFactory->setGridStrategy(Device::CPU);
     //gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::RAYCASTING);
@@ -318,7 +306,7 @@ void multipleLevel(const std::string& configPath)
 			//////////////////////////////////////////////////////////////////////////
 			gridBuilder->setPeriodicBoundaryCondition(true, true, true);
 			//////////////////////////////////////////////////////////////////////////
-			gridBuilder->buildGrids(LBM, true); 
+			gridBuilder->buildGrids(true); 
 			//////////////////////////////////////////////////////////////////////////
 			SPtr<Grid> grid = gridBuilder->getGrid(gridBuilder->getNumberOfLevels() - 1);
 			//////////////////////////////////////////////////////////////////////////
@@ -348,7 +336,7 @@ void multipleLevel(const std::string& configPath)
 
 			gridBuilder->setPeriodicBoundaryCondition(true, true, true);
 
-			gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+			gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 			//////////////////////////////////////////////////////////////////////////
 			SPtr<Grid> grid = gridBuilder->getGrid(gridBuilder->getNumberOfLevels() - 1);
 			//////////////////////////////////////////////////////////////////////////
@@ -373,7 +361,7 @@ void multipleLevel(const std::string& configPath)
             real dx = 0.2;
             real vx = 0.05;
 
-            TriangularMesh* SphereSTL = TriangularMesh::make("E:/temp/GridSphere/2018/STL/SphereNotOptimal.stl");
+            auto SphereSTL = std::make_shared<TriangularMesh>("E:/temp/GridSphere/2018/STL/SphereNotOptimal.stl");
 
             gridBuilder->addCoarseGrid(- 5.0, -5.0, -5.0,
                                         10.0,  5.0,  5.0, dx);  // DrivAer
@@ -385,7 +373,7 @@ void multipleLevel(const std::string& configPath)
 
             gridBuilder->setPeriodicBoundaryCondition(false, false, false);
 
-            gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+            gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
             //////////////////////////////////////////////////////////////////////////
             gridBuilder->setVelocityBoundaryCondition(SideType::PY, vx , 0.0, 0.0);
             gridBuilder->setVelocityBoundaryCondition(SideType::MY, vx , 0.0, 0.0);
@@ -421,21 +409,21 @@ void multipleLevel(const std::string& configPath)
             real dx = 0.2;
             real vx = 0.05;
 
-            TriangularMesh* DrivAerSTL = TriangularMesh::make("F:/Work/Computations/gridGenerator/stl/DrivAer_Fastback_Coarse.stl");
-            //TriangularMesh* triangularMesh = TriangularMesh::make("M:/TestGridGeneration/STL/DrivAer_NoSTLGroups.stl");
-            //TriangularMesh* triangularMesh = TriangularMesh::make("M:/TestGridGeneration/STL/DrivAer_Coarse.stl");
-            //TriangularMesh* DrivAerSTL = TriangularMesh::make("stl/DrivAer_Fastback_Coarse.stl");
+            auto DrivAerSTL = std::make_shared<TriangularMesh>("F:/Work/Computations/gridGenerator/stl/DrivAer_Fastback_Coarse.stl");
+            //auto triangularMesh = std::make_shared<TriangularMesh>("M:/TestGridGeneration/STL/DrivAer_NoSTLGroups.stl");
+            //auto triangularMesh = std::make_shared<TriangularMesh>("M:/TestGridGeneration/STL/DrivAer_Coarse.stl");
+            //auto DrivAerSTL = std::make_shared<TriangularMesh>("stl/DrivAer_Fastback_Coarse.stl");
 
-            TriangularMesh* DrivAerRefBoxSTL = TriangularMesh::make("F:/Work/Computations/gridGenerator/stl/DrivAer_REF_BOX_Adrea.stl");
-            //TriangularMesh* DrivAerRefBoxSTL = TriangularMesh::make("stl/DrivAer_REF_BOX_Adrea.stl");
+            auto DrivAerRefBoxSTL = std::make_shared<TriangularMesh>("F:/Work/Computations/gridGenerator/stl/DrivAer_REF_BOX_Adrea.stl");
+            //auto DrivAerRefBoxSTL = std::make_shared<TriangularMesh>("stl/DrivAer_REF_BOX_Adrea.stl");
 
             real z0 = 0.318+0.5*dx;
 
             gridBuilder->addCoarseGrid(- 5.0, -5.0, 0.0 - z0,
                                         15.0,  5.0, 5.0 - z0, dx);  // DrivAer
 
-            //Object* floorBox = new Cuboid( -0.3, -1, -1, 4.0, 1, 0.2 );
-            //Object* wakeBox  = new Cuboid(  3.5, -1, -1, 5.5, 1, 0.8 );
+            //Object* floorBox = std::make_shared<Cuboid>( -0.3, -1, -1, 4.0, 1, 0.2 );
+            //Object* wakeBox  = std::make_shared<Cuboid>(  3.5, -1, -1, 5.5, 1, 0.8 );
 
             //Conglomerate* refRegion = new Conglomerate();
 
@@ -453,7 +441,7 @@ void multipleLevel(const std::string& configPath)
 
             gridBuilder->setPeriodicBoundaryCondition(false, false, false);
 
-            gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+            gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 
             //////////////////////////////////////////////////////////////////////////
 
@@ -508,34 +496,34 @@ void multipleLevel(const std::string& configPath)
 
             std::vector<uint> ignorePatches = { 152, 153, 154 };
 
-            //TriangularMesh* VW370_SERIE_STL = TriangularMesh::make("C:/Users/lenz/Desktop/Work/gridGenerator/stl/VW370_SERIE.stl", ignorePatches);
-            TriangularMesh* VW370_SERIE_STL = TriangularMesh::make("stl/VW370_SERIE.stl", ignorePatches);
+            //auto VW370_SERIE_STL = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/VW370_SERIE.stl", ignorePatches);
+            auto VW370_SERIE_STL = std::make_shared<TriangularMesh>("stl/VW370_SERIE.stl", ignorePatches);
 
-            //TriangularMesh* DLC_RefBox = TriangularMesh::make("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox.stl");
+            //auto DLC_RefBox = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox.stl");
 
-            //TriangularMesh* DLC_RefBox_1 = TriangularMesh::make("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox_withWake/DLC_RefBox_withWake_4m.stl");
-            //TriangularMesh* DLC_RefBox_2 = TriangularMesh::make("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox_withWake/DLC_RefBox_withWake_3m.stl");
-            //TriangularMesh* DLC_RefBox_3 = TriangularMesh::make("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox_withWake/DLC_RefBox_withWake_2m.stl");
-            //TriangularMesh* DLC_RefBox_4 = TriangularMesh::make("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox_withWake/DLC_RefBox_withWake_1m.stl");
+            //auto DLC_RefBox_1 = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox_withWake/DLC_RefBox_withWake_4m.stl");
+            //auto DLC_RefBox_2 = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox_withWake/DLC_RefBox_withWake_3m.stl");
+            //auto DLC_RefBox_3 = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox_withWake/DLC_RefBox_withWake_2m.stl");
+            //auto DLC_RefBox_4 = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox_withWake/DLC_RefBox_withWake_1m.stl");
 
-            //TriangularMesh* DLC_RefBox_Level_3 = TriangularMesh::make("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC/DLC_RefBox_Level_3.stl");
-            //TriangularMesh* DLC_RefBox_Level_4 = TriangularMesh::make("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC/DLC_RefBox_Level_4.stl");
-            //TriangularMesh* DLC_RefBox_Level_5 = TriangularMesh::make("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC/DLC_RefBox_Level_5.stl");
+            //auto DLC_RefBox_Level_3 = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC/DLC_RefBox_Level_3.stl");
+            //auto DLC_RefBox_Level_4 = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC/DLC_RefBox_Level_4.stl");
+            //auto DLC_RefBox_Level_5 = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC/DLC_RefBox_Level_5.stl");
 
-            TriangularMesh* DLC_RefBox_Level_3 = TriangularMesh::make("stl/DLC/DLC_RefBox_Level_3.stl");
-            TriangularMesh* DLC_RefBox_Level_4 = TriangularMesh::make("stl/DLC/DLC_RefBox_Level_4.stl");
-            TriangularMesh* DLC_RefBox_Level_5 = TriangularMesh::make("stl/DLC/DLC_RefBox_Level_5.stl");
+            auto DLC_RefBox_Level_3 = std::make_shared<TriangularMesh>("stl/DLC/DLC_RefBox_Level_3.stl");
+            auto DLC_RefBox_Level_4 = std::make_shared<TriangularMesh>("stl/DLC/DLC_RefBox_Level_4.stl");
+            auto DLC_RefBox_Level_5 = std::make_shared<TriangularMesh>("stl/DLC/DLC_RefBox_Level_5.stl");
 
-            //TriangularMesh* VW370_SERIE_STL = TriangularMesh::make("stl/VW370_SERIE.stl", ignorePatches);
-            //TriangularMesh* DLC_RefBox = TriangularMesh::make("stl/DLC_RefBox.lnx.stl");
-            //TriangularMesh* DLC_RefBox_4 = TriangularMesh::make("stl/DLC_RefBox_withWake/DLC_RefBox_withWake_1m.lnx.stl");
+            //auto VW370_SERIE_STL = std::make_shared<TriangularMesh>("stl/VW370_SERIE.stl", ignorePatches);
+            //auto DLC_RefBox = std::make_shared<TriangularMesh>("stl/DLC_RefBox.lnx.stl");
+            //auto DLC_RefBox_4 = std::make_shared<TriangularMesh>("stl/DLC_RefBox_withWake/DLC_RefBox_withWake_1m.lnx.stl");
 
             gridBuilder->addCoarseGrid(-30.0, -20.0,  0.0 - z0,
                                         50.0,  20.0, 25.0 - z0, dx);
             
             gridBuilder->setNumberOfLayers(10,8);
-            gridBuilder->addGrid( new Cuboid( - 6.6, -6, -0.7, 20.6 , 6, 5.3  ), 1 );
-            gridBuilder->addGrid( new Cuboid( -3.75, -3, -0.7, 11.75, 3, 2.65 ), 2 );
+            gridBuilder->addGrid( std::make_shared<Cuboid>( - 6.6, -6, -0.7, 20.6 , 6, 5.3  ), 1 );
+            gridBuilder->addGrid( std::make_shared<Cuboid>( -3.75, -3, -0.7, 11.75, 3, 2.65 ), 2 );
 
             gridBuilder->setNumberOfLayers(10,8);
             gridBuilder->addGrid(DLC_RefBox_Level_3, 3);
@@ -552,7 +540,7 @@ void multipleLevel(const std::string& configPath)
 
             gridBuilder->setPeriodicBoundaryCondition(false, false, false);
 
-            gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+            gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 
             //////////////////////////////////////////////////////////////////////////
 
@@ -633,13 +621,11 @@ void multipleLevel(const std::string& configPath)
                 logFile2.open( "F:/Work/Computations/gridGenerator/grid/1/gridGeneratorLog.txt" );
                 //logFile2.open( "grid/1/gridGeneratorLog.txt" );
 
-            logging::Logger::addStream(&logFile2);
-
             real dx = 1.0 / 40.0;
             real vx = 0.05;
 
-            TriangularMesh* triangularMesh = TriangularMesh::make("F:/Work/Computations/gridGenerator/stl/ShpereNotOptimal.stl");
-            //TriangularMesh* triangularMesh = TriangularMesh::make("stl/ShpereNotOptimal.lnx.stl");
+            auto triangularMesh = std::make_shared<TriangularMesh>("F:/Work/Computations/gridGenerator/stl/ShpereNotOptimal.stl");
+            //auto triangularMesh = std::make_shared<TriangularMesh>("stl/ShpereNotOptimal.lnx.stl");
 
             // all
             //gridBuilder->addCoarseGrid(-2, -2, -2,  
@@ -673,7 +659,7 @@ void multipleLevel(const std::string& configPath)
 
             gridBuilder->setPeriodicBoundaryCondition(false, false, false);
 
-            gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+            gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
             
             if( generatePart == 0 ){
                 gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
@@ -775,7 +761,6 @@ int main( int argc, char* argv[])
             }
             catch (const std::exception& e)
             {
-                *logging::out << logging::Logger::ERROR << e.what() << "\n";
                 //MPI_Abort(MPI_COMM_WORLD, -1);
             }
             catch (...)
@@ -792,22 +777,17 @@ int main( int argc, char* argv[])
 			}
             catch (const std::exception& e)
             {
-                
-                *logging::out << logging::Logger::ERROR << e.what() << "\n";
-                //std::cout << e.what() << std::flush;
+                std::cout << e.what() << std::flush;
                 //MPI_Abort(MPI_COMM_WORLD, -1);
             }
             catch (const std::bad_alloc e)
             {
-                
-                *logging::out << logging::Logger::ERROR << "Bad Alloc:" << e.what() << "\n";
-                //std::cout << e.what() << std::flush;
+                std::cout << e.what() << std::flush;
                 //MPI_Abort(MPI_COMM_WORLD, -1);
             }
             catch (...)
             {
-                *logging::out << logging::Logger::ERROR << "Unknown exception!\n";
-                //std::cout << "unknown exeption" << std::endl;
+                std::cout << "unknown exeption" << std::endl;
             }
 
             std::cout << "\nConfiguration file must be set!: lbmgm <config file>" << std::endl << std::flush;
diff --git a/apps/gpu/LBM/metisTest/main.cpp b/apps/gpu/LBM/metisTest/main.cpp
index 6a62ff72c7b71211610ba4e40f81e9a1f527eb7f..b9879c16c32afd0ec27c5841a0b2dad7e4191055 100644
--- a/apps/gpu/LBM/metisTest/main.cpp
+++ b/apps/gpu/LBM/metisTest/main.cpp
@@ -15,9 +15,8 @@
 
 #include "metis.h"
 
-#include "Core/LbmOrGks.h"
-#include "Core/Input/Input.h"
-#include "Core/StringUtilities/StringUtil.h"
+#include "Input/Input.h"
+#include "StringUtilities/StringUtil.h"
 
 #include "VirtualFluids_GPU/LBM/Simulation.h"
 #include "VirtualFluids_GPU/Communication/Communicator.h"
@@ -66,7 +65,7 @@ void setParameters(std::shared_ptr<Parameter> para, std::unique_ptr<input::Input
 	Communicator* comm = Communicator::getInstanz();
 
 	para->setMaxDev(StringUtil::toInt(input->getValue("NumberOfDevices")));
-	para->setNumprocs(comm->getNummberOfProcess());
+	para->setNumprocs(comm->getNumberOfProcess());
 	para->setDevices(StringUtil::toUintVector(input->getValue("Devices")));
 	para->setMyID(comm->getPID());
 	
@@ -263,17 +262,6 @@ void setParameters(std::shared_ptr<Parameter> para, std::unique_ptr<input::Input
 
 void multipleLevel(const std::string& configPath)
 {
-    //std::ofstream logFile( "F:/Work/Computations/gridGenerator/grid/gridGeneratorLog.txt" );
-    std::ofstream logFile( "grid/gridGeneratorLog.txt" );
-    logging::Logger::addStream(&logFile);
-
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-    logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
-
-    //UbLog::reportingLevel() = UbLog::logLevelFromString("DEBUG5");
-
     auto gridFactory = GridFactory::make();
     gridFactory->setGridStrategy(Device::CPU);
     //gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::RAYCASTING);
@@ -299,8 +287,8 @@ void multipleLevel(const std::string& configPath)
         real dx = 1.0 / 20.0;
         real vx = 0.05;
 
-        TriangularMesh* triangularMesh = TriangularMesh::make("F:/Work/Computations/gridGenerator/stl/ShpereNotOptimal.stl");
-        //TriangularMesh* triangularMesh = TriangularMesh::make("stl/ShpereNotOptimal.lnx.stl");
+        auto triangularMesh = std::make_shared<TriangularMesh>("F:/Work/Computations/gridGenerator/stl/ShpereNotOptimal.stl");
+        //auto triangularMesh = std::make_shared<TriangularMesh>("stl/ShpereNotOptimal.lnx.stl");
 
         // all
         //gridBuilder->addCoarseGrid(-2, -2, -2,  
@@ -319,7 +307,7 @@ void multipleLevel(const std::string& configPath)
 
         gridBuilder->setPeriodicBoundaryCondition(false, false, false);
 
-        gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+        gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 
         //////////////////////////////////////////////////////////////////////////
 
@@ -784,7 +772,6 @@ int main( int argc, char* argv[])
             }
             catch (const std::exception& e)
             {
-                *logging::out << logging::Logger::ERROR << e.what() << "\n";
                 //MPI_Abort(MPI_COMM_WORLD, -1);
             }
             catch (...)
@@ -800,22 +787,17 @@ int main( int argc, char* argv[])
             }
             catch (const std::exception& e)
             {
-                
-                *logging::out << logging::Logger::ERROR << e.what() << "\n";
-                //std::cout << e.what() << std::flush;
+                std::cout << e.what() << std::flush;
                 //MPI_Abort(MPI_COMM_WORLD, -1);
             }
             catch (const std::bad_alloc e)
             {
-                
-                *logging::out << logging::Logger::ERROR << "Bad Alloc:" << e.what() << "\n";
-                //std::cout << e.what() << std::flush;
+                std::cout << e.what() << std::flush;
                 //MPI_Abort(MPI_COMM_WORLD, -1);
             }
             catch (...)
             {
-                *logging::out << logging::Logger::ERROR << "Unknown exception!\n";
-                //std::cout << "unknown exeption" << std::endl;
+                std::cout << "unknown exeption" << std::endl;
             }
 
             std::cout << "\nConfiguration file must be set!: lbmgm <config file>" << std::endl << std::flush;
diff --git a/apps/gpu/LBM/streetTest/3rdPartyLinking.cmake b/apps/gpu/LBM/streetTest/3rdPartyLinking.cmake
deleted file mode 100644
index 6dd6ba1bc73e73dfbf01a3cc36aaeb3664e1c04c..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/streetTest/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,13 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Boost/Link.cmake)
-linkBoost(${targetName} "serialization")
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-linkMetis(${targetName})
-
-if(HULC.BUILD_JSONCPP)
-  include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-  linkJsonCpp(${targetName})
-endif()
\ No newline at end of file
diff --git a/apps/gpu/LBM/streetTest/CMakeLists.txt b/apps/gpu/LBM/streetTest/CMakeLists.txt
deleted file mode 100644
index 6ef6241b52aa9b5dc73a19b82f2b9fd5248c77c5..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/streetTest/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR}) 
-
-set(linkDirectories "")
-set(libsToLink VirtualFluids_GPU GridGenerator)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src" "${CMAKE_SOURCE_DIR}/src/VirtualFluids_GPU" "${CMAKE_SOURCE_DIR}/src/GridGenerator" "${CMAKE_SOURCE_DIR}/src/VirtualFluidsBasics")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${lbmAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
\ No newline at end of file
diff --git a/apps/gpu/LBM/streetTest/CMakePackage.cmake b/apps/gpu/LBM/streetTest/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/streetTest/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/LBM/streetTest/resources/ExampleStreets.txt b/apps/gpu/LBM/streetTest/resources/ExampleStreets.txt
deleted file mode 100644
index 9e2df6c6dad1adc8fbfcdb7f8e91d794c56d0773..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/streetTest/resources/ExampleStreets.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-10
-  256     5    87     5  1
-   87     5   -85     5  1
-  -85     5   -80   256  1
-  -85     5  -256     5  1
- -185  -256   -85     5  1
-  -92     0    87     0  1
-  -85   256   -92     0  1
- -256     0   -92     0  1
-   87     0   256     0  1
-  -92     0  -190  -256  1
\ No newline at end of file
diff --git a/apps/gpu/LBM/streetTest/resources/FlatGround.stl b/apps/gpu/LBM/streetTest/resources/FlatGround.stl
deleted file mode 100644
index 8b18606cc5050121d3d724f248f7b7bd9d2425e8..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/streetTest/resources/FlatGround.stl
+++ /dev/null
@@ -1,86 +0,0 @@
-solid FlatGround
-facet normal 0.0 0.0 -1.0
-  outer loop
-    vertex 300.0 299.9999999999999 -10.000000000000002
-    vertex -300.0 -299.99999999999994 -10.000000000000002
-    vertex -300.0 299.9999999999999 -10.000000000000002
-  endloop
-endfacet
-facet normal 0.0 0.0 -1.0
-  outer loop
-    vertex -300.0 -299.99999999999994 -10.000000000000002
-    vertex 300.0 299.9999999999999 -10.000000000000002
-    vertex 300.0 -299.99999999999994 -10.000000000000002
-  endloop
-endfacet
-facet normal -0.0 -0.0 1.0
-  outer loop
-    vertex 300.0 -299.99999999999994 0.0
-    vertex -300.0 299.9999999999999 0.0
-    vertex -300.0 -299.99999999999994 0.0
-  endloop
-endfacet
-facet normal -0.0 -0.0 1.0
-  outer loop
-    vertex -300.0 299.9999999999999 0.0
-    vertex 300.0 -299.99999999999994 0.0
-    vertex 300.0 299.9999999999999 0.0
-  endloop
-endfacet
-facet normal 0.0 -1.0 0.0
-  outer loop
-    vertex 300.0 -299.99999999999994 0.0
-    vertex -300.0 -299.99999999999994 -10.000000000000002
-    vertex 300.0 -299.99999999999994 -10.000000000000002
-  endloop
-endfacet
-facet normal 0.0 -1.0 0.0
-  outer loop
-    vertex -300.0 -299.99999999999994 -10.000000000000002
-    vertex 300.0 -299.99999999999994 0.0
-    vertex -300.0 -299.99999999999994 0.0
-  endloop
-endfacet
-facet normal -1.0 0.0 0.0
-  outer loop
-    vertex -300.0 299.9999999999999 0.0
-    vertex -300.0 -299.99999999999994 -10.000000000000002
-    vertex -300.0 -299.99999999999994 0.0
-  endloop
-endfacet
-facet normal -1.0 0.0 0.0
-  outer loop
-    vertex -300.0 -299.99999999999994 -10.000000000000002
-    vertex -300.0 299.9999999999999 0.0
-    vertex -300.0 299.9999999999999 -10.000000000000002
-  endloop
-endfacet
-facet normal -0.0 1.0 -0.0
-  outer loop
-    vertex -300.0 299.9999999999999 0.0
-    vertex 300.0 299.9999999999999 -10.000000000000002
-    vertex -300.0 299.9999999999999 -10.000000000000002
-  endloop
-endfacet
-facet normal -0.0 1.0 -0.0
-  outer loop
-    vertex 300.0 299.9999999999999 -10.000000000000002
-    vertex -300.0 299.9999999999999 0.0
-    vertex 300.0 299.9999999999999 0.0
-  endloop
-endfacet
-facet normal 1.0 -0.0 -0.0
-  outer loop
-    vertex 300.0 299.9999999999999 -10.000000000000002
-    vertex 300.0 -299.99999999999994 0.0
-    vertex 300.0 -299.99999999999994 -10.000000000000002
-  endloop
-endfacet
-facet normal 1.0 -0.0 -0.0
-  outer loop
-    vertex 300.0 -299.99999999999994 0.0
-    vertex 300.0 299.9999999999999 -10.000000000000002
-    vertex 300.0 299.9999999999999 0.0
-  endloop
-endfacet
-endsolid FlatGround
diff --git a/apps/gpu/LBM/streetTest/streetTest.cpp b/apps/gpu/LBM/streetTest/streetTest.cpp
deleted file mode 100644
index 8b197e7098fd7c0c446aa4587bff14d5cc8e57af..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/streetTest/streetTest.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-
-#include <iostream>
-
-#include "GridGenerator/StreetPointFinder/StreetPointFinder.h"
-
-
-#include "GridGenerator/geometries/TriangularMesh/TriangularMeshStrategy.h"
-#include "GridGenerator/geometries/TriangularMesh/TriangularMesh.h"
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/BoundaryConditions/Side.h"
-#include "GridGenerator/grid/BoundaryConditions/BoundaryCondition.h"
-#include "GridGenerator/grid/GridFactory.h"
-#include "GridGenerator/io/GridVTKWriter/GridVTKWriter.h"
-#include "GridGenerator/io/STLReaderWriter/STLReader.h"
-#include "GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h"
-
-int main( int argc, char* argv[])
-{
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_INTERMEDIATE);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-    logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-	real dx = 1.0;
-
-	gridBuilder->addCoarseGrid(-256, -256, -10,
-								256,  256,  40, dx);
-
-    TriangularMesh* flatGroundSTL = TriangularMesh::make("F:/Work/Computations/NagelSchreckenberg/FlatGround.stl");
-
-    gridBuilder->addGeometry(flatGroundSTL);
-
-	gridBuilder->setPeriodicBoundaryCondition(true, true, false);
-	
-	gridBuilder->buildGrids(LBM, false); 
-	
-    gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
-	
-	gridBuilder->writeGridsToVtk("F:/Work/Computations/NagelSchreckenberg/ExampleGrid");
-
-    SimulationFileWriter::write("F:/Work/Computations/NagelSchreckenberg/grid/", gridBuilder, FILEFORMAT::BINARY);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    StreetPointFinder finder;
-
-    finder.readStreets( "F:/Work/Computations/NagelSchreckenberg/ExampleStreets.txt" );
-
-    finder.writeVTK( "F:/Work/Computations/NagelSchreckenberg/ExampleStreets.vtk" );
-
-    finder.findIndicesLB( gridBuilder->getGrid(0) );
-
-    finder.writeConnectionVTK( "F:/Work/Computations/NagelSchreckenberg/ExampleStreetsConnection.vtk", gridBuilder->getGrid(0) );
-
-    finder.writeSimulationFile("F:/Work/Computations/NagelSchreckenberg/grid/", 1.0, gridBuilder->getNumberOfLevels(), 0);
-
-    return 0;
-}
diff --git a/apps/gpu/LidDrivenCavity/CMakeLists.txt b/apps/gpu/LidDrivenCavity/CMakeLists.txt
deleted file mode 100644
index 108ab3c676e1abf2466f0b7ca61dce2df7eee792..0000000000000000000000000000000000000000
--- a/apps/gpu/LidDrivenCavity/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-PROJECT(LidDrivenCavity)
-
-
-vf_add_library(BUILDTYPE binary PRIVATE_LINK basics GridGenerator VirtualFluids_GPU GksMeshAdapter GksGpu FILES LidDrivenCavity.cpp)
diff --git a/apps/gpu/LidDrivenCavity/LidDrivenCavity.cpp b/apps/gpu/LidDrivenCavity/LidDrivenCavity.cpp
deleted file mode 100644
index 7c1f51f3415e381692f82fcd4822a7b8ca4517f7..0000000000000000000000000000000000000000
--- a/apps/gpu/LidDrivenCavity/LidDrivenCavity.cpp
+++ /dev/null
@@ -1,370 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
-//      \    \  |    |   ________________________________________________________________
-//       \    \ |    |  |  ______________________________________________________________|
-//        \    \|    |  |  |         __          __     __     __     ______      _______
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file LidDrivenCavity.cpp
-//! \ingroup Applications
-//! \author Martin Schoenherr, Stephan Lenz
-//=======================================================================================
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <sstream>
-#include <iostream>
-#include <stdexcept>
-#include <fstream>
-#include <exception>
-#include <memory>
-
-//////////////////////////////////////////////////////////////////////////
-
-#include "Core/DataTypes.h"
-#include "PointerDefinitions.h"
-#include "Core/LbmOrGks.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-//////////////////////////////////////////////////////////////////////////
-
-#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/BoundaryConditions/Side.h"
-#include "GridGenerator/grid/GridFactory.h"
-
-//////////////////////////////////////////////////////////////////////////
-
-#include "VirtualFluids_GPU/LBM/Simulation.h"
-#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
-#include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
-#include "VirtualFluids_GPU/Parameter/Parameter.h"
-#include "VirtualFluids_GPU/Output/FileWriter.h"
-#include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
-#include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
-
-//////////////////////////////////////////////////////////////////////////
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "GksGpu/DataBase/DataBase.h"
-#include "GksGpu/Parameters/Parameters.h"
-#include "GksGpu/Initializer/Initializer.h"
-
-#include "GksGpu/FlowStateData/FlowStateDataConversion.cuh"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-
-#include "GksGpu/TimeStepping/NestedTimeStep.h"
-
-#include "GksGpu/Analyzer/CupsAnalyzer.h"
-#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-
-#include "GksGpu/CudaUtility/CudaUtility.h"
-
-#include "GksGpu/Output/VtkWriter.h"
-
-//////////////////////////////////////////////////////////////////////////
-
-int main( int argc, char* argv[])
-{
-    try
-    {
-        //////////////////////////////////////////////////////////////////////////
-        // Simulation parameters
-        //////////////////////////////////////////////////////////////////////////
-        std::string path("./output");
-        std::string simulationName("LidDrivenCavity");
-
-        const real L  = 1.0;
-        const real Re = 1000.0;
-        const real velocity = 1.0;
-        const real dt = 0.5e-3;
-        const uint nx = 64;
-
-        const uint timeStepOut = 10000;
-        const uint timeStepEnd = 250000;
-
-        // switch between LBM and GKS solver here
-        //LbmOrGks lbmOrGks = GKS;
-        LbmOrGks lbmOrGks = LBM;
-
-        //////////////////////////////////////////////////////////////////////////
-        // setup logger
-        //////////////////////////////////////////////////////////////////////////
-
-        logging::Logger::addStream(&std::cout);
-        logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-        logging::Logger::timeStamp(logging::Logger::ENABLE);
-        logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
-
-        //////////////////////////////////////////////////////////////////////////
-        // setup gridGenerator
-        //////////////////////////////////////////////////////////////////////////
-
-        auto gridFactory = GridFactory::make();
-        gridFactory->setGridStrategy(Device::CPU);
-        auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-        //////////////////////////////////////////////////////////////////////////
-        // create grid
-        //////////////////////////////////////////////////////////////////////////
-
-        real dx = L / real(nx);
-
-        gridBuilder->addCoarseGrid(-0.5 * L, -0.5 * L, -0.5 * L,
-                                    0.5 * L,  0.5 * L,  0.5 * L, dx);
-
-        gridBuilder->setPeriodicBoundaryCondition(false, false, false);
-
-        gridBuilder->buildGrids(lbmOrGks, false);
-
-        //////////////////////////////////////////////////////////////////////////
-        // branch between LBM and GKS
-        //////////////////////////////////////////////////////////////////////////
-
-        if( lbmOrGks == LBM )
-        {
-            SPtr<Parameter> para = Parameter::make();
-            BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
-
-            //////////////////////////////////////////////////////////////////////////
-            // compute parameters in lattice units
-            //////////////////////////////////////////////////////////////////////////
-
-            const real velocityLB = velocity * dt / dx; // LB units
-
-            const real vx = velocityLB / sqrt(2.0); // LB units
-            const real vy = velocityLB / sqrt(2.0); // LB units
-
-            const real viscosityLB = nx * velocityLB / Re; // LB units
-
-            *logging::out << logging::Logger::INFO_HIGH << "velocity  [dx/dt] = " << velocityLB << " \n";
-            *logging::out << logging::Logger::INFO_HIGH << "viscosity [dx^2/dt] = " << viscosityLB << "\n";
-
-            //////////////////////////////////////////////////////////////////////////
-            // set parameters
-            //////////////////////////////////////////////////////////////////////////
-
-            para->setOutputPath( path );
-            para->setOutputPrefix( simulationName );
-
-            para->setPrintFiles(true);
-
-            para->setVelocityLB(velocityLB);
-            para->setViscosityLB(viscosityLB);
-
-            para->setVelocityRatio(velocity / velocityLB);
-
-            para->setTimestepOut( timeStepOut );
-            para->setTimestepEnd( timeStepEnd );
-
-            //////////////////////////////////////////////////////////////////////////
-            // set boundary conditions
-            //////////////////////////////////////////////////////////////////////////
-
-            gridBuilder->setNoSlipBoundaryCondition  (SideType::PX);
-            gridBuilder->setNoSlipBoundaryCondition  (SideType::MX);
-            gridBuilder->setNoSlipBoundaryCondition  (SideType::PY);
-            gridBuilder->setNoSlipBoundaryCondition  (SideType::MY);
-            gridBuilder->setVelocityBoundaryCondition(SideType::PZ,  vx,  vy, 0.0);
-            gridBuilder->setNoSlipBoundaryCondition  (SideType::MZ);
-
-            bcFactory.setNoSlipBoundaryCondition(BoundaryConditionFactory::NoSlipBC::NoSlipBounceBack);
-            bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocitySimpleBounceBackCompressible);
-
-            //////////////////////////////////////////////////////////////////////////
-            // set copy mesh to simulation
-            //////////////////////////////////////////////////////////////////////////
-
-            SPtr<CudaMemoryManager> cudaMemoryManager = CudaMemoryManager::make(para);
-
-            SPtr<GridProvider> gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
-
-            //////////////////////////////////////////////////////////////////////////
-            // run simulation
-            //////////////////////////////////////////////////////////////////////////
-
-            Simulation sim;
-            SPtr<FileWriter> fileWriter = SPtr<FileWriter>(new FileWriter());
-            sim.init(para, gridGenerator, fileWriter, cudaMemoryManager);
-            sim.run();
-            sim.free();
-        }
-        else
-        {
-            CudaUtility::setCudaDevice(0);
-
-            Parameters parameters;
-
-            //////////////////////////////////////////////////////////////////////////
-            // compute remaining parameters
-            //////////////////////////////////////////////////////////////////////////
-
-            const real vx = velocity / sqrt(2.0);
-            const real vy = velocity / sqrt(2.0);
-
-            parameters.K  = 2.0;
-            parameters.Pr = 1.0;
-
-            const real Ma = 0.1;
-
-            real rho = 1.0;
-
-            real cs = velocity / Ma;
-            real lambda = c1o2 * ( ( parameters.K + 5.0 ) / ( parameters.K + 3.0 ) ) / ( cs * cs );
-
-            const real mu = velocity * L * rho / Re;
-
-            *logging::out << logging::Logger::INFO_HIGH << "mu  = " << mu << " m^2/s\n";
-
-            *logging::out << logging::Logger::INFO_HIGH << "CFL = " << dt * ( velocity + cs ) / dx << "\n";
-
-            //////////////////////////////////////////////////////////////////////////
-            // set parameters
-            //////////////////////////////////////////////////////////////////////////
-
-            parameters.mu = mu;
-
-            parameters.dt = dt;
-            parameters.dx = dx;
-
-            parameters.lambdaRef = lambda;
-
-            //////////////////////////////////////////////////////////////////////////
-            // set copy mesh to simulation
-            //////////////////////////////////////////////////////////////////////////
-
-            GksMeshAdapter meshAdapter( gridBuilder );
-
-            meshAdapter.inputGrid();
-
-            auto dataBase = std::make_shared<DataBase>( "GPU" );
-
-            //////////////////////////////////////////////////////////////////////////
-            // set boundary conditions
-            //////////////////////////////////////////////////////////////////////////
-
-            SPtr<BoundaryCondition> bcLid  = std::make_shared<IsothermalWall>( dataBase, Vec3(  vx,  vy, 0.0 ), lambda, false );
-            SPtr<BoundaryCondition> bcWall = std::make_shared<IsothermalWall>( dataBase, Vec3( 0.0, 0.0, 0.0 ), lambda, false );
-
-            bcLid->findBoundaryCells ( meshAdapter, false,  [&](Vec3 center){ return center.z >  0.5 &&
-                                                                                     center.x > -0.5 &&
-                                                                                     center.x <  0.5 &&
-                                                                                     center.y > -0.5 &&
-                                                                                     center.y <  0.5; } );
-
-            bcWall->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x < -0.5 ||
-                                                                                   center.x >  0.5 ||
-                                                                                   center.y < -0.5 ||
-                                                                                   center.y >  0.5 ||
-                                                                                   center.z < -0.5; } );
-
-            dataBase->boundaryConditions.push_back( bcLid  );
-            dataBase->boundaryConditions.push_back( bcWall );
-
-            //////////////////////////////////////////////////////////////////////////
-            // set initial condition and upload mesh and initial condition to GPGPU
-            //////////////////////////////////////////////////////////////////////////
-
-            dataBase->setMesh( meshAdapter );
-
-            Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> ConservedVariables {
-
-                return toConservedVariables( PrimitiveVariables( rho, 0.0, 0.0, 0.0, lambda ), parameters.K );
-            });
-
-            dataBase->copyDataHostToDevice();
-
-            Initializer::initializeDataUpdate(dataBase);
-
-            VtkWriter::write( dataBase, parameters, path + "/" + simulationName + "_0" );
-
-            //////////////////////////////////////////////////////////////////////////
-            // set analyzers
-            //////////////////////////////////////////////////////////////////////////
-
-            CupsAnalyzer cupsAnalyzer( dataBase, false, 60.0, true, 10000 );
-
-            ConvergenceAnalyzer convergenceAnalyzer( dataBase, 10000 );
-
-            cupsAnalyzer.start();
-
-            //////////////////////////////////////////////////////////////////////////
-            // run simulation
-            //////////////////////////////////////////////////////////////////////////
-
-            for( uint iter = 1; iter <= timeStepEnd; iter++ )
-            {
-                TimeStepping::nestedTimeStep(dataBase, parameters, 0);
-
-                if( iter % timeStepOut == 0 )
-                {
-                    dataBase->copyDataDeviceToHost();
-
-                    VtkWriter::write( dataBase, parameters, path + "/" + simulationName + "_" + std::to_string( iter ) );
-                }
-
-                int crashCellIndex = dataBase->getCrashCellIndex();
-                if( crashCellIndex >= 0 )
-                {
-                    *logging::out << logging::Logger::LOGGER_ERROR << "Simulation crashed at CellIndex = " << crashCellIndex << "\n";
-                    dataBase->copyDataDeviceToHost();
-                    VtkWriter::write( dataBase, parameters, path + "/" + simulationName + "_" + std::to_string( iter ) );
-
-                    break;
-                }
-
-                dataBase->getCrashCellIndex();
-
-                cupsAnalyzer.run( iter, parameters.dt );
-
-                convergenceAnalyzer.run( iter );
-            }
-        }
-    }
-    catch (const std::bad_alloc e)
-    {
-
-        *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
-    }
-    catch (const std::exception& e)
-    {
-
-        *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
-    }
-    catch (std::string& s)
-    {
-
-        *logging::out << logging::Logger::LOGGER_ERROR << s << "\n";
-    }
-    catch (...)
-    {
-        *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
-    }
-
-   return 0;
-}
diff --git a/apps/gpu/tests/NumericalTestPostProcessing/Utilities/LogFileReader/LogFileReader.cpp b/apps/gpu/tests/NumericalTestPostProcessing/Utilities/LogFileReader/LogFileReader.cpp
index 5a469b44ab21c9a0fc3850baf9f1a82c1f3a0314..580aab90988215ef010de23b47946158e69fb88e 100644
--- a/apps/gpu/tests/NumericalTestPostProcessing/Utilities/LogFileReader/LogFileReader.cpp
+++ b/apps/gpu/tests/NumericalTestPostProcessing/Utilities/LogFileReader/LogFileReader.cpp
@@ -12,7 +12,7 @@
 #include "Utilities/LogFileData/LogFileDataImp.h"
 
 #include <basics/config/ConfigurationFile.h>
-#include "Core/StringUtilities/StringUtil.h"
+#include "StringUtilities/StringUtil.h"
 
 #include "Utilities/AlmostEquals.h"
 
diff --git a/apps/gpu/tests/NumericalTestPostProcessing/Utilities/PostProcessingConfigFileReader/PostProcessingConfigFileReaderImp.cpp b/apps/gpu/tests/NumericalTestPostProcessing/Utilities/PostProcessingConfigFileReader/PostProcessingConfigFileReaderImp.cpp
index 7bff82d2118e449ea19a1cfd56ff261b730ff0d4..6c52eeb93f3e556ebfad4e993eb56c63c95b56fd 100644
--- a/apps/gpu/tests/NumericalTestPostProcessing/Utilities/PostProcessingConfigFileReader/PostProcessingConfigFileReaderImp.cpp
+++ b/apps/gpu/tests/NumericalTestPostProcessing/Utilities/PostProcessingConfigFileReader/PostProcessingConfigFileReaderImp.cpp
@@ -1,7 +1,7 @@
 #include "PostProcessingConfigFileReaderImp.h"
 
 #include <basics/config/ConfigurationFile.h>
-#include "Core/StringUtilities/StringUtil.h"
+#include "StringUtilities/StringUtil.h"
 
 #include "Utilities/PostProcessingConfigData/PostProcessingConfigDataImp.h"
 
diff --git a/apps/gpu/tests/NumericalTests/Utilities/ConfigFileReaderNT/ConfigFileReaderNT.cpp b/apps/gpu/tests/NumericalTests/Utilities/ConfigFileReaderNT/ConfigFileReaderNT.cpp
index 26d4045fe4b14cf9ffc5dea16815d70c716de000..0d0dc94a911acb7c3f3f995922e5ce34ddffa303 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/ConfigFileReaderNT/ConfigFileReaderNT.cpp
+++ b/apps/gpu/tests/NumericalTests/Utilities/ConfigFileReaderNT/ConfigFileReaderNT.cpp
@@ -1,7 +1,7 @@
 #include "ConfigFileReaderNT.h"
 
 #include <basics/config/ConfigurationFile.h>
-#include "Core/StringUtilities/StringUtil.h"
+#include "StringUtilities/StringUtil.h"
 
 #include <memory>
 #include <fstream>
@@ -391,17 +391,8 @@ int calcNumberOfSimulations(ConfigFilePtr input, ConfigDataPtr configData)
     return counter;
 }
 
-ConfigDataPtr vf::gpu::tests::readConfigFile(const std::string aFilePath)
+ConfigDataPtr vf::gpu::tests::readConfigFile(const std::string aFilePath, const std::string &pathNumericalTests)
 {
-    // If PATH_NUMERICAL_TESTS is not defined, the grid definitions for the tests needs to be placed in the project root
-    // directories.
-#ifdef PATH_NUMERICAL_TESTS
-    auto pathNumericalTests = TOSTRING(PATH_NUMERICAL_TESTS) + std::string("/");
-#else
-    auto pathNumericalTests = TOSTRING(SOURCE_ROOT) + std::string("/");
-#endif
-    std::cout << pathNumericalTests << "\n";
-
     auto configData = std::make_shared<ConfigDataStruct>();
     auto input      = std::make_shared<vf::basics::ConfigurationFile>();
     input->load(aFilePath);
@@ -409,6 +400,8 @@ ConfigDataPtr vf::gpu::tests::readConfigFile(const std::string aFilePath)
     if (!checkConfigFile(input))
         exit(1);
 
+    std::cout << pathNumericalTests << "\n";
+
     configData->viscosity            = StringUtil::toDoubleVector(input->getValue<std::string>("Viscosity"));
     configData->kernelsToTest        = readKernelList(input);
     configData->writeAnalyticalToVTK = StringUtil::toBool(input->getValue<std::string>("WriteAnalyResultsToVTK"));
diff --git a/apps/gpu/tests/NumericalTests/Utilities/ConfigFileReaderNT/ConfigFileReaderNT.h b/apps/gpu/tests/NumericalTests/Utilities/ConfigFileReaderNT/ConfigFileReaderNT.h
index cd4ad56b4c23a8a973385839aa1b0736e1d2fda1..381e2d980ad3808d18c1f7f3b2ffff5bede6f664 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/ConfigFileReaderNT/ConfigFileReaderNT.h
+++ b/apps/gpu/tests/NumericalTests/Utilities/ConfigFileReaderNT/ConfigFileReaderNT.h
@@ -14,6 +14,6 @@ class ConfigurationFile;
 
 namespace vf::gpu::tests
 {
-    std::shared_ptr<ConfigDataStruct> readConfigFile(const std::string aFilePath);
+    std::shared_ptr<ConfigDataStruct> readConfigFile(const std::string aFilePath, const std::string &pathNumericalTests);
 }
 #endif
\ No newline at end of file
diff --git a/apps/gpu/tests/NumericalTests/Utilities/DataWriter/AnalyticalResults2DToVTKWriter/AnalyticalResults2DToVTKWriterImp.cpp b/apps/gpu/tests/NumericalTests/Utilities/DataWriter/AnalyticalResults2DToVTKWriter/AnalyticalResults2DToVTKWriterImp.cpp
index b9b4dd3a8d13ddcce87c555312c0d0a422de5a05..126ff07a31c4f434cc82b4bb8b7e1d944b22cae1 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/DataWriter/AnalyticalResults2DToVTKWriter/AnalyticalResults2DToVTKWriterImp.cpp
+++ b/apps/gpu/tests/NumericalTests/Utilities/DataWriter/AnalyticalResults2DToVTKWriter/AnalyticalResults2DToVTKWriterImp.cpp
@@ -5,7 +5,7 @@
 #include <sstream>
 #include <stdio.h>
 
-#include <Core/StringUtilities/StringUtil.h>
+#include <StringUtilities/StringUtil.h>
 
 #include "Parameter/Parameter.h"
 
diff --git a/apps/gpu/tests/NumericalTests/Utilities/NumericalTestGridReader/NumericalTestGridReader.cpp b/apps/gpu/tests/NumericalTests/Utilities/NumericalTestGridReader/NumericalTestGridReader.cpp
index 97ccf92dc72e253d5f38f88353ba564320e8fd65..cdf82c32ef982dec19df49ae347e5601017a538b 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/NumericalTestGridReader/NumericalTestGridReader.cpp
+++ b/apps/gpu/tests/NumericalTests/Utilities/NumericalTestGridReader/NumericalTestGridReader.cpp
@@ -13,10 +13,10 @@ std::shared_ptr<NumericalTestGridReader> NumericalTestGridReader::getNewInstance
 	return std::shared_ptr<NumericalTestGridReader>(new NumericalTestGridReader(para, initialCondition, cudaManager));
 }
 
-void NumericalTestGridReader::setInitalNodeValues(const int numberOfNodes, const int level) const
+void NumericalTestGridReader::setInitalNodeValues(uint numberOfNodes, int level) const
 {
 	initialCondition->init(level);
-	for (int j = 0; j <= numberOfNodes; j++){
+	for (uint j = 0; j <= numberOfNodes; j++){
 		para->getParH(level)->velocityX[j] = initialCondition->getInitVX(j, level);
 		para->getParH(level)->velocityY[j] = initialCondition->getInitVY(j, level);
 		para->getParH(level)->velocityZ[j] = initialCondition->getInitVZ(j, level);
diff --git a/apps/gpu/tests/NumericalTests/Utilities/NumericalTestGridReader/NumericalTestGridReader.h b/apps/gpu/tests/NumericalTests/Utilities/NumericalTestGridReader/NumericalTestGridReader.h
index b766e4bab7e0f2667d35f416b9130bea657c823c..84f5d72705db06349f0b31a42eb99ded45bb0e1d 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/NumericalTestGridReader/NumericalTestGridReader.h
+++ b/apps/gpu/tests/NumericalTests/Utilities/NumericalTestGridReader/NumericalTestGridReader.h
@@ -13,8 +13,8 @@ public:
 	static std::shared_ptr<NumericalTestGridReader> getNewInstance(std::shared_ptr<Parameter> para, std::shared_ptr<InitialCondition> initialCondition, std::shared_ptr<CudaMemoryManager> cudaManager);
 
 protected:
-	void setInitalNodeValues(const int numberOfNodes, const int level) const;
-		
+	virtual void setInitalNodeValues(uint numberOfNodes, int level) const override;
+    
 private:
 	NumericalTestGridReader(std::shared_ptr<Parameter> para, std::shared_ptr<InitialCondition> initialCondition, std::shared_ptr<CudaMemoryManager> cudaManager);
 
diff --git a/apps/gpu/tests/NumericalTests/main.cpp b/apps/gpu/tests/NumericalTests/main.cpp
index fda9d14a7b752eab2585b300ee6aef606437913e..017c6551ae57f47c14332683aecb6650fb10a5f0 100644
--- a/apps/gpu/tests/NumericalTests/main.cpp
+++ b/apps/gpu/tests/NumericalTests/main.cpp
@@ -12,9 +12,9 @@
 #include <fstream>
 #include <iostream>
 
-static TestSuiteResult startNumericalTests(const std::string &configFile)
+static TestSuiteResult startNumericalTests(const std::string &configFile, const std::string &pathNumericalTests)
 {
-    auto configData = vf::gpu::tests::readConfigFile(configFile);
+    auto configData = vf::gpu::tests::readConfigFile(configFile, pathNumericalTests);
 
     std::shared_ptr<NumericalTestFactoryImp> numericalTestFactory = NumericalTestFactoryImp::getNewInstance(configData);
 
@@ -33,10 +33,11 @@ int main(int argc, char **argv)
 
     auto tests_passed = TestSuiteResult::FAILED;
 
-    if (argc > 1)
-        tests_passed = startNumericalTests(argv[1]);
+    if (argc == 3) {
+        tests_passed = startNumericalTests(argv[1], argv[2]);
+    }
     else
-        std::cout << "Configuration file must be set!: lbmgm <config file>" << std::endl << std::flush;
+        std::cout << "Configuration file must be set!: lbmgm <config file> <path to grid data>" << std::endl << std::flush;
 
     MPI_Finalize();
 
diff --git a/cpu.cmake b/cpu.cmake
index 13f216203bfeefb9c550b8be470af4cf181a44b4..1a93590bd0dde7e9c1556b9b150656a0ed93c8d0 100644
--- a/cpu.cmake
+++ b/cpu.cmake
@@ -27,7 +27,6 @@ SET(USE_VTK OFF CACHE BOOL "include VTK library support")
 SET(USE_CATALYST OFF CACHE BOOL "include Paraview Catalyst support")
 
 SET(USE_HLRN_LUSTRE OFF CACHE BOOL "include HLRN Lustre support")
-SET(USE_DEM_COUPLING OFF CACHE BOOL "PE plugin")
 
 SET(USE_LIGGGHTS OFF CACHE BOOL "include LIGGGHTS library support")
 
diff --git a/gpu.cmake b/gpu.cmake
index 5b175ca2a5fe7d289bd948e905ada612413333d2..6bdbf656326c4d733ec2c5b5d08751b3055a7b00 100644
--- a/gpu.cmake
+++ b/gpu.cmake
@@ -1,73 +1,30 @@
-IF( BUILD_VF_GKS )
-    # only use this with device of CC larger than 6.0
-    set(CMAKE_CUDA_FLAGS "-Xptxas=\"-v\"" CACHE STRING "" FORCE)
-    set(CMAKE_CUDA_ARCHITECTURES 60)
-ENDIF()
-
-set(CMAKE_CUDA_FLAGS_DEBUG " -G" CACHE STRING "" FORCE)
-
-
-##########################################################################################################################
-###                  Subdirectories                                                                                    ###
-##########################################################################################################################
-
 #############################################################
 ###                  Core                                 ###
 #############################################################
 
 add_subdirectory(src/gpu/GridGenerator)
-#add_subdirectory(3rdParty/metis/metis-5.1.0)
+add_subdirectory(src/gpu/VirtualFluids_GPU)
 
-#############################################################
-###                  Virtual Fluids GPU                   ###
-#############################################################
-
-IF (BUILD_VF_GPU)
-    add_subdirectory(src/gpu/VirtualFluids_GPU)
-
-    add_subdirectory(apps/gpu/LBM/DrivenCavity)
-    add_subdirectory(apps/gpu/LBM/SphereGPU)
-    add_subdirectory(apps/gpu/LBM/BoundaryLayer)
-ELSE()
-    MESSAGE( STATUS "exclude Virtual Fluids GPU." )
-ENDIF()
+if(BUILD_VF_ALL_SAMPLES)
+    list(APPEND USER_APPS 
+    "apps/gpu/LBM/ActuatorLine"
+    "apps/gpu/LBM/SphereScaling" 
+    "apps/gpu/LBM/TGV_3D")
+endif()
 
 #############################################################
-###                  Virtual Fluids GKS                   ###
+###                  Apps                                 ###
 #############################################################
 
-IF (BUILD_VF_GKS)
-    add_subdirectory(src/gpu/GksMeshAdapter)
-    add_subdirectory(src/gpu/GksVtkAdapter)
-
-    add_subdirectory(src/gpu/GksGpu)
-
-    add_subdirectory(apps/gpu/GKS/Flame7cm)
-ELSE()
-    MESSAGE( STATUS "exclude Virtual Fluids GKS." )
-ENDIF()
-
-#############################################################
-###                     JSONCPP                           ###
-#############################################################
-IF (NOT BUILD_JSONCPP)
-    MESSAGE( STATUS "Build Input Project without JsonCpp." )
-ELSE()
-    add_subdirectory(3rdParty/jsoncpp)
-    add_definitions(-DBUILD_JSONCPP)
-ENDIF()
+add_subdirectory(apps/gpu/LBM/DrivenCavity)
+add_subdirectory(apps/gpu/LBM/SphereGPU)
+add_subdirectory(apps/gpu/LBM/BoundaryLayer)
 
 #############################################################
 ###                   Numeric Tests                       ###
 #############################################################
 
 if(BUILD_NUMERIC_TESTS)
-
-    # PATH_NUMERICAL_TESTS can be passed to cmake e.g. cmake .. -DPATH_NUMERICAL_TESTS=/data/
-    if(PATH_NUMERICAL_TESTS)
-        LIST(APPEND VF_COMPILER_DEFINITION "PATH_NUMERICAL_TESTS=${PATH_NUMERICAL_TESTS}")
-    endif()
-
     if(NOT BUILD_VF_UNIT_TESTS) # in this case googletest is already included.
         add_subdirectory(${VF_THIRD_DIR}/googletest)
     endif()
@@ -76,12 +33,3 @@ if(BUILD_NUMERIC_TESTS)
     add_subdirectory(apps/gpu/tests/NumericalTests)
     add_subdirectory(apps/gpu/tests/NumericalTestPostProcessing)
 endif()
-
-#############################################################
-###                 Annas Traffic Sim                     ###
-#############################################################
-
-if(BUILD_VF_TRAFFIC)
-    add_subdirectory(src/gpu/Traffic)
-    add_subdirectory(apps/gpu/LBM/TrafficTest)
-endif()
diff --git a/metadata.xml b/metadata.xml
deleted file mode 100644
index 7cbae3ae7e1d5d7d48af2f0e5577253a89f953f5..0000000000000000000000000000000000000000
--- a/metadata.xml
+++ /dev/null
@@ -1,204 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://datacite.org/schema/kernel-4" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.3/metadata.xsd">
-	<identifier identifierType="DOI">PLACEHOLDER</identifier>
-	<titles>
-		<title xml:lang="en">VirtualFluids</title>
-	</titles>
-	<language>en</language>
-	<creators>
-		<creator>
-			<creatorName nameType="Personal">Krafczyk, Manfred</creatorName>
-			<givenName>Manfred</givenName>
-			<familyName>Krafczyk</familyName>
-			<nameIdentifier nameIdentifierScheme="ORCID">0000-0002-8509-0871</nameIdentifier>
-			<affiliation xml:lang="de">TU Braunschweig</affiliation>
-			<affiliation xml:lang="de">Institut für rechnergestützte Modellierung im Bauingenieurwesen</affiliation>
-		</creator>
-		<creator>
-			<creatorName nameType="Organizational">Institut für rechnergestützte Modellierung im Bauingenieurwesen</creatorName>
-			<affiliation xml:lang="de">TU Braunschweig</affiliation>
-		</creator>
-	</creators>
-	<publisher xml:lang="de">Institut für rechnergestützte Modellierung im Bauingenieurwesen</publisher>
-	<publicationYear>2021</publicationYear>
-	<resourceType resourceTypeGeneral="Software">Computational Fluid Dynamics Solver</resourceType>
-	<subjects>
-		<subject subjectScheme="DDC" schemeURI="https://www.oclc.org/en/dewey.html">532 Fluid Mechanics, liquid mechanics</subject>
-	</subjects>
-	<contributors>
-		<contributor contributorType="Researcher">
-			<contributorName>Ahrenholz, Benjamin</contributorName>
-			<givenName>Benjamin</givenName>
-			<familyName>Ahrenholz</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Alihussein, Hussein</contributorName>
-			<givenName>Hussein</givenName>
-			<familyName>Alihussein</familyName>
-			<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0003-3656-7028</nameIdentifier>
-			<affiliation xml:lang="de">TU Braunschweig</affiliation>
-			<affiliation xml:lang="en">Institut für rechnergestützte Modellierung im Bauingenieurwesen</affiliation>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Bindick, Sebastian</contributorName>
-			<givenName>Sebastian</givenName>
-			<familyName>Bindick</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Brendel, Aileen</contributorName>
-			<givenName>Aileen</givenName>
-			<familyName>Brendel</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Geier, Martin</contributorName>
-			<givenName>Martin</givenName>
-			<familyName>Geier</familyName>
-			<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0002-8367-9412</nameIdentifier>
-			<affiliation xml:lang="de">TU Braunschweig</affiliation>
-			<affiliation xml:lang="en">Institut für rechnergestützte Modellierung im Bauingenieurwesen</affiliation>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Geller, Sebastian</contributorName>
-			<givenName>Sebastian</givenName>
-			<familyName>Geller</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Goraki Fard, Ehsan</contributorName>
-			<givenName>Ehsan</givenName>
-			<familyName>Goraki Fard</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Hegewald, Jan</contributorName>
-			<givenName>Jan</givenName>
-			<familyName>Hegewald</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Janßen, Christian</contributorName>
-			<givenName>Christian</givenName>
-			<familyName>Janßen</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Kutscher, Konstantin</contributorName>
-			<givenName>Konstantin</givenName>
-			<familyName>Kutscher</familyName>
-			<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0002-1099-1608</nameIdentifier>
-			<affiliation xml:lang="de">TU Braunschweig</affiliation>
-			<affiliation xml:lang="en">Institut für rechnergestützte Modellierung im Bauingenieurwesen</affiliation>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Lenz, Stephan</contributorName>
-			<givenName>Stephan</givenName>
-			<familyName>Lenz</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Linxweiler, Jan</contributorName>
-			<givenName>Jan</givenName>
-			<familyName>Linxweiler</familyName>
-			<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0002-2755-5087</nameIdentifier>
-			<affiliation xml:lang="de">TU Braunschweig</affiliation>
-			<affiliation xml:lang="en">Institut für rechnergestützte Modellierung im Bauingenieurwesen</affiliation>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Lux, Lennard</contributorName>
-			<givenName>Lennard</givenName>
-			<familyName>Lux</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Marcus, Sven</contributorName>
-			<givenName>Sven</givenName>
-			<familyName>Marcus</familyName>
-			<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0003-3689-2162</nameIdentifier>
-			<affiliation xml:lang="de">TU Braunschweig</affiliation>
-			<affiliation xml:lang="en">Universitätsbibliothek Braunschweig</affiliation>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Peters, Sören</contributorName>
-			<givenName>Sören</givenName>
-			<familyName>Peters</familyName>
-			<affiliation xml:lang="de">TU Braunschweig</affiliation>
-			<affiliation xml:lang="en">Institut für rechnergestützte Modellierung im Bauingenieurwesen</affiliation>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Safari, Hesameddin</contributorName>
-			<givenName>Hesameddin</givenName>
-			<familyName>Safari</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Schönherr, Martin</contributorName>
-			<givenName>Martin</givenName>
-			<familyName>Schönherr</familyName>
-			<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0002-4774-1776</nameIdentifier>
-			<affiliation xml:lang="de">TU Braunschweig</affiliation>
-			<affiliation xml:lang="en">Institut für rechnergestützte Modellierung im Bauingenieurwesen</affiliation>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Stiebler, Maik</contributorName>
-			<givenName>Maik</givenName>
-			<familyName>Stiebler</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Textor, Sören</contributorName>
-			<givenName>Sören</givenName>
-			<familyName>Textor</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Tölke, Jonas</contributorName>
-			<givenName>Jonas</givenName>
-			<familyName>Tölke</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Uphoff, Sonja</contributorName>
-			<givenName>Sonja</givenName>
-			<familyName>Uphoff</familyName>
-		</contributor>
-
-		<contributor contributorType="Researcher">
-			<contributorName>Wellmann, Anna</contributorName>
-			<givenName>Anna</givenName>
-			<familyName>Wellmann</familyName>
-		</contributor>
-	</contributors>
-	<dates>
-		<date dateType="Created">2000</date>
-	</dates>
-	<formats>
-		<format>text/x-c</format>
-		<format>text/x-h</format>
-		<format>text/x-script.python</format>
-	</formats>
-	<relatedIdentifiers>
-		<relatedIdentifier relatedIdentifierType="URL" relationType="Requires" resourceTypeGeneral="Software">https://www.open-mpi.org/software/ompi/v4.1/</relatedIdentifier>
-		<relatedIdentifier relatedIdentifierType="URL" relationType="IsCompiledBy" resourceTypeGeneral="Software">https://cmake.org</relatedIdentifier>
-		<relatedIdentifier relatedIdentifierType="URL" relationType="IsCompiledBy" resourceTypeGeneral="Software">https://gcc.gnu.org</relatedIdentifier>
-		<relatedIdentifier relatedIdentifierType="URL" relationType="IsCompiledBy" resourceTypeGeneral="Software">https://clang.llvm.org</relatedIdentifier>
-		<relatedIdentifier relatedIdentifierType="URL" relationType="IsCompiledBy" resourceTypeGeneral="Software">https://visualstudio.microsoft.com/vs/features/cplusplus/</relatedIdentifier>
-	</relatedIdentifiers>
-	<rightsList>
-		<rights xml:lang="en" schemeURI="https://spdx.org/licenses/" rightsIdentifierScheme="SPDX" rightsIdentifier="GPL-3.0-only" rightsURI="https://www.gnu.org/licenses/gpl-3.0-standalone.html">GNU General Public License Version 3</rights>
-	</rightsList>
-	<descriptions>
-		<description descriptionType="Abstract">
-			VirtualFluids (VF) is a research code developed at the Institute for Computational Modeling in Civil Engineering (iRMB). The code is a Computational Fluid Dynamics (CFD) solver based on the Lattice Boltzmann Method (LBM) for turbulent, thermal, multiphase and multicomponent flow problems as well as for multi-field problems such as Fluid-Structure-interaction including distributed pre- and postprocessing capabilities for simulations with more than 100 billion degrees of freedom.
-		</description>
-	</descriptions>
-</resource>
diff --git a/pythonbindings/CMakeLists.txt b/pythonbindings/CMakeLists.txt
index ea6ff833aae76c4ed9964b6d280d7e5a6dec26e0..037b68baf53d5da8a1ccd20155cb0e7be483176b 100644
--- a/pythonbindings/CMakeLists.txt
+++ b/pythonbindings/CMakeLists.txt
@@ -7,23 +7,26 @@ endif()
 project(VirtualFluidsPython LANGUAGES ${PYFLUIDS_LANGUAGES})
 
 pybind11_add_module(python_bindings MODULE src/VirtualFluids.cpp)
+target_compile_definitions(python_bindings PUBLIC VF_DOUBLE_ACCURACY)
 
 set_target_properties(  python_bindings PROPERTIES
                         LIBRARY_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/pythonbindings/pyfluids
                         OUTPUT_NAME "bindings")
 
+target_link_libraries(python_bindings PRIVATE basics logger mpi lbm)
+
 IF(BUILD_VF_GPU)
     set_source_files_properties(src/VirtualFluids.cpp PROPERTIES LANGUAGE CUDA)
 
     target_include_directories(python_bindings PRIVATE ${VF_THIRD_DIR}/cuda_samples/)
     target_compile_definitions(python_bindings PRIVATE VF_GPU_PYTHONBINDINGS)
 
-    target_link_libraries(python_bindings PRIVATE GridGenerator VirtualFluids_GPU basics lbmCuda logger)
+    target_link_libraries(python_bindings PRIVATE GridGenerator VirtualFluids_GPU)
 ENDIF()
 
 IF(BUILD_VF_CPU)
     target_compile_definitions(python_bindings PRIVATE VF_METIS VF_MPI VF_CPU_PYTHONBINDINGS)
-    target_link_libraries(python_bindings PRIVATE simulationconfig VirtualFluidsCore muparser basics)
+    target_link_libraries(python_bindings PRIVATE simulationconfig VirtualFluidsCore muparser)
 
     # include bindings for muparsers
     pybind11_add_module(pymuparser MODULE src/muParser.cpp)
@@ -38,5 +41,6 @@ IF(BUILD_VF_CPU)
     target_link_libraries(pymuparser PRIVATE muparser)
 ENDIF()
 
+
 target_include_directories(python_bindings PRIVATE ${CMAKE_SOURCE_DIR}/src/)
 target_include_directories(python_bindings PRIVATE ${CMAKE_BINARY_DIR})
\ No newline at end of file
diff --git a/pythonbindings/pyfluids-stubs/bindings/basics/__init__.pyi b/pythonbindings/pyfluids-stubs/bindings/basics/__init__.pyi
index a41b7934ca706dc0db5bd6188fee3150456e0cd9..a646f7e590e2aba91ab1c367f75b8c6ebe8f79ae 100644
--- a/pythonbindings/pyfluids-stubs/bindings/basics/__init__.pyi
+++ b/pythonbindings/pyfluids-stubs/bindings/basics/__init__.pyi
@@ -64,19 +64,3 @@ class ConfigurationFile:
     @overload
     def get_uint_value(self, key: str, default_value: int) -> int: ...
     def load(self, file: str) -> bool: ...
-
-class LbmOrGks:
-    __members__: ClassVar[dict] = ...  # read-only
-    GKS: ClassVar[LbmOrGks] = ...
-    LBM: ClassVar[LbmOrGks] = ...
-    __entries: ClassVar[dict] = ...
-    def __init__(self, arg0: int) -> None: ...
-    def __eq__(self, arg0: object) -> bool: ...
-    def __getstate__(self) -> int: ...
-    def __hash__(self) -> int: ...
-    def __index__(self) -> int: ...
-    def __int__(self) -> int: ...
-    def __ne__(self, arg0: object) -> bool: ...
-    def __setstate__(self, arg0: int) -> None: ...
-    @property
-    def name(self) -> str: ...
diff --git a/pythonbindings/pyfluids-stubs/bindings/basics/logger.pyi b/pythonbindings/pyfluids-stubs/bindings/basics/logger.pyi
deleted file mode 100644
index 43938ff7646efd3c596ae29971cce39fed865fa6..0000000000000000000000000000000000000000
--- a/pythonbindings/pyfluids-stubs/bindings/basics/logger.pyi
+++ /dev/null
@@ -1,83 +0,0 @@
-r"""
-=======================================================================================
- ____          ____    __    ______     __________   __      __       __        __
- \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
-  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
-   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
-    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
-     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
-      \    \  |    |   ________________________________________________________________
-       \    \ |    |  |  ______________________________________________________________|
-        \    \|    |  |  |         __          __     __     __     ______      _______
-         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
-          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
-           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
-
-  This file is part of VirtualFluids. VirtualFluids is free software: you can
-  redistribute it and/or modify it under the terms of the GNU General Public
-  License as published by the Free Software Foundation, either version 3 of
-  the License, or (at your option) any later version.
-
-  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-  for more details.
-
-  You should have received a copy of the GNU General Public License along
-  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-
-! \file logger.pyi
-! \ingroup basics
-! \author Henry Korb
-=======================================================================================
-"""
-from typing import Any, ClassVar
-
-log: None
-
-class Level:
-    __members__: ClassVar[dict] = ...  # read-only
-    INFO_HIGH: ClassVar[Level] = ...
-    INFO_INTERMEDIATE: ClassVar[Level] = ...
-    INFO_LOW: ClassVar[Level] = ...
-    LOGGER_ERROR: ClassVar[Level] = ...
-    WARNING: ClassVar[Level] = ...
-    __entries: ClassVar[dict] = ...
-    def __init__(self, arg0: int) -> None: ...
-    def __eq__(self, arg0: object) -> bool: ...
-    def __getstate__(self) -> int: ...
-    def __hash__(self) -> int: ...
-    def __index__(self) -> int: ...
-    def __int__(self) -> int: ...
-    def __ne__(self, arg0: object) -> bool: ...
-    def __setstate__(self, arg0: int) -> None: ...
-    @property
-    def name(self) -> str: ...
-
-class Logger:
-    def __init__(self, *args, **kwargs) -> None: ...
-    @staticmethod
-    def add_stdout() -> None: ...
-    @staticmethod
-    def enable_printed_rank_numbers(print: bool) -> None: ...
-    @staticmethod
-    def set_debug_level(level: int) -> None: ...
-    @staticmethod
-    def time_stamp(time_stemp: TimeStamp) -> None: ...
-
-class TimeStamp:
-    __members__: ClassVar[dict] = ...  # read-only
-    DISABLE: ClassVar[TimeStamp] = ...
-    ENABLE: ClassVar[TimeStamp] = ...
-    __entries: ClassVar[dict] = ...
-    def __init__(self, arg0: int) -> None: ...
-    def __eq__(self, arg0: object) -> bool: ...
-    def __getstate__(self) -> int: ...
-    def __hash__(self) -> int: ...
-    def __index__(self) -> int: ...
-    def __int__(self) -> int: ...
-    def __ne__(self, arg0: object) -> bool: ...
-    def __setstate__(self, arg0: int) -> None: ...
-    @property
-    def name(self) -> str: ...
diff --git a/pythonbindings/pyfluids-stubs/bindings/gpu/grid_generator.pyi b/pythonbindings/pyfluids-stubs/bindings/gpu/grid_generator.pyi
index 8d715e4b4cd49e6dbf92da3aedddbc4b869067c4..433a20e7efe472bd791b1d2a0f43859676e8fcf0 100644
--- a/pythonbindings/pyfluids-stubs/bindings/gpu/grid_generator.pyi
+++ b/pythonbindings/pyfluids-stubs/bindings/gpu/grid_generator.pyi
@@ -67,7 +67,7 @@ class LevelGridBuilder(GridBuilder):
     def set_precursor_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, file_collection: pyfluids.bindings.gpu.VelocityFileCollection, n_t_read: int, velocity_x: float = ..., velocity_y: float = ..., velocity_z: float = ..., file_level_to_grid_level_map: List[int] = ...) -> None: ...
     def set_pressure_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, rho: float) -> None: ...
     def set_slip_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, normal_x: float, normal_y: float, normal_z: float) -> None: ...
-    def set_stress_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, normal_x: float, normal_y: float, normal_z: float, sampling_offset: int, z0: float, dx: float) -> None: ...
+    def set_stress_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, normal_x: float, normal_y: float, normal_z: float, sampling_offset: int, z0: float, dx: float, q: float) -> None: ...
     def set_velocity_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, vx: float, vy: float, vz: float) -> None: ...
 
 class MultipleGridBuilder(LevelGridBuilder):
@@ -81,7 +81,7 @@ class MultipleGridBuilder(LevelGridBuilder):
     def add_grid(self, grid_shape: Object) -> None: ...
     @overload
     def add_grid(self, grid_shape: Object, level_fine: int) -> None: ...
-    def build_grids(self, lbm_or_gks: pyfluids.bindings.basics.LbmOrGks, enable_thin_walls: bool) -> None: ...
+    def build_grids(self, enable_thin_walls: bool) -> None: ...
     def get_number_of_levels(self) -> int: ...
     @staticmethod
     def make_shared(grid_factory: GridFactory) -> MultipleGridBuilder: ...
diff --git a/pythonbindings/src/VirtualFluids.cpp b/pythonbindings/src/VirtualFluids.cpp
index 20e5012e0af325440e502c704d6f372100306ab1..91682b79e8125a7513565b28e2e22e74e0b2dac1 100644
--- a/pythonbindings/src/VirtualFluids.cpp
+++ b/pythonbindings/src/VirtualFluids.cpp
@@ -49,7 +49,8 @@ namespace py_bindings
 
     PYBIND11_MODULE(bindings, m)
     {
-        py::add_ostream_redirect(m, "ostream_redirect");
+        // because we do not use the old logger (src/basics/logger) anymore and cout is not passed anymore to the old logger, we probably do not need this anymore
+        // pybind11::add_ostream_redirect(m, "ostream_redirect");
         basics::makeModule(m);
         lbm::makeModule(m);
         logging::makeModule(m);
@@ -60,4 +61,4 @@ namespace py_bindings
         cpu::makeModule(m);
 #endif
     }
-}
\ No newline at end of file
+}
diff --git a/pythonbindings/src/basics/basics.cpp b/pythonbindings/src/basics/basics.cpp
index e67dfb05308511c8bf79d7e860299f062f317194..0b294b7a8de2f4f396dac418b0544b5f99ecec3d 100644
--- a/pythonbindings/src/basics/basics.cpp
+++ b/pythonbindings/src/basics/basics.cpp
@@ -31,9 +31,7 @@
 //! \author Henry Korb
 //=======================================================================================
 #include <pybind11/pybind11.h>
-#include "submodules/logger.cpp"
 #include "submodules/configuration_file.cpp"
-#include "submodules/lbm_or_gks.cpp"
 
 namespace basics
 {
@@ -43,10 +41,8 @@ namespace basics
     {
         py::module basicsModule = parentModule.def_submodule("basics");
 
-        logger::makeModule(basicsModule);
         configuration::makeModule(basicsModule);
-        lbmOrGks::makeModule(basicsModule);
-        
+
         return basicsModule;
     }
 }
\ No newline at end of file
diff --git a/pythonbindings/src/basics/submodules/lbm_or_gks.cpp b/pythonbindings/src/basics/submodules/lbm_or_gks.cpp
deleted file mode 100644
index d20cf2d1f631f6d36a80c36f1fb6c9c59d192090..0000000000000000000000000000000000000000
--- a/pythonbindings/src/basics/submodules/lbm_or_gks.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
-//      \    \  |    |   ________________________________________________________________
-//       \    \ |    |  |  ______________________________________________________________|
-//        \    \|    |  |  |         __          __     __     __     ______      _______
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file lbm_or_gks.cpp
-//! \ingroup submodules
-//! \author Henry Korb
-//=======================================================================================
-#include <pybind11/pybind11.h>
-#include "basics/Core/LbmOrGks.h"
-
-namespace lbmOrGks
-{
-    namespace py = pybind11;
-
-    void makeModule(py::module_ &parentModule)
-    {
-         py::enum_<LbmOrGks>(parentModule, "LbmOrGks")
-         .value("LBM", LbmOrGks::LBM)
-         .value("GKS", LbmOrGks::GKS);
-    }
-}
\ No newline at end of file
diff --git a/pythonbindings/src/basics/submodules/logger.cpp b/pythonbindings/src/basics/submodules/logger.cpp
deleted file mode 100644
index fa7e00e4dca06581b7a14d2bcf2628ed6af60001..0000000000000000000000000000000000000000
--- a/pythonbindings/src/basics/submodules/logger.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
-//      \    \  |    |   ________________________________________________________________
-//       \    \ |    |  |  ______________________________________________________________|
-//        \    \|    |  |  |         __          __     __     __     ______      _______
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file logger.cpp
-//! \ingroup submodules
-//! \author Henry Korb
-//=======================================================================================
-#include <pybind11/pybind11.h>
-#include <pybind11/iostream.h>
-#include <basics/Core/Logger/Logger.h>
-#include <basics/Core/Logger/implementations/LoggerImp.h>
-
-namespace logger
-{
-    namespace py = pybind11;
-
-    py::module makeModule(py::module_ &parentModule)
-    {
-        py::module loggerModule = parentModule.def_submodule("logger");
-
-        py::class_<logging::Logger>(loggerModule, "Logger")
-        .def_static("add_stdout", [](){
-            logging::Logger::addStream(&std::cout);
-        })
-        .def_static("set_debug_level", &logging::Logger::setDebugLevel)
-        .def_static("time_stamp", &logging::Logger::timeStamp, py::arg("time_stamp"))
-        .def_static("enable_printed_rank_numbers", &logging::Logger::enablePrintedRankNumbers, py::arg("print"));
-
-        loggerModule.attr("log") = logging::out;
-        py::enum_<logging::Logger::Level>(loggerModule, "Level")
-        .value("INFO_LOW", logging::Logger::Level::INFO_LOW)
-        .value("INFO_INTERMEDIATE", logging::Logger::Level::INFO_INTERMEDIATE)
-        .value("INFO_HIGH", logging::Logger::Level::INFO_HIGH)
-        .value("WARNING", logging::Logger::Level::WARNING)
-        .value("LOGGER_ERROR", logging::Logger::Level::LOGGER_ERROR);
-
-        py::enum_<logging::Logger::TimeStamp>(loggerModule, "TimeStamp")
-        .value("ENABLE", logging::Logger::TimeStamp::ENABLE)
-        .value("DISABLE", logging::Logger::TimeStamp::DISABLE);
-
-        return loggerModule;
-    }
-}
\ No newline at end of file
diff --git a/pythonbindings/src/cpu/submodules/boundaryconditions.cpp b/pythonbindings/src/cpu/submodules/boundaryconditions.cpp
index ac9ec8605dec51e8374c850b1c1b58314674c426..d7cd0b578a52c369923db0e31b01200f0389c9eb 100644
--- a/pythonbindings/src/cpu/submodules/boundaryconditions.cpp
+++ b/pythonbindings/src/cpu/submodules/boundaryconditions.cpp
@@ -77,14 +77,14 @@ namespace boundaryconditions
 
         bc_class<VelocityBCAdapter, VelocityBCAlgorithm>(bcModule, "VelocityBoundaryCondition")
                 .def(py::init())
-                .def(py::init<bool &, bool &, bool &, mu::Parser &, double &, double &>(),
+                .def(py::init<bool &, bool &, bool &, mu::Parser &, real &, real &>(),
                      "vx1"_a, "vx2"_a, "vx3"_a,
                      "function"_a, "start_time"_a, "end_time"_a)
-                .def(py::init<bool &, bool &, bool &, mu::Parser &, mu::Parser &, mu::Parser &, double &, double &>(),
+                .def(py::init<bool &, bool &, bool &, mu::Parser &, mu::Parser &, mu::Parser &, real &, real &>(),
                      "vx1"_a, "vx2"_a, "vx3"_a,
                      "function_vx1"_a, "function_vx2"_a, "function_vx2"_a,
                      "start_time"_a, "end_time"_a)
-                .def(py::init<double &, double &, double &, double &, double &, double &, double &, double &, double &>(),
+                .def(py::init<real &, real &, real &, real &, real &, real &, real &, real &, real &>(),
                      "vx1"_a, "vx1_start_time"_a, "vx1_end_time"_a,
                      "vx2"_a, "vx2_start_time"_a, "vx2_end_time"_a,
                      "vx3"_a, "vx3_start_time"_a, "vx3_end_time"_a);
diff --git a/pythonbindings/src/cpu/submodules/kernel.cpp b/pythonbindings/src/cpu/submodules/kernel.cpp
index b00d86579540a299e4bf3ed47bc09d4386f420a2..2f2effa7ef28600e67fb2e2954409a0963df1f37 100644
--- a/pythonbindings/src/cpu/submodules/kernel.cpp
+++ b/pythonbindings/src/cpu/submodules/kernel.cpp
@@ -55,7 +55,7 @@ namespace kernel
                 .def_readwrite("forcing_in_x1", &LBMKernelConfiguration::forcingX1)
                 .def_readwrite("forcing_in_x2", &LBMKernelConfiguration::forcingX2)
                 .def_readwrite("forcing_in_x3", &LBMKernelConfiguration::forcingX3)
-                .def("set_forcing", [](LBMKernelConfiguration &kernelConfig, double x1, double x2, double x3)
+                .def("set_forcing", [](LBMKernelConfiguration &kernelConfig, real x1, real x2, real x3)
                 {
                     kernelConfig.forcingX1 = x1;
                     kernelConfig.forcingX2 = x2;
diff --git a/pythonbindings/src/gpu/submodules/communicator.cpp b/pythonbindings/src/gpu/submodules/communicator.cpp
index 26a57061933fbdbfe3447ec89eeb07116a9b974b..0230caf197c04c2f2cd411288e9ea24ee314c4a8 100644
--- a/pythonbindings/src/gpu/submodules/communicator.cpp
+++ b/pythonbindings/src/gpu/submodules/communicator.cpp
@@ -41,7 +41,7 @@ namespace communicator
     {
         py::class_<vf::gpu::Communicator, std::unique_ptr<vf::gpu::Communicator, py::nodelete>>(parentModule, "Communicator")
         .def_static("get_instance", &vf::gpu::Communicator::getInstance, py::return_value_policy::reference)
-        .def("get_number_of_process", &vf::gpu::Communicator::getNummberOfProcess)
+        .def("get_number_of_process", &vf::gpu::Communicator::getNumberOfProcess)
         .def("get_pid", &vf::gpu::Communicator::getPID);
     }
 }
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/grid_generator.cpp b/pythonbindings/src/gpu/submodules/grid_generator.cpp
index 3e9fb5655e26ffa6053a205da5a3e3f0f2ecd49f..59d0bd5708b11f246664d1e8c7ee198f986d80c1 100644
--- a/pythonbindings/src/gpu/submodules/grid_generator.cpp
+++ b/pythonbindings/src/gpu/submodules/grid_generator.cpp
@@ -102,7 +102,7 @@ namespace grid_generator
         .def("add_geometry", py::overload_cast<Object*>(&MultipleGridBuilder::addGeometry), py::arg("solid_object"))
         .def("add_geometry", py::overload_cast<Object*, uint>(&MultipleGridBuilder::addGeometry), py::arg("solid_object"), py::arg("level"))
         .def("get_number_of_levels", &MultipleGridBuilder::getNumberOfLevels)
-        .def("build_grids", &MultipleGridBuilder::buildGrids, py::arg("lbm_or_gks"), py::arg("enable_thin_walls"))
+        .def("build_grids", &MultipleGridBuilder::buildGrids, py::arg("enable_thin_walls"))
         .def("set_subdomain_box", &MultipleGridBuilder::setSubDomainBox, py::arg("bounding_box"))
         .def("find_communication_indices", &MultipleGridBuilder::findCommunicationIndices)
         .def("set_communication_process", &MultipleGridBuilder::setCommunicationProcess)
diff --git a/pythonbindings/src/gpu/submodules/parameter.cpp b/pythonbindings/src/gpu/submodules/parameter.cpp
index a7c42223e6a5bfa3caa89c0879e4133fc4123ad0..e1d8a1f63eb877ec7ae42beb858b0b7cb9253815 100644
--- a/pythonbindings/src/gpu/submodules/parameter.cpp
+++ b/pythonbindings/src/gpu/submodules/parameter.cpp
@@ -34,12 +34,12 @@
 #include <pybind11/functional.h>
 #include <pybind11/stl.h>
 #include <gpu/VirtualFluids_GPU/Parameter/Parameter.h>
-#include "lbm/constants/NumericConstants.h"
+#include "basics/constants/NumericConstants.h"
 #include <basics/config/ConfigurationFile.h>
 #include <gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h>
 
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 
 namespace parameter
 {
diff --git a/pythonbindings/src/logger/logger.cpp b/pythonbindings/src/logger/logger.cpp
index 555b502fa9a56299895de0fa6dd6cfeb66c15024..c4c99c0a5077303b398e0726eaba0420ddb0dceb 100644
--- a/pythonbindings/src/logger/logger.cpp
+++ b/pythonbindings/src/logger/logger.cpp
@@ -42,7 +42,7 @@ namespace logging
         py::module loggerModule = parentModule.def_submodule("logger");
 
         py::class_<vf::logging::Logger>(loggerModule, "Logger")
-        .def_static("initialize_logger", &vf::logging::Logger::initalizeLogger)
+        .def_static("initialize_logger", &vf::logging::Logger::initializeLogger)
         .def_static("change_log_path", &vf::logging::Logger::changeLogPath, py::arg("path"));
 
         // use f-strings (f"text {float}") in python for compounded messages
diff --git a/regression-tests/README.md b/regression-tests/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..471ce331cc90dbfcfa323011419f201031bc48bd
--- /dev/null
+++ b/regression-tests/README.md
@@ -0,0 +1,3 @@
+# Adding Regression Tests.
+
+How to add regression test is described in [this document](https://git.rz.tu-bs.de/irmb/virtualfluids/-/wikis/Regression-Tests).
\ No newline at end of file
diff --git a/regression-tests/__regression_test_executer.sh b/regression-tests/__regression_test_executer.sh
new file mode 100644
index 0000000000000000000000000000000000000000..2541619299110bfcb872e48035c6e5e9fb8d7672
--- /dev/null
+++ b/regression-tests/__regression_test_executer.sh
@@ -0,0 +1,29 @@
+
+
+download_reference_data () {
+    rm -rf reference_data && mkdir -p reference_data
+    git clone --depth 1 --filter=blob:none --sparse https://github.com/irmb/test_data reference_data
+    cd reference_data
+    git sparse-checkout add $1
+    cd ..
+}
+
+
+# run regression test - arguments:
+# 1. REFERENCE_DATA_DIR - to download the reference data and compare against
+# 2. CMAKE_FLAGS - cmake flags for the build of VirtualFluids
+# 3. APPLICATION - the application to be executed
+# 4. RESULT_DATA_DIR - the path to the produced data to be compared
+run_regression_test () {
+    download_reference_data $1
+
+    rm -rf build && mkdir -p build
+    cmake -B build $2
+    cmake --build build --parallel 8
+
+    # execute the application
+    $3
+
+    # execute fieldcompare (A more comprehensive manual can be found here https://gitlab.com/dglaeser/fieldcompare)
+    fieldcompare dir $4 reference_data/$1 --include-files "*.vtu"
+}
\ No newline at end of file
diff --git a/regression-tests/driven_cavity_test.sh b/regression-tests/driven_cavity_test.sh
index e10a829d2680ab647ba0f66e0f2e85a70186007e..e4a7dcf050c81ea7f3ccd99cffd58edab80959bd 100755
--- a/regression-tests/driven_cavity_test.sh
+++ b/regression-tests/driven_cavity_test.sh
@@ -1,24 +1,19 @@
 #!/bin/bash
+source ./regression-tests/__regression_test_executer.sh
 
-#################################
-# Driven Cavity Regression Test
-#################################
 
-# build VirtualFluids accordingly to our specific test scenario.
-# in this case adding -DUSER_APPS="apps/gpu/LBM/DrivenCavity to the cmake command is not necessary, because the DrivenCavity is added to VirtualFluids by default.
-mkdir -p build
-cmake -B build --preset=gpu_make -DCMAKE_CUDA_ARCHITECTURES=75 #-DUSER_APPS="apps/gpu/LBM/DrivenCavity"
-cd build && make -j 8 && cd ..
+# 1. set reference data directory (must match the folder structure in https://github.com/irmb/test_data)
+REFERENCE_DATA_DIR=regression_tests/gpu/DrivenCavity_2Levels
 
-# execute VirtualFluids
-./build/bin/DrivenCavity
+# 2. set cmake flags for the build of VirtualFluids
+CMAKE_FLAGS="--preset=make_gpu -DCMAKE_BUILD_TYPE=Release -DCMAKE_CUDA_ARCHITECTURES=75"
 
-# set the path to the produced data
-PATH_TO_DIR=output/DrivenCavity
+# 3. define the application to be executed
+APPLICATION=./build/bin/DrivenCavity
 
-# set the path to the reference data.
-# `regression-tests/reference_data` is fix `regression_tests/gpu/DrivenCavity_2Levels` must match the structure in https://github.com/irmb/test_data:
-PATH_TO_REFERENCE_DIR=regression-tests/reference_data/regression_tests/gpu/DrivenCavity_2Levels
+# 4. set the path to the produced data
+RESULT_DATA_DIR=output/DrivenCavity
+
+
+run_regression_test "$REFERENCE_DATA_DIR" "$CMAKE_FLAGS" "$APPLICATION" "$RESULT_DATA_DIR"
 
-# execute fieldcompare (A more comprehensive manual can be found here https://gitlab.com/dglaeser/fieldcompare)
-fieldcompare dir $PATH_TO_DIR --reference $PATH_TO_REFERENCE_DIR --include-files "*.vtu"
\ No newline at end of file
diff --git a/regression-tests/driven_cavity_uniform_test.sh b/regression-tests/driven_cavity_uniform_test.sh
new file mode 100755
index 0000000000000000000000000000000000000000..deb1300cad5914e69a4f2c01428bbef31d7af6d3
--- /dev/null
+++ b/regression-tests/driven_cavity_uniform_test.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+source ./regression-tests/__regression_test_executer.sh
+
+
+# 1. set reference data directory (must match the folder structure in https://github.com/irmb/test_data)
+REFERENCE_DATA_DIR=regression_tests/gpu/DrivenCavity_uniform
+
+# 2. set cmake flags for the build of VirtualFluids
+CMAKE_FLAGS="--preset=make_gpu -DCMAKE_BUILD_TYPE=Release -DCMAKE_CUDA_ARCHITECTURES=75 -DUSER_APPS="apps/gpu/LBM/DrivenCavityUniform""
+
+# 3. define the application to be executed
+APPLICATION=./build/bin/DrivenCavityUniform
+
+# 4. set the path to the produced data
+RESULT_DATA_DIR=output/DrivenCavity_uniform
+
+
+run_regression_test "$REFERENCE_DATA_DIR" "$CMAKE_FLAGS" "$APPLICATION" "$RESULT_DATA_DIR"
diff --git a/regression-tests/multigpu_test/rocket4GPU.yml b/regression-tests/multigpu_test/rocket4GPU.yml
new file mode 100755
index 0000000000000000000000000000000000000000..a05ffea6ad04e0d5cfb8d7749111726dfceb4609
--- /dev/null
+++ b/regression-tests/multigpu_test/rocket4GPU.yml
@@ -0,0 +1,51 @@
+host: $PHOENIX_REMOTE_HOST
+user: $PHOENIX_REMOTE_USER
+private_keyfile: $PHOENIX_PRIVATE_KEY
+
+copy:
+  - from: regression-tests/multigpu_test/slurm4GPU.job
+    to: multigpu_test/slurm4GPU.job
+    overwrite: true
+
+  - from: "CMake/"
+    to: "multigpu_test/CMake/"
+    overwrite: true
+
+  - from: "3rdParty/"
+    to: "multigpu_test/3rdParty/"
+    overwrite: true
+
+  - from: "CMakeLists.txt"
+    to: "multigpu_test/CMakeLists.txt"
+    overwrite: true
+
+  - from: "gpu.cmake"
+    to: "multigpu_test/gpu.cmake"
+    overwrite: true
+
+  - from: "src/"
+    to: "multigpu_test/src/"
+    overwrite: true
+
+  - from: "CMakePresets.json"
+    to: "multigpu_test/CMakePresets.json"
+    overwrite: true
+
+  - from: "apps/gpu/LBM/"
+    to: "multigpu_test/apps/gpu/LBM/"
+    overwrite: true
+
+collect:
+  - from: multigpu_test/output/4GPU/
+    to: output/4GPU
+    overwrite: true
+
+  - from: multigpu_test/slurm4GPU.out
+    to: output/4GPU/slurm4GPU.out
+    overwrite: true
+
+clean:
+  - multigpu_test/output/*
+
+sbatch: multigpu_test/slurm4GPU.job
+continue_if_job_fails: true
diff --git a/regression-tests/multigpu_test/rocket8GPU.yml b/regression-tests/multigpu_test/rocket8GPU.yml
new file mode 100755
index 0000000000000000000000000000000000000000..e8cc08a9fa39425686a16d193dba1743533994bc
--- /dev/null
+++ b/regression-tests/multigpu_test/rocket8GPU.yml
@@ -0,0 +1,51 @@
+host: $PHOENIX_REMOTE_HOST
+user: $PHOENIX_REMOTE_USER
+private_keyfile: $PHOENIX_PRIVATE_KEY
+
+copy:
+  - from: regression-tests/multigpu_test/slurm8GPU.job
+    to: multigpu_test/slurm8GPU.job
+    overwrite: true
+
+  - from: "CMake/"
+    to: "multigpu_test/CMake/"
+    overwrite: true
+
+  - from: "3rdParty/"
+    to: "multigpu_test/3rdParty/"
+    overwrite: true
+
+  - from: "CMakeLists.txt"
+    to: "multigpu_test/CMakeLists.txt"
+    overwrite: true
+
+  - from: "gpu.cmake"
+    to: "multigpu_test/gpu.cmake"
+    overwrite: true
+
+  - from: "src/"
+    to: "multigpu_test/src/"
+    overwrite: true
+
+  - from: "CMakePresets.json"
+    to: "multigpu_test/CMakePresets.json"
+    overwrite: true
+
+  - from: "apps/gpu/LBM/"
+    to: "multigpu_test/apps/gpu/LBM/"
+    overwrite: true
+
+collect:
+  - from: multigpu_test/output/8GPU/
+    to: output/8GPU
+    overwrite: true
+
+  - from: multigpu_test/slurm8GPU.out
+    to: output/8GPU/slurm8GPU.out
+    overwrite: true
+
+clean:
+  - multigpu_test/output/*
+
+sbatch: multigpu_test/slurm8GPU.job
+continue_if_job_fails: true
diff --git a/regression-tests/multigpu_test/slurm4GPU.job b/regression-tests/multigpu_test/slurm4GPU.job
new file mode 100755
index 0000000000000000000000000000000000000000..886bfaf7479e01cfef285e9a2dae189258ce0b7e
--- /dev/null
+++ b/regression-tests/multigpu_test/slurm4GPU.job
@@ -0,0 +1,33 @@
+#!/bin/bash -l
+
+#SBATCH --partition=gpu01_queue
+#SBATCH --nodes=1
+#SBATCH --time=10:00:00
+#SBATCH --job-name=Regr4GPU
+#SBATCH --ntasks-per-node=4
+#SBATCH --gres=gpu:4
+#SBATCH --output=multigpu_test/slurm4GPU.out
+##SBATCH --exclusive
+
+module purge 
+module load comp/ccache/4.1 # loads comp/gcc/9.3.0 
+module load mpi/openmpi/4.0.5_gcc_9.3/openmpi 
+module load cuda/11.3
+module load comp/git/2.27.0
+PATH=/home/irmb/tools/cmake-3.20.3-linux-x86_64/bin:$PATH
+
+module list
+
+cd multigpu_test
+mkdir -p build
+cd build
+cmake .. -DBUILD_VF_GPU=ON -DCMAKE_CUDA_ARCHITECTURES=60 -DUSER_APPS=apps/gpu/LBM/DrivenCavityMultiGPU\;apps/gpu/LBM/SphereScaling
+make -j 16
+cd ..
+mkdir -p output
+
+echo $'\n\n\n\n---First test: DrivenCavityMultiGPU on 4 GPUs\n\n'
+mpirun -np 4 "./build/bin/DrivenCavityMultiGPU" "configPhoenix4GPU_regressionTest.txt"
+
+echo $'\n\n\n\n---Second test: SphereScaling on 4 GPUs\n\n'
+mpirun -np 4 "./build/bin/SphereScaling"        "configPhoenix4GPU_regressionTest.txt"
\ No newline at end of file
diff --git a/regression-tests/multigpu_test/slurm8GPU.job b/regression-tests/multigpu_test/slurm8GPU.job
new file mode 100755
index 0000000000000000000000000000000000000000..333d5c77b176329947fb5d0452a0187208f323d4
--- /dev/null
+++ b/regression-tests/multigpu_test/slurm8GPU.job
@@ -0,0 +1,33 @@
+#!/bin/bash -l
+
+#SBATCH --partition=gpu01_queue
+#SBATCH --nodes=2
+#SBATCH --time=10:00:00
+#SBATCH --job-name=Regr8GPU
+#SBATCH --ntasks-per-node=4
+#SBATCH --gres=gpu:4
+#SBATCH --output=multigpu_test/slurm8GPU.out
+##SBATCH --exclusive
+
+module purge 
+module load comp/ccache/4.1 # loads comp/gcc/9.3.0 
+module load mpi/openmpi/4.0.5_gcc_9.3/openmpi 
+module load cuda/11.3
+module load comp/git/2.27.0
+PATH=/home/irmb/tools/cmake-3.20.3-linux-x86_64/bin:$PATH
+
+module list
+
+cd multigpu_test
+mkdir -p build
+cd build
+cmake .. -DBUILD_VF_GPU=ON -DCMAKE_CUDA_ARCHITECTURES=60 -DUSER_APPS=apps/gpu/LBM/DrivenCavityMultiGPU\;apps/gpu/LBM/SphereScaling
+make -j 16
+cd ..
+mkdir -p output
+
+echo $'\n\n\n\n---First test: DrivenCavityMultiGPU on 8 GPUs\n\n'
+mpirun -np 8 "./build/bin/DrivenCavityMultiGPU" "configPhoenix8GPU_regressionTest.txt"
+
+echo $'\n\n\n\n---Second test: SphereScaling on 8 GPUs\n\n'
+mpirun -np 8 "./build/bin/SphereScaling"        "configPhoenix8GPU_regressionTest.txt"
\ No newline at end of file
diff --git a/regression-tests/reference_data b/regression-tests/reference_data
deleted file mode 160000
index a5787a2312095d7374a8cfe6225d4d1409024081..0000000000000000000000000000000000000000
--- a/regression-tests/reference_data
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit a5787a2312095d7374a8cfe6225d4d1409024081
diff --git a/regression-tests/regression-tests.sh b/regression-tests/regression-tests.sh
deleted file mode 100755
index 5b7d227907594b727103be91d2382c05a07b9c6f..0000000000000000000000000000000000000000
--- a/regression-tests/regression-tests.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-
-#################################
-# VirtualFludis regression tests
-#################################
-
-
-# 1. Cloning the reference data from github
-mkdir -p regression-tests/reference_data
-git clone https://github.com/irmb/test_data regression-tests/reference_data
-
-# 2. set up the python environnement
-#    by cloning our meshio patch and fieldcompare into a venv
-python3 -m venv .venv
-source .venv/bin/activate
-pip install rich
-pip install git+https://github.com/soerenPeters/meshio@update-pyproject-version
-pip install git+https://gitlab.com/dglaeser/fieldcompare
-
-# 3. Running the specific tests
-./regression-tests/driven_cavity_test.sh
-
-
-
-# How to add a new regression test?
-# 1. setup the specfic simulation and run it to create reference data.
-# 2. fork https://github.com/irmb/test_data and create a pull request containing the reference data.
-# 3. copy ./regression-tests/driven_cavity_test.sh and adjust the file accordingly to the new test scenario.
-# 4. execute this file from here accordingly to #3.
\ No newline at end of file
diff --git a/regression-tests/sphere_in_channel_test.sh b/regression-tests/sphere_in_channel_test.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e416d535d274537d008a442435f66a38dd69fac1
--- /dev/null
+++ b/regression-tests/sphere_in_channel_test.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+source ./regression-tests/__regression_test_executer.sh
+
+# 1. set reference data directory (must match the folder structure in https://github.com/irmb/test_data)
+REFERENCE_DATA_DIR=regression_tests/gpu/SphereInChannel
+
+# 2. set cmake flags for the build of VirtualFluids
+CMAKE_FLAGS="--preset=make_gpu -DCMAKE_BUILD_TYPE=Release -DCMAKE_CUDA_ARCHITECTURES=75"
+
+# 3. define the application to be executed
+APPLICATION=./build/bin/SphereGPU
+
+# 4. set the path to the produced data
+RESULT_DATA_DIR=output/Sphere
+
+
+run_regression_test "$REFERENCE_DATA_DIR" "$CMAKE_FLAGS" "$APPLICATION" "$RESULT_DATA_DIR"
+
+# fieldcompare dir output/Sphere reference_data/regression_tests/gpu/SphereInChannel --include-files "*.vtu"
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
index f3bc36f29efe64b81916efa04b9d7831a2abeb09..5894f9dec06953c3eeb909af96db9cb19d202d65 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -5,7 +5,7 @@ long_description = file: README.md
 long_description_content_type = text/markdown
 platforms = any
 url = https://git.rz.tu-bs.de/irmb/virtualfluids
-version = 0.0.1
+version = 0.1.0
 
 [options]
 python_requires = >=3.6
diff --git a/src/basics/CMakeLists.txt b/src/basics/CMakeLists.txt
index 7f871424b2c6849d2c0f6e8d277b17214fa5cd9c..7e4cccf26f0c54a47d720f8d33fad435800838b7 100644
--- a/src/basics/CMakeLists.txt
+++ b/src/basics/CMakeLists.txt
@@ -1,19 +1,18 @@
 
-include(Core/buildInfo.cmake)
+include(buildInfo.cmake)
 
 vf_add_library(PUBLIC_LINK logger MPI::MPI_CXX EXCLUDE buildInfo.in.cpp)
 
 vf_get_library_name (library_name)
-target_include_directories(${library_name} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/Core)
 target_include_directories(${library_name} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/geometry3d)
 target_include_directories(${library_name} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/geometry3d/KdTree)
-target_include_directories(${library_name} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/basics/container)
-target_include_directories(${library_name} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/basics/memory)
-target_include_directories(${library_name} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/basics/objects)
-target_include_directories(${library_name} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/basics/parallel)
-target_include_directories(${library_name} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/basics/transmitter)
-target_include_directories(${library_name} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/basics/utilities)
-target_include_directories(${library_name} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/basics/writer)
+target_include_directories(${library_name} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/container)
+target_include_directories(${library_name} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/memory)
+target_include_directories(${library_name} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/objects)
+target_include_directories(${library_name} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/parallel)
+target_include_directories(${library_name} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/transmitter)
+target_include_directories(${library_name} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/utilities)
+target_include_directories(${library_name} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/writer)
 
 
 
@@ -21,4 +20,8 @@ IF(MSVC)
     target_compile_definitions(${library_name} PUBLIC NOMINMAX) # Disable Min/Max-Macros
 ENDIF(MSVC)
 
+if(BUILD_USE_BOOST)
+    target_link_libraries(${library_name} PRIVATE Boost::boost)
+endif()
+
 vf_add_tests()
diff --git a/src/basics/Core/ArrayTypes.h b/src/basics/Core/ArrayTypes.h
deleted file mode 100644
index f899c92a7be4f29065b55b13fb1a1181da0eaf7e..0000000000000000000000000000000000000000
--- a/src/basics/Core/ArrayTypes.h
+++ /dev/null
@@ -1,94 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
-//      \    \  |    |   ________________________________________________________________
-//       \    \ |    |  |  ______________________________________________________________|
-//        \    \|    |  |  |         __          __     __     __     ______      _______
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file ArrayTypes.h
-//! \ingroup Core
-//! \author Konstantin Kutscher, Soeren Textor, Sebastian Geller
-//=======================================================================================
-#ifndef ARRAYTYPES_H
-#define ARRAYTYPES_H
-
-#include <array>
-
-#include "DataTypes.h"
-
-typedef std::array<uint, 2> uint_2;
-typedef std::array<uint, 3> uint_3;
-typedef std::array<uint, 4> uint_4;
-typedef std::array<uint, 5> uint_5;
-typedef std::array<uint, 6> uint_6;
-typedef std::array<uint, 7> uint_7;
-typedef std::array<uint, 8> uint_8;
-typedef std::array<uint, 9> uint_9;
-typedef std::array<uint, 10> uint_10;
-typedef std::array<uint, 11> uint_11;
-typedef std::array<uint, 12> uint_12;
-typedef std::array<uint, 13> uint_13;
-typedef std::array<uint, 14> uint_14;
-typedef std::array<uint, 15> uint_15;
-typedef std::array<uint, 16> uint_16;
-typedef std::array<uint, 17> uint_17;
-typedef std::array<uint, 18> uint_18;
-typedef std::array<uint, 19> uint_19;
-typedef std::array<uint, 20> uint_20;
-typedef std::array<uint, 21> uint_21;
-typedef std::array<uint, 22> uint_22;
-typedef std::array<uint, 23> uint_23;
-typedef std::array<uint, 24> uint_24;
-typedef std::array<uint, 25> uint_25;
-typedef std::array<uint, 26> uint_26;
-typedef std::array<uint, 27> uint_27;
-
-typedef std::array<bool, 2> bool_2;
-typedef std::array<bool, 3> bool_3;
-typedef std::array<bool, 4> bool_4;
-typedef std::array<bool, 5> bool_5;
-typedef std::array<bool, 6> bool_6;
-typedef std::array<bool, 7> bool_7;
-typedef std::array<bool, 8> bool_8;
-typedef std::array<bool, 9> bool_9;
-typedef std::array<bool, 10> bool_10;
-typedef std::array<bool, 11> bool_11;
-typedef std::array<bool, 12> bool_12;
-typedef std::array<bool, 13> bool_13;
-typedef std::array<bool, 14> bool_14;
-typedef std::array<bool, 15> bool_15;
-typedef std::array<bool, 16> bool_16;
-typedef std::array<bool, 17> bool_17;
-typedef std::array<bool, 18> bool_18;
-typedef std::array<bool, 19> bool_19;
-typedef std::array<bool, 20> bool_20;
-typedef std::array<bool, 21> bool_21;
-typedef std::array<bool, 22> bool_22;
-typedef std::array<bool, 23> bool_23;
-typedef std::array<bool, 24> bool_24;
-typedef std::array<bool, 25> bool_25;
-typedef std::array<bool, 26> bool_26;
-typedef std::array<bool, 27> bool_27;
-
-#endif
diff --git a/src/basics/Core/LbmOrGks.h b/src/basics/Core/LbmOrGks.h
deleted file mode 100644
index ae4a17cad6488ee8ec95c19c3e3766abd408b0bf..0000000000000000000000000000000000000000
--- a/src/basics/Core/LbmOrGks.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
-//      \    \  |    |   ________________________________________________________________
-//       \    \ |    |  |  ______________________________________________________________|
-//        \    \|    |  |  |         __          __     __     __     ______      _______
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file LbmOrGks.h
-//! \ingroup Core
-//! \author Stephan Lenz
-//=======================================================================================
-#ifndef LBMORGKS_H
-#define LBMORGKS_H
-
-enum LbmOrGks { LBM, GKS };
-
-#endif
diff --git a/src/basics/Core/Logger/Logger.cpp b/src/basics/Core/Logger/Logger.cpp
deleted file mode 100644
index 1a27ef6f2a33bf46b9f488a4cf8d17705c2a64e6..0000000000000000000000000000000000000000
--- a/src/basics/Core/Logger/Logger.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
-//      \    \  |    |   ________________________________________________________________
-//       \    \ |    |  |  ______________________________________________________________|
-//        \    \|    |  |  |         __          __     __     __     ______      _______
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file Logger.cpp
-//! \ingroup Logger
-//! \author Stephan Lenz
-//=======================================================================================
-#include "Logger.h"
-#include <iostream>
-#include <memory>
-
-#include "implementations/LoggerImp.h"
-
-namespace logging
-{
-
-std::shared_ptr<Logger> out = nullptr;
-
-logging::Logger::Level logging::Logger::globalLogLevel = logging::Logger::INFO_LOW;
-logging::Logger::Level logging::Logger::localLogLevel  = logging::Logger::INFO_LOW;
-bool logging::Logger::printRankNumber                  = false;
-bool logging::Logger::timeStampEnabled                 = false;
-
-logging::Logger::Logger(std::ostream *stream) { streams.push_back(stream); }
-
-logging::Logger::~Logger() = default;
-
-void Logger::addStreamToList(std::ostream *stream) { streams.push_back(stream); }
-
-void Logger::resetStreamList() { streams.clear(); }
-
-//-----------static methods----------------//
-void logging::Logger::resetStreams()
-{
-    if (!out)
-        out = std::make_shared<LoggerImp>(&std::cout);
-
-    out->resetStreamList();
-}
-
-void logging::Logger::setStream(std::ostream *stream) { out = std::make_shared<LoggerImp>(stream); }
-
-void logging::Logger::addStream(std::ostream *stream)
-{
-    if (!out)
-        out = std::make_shared<LoggerImp>(stream);
-    else
-        out->addStreamToList(stream);
-}
-
-void logging::Logger::timeStamp(TimeStamp timeStamp)
-{
-    switch (timeStamp) {
-        case ENABLE:
-            timeStampEnabled = true;
-            break;
-        case DISABLE:
-            timeStampEnabled = false;
-            break;
-    }
-}
-
-void logging::Logger::setDebugLevel(const Level &level) { globalLogLevel = level; }
-
-void logging::Logger::enablePrintedRankNumbers(bool print) { printRankNumber = print; }
-
-} // namespace logging
diff --git a/src/basics/Core/Logger/Logger.h b/src/basics/Core/Logger/Logger.h
deleted file mode 100644
index 53cb974b8e409a8ba6eb555abac41ca99eb2bbcf..0000000000000000000000000000000000000000
--- a/src/basics/Core/Logger/Logger.h
+++ /dev/null
@@ -1,88 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
-//      \    \  |    |   ________________________________________________________________
-//       \    \ |    |  |  ______________________________________________________________|
-//        \    \|    |  |  |         __          __     __     __     ______      _______
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file Logger.h
-//! \ingroup Logger
-//! \author Stephan Lenz
-//=======================================================================================
-#ifndef Logger_H
-#define Logger_H
-
-#include "basics_export.h"
-
-#include <memory>
-#include <ostream>
-#include <string>
-#include <vector>
-
-namespace logging
-{
-class BASICS_EXPORT Logger
-{
-protected:
-    Logger(std::ostream *stream);
-
-public:
-    virtual ~Logger();
-
-    enum Level { INFO_LOW = 3, INFO_INTERMEDIATE = 2, INFO_HIGH = 1, WARNING = 0, LOGGER_ERROR = -1 };
-
-    enum TimeStamp { ENABLE, DISABLE };
-
-    static void setStream(std::ostream *stream);
-    static void addStream(std::ostream *stream);
-    static void resetStreams();
-
-    static void timeStamp(TimeStamp timeStamp);
-
-    static void setDebugLevel(const Level &level = Level::LOGGER_ERROR);
-    static void enablePrintedRankNumbers(bool printRankNumbers);
-
-    virtual Logger &operator<<(const Level &level)       = 0;
-    virtual Logger &operator<<(const std::string &log)   = 0;
-    virtual Logger &operator<<(const int &log)           = 0;
-    virtual Logger &operator<<(const unsigned int &log)  = 0;
-    virtual Logger &operator<<(const unsigned long &log) = 0;
-    virtual Logger &operator<<(const float &log)         = 0;
-    virtual Logger &operator<<(const double &log)        = 0;
-
-protected:
-    void addStreamToList(std::ostream *stream);
-    void resetStreamList();
-
-    std::vector<std::ostream *> streams;
-
-    static Level globalLogLevel;
-    static Level localLogLevel;
-    static bool printRankNumber;
-    static bool timeStampEnabled;
-};
-extern BASICS_EXPORT std::shared_ptr<Logger> out;
-} // namespace logging
-
-#endif
diff --git a/src/basics/Core/Logger/implementations/LoggerImp.cpp b/src/basics/Core/Logger/implementations/LoggerImp.cpp
deleted file mode 100644
index 086c0e3aa5153f017905cd8476248ea7a7434335..0000000000000000000000000000000000000000
--- a/src/basics/Core/Logger/implementations/LoggerImp.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
-//      \    \  |    |   ________________________________________________________________
-//       \    \ |    |  |  ______________________________________________________________|
-//        \    \|    |  |  |         __          __     __     __     ______      _______
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file LoggerImp.cpp
-//! \ingroup Logger
-//! \author Stephan Lenz
-//=======================================================================================
-#include "LoggerImp.h"
-
-#include <chrono>
-#include <iomanip>
-#include <iostream>
-#include <mpi.h>
-#include <sstream>
-
-logging::LoggerImp::LoggerImp(std::ostream *stream) : logging::Logger(stream)
-{
-    levelString[Level::WARNING]           = "[WARNING]          ";
-    levelString[Level::LOGGER_ERROR]      = "[ERROR]            ";
-    levelString[Level::INFO_LOW]          = "[INFO_LOW]         ";
-    levelString[Level::INFO_INTERMEDIATE] = "[INFO_INTERMEDIATE]";
-    levelString[Level::INFO_HIGH]         = "[INFO_HIGH]        ";
-}
-
-logging::LoggerImp::~LoggerImp() = default;
-
-logging::Logger &logging::LoggerImp::operator<<(const Level &level)
-{
-    localLogLevel = level;
-    return *this;
-}
-
-logging::Logger &logging::LoggerImp::operator<<(const std::string &message) { return this->log(message); }
-
-logging::Logger &logging::LoggerImp::operator<<(const int &message) { return this->log(std::to_string(message)); }
-
-logging::Logger &logging::LoggerImp::operator<<(const unsigned int &message)
-{
-    return this->log(std::to_string(message));
-}
-
-logging::Logger &logging::LoggerImp::operator<<(const unsigned long &message)
-{
-    return this->log(std::to_string(message));
-}
-
-logging::Logger &logging::LoggerImp::operator<<(const float &message) { return this->log(std::to_string(message)); }
-
-logging::Logger &logging::LoggerImp::operator<<(const double &message) { return this->log(std::to_string(message)); }
-
-logging::Logger &logging::LoggerImp::log(const std::string &message)
-{
-    if (shouldBeLogged()) {
-        std::string modifiedMessage = message;
-        addDebugInformation(modifiedMessage);
-        for (auto stream : streams)
-            *stream << modifiedMessage << std::flush;
-    }
-    std::size_t found = message.find(std::string("\n"));
-    if (found != std::string::npos)
-        newLoggingLine = true;
-    else
-        newLoggingLine = false;
-
-    return *this;
-}
-
-bool logging::LoggerImp::shouldBeLogged() { return localLogLevel <= globalLogLevel; }
-
-void logging::LoggerImp::addDebugInformation(std::string &message)
-{
-    if (newLoggingLine) {
-        std::stringstream os;
-        os << levelString[localLogLevel] << getTimeStamp() << " " << message;
-        message = os.str();
-    }
-}
-
-std::string logging::LoggerImp::getTimeStamp()
-{
-    if (!timeStampEnabled)
-        return "";
-
-    const auto now = std::chrono::system_clock::now();
-    time_t tt      = std::chrono::system_clock::to_time_t(now);
-    // const tm utc_tm = *gmtime(&tt);
-    const tm local_tm = *localtime(&tt);
-
-    std::stringstream os;
-    os << " [" << std::setw(2) << std::setfill('0') << local_tm.tm_hour << ":";
-    os << std::setw(2) << std::setfill('0') << local_tm.tm_min << ":";
-    os << std::setw(2) << std::setfill('0') << local_tm.tm_sec << "]";
-    return os.str();
-}
-
-std::string logging::LoggerImp::getRankString()
-{
-    int rank;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    return printRankNumber ? "[" + std::to_string(rank) + "] " : "";
-}
diff --git a/src/basics/Core/Logger/implementations/LoggerImp.h b/src/basics/Core/Logger/implementations/LoggerImp.h
deleted file mode 100644
index 070b06a23ee59a1d387f09fc86960dd3083c4e61..0000000000000000000000000000000000000000
--- a/src/basics/Core/Logger/implementations/LoggerImp.h
+++ /dev/null
@@ -1,75 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
-//      \    \  |    |   ________________________________________________________________
-//       \    \ |    |  |  ______________________________________________________________|
-//        \    \|    |  |  |         __          __     __     __     ______      _______
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file LoggerImp.h
-//! \ingroup Logger
-//! \author Stephan Lenz
-//=======================================================================================
-#ifndef LoggerImp_H
-#define LoggerImp_H
-
-#include "basics_export.h"
-
-#include <string>
-
-#include "../Logger.h"
-#include <map>
-
-namespace logging
-{
-
-class BASICS_EXPORT LoggerImp : public Logger
-{
-public:
-    LoggerImp(std::ostream *stream);
-    ~LoggerImp() override;
-
-    Logger &operator<<(const Level &level) override;
-    Logger &operator<<(const std::string &message) override;
-    Logger &operator<<(const int &message) override;
-    Logger &operator<<(const unsigned int &message) override;
-    Logger &operator<<(const unsigned long &log) override;
-    Logger &operator<<(const float &message) override;
-    Logger &operator<<(const double &message) override;
-
-private:
-    std::string getRankString();
-    static bool shouldBeLogged();
-
-    static std::string getTimeStamp();
-    void addDebugInformation(std::string &message);
-    logging::Logger &log(const std::string &message);
-
-private:
-    std::map<Logger::Level, std::string> levelString;
-    bool newLoggingLine = true;
-};
-
-} // namespace logging
-
-#endif
diff --git a/src/basics/Core/Logger/implementations/LoggerTest.cpp b/src/basics/Core/Logger/implementations/LoggerTest.cpp
deleted file mode 100644
index 2c4e4e6819a19497c3061caddbfa4c2a54953054..0000000000000000000000000000000000000000
--- a/src/basics/Core/Logger/implementations/LoggerTest.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-#include "gmock/gmock.h"
-#include <fstream>
-#include <memory>
-
-#include "../Logger.h"
-
-TEST(DISABLED_LoggerTest, logStringWithoutSettingLevels_WillPutTheLogMesssageIntoTheStream)
-{
-    std::ostringstream stream;
-    logging::Logger::setStream(&stream);
-
-    *logging::out << logging::Logger::INFO_LOW << "Hello World\n";
-
-    EXPECT_THAT(stream.str(), "[LOW] Hello World\n");
-}
-
-TEST(DISABLED_LoggerTest, logStringWithHighDebugLevel_logOnlyHighLevelMessages)
-{
-    std::ostringstream stream;
-    logging::Logger::setStream(&stream);
-
-    logging::Logger::setDebugLevel(logging::Logger::INFO_HIGH);
-    *logging::out << logging::Logger::INFO_LOW << "Low Debug Message\n"
-                  << logging::Logger::INFO_HIGH << "HIGH Debug Message\n";
-
-    EXPECT_THAT(stream.str(), "[HIGH] HIGH Debug Message\n");
-}
-
-TEST(DISABLED_LoggerTest, addTwoStreams_shouldWriteToBoth)
-{
-    logging::Logger::resetStreams();
-
-    std::ostringstream stream1, stream2;
-    logging::out->addStream(&stream1);
-    logging::out->addStream(&stream2);
-    logging::Logger::setDebugLevel(logging::Logger::INFO_LOW);
-
-    *logging::out << logging::Logger::INFO_LOW << "Hello World\n";
-
-    EXPECT_THAT(stream1.str(), "[LOW] Hello World\n");
-    EXPECT_THAT(stream2.str(), "[LOW] Hello World\n");
-}
-
-TEST(DISABLED_LoggerTest, splittetOutputShouldHaveDebugInformationOnce)
-{
-    std::ostringstream stream;
-    logging::Logger::setStream(&stream);
-
-    *logging::out << logging::Logger::INFO_LOW << "Hello"
-                  << " World\n";
-
-    EXPECT_THAT(stream.str(), "[LOW] Hello World\n");
-}
-
-TEST(DISABLED_LoggerTest, enableTimeStampInOutput)
-{
-    std::ostringstream stream;
-    logging::Logger::setStream(&stream);
-    logging::Logger::timeStamp(logging::Logger::TimeStamp::ENABLE);
-
-    *logging::out << logging::Logger::INFO_LOW << "Hello"
-                  << " World\n";
-
-    EXPECT_THAT(stream.str(), testing::StrNe("[LOW] Hello World\n"));
-}
diff --git a/src/basics/Core/NonCreatable.h b/src/basics/Core/NonCreatable.h
deleted file mode 100644
index c581a19ec92b666f45109f1e3c66b9e57fb54614..0000000000000000000000000000000000000000
--- a/src/basics/Core/NonCreatable.h
+++ /dev/null
@@ -1,44 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
-//      \    \  |    |   ________________________________________________________________
-//       \    \ |    |  |  ______________________________________________________________|
-//        \    \|    |  |  |         __          __     __     __     ______      _______
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file NonCreatable.h
-//! \ingroup Core
-//! \author Soeren Peters
-//=======================================================================================
-#ifndef NON_CREATABLE_H
-#define NON_CREATABLE_H
-
-class NonCreatable
-{
-private:
-    NonCreatable()                     = delete;
-    NonCreatable(const NonCreatable &) = delete;
-    NonCreatable &operator=(const NonCreatable &) = delete;
-};
-
-#endif
diff --git a/src/basics/Core/RealConstants.h b/src/basics/Core/RealConstants.h
deleted file mode 100644
index 5cb42584e29d0e0a1a9cf1b40f50736dd13169f0..0000000000000000000000000000000000000000
--- a/src/basics/Core/RealConstants.h
+++ /dev/null
@@ -1,255 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
-//      \    \  |    |   ________________________________________________________________
-//       \    \ |    |  |  ______________________________________________________________|
-//        \    \|    |  |  |         __          __     __     __     ______      _______
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file RealConstants.h
-//! \ingroup Core
-//! \author Martin Schoenherr
-//=======================================================================================
-//#ifndef REAL_CONSTANT_H
-//#define REAL_CONSTANT_H
-//
-//#ifdef VF_DOUBLE_ACCURACY
-//#define c1o2 0.5
-//#define c3o2 1.5
-//#define c1o3 0.333333333333333
-//#define c2o3 0.666666666666667
-//#define c1o4 0.25
-//#define c3o4 0.75
-//#define c1o6 0.166666666666667
-//#define c1o7 0.142857142857143
-//#define c1o8 0.125
-//#define c1o9 0.111111111111111
-//#define c2o9 0.222222222222222
-//#define c4o9 0.444444444444444
-//#define c1o10 0.1
-//#define c1o12 0.083333333333333
-//#define c1o16 0.0625
-//#define c3o16 0.1875
-//#define c9o16 0.5625
-//#define c1o18 0.055555555555556
-//#define c1o20 0.05
-//#define c19o20 0.95
-//#define c21o20 1.05
-//#define c1o24 0.041666666666667
-//#define c1o27 0.037037037037037
-//#define c3o32 0.09375
-//#define c4o32 0.125
-//#define c1o36 0.027777777777778
-//#define c1o48 0.020833333333333
-//#define c1o64 0.015625
-//#define c3o64 0.046875
-//#define c9o64 0.140625
-//#define c27o64 0.421875
-//#define c1o66 0.015151515151515
-//#define c1o72 0.013888888888889
-//#define c1o264 0.003787878787879
-//#define c8o27 0.296296296296296
-//#define c2o27 0.074074074074074
-//#define c1o54 0.018518518518519
-//#define c1o100 0.01
-//#define c99o100 0.99
-//#define c1o126 0.007936507936508
-//#define c1o216 0.004629629629630
-//#define c5o4 1.25
-//#define c9o4 2.25
-//#define c5o2 2.5
-//#define c9o2 4.5
-//
-//#define c0o1 0.
-//#define c1o1 1.
-//#define c2o1 2.
-//#define c3o1 3.
-//#define c4o1 4.
-//#define c5o1 5.
-//#define c6o1 6.
-//#define c7o1 7.
-//#define c8o1 8.
-//#define c9o1 9.
-//#define c10o1 10.
-//#define c11o1 11.
-//#define c12o1 12.
-//#define c13o1 13.
-//#define c14o1 14.
-//#define c15o1 15.
-//#define c16o1 16.
-//#define c17o1 17.
-//#define c18o1 18.
-//#define c21o1 21.
-//#define c24o1 24.
-//#define c25o1 25.
-//#define c26o1 26.
-//#define c27o1 27.
-//#define c28o1 28.
-//#define c29o1 29.
-//#define c30o1 30.
-//#define c32o1 32.
-//#define c33o1 33.
-//#define c34o1 34.
-//#define c36o1 36.
-//#define c40o1 40.
-//#define c42o1 42.
-//#define c46o1 46.
-//#define c48o1 48.
-//#define c50o1 50.
-//#define c52o1 52.
-//#define c54o1 54.
-//#define c56o1 56.
-//#define c64o1 64.
-//#define c66o1 66.
-//#define c68o1 68.
-//#define c69o1 69.
-//#define c72o1 72.
-//#define c84o1 84.
-//#define c88o1 88.
-//#define c96o1 96.
-//#define c100o1 100.0
-//#define c130o1 130.0
-//#define c152o1 152.0
-//#define c166o1 166.0
-//#define c195o1 195.0
-//#define c216o1 216.0
-//#define c264o1 264.0
-//#define c290o1 290.0
-//#define c367o1 367.0
-//
-//#define Op0000002 0.0000002
-//#define c10eM30 1e-30
-//#define c10eM10 1e-10
-//#define smallSingle 0.0000000002
-//
-//#else
-//#define c1o2 0.5f
-//#define c3o2 1.5f
-//#define c1o3 (1.0f / 3.0f)
-//#define c2o3 (2.0f / 3.0f)
-//#define c1o4 0.25f
-//#define c3o4 0.75f
-//#define c1o6 (1.0f / 6.0f)
-//#define c1o7 (1.0f / 7.0f)
-//#define c1o8 0.125f
-//#define c1o9 (1.0f / 9.0f)
-//#define c2o9 (2.0f / 9.0f)
-//#define c4o9 (4.0f / 9.0f)
-//#define c1o10 0.1f
-//#define c1o12 (1.0f / 12.0f)
-//#define c1o16 0.0625f
-//#define c3o16 0.1875f
-//#define c9o16 0.5625f
-//#define c1o18 (1.0f / 18.0f)
-//#define c1o20 0.05f
-//#define c19o20 0.95f
-//#define c21o20 1.05f
-//#define c1o24 (1.0f / 24.0f)
-//#define c1o27 (1.0f / 27.0f)
-//#define c3o32 0.09375f
-//#define c4o32 0.125f
-//#define c1o36 (1.0f / 36.0f)
-//#define c1o48 (1.0f / 48.0f)
-//#define c1o64 0.015625f
-//#define c3o64 0.046875f
-//#define c9o64 0.140625f
-//#define c27o64 0.421875f
-//#define c1o66 (1.0f / 66.0f)
-//#define c1o72 (1.0f / 72.0f)
-//#define c1o264 (1.0f / 264.0f)
-//#define c8o27 (8.0f / 27.0f)
-//#define c2o27 (2.0f / 27.0f)
-//#define c1o54 (1.0f / 54.0f)
-//#define c1o100 0.01f
-//#define c99o100 0.99f
-//#define c1o126 (1.0f / 126.0f)
-//#define c1o216 (1.0f / 216.0f)
-//#define c5o4 1.25f
-//#define c9o4 2.25f
-//#define c5o2 2.5f
-//#define c9o2 4.5f
-//
-//#define c0o1 0.f
-//#define c1o1 1.f
-//#define c2o1 2.f
-//#define c3o1 3.f
-//#define c4o1 4.f
-//#define c5o1 5.f
-//#define c6o1 6.f
-//#define c7o1 7.f
-//#define c8o1 8.f
-//#define c9o1 9.f
-//#define c10o1 10.f
-//#define c11o1 11.f
-//#define c12o1 12.f
-//#define c13o1 13.f
-//#define c14o1 14.f
-//#define c15o1 15.f
-//#define c16o1 16.f
-//#define c17o1 17.f
-//#define c18o1 18.f
-//#define c21o1 21.f
-//#define c24o1 24.f
-//#define c25o1 25.f
-//#define c26o1 26.f
-//#define c27o1 27.f
-//#define c28o1 28.f
-//#define c29o1 29.f
-//#define c30o1 30.f
-//#define c32o1 32.f
-//#define c33o1 33.f
-//#define c34o1 34.f
-//#define c36o1 36.f
-//#define c40o1 40.f
-//#define c42o1 42.f
-//#define c46o1 46.f
-//#define c48o1 48.f
-//#define c50o1 50.f
-//#define c52o1 52.f
-//#define c54o1 54.f
-//#define c56o1 56.f
-//#define c64o1 64.f
-//#define c66o1 66.f
-//#define c68o1 68.f
-//#define c69o1 69.f
-//#define c72o1 72.f
-//#define c84o1 84.f
-//#define c88o1 88.f
-//#define c96o1 96.f
-//#define c100o1 100.0f
-//#define c130o1 130.0f
-//#define c152o1 152.0f
-//#define c166o1 166.0f
-//#define c195o1 195.0f
-//#define c216o1 216.0f
-//#define c264o1 264.0f
-//#define c290o1 290.0f
-//#define c367o1 367.0f
-//
-//#define Op0000002 0.0000002f
-//#define c10eM30 1e-30
-//#define c10eM10 1e-10
-//#define smallSingle 0.0000000002f
-//#endif
-//
-//#endif
\ No newline at end of file
diff --git a/src/basics/Core/VectorTypes.cpp b/src/basics/Core/VectorTypes.cpp
deleted file mode 100644
index ecc986be836cb3c85d760f4db408707bfb1180b4..0000000000000000000000000000000000000000
--- a/src/basics/Core/VectorTypes.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
-//      \    \  |    |   ________________________________________________________________
-//       \    \ |    |  |  ______________________________________________________________|
-//        \    \|    |  |  |         __          __     __     __     ______      _______
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file VectorTypes.cpp
-//! \ingroup Core
-//! \author Soeren Peters
-//=======================================================================================
-#include "VectorTypes.h"
-
-// Vec3 Vec3::operator+( Vec3& left, Vec3& right ){
-Vec3 Vec3::operator+(Vec3 &right) { return { this->x + right.x, this->y + right.y, this->z + right.z }; }
-
-Vec3 Vec3::operator-(Vec3 &right) { return { this->x - right.x, this->y - right.y, this->z - right.z }; }
-
-Vec3 operator*(real scalar, Vec3 &vec) { return { scalar * vec.x, scalar * vec.y, scalar * vec.z }; }
diff --git a/src/basics/Core/VectorTypes.h b/src/basics/Core/VectorTypes.h
deleted file mode 100644
index 2ca45b359397c712b8b8695b9a99b4c5f8c324d1..0000000000000000000000000000000000000000
--- a/src/basics/Core/VectorTypes.h
+++ /dev/null
@@ -1,70 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
-//      \    \  |    |   ________________________________________________________________
-//       \    \ |    |  |  ______________________________________________________________|
-//        \    \|    |  |  |         __          __     __     __     ______      _______
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file VectorTypes.h
-//! \ingroup Core
-//! \author Soeren Peters
-//=======================================================================================
-#ifndef VECTORTYPES_H
-#define VECTORTYPES_H
-
-#ifdef __CUDACC__
-#include <cuda_runtime.h>
-#else
-#ifndef __host__
-#define __host__
-#endif
-#ifndef __device__
-#define __device__
-#endif
-#endif
-
-#include <cmath>
-
-#include "basics_export.h"
-
-#include "DataTypes.h"
-#include "RealConstants.h"
-
-struct BASICS_EXPORT Vec3 {
-    real x{ 0. }, y{ 0. }, z{ 0. };
-
-    __host__ __device__ Vec3(real x, real y, real z) : x(x), y(y), z(z) {}
-    Vec3() = default;
-
-    __host__ __device__ real length() { return std::sqrt(x * x + y * y + z * z); }
-
-    Vec3 operator+(Vec3 &right);
-    Vec3 operator-(Vec3 &right);
-};
-
-// BASICS_EXPORT Vec3 operator+( Vec3& left, Vec3& right );
-// BASICS_EXPORT Vec3 operator-( Vec3& left, Vec3& right );
-BASICS_EXPORT Vec3 operator*(real scalar, Vec3 &vec);
-
-#endif
diff --git a/src/basics/Core/DataTypes.h b/src/basics/DataTypes.h
similarity index 100%
rename from src/basics/Core/DataTypes.h
rename to src/basics/DataTypes.h
diff --git a/src/basics/Core/StringUtilities/StringUtil.cpp b/src/basics/StringUtilities/StringUtil.cpp
similarity index 100%
rename from src/basics/Core/StringUtilities/StringUtil.cpp
rename to src/basics/StringUtilities/StringUtil.cpp
diff --git a/src/basics/Core/StringUtilities/StringUtil.h b/src/basics/StringUtilities/StringUtil.h
similarity index 100%
rename from src/basics/Core/StringUtilities/StringUtil.h
rename to src/basics/StringUtilities/StringUtil.h
diff --git a/src/basics/Core/StringUtilities/StringUtilTest.cpp b/src/basics/StringUtilities/StringUtilTest.cpp
similarity index 100%
rename from src/basics/Core/StringUtilities/StringUtilTest.cpp
rename to src/basics/StringUtilities/StringUtilTest.cpp
diff --git a/src/basics/Core/Timer/Timer.cpp b/src/basics/Timer/Timer.cpp
similarity index 100%
rename from src/basics/Core/Timer/Timer.cpp
rename to src/basics/Timer/Timer.cpp
diff --git a/src/basics/Core/Timer/Timer.h b/src/basics/Timer/Timer.h
similarity index 100%
rename from src/basics/Core/Timer/Timer.h
rename to src/basics/Timer/Timer.h
diff --git a/src/basics/Core/Timer/TimerImp.cpp b/src/basics/Timer/TimerImp.cpp
similarity index 100%
rename from src/basics/Core/Timer/TimerImp.cpp
rename to src/basics/Timer/TimerImp.cpp
diff --git a/src/basics/Core/Timer/TimerImp.h b/src/basics/Timer/TimerImp.h
similarity index 100%
rename from src/basics/Core/Timer/TimerImp.h
rename to src/basics/Timer/TimerImp.h
diff --git a/src/basics/Core/buildInfo.cmake b/src/basics/buildInfo.cmake
similarity index 100%
rename from src/basics/Core/buildInfo.cmake
rename to src/basics/buildInfo.cmake
diff --git a/src/basics/Core/buildInfo.h b/src/basics/buildInfo.h
similarity index 100%
rename from src/basics/Core/buildInfo.h
rename to src/basics/buildInfo.h
diff --git a/src/basics/Core/buildInfo.in.cpp b/src/basics/buildInfo.in.cpp
similarity index 100%
rename from src/basics/Core/buildInfo.in.cpp
rename to src/basics/buildInfo.in.cpp
diff --git a/src/basics/config/ConfigurationFile.cpp b/src/basics/config/ConfigurationFile.cpp
index 026d13e15486c46a7056e061ea075a03c9c06f9f..547468cd2fd6aadccbdc5017230fcf6cdb803733 100644
--- a/src/basics/config/ConfigurationFile.cpp
+++ b/src/basics/config/ConfigurationFile.cpp
@@ -8,8 +8,9 @@
 #include <fstream>
 #include <iostream>
 #include <stdlib.h>
+#include <filesystem>
 
-#include <basics/basics/utilities/UbException.h>
+#include <basics/utilities/UbException.h>
 
 
 namespace vf::basics
@@ -26,7 +27,7 @@ bool ConfigurationFile::load(const std::string& file)
 
    if (!inFile.good())
    {
-      UB_THROW(UbException(UB_EXARGS, "Cannot read configuration file "+file+"!"));
+      UB_THROW(UbException(UB_EXARGS, "Cannot read configuration file " + file + "! Your current directory is " + std::filesystem::current_path().string() + "."));
    }
 
    while (inFile.good() && ! inFile.eof())
diff --git a/src/basics/config/ConfigurationFile.h b/src/basics/config/ConfigurationFile.h
index 4a53f7add85b9c6461fda0bab20fa6656eebc5d3..8ca12306e0ef321832efb19a2a428e6dc753df41 100644
--- a/src/basics/config/ConfigurationFile.h
+++ b/src/basics/config/ConfigurationFile.h
@@ -1,6 +1,8 @@
 #ifndef BASICS_CONFIGURATIONFILE_H
 #define BASICS_CONFIGURATIONFILE_H
 
+#include "Logger.h"
+#include <filesystem>
 #include <map>
 #include <vector>
 #include <sstream>
@@ -9,7 +11,7 @@
 #include <iostream>
 #include <stdlib.h>
 
-#include <basics/basics/utilities/UbException.h>
+#include <basics/utilities/UbException.h>
 
 //! \brief  Simple configuration file
 //! \details The Configuration class presented here can read and keep values of any configuration file written in a format like this:
@@ -68,6 +70,23 @@ public:
    template<class T>
    T getValue(const std::string& key, T defaultValue) const;
 
+   static ConfigurationFile loadConfig(int argc, char *argv[], std::string configPath = "./config.txt")
+   {
+      // the config file's default path can be replaced by passing a command line argument
+
+      if (argc > 1) 
+      {
+         configPath = argv[1];
+         VF_LOG_INFO("Using command line argument for config path: {}", configPath);
+      } else {
+         VF_LOG_INFO("Using default config path: {}", configPath);
+      }
+
+      vf::basics::ConfigurationFile config;
+      config.load(configPath);
+      return config;
+   }
+
 private:
    //! the container
    std::map<std::string, std::string> data;
diff --git a/src/basics/constants/NumericConstants.h b/src/basics/constants/NumericConstants.h
new file mode 100644
index 0000000000000000000000000000000000000000..305805de4f1532de51bae15b92cfda80a5b2f4ab
--- /dev/null
+++ b/src/basics/constants/NumericConstants.h
@@ -0,0 +1,271 @@
+#ifndef BASICS_NUMERIC_CONSTANT_H
+#define BASICS_NUMERIC_CONSTANT_H
+
+#ifndef __CUDACC__
+#include <cmath>
+#endif
+
+namespace vf::basics::constant
+{
+
+#ifdef VF_DOUBLE_ACCURACY
+static constexpr double c1o2 = 1. / 2.;
+static constexpr double c3o2 = 3. / 2.;
+static constexpr double c1o3 = 1. / 3.;
+static constexpr double c2o3 = 2. / 3.;
+static constexpr double c1o4 = 1. / 4.;
+static constexpr double c3o4 = 3. / 4.;
+static constexpr double c1o6 = 1. / 6.;
+static constexpr double c1o7 = 1. / 7.;
+static constexpr double c1o8 = 1. / 8.;
+static constexpr double c1o9 = 1. / 9.;
+static constexpr double c2o9 = 2. / 9.;
+static constexpr double c4o9 = 4. / 9.;
+static constexpr double c4o10 = 4. / 10.;
+static constexpr double c1o10 = 1. / 10.;
+static constexpr double c1o12 = 1. / 12.;
+static constexpr double c1o16 = 1. / 16.;
+static constexpr double c3o16 = 3. / 16.;
+static constexpr double c9o16 = 9. / 16.;
+static constexpr double c1o18 = 1. / 18.;
+static constexpr double c1o20 = 1. / 20.;
+static constexpr double c19o20 = 19. / 20.;
+static constexpr double c21o20 = 21. / 20.;
+static constexpr double c1o24 = 1. / 24.;
+static constexpr double c1o27 = 1. / 27.;
+static constexpr double c3o32 = 3. / 32.;
+static constexpr double c4o32 = 4. / 32.;
+static constexpr double c1o36 = 1. / 36.;
+static constexpr double c1o48 = 1. / 48.;
+static constexpr double c1o64 = 1. / 64.;
+static constexpr double c3o64 = 3. / 64.;
+static constexpr double c9o64 = 9. / 64.;
+static constexpr double c27o64 = 27. / 64.;
+static constexpr double c1o66 = 1. / 66.;
+static constexpr double c1o72 = 1. / 72.;
+static constexpr double c1o264 = 1. / 264.;
+static constexpr double c8o27 = 8. / 27.;
+static constexpr double c2o27 = 2. / 27.;
+static constexpr double c1o54 = 1. / 54.;
+static constexpr double c1o100 = 1. / 100.;
+static constexpr double c99o100 = 99. / 100;
+static constexpr double c1o126 = 1. / 126.;
+static constexpr double c1o216 = 1. / 216.;
+static constexpr double c5o4 = 5. / 4.;
+static constexpr double c4o3 = 4. / 3.;
+static constexpr double c9o4 = 9. / 4.;
+static constexpr double c5o2 = 5. / 2.;
+static constexpr double c9o2 = 9. / 2.;
+
+static constexpr double c0o1 = 0.;
+static constexpr double c1o1 = 1.;
+static constexpr double c2o1 = 2.;
+static constexpr double c3o1 = 3.;
+static constexpr double c4o1 = 4.;
+static constexpr double c5o1 = 5.;
+static constexpr double c6o1 = 6.;
+static constexpr double c7o1 = 7.;
+static constexpr double c8o1 = 8.;
+static constexpr double c9o1 = 9.;
+static constexpr double c10o1 = 10.;
+static constexpr double c11o1 = 11.;
+static constexpr double c12o1 = 12.;
+static constexpr double c13o1 = 13.;
+static constexpr double c14o1 = 14.;
+static constexpr double c15o1 = 15.;
+static constexpr double c16o1 = 16.;
+static constexpr double c17o1 = 17.;
+static constexpr double c18o1 = 18.;
+static constexpr double c21o1 = 21.;
+static constexpr double c24o1 = 24.;
+static constexpr double c25o1 = 25.;
+static constexpr double c26o1 = 26.;
+static constexpr double c27o1 = 27.;
+static constexpr double c28o1 = 28.;
+static constexpr double c29o1 = 29.;
+static constexpr double c30o1 = 30.;
+static constexpr double c32o1 = 32.;
+static constexpr double c33o1 = 33.;
+static constexpr double c34o1 = 34.;
+static constexpr double c36o1 = 36.;
+static constexpr double c40o1 = 40.;
+static constexpr double c42o1 = 42.;
+static constexpr double c46o1 = 46.;
+static constexpr double c48o1 = 48.;
+static constexpr double c50o1 = 50.;
+static constexpr double c52o1 = 52.;
+static constexpr double c54o1 = 54.;
+static constexpr double c56o1 = 56.;
+static constexpr double c64o1 = 64.;
+static constexpr double c66o1 = 66.;
+static constexpr double c68o1 = 68.;
+static constexpr double c69o1 = 69.;
+static constexpr double c72o1 = 72.;
+static constexpr double c84o1 = 84.;
+static constexpr double c88o1 = 88.;
+static constexpr double c96o1 = 96.;
+static constexpr double c100o1 = 100.;
+static constexpr double c130o1 = 130.;
+static constexpr double c152o1 = 152.;
+static constexpr double c166o1 = 166.;
+static constexpr double c195o1 = 195.;
+static constexpr double c216o1 = 216.;
+static constexpr double c264o1 = 264.;
+static constexpr double c290o1 = 290.;
+static constexpr double c367o1 = 367.;
+
+static constexpr double Op0000002 = 0.0000002;
+static constexpr double c10eM30 = 1e-30;
+static constexpr double c10eM10 = 1e-10;
+static constexpr double smallSingle = 0.0000000002;
+
+#ifndef __CUDACC__
+static const double cPi = 4.0 * std::atan(1.0);               // 3.1415926535
+static const double c2Pi = 8.0 * std::atan(1.0);              // 6.2831853071
+static const double cPio180 = 4.0 * std::atan(1.0) / 180.0;   // 1.74532925199e-2
+static const double c180oPi = 180.0 / (4.0 * std::atan(1.0)); // 57.2957795131
+#else
+static constexpr double cPi = 3.1415926535;
+static constexpr double c2Pi = 6.28318530717;
+static constexpr double cPio180 = 1.74532925199e-2;
+static constexpr double c180oPi = 57.2957795131;
+#endif
+
+static const double one_over_sqrt2 = 1.0 / sqrt(2.0); // 0.707106781
+static const double one_over_sqrt3 = 1.0 / sqrt(3.0); // 0.577350269
+static const double sqrt2 = sqrt(2.0);       // 1.4142135
+static const double sqrt3 = sqrt(3.0);       // 1.7320508
+
+#else
+static constexpr float c1o2 = 1.0f / 2.0f;
+static constexpr float c3o2 = 3.0f / 2.0f;
+static constexpr float c1o3 = 1.0f / 3.0f;
+static constexpr float c2o3 = 2.0f / 3.0f;
+static constexpr float c1o4 = 1.0f / 4.0f;
+static constexpr float c3o4 = 3.0f / 4.0f;
+static constexpr float c1o6 = 1.0f / 6.0f;
+static constexpr float c1o7 = 1.0f / 7.0f;
+static constexpr float c1o8 = 1.0f / 8.0f;
+static constexpr float c1o9 = 1.0f / 9.0f;
+static constexpr float c2o9 = 2.0f / 9.0f;
+static constexpr float c4o9 = 4.0f / 9.0f;
+static constexpr float c4o10 = 4.0f / 10.0f;
+static constexpr float c1o10 = 1.0f / 10.0f;
+static constexpr float c1o12 = 1.0f / 12.0f;
+static constexpr float c1o16 = 1.0f / 16.0f;
+static constexpr float c3o16 = 3.0f / 16.0f;
+static constexpr float c9o16 = 9.0f / 16.0f;
+static constexpr float c1o18 = 1.0f / 18.0f;
+static constexpr float c1o20 = 1.0f / 20.0f;
+static constexpr float c19o20 = 19.0f / 20.0f;
+static constexpr float c21o20 = 21.0f / 20.0f;
+static constexpr float c1o24 = 1.0f / 24.0f;
+static constexpr float c1o27 = 1.0f / 27.0f;
+static constexpr float c3o32 = 3.0f / 32.0f;
+static constexpr float c4o32 = 4.0f / 32.0f;
+static constexpr float c1o36 = 1.0f / 36.0f;
+static constexpr float c1o48 = 1.0f / 48.0f;
+static constexpr float c1o64 = 1.0f / 64.0f;
+static constexpr float c3o64 = 3.0f / 64.0f;
+static constexpr float c9o64 = 9.0f / 64.0f;
+static constexpr float c27o64 = 27.0f / 64.0f;
+static constexpr float c1o66 = 1.0f / 66.0f;
+static constexpr float c1o72 = 1.0f / 72.0f;
+static constexpr float c1o264 = 1.0f / 264.0f;
+static constexpr float c8o27 = 8.0f / 27.0f;
+static constexpr float c2o27 = 2.0f / 27.0f;
+static constexpr float c1o54 = 1.0f / 54.0f;
+static constexpr float c1o100 = 1.0f / 100.0f;
+static constexpr float c99o100 = 99.0f / 100.0f;
+static constexpr float c1o126 = 1.0f / 126.0f;
+static constexpr float c1o216 = 1.0f / 216.0f;
+static constexpr float c5o4 = 5.0f / 4.0f;
+static constexpr float c4o3 = 4.0f / 3.0f;
+static constexpr float c9o4 = 9.0f / 4.0f;
+static constexpr float c5o2 = 5.0f / 2.0f;
+static constexpr float c9o2 = 9.0f / 2.0f;
+
+static constexpr float c0o1 = 0.f;
+static constexpr float c1o1 = 1.f;
+static constexpr float c2o1 = 2.f;
+static constexpr float c3o1 = 3.f;
+static constexpr float c4o1 = 4.f;
+static constexpr float c5o1 = 5.f;
+static constexpr float c6o1 = 6.f;
+static constexpr float c7o1 = 7.f;
+static constexpr float c8o1 = 8.f;
+static constexpr float c9o1 = 9.f;
+static constexpr float c10o1 = 10.f;
+static constexpr float c11o1 = 11.f;
+static constexpr float c12o1 = 12.f;
+static constexpr float c13o1 = 13.f;
+static constexpr float c14o1 = 14.f;
+static constexpr float c15o1 = 15.f;
+static constexpr float c16o1 = 16.f;
+static constexpr float c17o1 = 17.f;
+static constexpr float c18o1 = 18.f;
+static constexpr float c21o1 = 21.f;
+static constexpr float c24o1 = 24.f;
+static constexpr float c25o1 = 25.f;
+static constexpr float c26o1 = 26.f;
+static constexpr float c27o1 = 27.f;
+static constexpr float c28o1 = 28.f;
+static constexpr float c29o1 = 29.f;
+static constexpr float c30o1 = 30.f;
+static constexpr float c32o1 = 32.f;
+static constexpr float c33o1 = 33.f;
+static constexpr float c34o1 = 34.f;
+static constexpr float c36o1 = 36.f;
+static constexpr float c40o1 = 40.f;
+static constexpr float c42o1 = 42.f;
+static constexpr float c46o1 = 46.f;
+static constexpr float c48o1 = 48.f;
+static constexpr float c50o1 = 50.f;
+static constexpr float c52o1 = 52.f;
+static constexpr float c54o1 = 54.f;
+static constexpr float c56o1 = 56.f;
+static constexpr float c64o1 = 64.f;
+static constexpr float c66o1 = 66.f;
+static constexpr float c68o1 = 68.f;
+static constexpr float c69o1 = 69.f;
+static constexpr float c72o1 = 72.f;
+static constexpr float c84o1 = 84.f;
+static constexpr float c88o1 = 88.f;
+static constexpr float c96o1 = 96.f;
+static constexpr float c100o1 = 100.0f;
+static constexpr float c130o1 = 130.0f;
+static constexpr float c152o1 = 152.0f;
+static constexpr float c166o1 = 166.0f;
+static constexpr float c195o1 = 195.0f;
+static constexpr float c216o1 = 216.0f;
+static constexpr float c264o1 = 264.0f;
+static constexpr float c290o1 = 290.0f;
+static constexpr float c367o1 = 367.0f;
+
+static constexpr float Op0000002 = 0.0000002f;
+static constexpr float c10eM30 = 1e-30f;
+static constexpr float c10eM10 = 1e-10f;
+static constexpr float smallSingle = 0.0000000002f;
+
+#ifndef __CUDACC__
+static const float cPi = 4.0f * std::atan(1.0f);               // 3.1415926535
+static const float c2Pi = 8.0f * std::atan(1.0f);              // 6.2831853071
+static const float cPio180 = 4.0f * std::atan(1.0f) / 180.0f;   // 1.74532925199e-2
+static const float c180oPi = 180.0f / (4.0f * std::atan(1.0f)); // 57.2957795131
+#else
+static constexpr float cPi = 3.1415926535f;
+static constexpr float c2Pi = 6.28318530717f;
+static constexpr float cPio180 = 1.74532925199e-2f;
+static constexpr float c180oPi = 57.2957795131f;
+#endif
+
+static const float one_over_sqrt2 = 1.0 / sqrtf(2.0); // 0.707106781
+static const float one_over_sqrt3 = 1.0 / sqrtf(3.0); // 0.577350269
+static const float sqrt2 = sqrtf(2.0);                // 1.4142135
+static const float sqrt3 = sqrtf(3.0);                // 1.7320508
+
+#endif
+
+}
+
+#endif
diff --git a/src/basics/basics/container/CbArray2D.h b/src/basics/container/CbArray2D.h
similarity index 100%
rename from src/basics/basics/container/CbArray2D.h
rename to src/basics/container/CbArray2D.h
diff --git a/src/basics/basics/container/CbArray3D.h b/src/basics/container/CbArray3D.h
similarity index 100%
rename from src/basics/basics/container/CbArray3D.h
rename to src/basics/container/CbArray3D.h
diff --git a/src/basics/basics/container/CbArray4D.h b/src/basics/container/CbArray4D.h
similarity index 100%
rename from src/basics/basics/container/CbArray4D.h
rename to src/basics/container/CbArray4D.h
diff --git a/src/basics/basics/container/CbVector.h b/src/basics/container/CbVector.h
similarity index 100%
rename from src/basics/basics/container/CbVector.h
rename to src/basics/container/CbVector.h
diff --git a/src/basics/basics/container/CbVectorPool.h b/src/basics/container/CbVectorPool.h
similarity index 100%
rename from src/basics/basics/container/CbVectorPool.h
rename to src/basics/container/CbVectorPool.h
diff --git a/src/basics/geometry3d/GbImplicitSurface.cpp b/src/basics/geometry3d/GbImplicitSurface.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6dec5717341d0f55e2b34b3e87d4f15e2b077f8c
--- /dev/null
+++ b/src/basics/geometry3d/GbImplicitSurface.cpp
@@ -0,0 +1,446 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file GbImplicitSurface.cpp
+//! \ingroup geometry3d
+//! \author Hussein Alihussein
+//=======================================================================================
+
+#include <GbImplicitSurface.h>
+
+#ifdef BUILD_USE_BOOST
+
+#include <basics/utilities/UbMath.h>
+
+#include <geometry3d/GbSystem3D.h>
+#include <geometry3d/GbTriangle3D.h>
+
+#include <boost/math/tools/roots.hpp>
+
+
+using namespace std;
+using boost::math::tools::bisect;
+
+/*=======================================================*/
+// ObObjectCreator* GbImplicitSurface::getCreator()
+// {
+// 	 GbObject3DCreator instance;
+// 	return &instance;
+// }
+/*=======================================================*/
+// Konstruktor
+GbImplicitSurface::GbImplicitSurface() //: GbObject3D()
+{
+
+}
+/*=======================================================*/
+// Konstruktor
+GbImplicitSurface::GbImplicitSurface(const double& x1a, const double& x2a, const double& x3a, const double& x1b, const double& x2b, const double& x3b, const double& edgeLength, const double& dx, const double& thickness) :GbObject3D()
+{
+	this->p1 = new GbPoint3D(x1a, x2a, x3a);
+	this->p2 = new GbPoint3D(x1b, x2b, x3b);
+    this->p1->addObserver(this);
+    this->p2->addObserver(this);
+
+	this->p3 = new GbPoint3D(x1a, x2a, x3a);
+	this->p4 = new GbPoint3D(x1b, x2b, x3b);
+    this->p3->addObserver(this);
+    this->p4->addObserver(this);
+
+	this->edgeLength = edgeLength;
+	this->dx = dx;
+	this->thickness = thickness;
+}
+/*=======================================================*/
+// Konstruktor
+//GbImplicitSurface::GbImplicitSurface(
+//	const double& x1a, const double& x2a, const double& x3a,
+//	const double& x1b, const double& x2b, const double& x3b,
+//
+//	const double& x1c, const double& x2c, const double& x3c,
+//	const double& x1d, const double& x2d, const double& x3d,
+//
+//	const double& edgeLength, const double& dx) :GbObject3D()
+//{
+//	this->p1 = new GbPoint3D(x1a, x2a, x3a);
+//	this->p2 = new GbPoint3D(x1b, x2b, x3b);
+//
+//	this->p3 = new GbPoint3D(x1c, x2c, x3c);
+//	this->p4 = new GbPoint3D(x1d, x2d, x3d);
+//
+//	this->edgeLength = edgeLength;
+//	this->dx = dx;
+//}
+GbImplicitSurface::GbImplicitSurface(GbImplicitSurface * imp)
+{
+}
+/*=======================================================*/
+// Destruktor
+GbImplicitSurface::~GbImplicitSurface()
+{
+    if (this->p1)
+        this->p1->removeObserver(this);
+    if (this->p2)
+        this->p2->removeObserver(this);
+    if (this->p3)
+        this->p3->removeObserver(this);
+    if (this->p4)
+        this->p4->removeObserver(this);
+}
+/*=======================================================*/
+struct TerminationCondition {
+	bool operator() (double min, double max) {
+		return abs(min - max) <= 10e-10;
+	}
+};
+/*============================================M-===========*/
+struct FunctionToApproximate {
+	double x, y, z;
+	double dir1, dir2, dir3, L;
+	double operator() (double q) {
+		return sin(2.*M_PI / L*(x + q*dir1))*cos(2.*M_PI / L*(y + q*dir2)) + sin(2.*M_PI / L*(y + q*dir2))*cos(2.*M_PI / L*(z + q*dir3)) + sin(2.*M_PI / L*(z + q*dir3))*cos(2.*M_PI / L*(x + q*dir1));
+	}
+};
+/*=======================================================*/
+struct FunctionGyroidThirdOrder {
+	double x, y, z;
+	double dir1, dir2, dir3, L;
+	double h;
+	
+	double t17, t3, t2, t18, t20, t8, t13, t5, t9, t6, t11, t14;
+	double f300, f210, f201, f120, f102, f030, f021, f012, f003, f200, f110, f101, f020, f011, f002, f100, f010, f001, f000;
+
+	double repeatedTerm, repeatedTermRoot;
+	double T1, T2, T3, T4, T5, T6, T7, T8, T9, Gyroidh;
+
+	double operator() (double q) {
+	//sins and cosines combinations 
+	 t2  = sin((2. * M_PI*(x+q*dir1)) / L)*sin((2. * M_PI*(y+q*dir2)) / L);
+	 t3  = sin((2. * M_PI*(x+q*dir1)) / L)*sin((2. * M_PI*(z+q*dir3)) / L);
+	 t5  = cos((2. * M_PI*(y+q*dir2)) / L)*sin((2. * M_PI*(x+q*dir1)) / L);
+	 t6  = cos((2. * M_PI*(z+q*dir3)) / L)*sin((2. * M_PI*(x+q*dir1)) / L);
+	 t8  = sin((2. * M_PI*(y+q*dir2)) / L)*sin((2. * M_PI*(z+q*dir3)) / L);
+	 t9  = cos((2. * M_PI*(x+q*dir1)) / L)*sin((2. * M_PI*(y+q*dir2)) / L);
+	 t11 = cos((2. * M_PI*(z+q*dir3)) / L)*sin((2. * M_PI*(y+q*dir2)) / L);
+	 t13 = cos((2. * M_PI*(x+q*dir1)) / L)*sin((2. * M_PI*(z+q*dir3)) / L);
+	 t14 = cos((2. * M_PI*(y+q*dir2)) / L)*sin((2. * M_PI*(z+q*dir3)) / L);
+	 t17 = cos((2. * M_PI*(x+q*dir1)) / L)*cos((2. * M_PI*(y+q*dir2)) / L);
+	 t18 = cos((2. * M_PI*(x+q*dir1)) / L)*cos((2. * M_PI*(z+q*dir3)) / L);
+	 t20 = cos((2. * M_PI*(y+q*dir2)) / L)*cos((2. * M_PI*(z+q*dir3)) / L);
+
+	//Gyroid third order derivatives
+	 f300 = (8. * pow(M_PI, 3.)*(-t17 + t3)) / pow(L, 3.);
+	 f210 = (8. * pow(M_PI, 3.)*t2) / pow(L, 3.);
+	 f201 = (-8. * pow(M_PI, 3.)*t18) / pow(L, 3.);
+	 f120 = (-8. * pow(M_PI, 3.)*t17) / pow(L, 3.);
+	 f102 = (8. * pow(M_PI, 3.)*t3) / pow(L, 3.);
+	 f030 = (8. * pow(M_PI, 3.)*(t2 - t20)) / pow(L, 3.);
+	 f021 = (8. * pow(M_PI, 3.)*t8) / pow(L, 3.);
+	 f012 = (-8. * pow(M_PI, 3.)*t20) / pow(L, 3.);
+	 f003 = (8. * pow(M_PI, 3.)*(-t18 + t8)) / pow(L, 3.);
+
+	//Gyroid second order derivatives
+	 f200 = (-4. * pow(M_PI, 2.)*(t13 + t5)) / pow(L, 2.);
+	 f110 = (-4. * pow(M_PI, 2.)*t9) / pow(L, 2.);
+	 f101 = (-4. * pow(M_PI, 2.)*t6) / pow(L, 2.);
+	 f020 = (-4. * pow(M_PI, 2.)*(t11 + t5)) / pow(L, 2.);
+	 f011 = (-4. * pow(M_PI, 2.)*t14) / pow(L, 2.);
+	 f002 = (-4. * pow(M_PI, 2.)*(t11 + t13)) / pow(L, 2.);
+
+	//Gyroid first order derivatives
+	 f100 = (2. * M_PI*(t17 - t3)) / L;
+	 f010 = (2. * M_PI*(-t2 + t20)) / L;
+	 f001 = (2. * M_PI*(t18 - t8)) / L;
+
+	//Gyroid 
+	 f000 = t11 + t13 + t5;
+
+	 repeatedTerm = f100*f100 + f010*f010 + f001*f001;
+	 repeatedTermRoot = sqrt(repeatedTerm);
+
+	 T1 = f001*f002 + f010*f011 + f100*f101;
+	 T2 = f001*f011 + f010*f020 + f100*f110;
+	 T3 = f001*f101 + f010*f110 + f100*f200;
+	 T4 = f002*f011 + f001*f012 + f011*f020 + f010*f021 + f101*f110;
+	 T5 = f002*f101 + f001*f102 + f011*f110 + f101*f200 + f100*f201;
+	 T6 = f011*f101 + f020*f110 + f010*f120 + f110*f200 + f100*f210;
+	 T7 = f001*f002*h + f010*f011*h + f100*f101*h;
+	 T8 = f001*f011*h + f010*f020*h + f100*f110*h;
+	 T9 = f001*f101*h + f010*f110*h + f100*f200*h;
+
+
+	 Gyroidh = 2 * h*sqrt(pow(f001 - (T1*h) / (2.*repeatedTermRoot), 2) + pow(f010 - (T2*h) / (2.*repeatedTermRoot), 2) + pow(f100 - (T3*h) / (2.*repeatedTermRoot), 2))
+		- (3 * h*sqrt(pow(f001 - (T1*h) / (3.*repeatedTermRoot), 2) + pow(f010 - (T2*h) / (3.*repeatedTermRoot), 2) + pow(f100 - (T3*h) / (3.*repeatedTermRoot), 2))) / 2.
+		- (3 * h*sqrt(pow(f001 - (T1*h) / (3.*repeatedTermRoot) + (h*((T7 - 3 * f001*repeatedTermRoot)*
+		(4 * pow(T1, 2)*h - 4 * (pow(f002, 2) + f001*f003 + pow(f011, 2) + f010*f012 + pow(f101, 2) + f100*f102)*h*repeatedTerm + 12 * f002*pow(repeatedTerm, 1.5)) +
+			(T8 - 3 * f010*repeatedTermRoot)*(4 * T1*T2*h - 4 * (T4)*h*repeatedTerm + 12 * f011*pow(repeatedTerm, 1.5)) +
+			(T9 - 3 * f100*repeatedTermRoot)*(4 * T1*T3*h - 4 * (T5)*h*repeatedTerm + 12 * f101*pow(repeatedTerm, 1.5)))) /
+			(108.*sqrt(pow(f001 - (T1*h) / (3.*repeatedTermRoot), 2) + pow(f010 - (T2*h) / (3.*repeatedTermRoot), 2) + pow(f100 - (T3*h) / (3.*repeatedTermRoot), 2))*
+				pow(repeatedTerm, 2)), 2) + pow(f010 - (T2*h) / (3.*repeatedTermRoot) +
+				(h*((T7 - 3 * f001*repeatedTermRoot)*(4 * T1*T2*h - 4 * (T4)*h*repeatedTerm + 12 * f011*pow(repeatedTerm, 1.5)) +
+					(T8 - 3 * f010*repeatedTermRoot)*(4 * pow(T2, 2)*h - 4 * (pow(f011, 2) + pow(f020, 2) + f001*f021 + f010*f030 + pow(f110, 2) + f100*f120)*h*repeatedTerm + 12 * f020*pow(repeatedTerm, 1.5)) +
+					(T9 - 3 * f100*repeatedTermRoot)*(4 * T2*T3*h - 4 * (T6)*h*repeatedTerm + 12 * f110*pow(repeatedTerm, 1.5)))) /
+					(108.*sqrt(pow(f001 - (T1*h) / (3.*repeatedTermRoot), 2) + pow(f010 - (T2*h) / (3.*repeatedTermRoot), 2) + pow(f100 - (T3*h) / (3.*repeatedTermRoot), 2))*
+						pow(repeatedTerm, 2)), 2) + pow(f100 - (T3*h) / (3.*repeatedTermRoot) +
+						(h*((T7 - 3 * f001*repeatedTermRoot)*(4 * T1*T3*h - 4 * (T5)*h*repeatedTerm + 12 * f101*pow(repeatedTerm, 1.5)) +
+							(T8 - 3 * f010*repeatedTermRoot)*(4 * T2*T3*h - 4 * (T6)*h*repeatedTerm + 12 * f110*pow(repeatedTerm, 1.5)) +
+							(T9 - 3 * f100*repeatedTermRoot)*(4 * pow(T3, 2)*h - 4 * (pow(f101, 2) + pow(f110, 2) + pow(f200, 2) + f001*f201 + f010*f210 + f100*f300)*h*repeatedTerm + 12 * f200*pow(repeatedTerm, 1.5)))) /
+							(108.*sqrt(pow(f001 - (T1*h) / (3.*repeatedTermRoot), 2) + pow(f010 - (T2*h) / (3.*repeatedTermRoot), 2) + pow(f100 - (T3*h) / (3.*repeatedTermRoot), 2))*
+								pow(repeatedTerm, 2)), 2))) / 2. + f000;
+	
+		return Gyroidh;
+	}
+};
+/*==========================================================*/
+bool GbImplicitSurface::isPointInGbObject3D(const double& x1, const double& x2, const double& x3)
+{
+	//double f = sin(2.*M_PI*x1/edgeLength)*cos(2.*M_PI*x2 / edgeLength) + sin(2.*M_PI*x2 / edgeLength)*cos(2.*M_PI*x3 / edgeLength) + sin(2.*M_PI*x3 / edgeLength)*cos(2.*M_PI*x1 / edgeLength);
+	//evaluateImplicitFunction(x1,x2,x3, 0., 0., 0.)
+	double f1 = evaluateImplicitFunction(x1, x2, x3, 1.);
+	double f2 = evaluateImplicitFunction(x1, x2, x3, -1.);
+	// 	if (f < 10.0E-15 && f > -10.0E-15)
+		//if (fabs(f) <= 10e-15)
+	 //if (f <= 0)
+	if (f1 <= 0. && f2 >= 0.)
+{
+	return true;
+}
+else
+{
+	return false;
+}
+}
+
+/*==========================================================*/
+double GbImplicitSurface::getIntersectionRaytraceFactor(const double& x1, const double& x2, const double& x3, const double& rx1, const double& rx2, const double& rx3)
+{
+	double from = 0;  // The solution must lie in the interval [from, to], additionally f(from) <= 0 && f(to) >= 0
+	double to = dx*sqrt(rx1*rx1+ rx2*rx2+ rx3*rx3);
+	FunctionGyroidThirdOrder f;
+	//FunctionToApproximate f;
+	f.x =x1 ;
+	f.y =x2 ;
+	f.z =x3 ;
+	f.dir1 = rx1;
+	f.dir2 = rx2;
+	f.dir3 = rx3;
+	f.L = edgeLength;
+	f.h = thickness;
+	if (f(from)*f(to)<0)
+		{
+		std::pair<double, double> result = bisect(f, from, to, TerminationCondition());
+		double root = (result.first + result.second) / 2;
+		return root;
+		}
+	f.h = -thickness;
+	if (f(from)*f(to) < 0)
+	{
+		std::pair<double, double> result = bisect(f, from, to, TerminationCondition());
+		double root = (result.first + result.second) / 2;
+		return root;
+	}
+	else
+	{
+		return 999;
+	}
+	
+}
+/*=======================================================*/
+double GbImplicitSurface::evaluateImplicitFunction(const double& x1, const double& x2, const double& x3, const double& position)
+{
+	double to = 0.;
+	FunctionGyroidThirdOrder f;
+	f.x = x1;
+	f.y = x2;
+	f.z = x3;
+	f.dir1 = 0.;
+	f.dir2 = 0.;
+	f.dir3 = 0.;
+	f.L = edgeLength;
+	f.h = position*thickness;
+	return f(to);
+}
+/*=======================================================*/
+double GbImplicitSurface::getX1Centroid()
+{
+	return (0.5*(p1->x1 + p2->x1));
+}
+/*=======================================================*/
+double GbImplicitSurface::getX1Minimum()
+{
+	return (this->p1->x1 < this->p2->x1 ? this->p1->x1 : this->p2->x1);
+}
+/*=======================================================*/
+double GbImplicitSurface::getX1Maximum()
+{
+	return (this->p1->x1 > this->p2->x1 ? this->p1->x1 : this->p2->x1);
+}
+/*=======================================================*/
+double GbImplicitSurface::getX2Centroid()
+{
+	return (0.5*(p1->x2 + p2->x2));
+}
+/*=======================================================*/
+double GbImplicitSurface::getX2Minimum()
+{
+	return (this->p1->x2 < this->p2->x2 ? this->p1->x2 : this->p2->x2);
+}
+/*=======================================================*/
+double GbImplicitSurface::getX2Maximum()
+{
+	return (this->p1->x2 > this->p2->x2 ? this->p1->x2 : this->p2->x2);
+}
+/*=======================================================*/
+double GbImplicitSurface::getX3Centroid()
+{
+	return (0.5*(p1->x3 + p2->x3));
+}
+/*=======================================================*/
+double GbImplicitSurface::getX3Minimum()
+{
+	return (this->p1->x3 < this->p2->x3 ? this->p1->x3 : this->p2->x3);
+}
+/*=======================================================*/
+double GbImplicitSurface::getX3Maximum()
+{
+	return (this->p1->x3 > this->p2->x3 ? this->p1->x3 : this->p2->x3);
+}
+/*=======================================================*/
+bool GbImplicitSurface::isCellInsideGbObject3D(const double& x1a, const double& x2a, const double& x3a, const double& x1b, const double& x2b, const double& x3b)
+{
+	if (this->isPointInGbObject3D(x1a, x2a, x3a)
+		&& this->isPointInGbObject3D(x1b, x2a, x3a)
+		&& this->isPointInGbObject3D(x1b, x2b, x3a)
+		&& this->isPointInGbObject3D(x1a, x2b, x3a)
+		&& this->isPointInGbObject3D(x1a, x2a, x3b)
+		&& this->isPointInGbObject3D(x1b, x2a, x3b)
+		&& this->isPointInGbObject3D(x1b, x2b, x3b)
+		&& this->isPointInGbObject3D(x1a, x2b, x3b))
+	{
+		return true;
+	}
+	return false;
+}
+/*=======================================================*/
+bool GbImplicitSurface::isCellInsideOrCuttingGbObject3D(const double& x1a, const double& x2a, const double& x3a, const double& x1b, const double& x2b, const double& x3b)
+{
+	if ((this->isPointInGbObject3D(x1a, x2a, x3a) == false)
+		&& (this->isPointInGbObject3D(x1b, x2a, x3a) == false)
+		&& (this->isPointInGbObject3D(x1b, x2b, x3a) == false)
+		&& (this->isPointInGbObject3D(x1a, x2b, x3a) == false)
+		&& (this->isPointInGbObject3D(x1a, x2a, x3b) == false)
+		&& (this->isPointInGbObject3D(x1b, x2a, x3b) == false)
+		&& (this->isPointInGbObject3D(x1b, x2b, x3b) == false)
+		&& (this->isPointInGbObject3D(x1a, x2b, x3b) == false))
+	{
+		return false;
+	}
+	return true;
+}
+/*=======================================================*/
+bool GbImplicitSurface::isCellCuttingGbObject3D(const double& x1a, const double& x2a, const double& x3a, const double& x1b, const double& x2b, const double& x3b)
+{
+	if (!this->isCellInsideGbObject3D(x1a, x2a, x3a, x1b, x2b, x3b)
+		&& this->isCellInsideOrCuttingGbObject3D(x1a, x2a, x3a, x1b, x2b, x3b))
+	{
+		return true;
+	}
+	return false;
+}
+/*=======================================================*/
+void GbImplicitSurface::addSurfaceTriangleSet(vector<UbTupleFloat3>& nodes, vector<UbTupleInt3>& triangles)
+{
+	/*0*/nodes.push_back(makeUbTuple((float)getX1Minimum(), (float)getX2Minimum(), (float)getX3Minimum()));
+	/*1*/nodes.push_back(makeUbTuple((float)getX1Maximum(), (float)getX2Minimum(), (float)getX3Minimum()));
+	/*2*/nodes.push_back(makeUbTuple((float)getX1Maximum(), (float)getX2Maximum(), (float)getX3Minimum()));
+	/*3.*/nodes.push_back(makeUbTuple((float)getX1Minimum(), (float)getX2Maximum(), (float)getX3Minimum()));
+
+	/*4*/nodes.push_back(makeUbTuple((float)getX1Minimum(), (float)getX2Minimum(), (float)getX3Maximum()));
+	/*5*/nodes.push_back(makeUbTuple((float)getX1Maximum(), (float)getX2Minimum(), (float)getX3Maximum()));
+	/*6*/nodes.push_back(makeUbTuple((float)getX1Maximum(), (float)getX2Maximum(), (float)getX3Maximum()));
+	/*7*/nodes.push_back(makeUbTuple((float)getX1Minimum(), (float)getX2Maximum(), (float)getX3Maximum()));
+
+	//"unten"
+	triangles.push_back(makeUbTuple(0, 1, 2));
+	triangles.push_back(makeUbTuple(0, 2, 3));
+	//"oben"
+	triangles.push_back(makeUbTuple(4, 5, 6));
+	triangles.push_back(makeUbTuple(4, 6, 7));
+	//"links"
+	triangles.push_back(makeUbTuple(0, 3, 7));
+	triangles.push_back(makeUbTuple(0, 7, 4));
+	//"rechts"                                                               
+	triangles.push_back(makeUbTuple(1, 2, 6));
+	triangles.push_back(makeUbTuple(1, 6, 5));
+	//"hinten"                                                                       
+	triangles.push_back(makeUbTuple(3, 2, 7));
+	triangles.push_back(makeUbTuple(2, 7, 6));
+	//"vorne"                                                                        
+	triangles.push_back(makeUbTuple(0, 1, 5));
+	triangles.push_back(makeUbTuple(0, 5, 4));
+}
+/*==========================================================*/
+void GbImplicitSurface::objectChanged(UbObservable *changedObject)
+{
+    GbPoint3D *point = dynamic_cast<GbPoint3D *>(changedObject);
+    if (!point || (this->p1 != point && this->p2 != point && this->p3 != point && this->p4 != point))
+        return;
+
+    this->notifyObserversObjectChanged();
+}
+/*==========================================================*/
+void GbImplicitSurface::objectWillBeDeleted(UbObservable *objectForDeletion)
+{
+    if (this->p1) {
+        UbObservable *observedObj = dynamic_cast<UbObservable *>(this->p1);
+        if (objectForDeletion == observedObj) {
+            this->p1 = NULL;
+        }
+    }
+    if (this->p2) {
+        UbObservable *observedObj = dynamic_cast<UbObservable *>(this->p2);
+        if (objectForDeletion == observedObj) {
+            this->p2 = NULL;
+        }
+    }
+    if (this->p3) {
+        UbObservable *observedObj = dynamic_cast<UbObservable *>(this->p3);
+        if (objectForDeletion == observedObj) {
+            this->p3 = NULL;
+        }
+    }
+    if (this->p4) {
+        UbObservable *observedObj = dynamic_cast<UbObservable *>(this->p4);
+        if (objectForDeletion == observedObj) {
+            this->p4 = NULL;
+        }
+    }
+    // ACHTUNG: eigentlich muessten in allen methoden von GbLine if abfragen fuer NULL pointer hin... toDo
+}
+
+#endif
diff --git a/src/basics/geometry3d/GbImplicitSurface.h b/src/basics/geometry3d/GbImplicitSurface.h
new file mode 100644
index 0000000000000000000000000000000000000000..a72c9442eca57d3bdd99887c8e5692afec939e1d
--- /dev/null
+++ b/src/basics/geometry3d/GbImplicitSurface.h
@@ -0,0 +1,161 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file GbImplicitSurface.cpp
+//! \ingroup geometry3d
+//! \author Hussein Alihussein
+//=======================================================================================
+#ifndef GbImplicitSurface_H
+#define GbImplicitSurface_H
+
+#ifdef BUILD_USE_BOOST
+
+#include <vector>
+
+#include <GbPoint3D.h>
+#include <basics/utilities/UbObserver.h>
+#include <basics/utilities/UbMath.h>
+
+#define _USE_MATH_DEFINES
+#include <cmath>
+#include <math.h> 
+
+class GbLine3D;
+class GbObject3DCreator;
+
+#include <PointerDefinitions.h>
+class GbImplicitSurface;
+using GbImplicitSurfacePtr = SPtr<GbImplicitSurface>;
+
+
+class GbImplicitSurface : public GbObject3D, public UbObserver
+{
+public:
+	GbImplicitSurface();
+	GbImplicitSurface(const double& x1a, const double& x2a, const double& x3a, const double& x1b, const double& x2b, const double& x3b, const double& edgeLength, const double& dx, const double& thickness=0);
+
+	GbImplicitSurface(const double & x1a, const double & x2a, const double & x3a, const double & x1b, const double & x2b, const double & x3b, const double & x1c, const double & x2c, const double & x3c, const double & x1d, const double & x2d, const double & x3d, const double & edgeLength, const double & dx);
+	//GbImplicitSurface(const double& minX1, const double& minX2, const double& minX3, const double& maxX1, const double& maxX2, const double& maxX3);
+	GbImplicitSurface(GbImplicitSurface *imp);
+	~GbImplicitSurface();
+
+	GbImplicitSurface* clone() override { return new GbImplicitSurface(this); }
+	void finalize() override { throw UbException(UB_EXARGS, "finalize() - not implemented"); }
+
+
+
+	double getX1Centroid() override;
+	double getX1Minimum() override;
+	double getX1Maximum() override;
+	double getX2Centroid()override;
+	double getX2Minimum() override;
+	double getX2Maximum() override;
+	double getX3Centroid()override;
+	double getX3Minimum() override;
+	double getX3Maximum() override;
+    void setCenterCoordinates(const double &x1, const double &x2, const double &x3) override {throw UbException(UB_EXARGS, "finalize() - not implemented");
+    }
+
+	void translate(const double& x1, const double& x2, const double& x3) override { throw UbException(UB_EXARGS, "finalize() - not implemented"); }
+	void rotate(const double& rx1, const double& rx2, const double& rx3) override{ throw UbException(UB_EXARGS, "finalize() - not implemented"); }
+	void scale(const double& sx1, const double& sx2, const double& sx3) override { throw UbException(UB_EXARGS, "finalize() - not implemented"); }
+
+	double getLengthX1();
+	double getLengthX2();
+	double getLengthX3();
+	
+	bool isPointInGbObject3D(const double &x1p, const double &x2p, const double &x3p, bool &pointinboundary) override { throw UbException(UB_EXARGS, "finalize() - not implemented"); }
+    bool isPointInGbObject3D(const double &x1p, const double &x2p, const double &x3p) override;
+    bool isCellInsideGbObject3D(const double &x1a, const double &x2a, const double &x3a, const double &x1b,
+                                const double &x2b, const double &x3b) override;
+    bool isCellCuttingGbObject3D(const double &x1a, const double &x2a, const double &x3a, const double &x1b,
+                                 const double &x2b, const double &x3b) override;
+    bool isCellInsideOrCuttingGbObject3D(const double &x1a, const double &x2a, const double &x3a, const double &x1b,
+                                         const double &x2b, const double &x3b) override;
+    double getCellVolumeInsideGbObject3D(const double &x1a, const double &x2a, const double &x3a, const double &x1b,
+                                         const double &x2b, const double &x3b) override { throw UbException(UB_EXARGS, "finalize() - not implemented"); }
+
+	GbPoint3D *calculateInterSectionPoint3D(GbPoint3D &point1, GbPoint3D &point2);
+	//GbImplicitSurface* createClippedRectangle3D(const double& x1a,const double& x2a,const double& x3a,const double& x1b,const double& x2b,const double& x3b);
+    GbLine3D *createClippedLine3D (GbPoint3D &point1, GbPoint3D &point2) override { throw UbException(UB_EXARGS, "finalize() - not implemented"); }
+
+	std::vector<GbTriangle3D *> getSurfaceTriangleSet() override { throw UbException(UB_EXARGS, "finalize() - not implemented"); }
+
+	 void addSurfaceTriangleSet(std::vector<UbTupleFloat3> &nodes, std::vector<UbTupleInt3> &triangles) override;
+
+	bool hasRaytracing() override { return true;  }
+
+	/*|r| must be 1! einheitsvector!!*/
+	double getIntersectionRaytraceFactor (const double& x1, const double& x2, const double& x3, const double& rx1, const double& rx2, const double& rx3) override;
+
+	double evaluateImplicitFunction(const double & x1, const double & x2, const double & x3, const double & position);
+
+	double getDistance(const double& x1p, const double& x2p, const double& x3p)
+	{
+		throw UbException(UB_EXARGS, "not implemented");
+
+		// falls punkt innerhalt ist: minimalen abstand ausrechnen
+		if (this->isPointInGbObject3D(x1p, x2p, x3p))
+		{
+			double x1Dist = UbMath::min(std::abs(x1p - this->getX1Minimum()), std::abs(x1p - this->getX1Maximum()));
+			double x2Dist = UbMath::min(std::abs(x2p - this->getX2Minimum()), std::abs(x2p - this->getX2Maximum()));
+			double x3Dist = UbMath::min(std::abs(x3p - this->getX3Minimum()), std::abs(x3p - this->getX3Maximum()));
+
+			return UbMath::min(x1Dist, x2Dist, x3Dist);
+		}
+		else
+		{
+
+		}
+	}
+
+	std::string toString() override { throw UbException(UB_EXARGS, "finalize() - not implemented"); }
+
+
+ // virtuelle Methoden von UbObserver
+    void objectChanged(UbObservable *changedObject) override;
+    void objectWillBeDeleted(UbObservable *objectForDeletion) override;
+
+	using GbObject3D::isPointInGbObject3D; //Grund: dadurch muss man hier  isPointInGbObject3D(GbPoint3D*) nicht ausprogrammieren, welche sonst hier "ueberdeckt" waere
+
+
+protected:
+	GbPoint3D* p1;
+	GbPoint3D* p2;
+	GbPoint3D* p3;
+	GbPoint3D* p4;
+	double edgeLength;
+	double dx;
+	double thickness;
+private:
+};
+
+
+
+#endif   
+#endif
diff --git a/src/basics/geometry3d/GbVoxelMatrix3D.cpp b/src/basics/geometry3d/GbVoxelMatrix3D.cpp
index c88f1d13104a5312efd161143d40e835f5654571..5040b54c8dacbd17317a35c451d82f1ad20b0edf 100644
--- a/src/basics/geometry3d/GbVoxelMatrix3D.cpp
+++ b/src/basics/geometry3d/GbVoxelMatrix3D.cpp
@@ -39,6 +39,7 @@
 #include <geometry3d/GbTriangle3D.h>
 
 #include <basics/utilities/UbSystem.h>
+#include "basics/constants/NumericConstants.h"
 
 #ifdef MC_CUBES
 #include <MarchingCubes/MarchingCubes.h>
@@ -173,11 +174,11 @@ double GbVoxelMatrix3D::getIntersectionRaytraceFactor(const double &x1, const do
                                                       const double &rx1, const double &rx2, const double &rx3)
 {
     if (!((UbMath::equal(rx1, 0.0) || UbMath::equal(fabs(rx1), 1.0) ||
-           UbMath::equal(fabs(rx1), UbMath::one_over_sqrt2) || UbMath::equal(fabs(rx1), UbMath::one_over_sqrt3)) &&
+           UbMath::equal(fabs(rx1), vf::basics::constant::one_over_sqrt2) || UbMath::equal(fabs(rx1), vf::basics::constant::one_over_sqrt3)) &&
           (UbMath::equal(rx2, 0.0) || UbMath::equal(fabs(rx2), 1.0) ||
-           UbMath::equal(fabs(rx2), UbMath::one_over_sqrt2) || UbMath::equal(fabs(rx2), UbMath::one_over_sqrt3)) &&
+           UbMath::equal(fabs(rx2), vf::basics::constant::one_over_sqrt2) || UbMath::equal(fabs(rx2), vf::basics::constant::one_over_sqrt3)) &&
           (UbMath::equal(rx3, 0.0) || UbMath::equal(fabs(rx3), 1.0) ||
-           UbMath::equal(fabs(rx3), UbMath::one_over_sqrt2) || UbMath::equal(fabs(rx3), UbMath::one_over_sqrt3)))) {
+           UbMath::equal(fabs(rx3), vf::basics::constant::one_over_sqrt2) || UbMath::equal(fabs(rx3), vf::basics::constant::one_over_sqrt3)))) {
         throw UbException(UB_EXARGS, "nur fuer diskrete Boltzmannrichungen implementiert!!!");
     }
 
diff --git a/src/basics/basics/memory/MbSmartPtr.h b/src/basics/memory/MbSmartPtr.h
similarity index 100%
rename from src/basics/basics/memory/MbSmartPtr.h
rename to src/basics/memory/MbSmartPtr.h
diff --git a/src/basics/basics/memory/MbSmartPtrBase.cpp b/src/basics/memory/MbSmartPtrBase.cpp
similarity index 100%
rename from src/basics/basics/memory/MbSmartPtrBase.cpp
rename to src/basics/memory/MbSmartPtrBase.cpp
diff --git a/src/basics/basics/memory/MbSmartPtrBase.h b/src/basics/memory/MbSmartPtrBase.h
similarity index 100%
rename from src/basics/basics/memory/MbSmartPtrBase.h
rename to src/basics/memory/MbSmartPtrBase.h
diff --git a/src/basics/basics/objects/ObObject.h b/src/basics/objects/ObObject.h
similarity index 100%
rename from src/basics/basics/objects/ObObject.h
rename to src/basics/objects/ObObject.h
diff --git a/src/basics/basics/parallel/PbMpi.h b/src/basics/parallel/PbMpi.h
similarity index 100%
rename from src/basics/basics/parallel/PbMpi.h
rename to src/basics/parallel/PbMpi.h
diff --git a/src/basics/tests/testUtilities.h b/src/basics/tests/testUtilities.h
index c70d9cc5c11633ded6b696d92692e3d4edf8d2ca..43fd5d822a10c6e9756c348f3e7dfb71c285ab71 100644
--- a/src/basics/tests/testUtilities.h
+++ b/src/basics/tests/testUtilities.h
@@ -1,6 +1,8 @@
 #ifndef TESTUTILITIES_H
 #define TESTUTILITIES_H
 
+#include <gmock/gmock.h>
+
 inline auto RealEq = [](auto value) {
 #ifdef VF_DOUBLE_ACCURACY
     return testing::DoubleEq(value);
@@ -9,4 +11,12 @@ inline auto RealEq = [](auto value) {
 #endif
 };
 
+inline auto RealNear = [](auto value, auto max_abs_error) {
+#ifdef VF_DOUBLE_ACCURACY
+    return testing::DoubleNear(value, max_abs_error);
+#else
+    return testing::FloatNear(value, max_abs_error);
+#endif
+};
+
 #endif
diff --git a/src/basics/basics/transmitter/TbTransmitter.h b/src/basics/transmitter/TbTransmitter.h
similarity index 100%
rename from src/basics/basics/transmitter/TbTransmitter.h
rename to src/basics/transmitter/TbTransmitter.h
diff --git a/src/basics/basics/transmitter/TbTransmitterLocal.h b/src/basics/transmitter/TbTransmitterLocal.h
similarity index 100%
rename from src/basics/basics/transmitter/TbTransmitterLocal.h
rename to src/basics/transmitter/TbTransmitterLocal.h
diff --git a/src/basics/basics/transmitter/TbTransmitterMpiPool.h b/src/basics/transmitter/TbTransmitterMpiPool.h
similarity index 100%
rename from src/basics/basics/transmitter/TbTransmitterMpiPool.h
rename to src/basics/transmitter/TbTransmitterMpiPool.h
diff --git a/src/basics/basics/utilities/UbComparators.h b/src/basics/utilities/UbComparators.h
similarity index 100%
rename from src/basics/basics/utilities/UbComparators.h
rename to src/basics/utilities/UbComparators.h
diff --git a/src/basics/basics/utilities/UbEqual.h b/src/basics/utilities/UbEqual.h
similarity index 100%
rename from src/basics/basics/utilities/UbEqual.h
rename to src/basics/utilities/UbEqual.h
diff --git a/src/basics/basics/utilities/UbException.h b/src/basics/utilities/UbException.h
similarity index 100%
rename from src/basics/basics/utilities/UbException.h
rename to src/basics/utilities/UbException.h
diff --git a/src/basics/basics/utilities/UbFileInput.h b/src/basics/utilities/UbFileInput.h
similarity index 100%
rename from src/basics/basics/utilities/UbFileInput.h
rename to src/basics/utilities/UbFileInput.h
diff --git a/src/basics/basics/utilities/UbFileInputASCII.cpp b/src/basics/utilities/UbFileInputASCII.cpp
similarity index 100%
rename from src/basics/basics/utilities/UbFileInputASCII.cpp
rename to src/basics/utilities/UbFileInputASCII.cpp
diff --git a/src/basics/basics/utilities/UbFileInputASCII.h b/src/basics/utilities/UbFileInputASCII.h
similarity index 100%
rename from src/basics/basics/utilities/UbFileInputASCII.h
rename to src/basics/utilities/UbFileInputASCII.h
diff --git a/src/basics/basics/utilities/UbFileInputASCIITest.cfg b/src/basics/utilities/UbFileInputASCIITest.cfg
similarity index 100%
rename from src/basics/basics/utilities/UbFileInputASCIITest.cfg
rename to src/basics/utilities/UbFileInputASCIITest.cfg
diff --git a/src/basics/basics/utilities/UbFileInputASCIITest.cpp b/src/basics/utilities/UbFileInputASCIITest.cpp
similarity index 100%
rename from src/basics/basics/utilities/UbFileInputASCIITest.cpp
rename to src/basics/utilities/UbFileInputASCIITest.cpp
diff --git a/src/basics/basics/utilities/UbFileInputBinary.cpp b/src/basics/utilities/UbFileInputBinary.cpp
similarity index 100%
rename from src/basics/basics/utilities/UbFileInputBinary.cpp
rename to src/basics/utilities/UbFileInputBinary.cpp
diff --git a/src/basics/basics/utilities/UbFileInputBinary.h b/src/basics/utilities/UbFileInputBinary.h
similarity index 100%
rename from src/basics/basics/utilities/UbFileInputBinary.h
rename to src/basics/utilities/UbFileInputBinary.h
diff --git a/src/basics/basics/utilities/UbFileOutput.h b/src/basics/utilities/UbFileOutput.h
similarity index 100%
rename from src/basics/basics/utilities/UbFileOutput.h
rename to src/basics/utilities/UbFileOutput.h
diff --git a/src/basics/basics/utilities/UbFileOutputASCII.cpp b/src/basics/utilities/UbFileOutputASCII.cpp
similarity index 100%
rename from src/basics/basics/utilities/UbFileOutputASCII.cpp
rename to src/basics/utilities/UbFileOutputASCII.cpp
diff --git a/src/basics/basics/utilities/UbFileOutputASCII.h b/src/basics/utilities/UbFileOutputASCII.h
similarity index 100%
rename from src/basics/basics/utilities/UbFileOutputASCII.h
rename to src/basics/utilities/UbFileOutputASCII.h
diff --git a/src/basics/basics/utilities/UbFileOutputBinary.cpp b/src/basics/utilities/UbFileOutputBinary.cpp
similarity index 100%
rename from src/basics/basics/utilities/UbFileOutputBinary.cpp
rename to src/basics/utilities/UbFileOutputBinary.cpp
diff --git a/src/basics/basics/utilities/UbFileOutputBinary.h b/src/basics/utilities/UbFileOutputBinary.h
similarity index 100%
rename from src/basics/basics/utilities/UbFileOutputBinary.h
rename to src/basics/utilities/UbFileOutputBinary.h
diff --git a/src/basics/basics/utilities/UbInfinity.h b/src/basics/utilities/UbInfinity.h
similarity index 100%
rename from src/basics/basics/utilities/UbInfinity.h
rename to src/basics/utilities/UbInfinity.h
diff --git a/src/basics/basics/utilities/UbKeys.h b/src/basics/utilities/UbKeys.h
similarity index 100%
rename from src/basics/basics/utilities/UbKeys.h
rename to src/basics/utilities/UbKeys.h
diff --git a/src/basics/basics/utilities/UbLimits.h b/src/basics/utilities/UbLimits.h
similarity index 100%
rename from src/basics/basics/utilities/UbLimits.h
rename to src/basics/utilities/UbLimits.h
diff --git a/src/basics/basics/utilities/UbLogger.cpp b/src/basics/utilities/UbLogger.cpp
similarity index 100%
rename from src/basics/basics/utilities/UbLogger.cpp
rename to src/basics/utilities/UbLogger.cpp
diff --git a/src/basics/basics/utilities/UbLogger.h b/src/basics/utilities/UbLogger.h
similarity index 100%
rename from src/basics/basics/utilities/UbLogger.h
rename to src/basics/utilities/UbLogger.h
diff --git a/src/basics/basics/utilities/UbMath.cpp b/src/basics/utilities/UbMath.cpp
similarity index 100%
rename from src/basics/basics/utilities/UbMath.cpp
rename to src/basics/utilities/UbMath.cpp
diff --git a/src/basics/basics/utilities/UbMath.h b/src/basics/utilities/UbMath.h
similarity index 77%
rename from src/basics/basics/utilities/UbMath.h
rename to src/basics/utilities/UbMath.h
index fe6b01140a4f1675335e3e4f8c9a542055881727..5ba42eb6b0c00941c72ea8370ab3fe0cef71ba98 100644
--- a/src/basics/basics/utilities/UbMath.h
+++ b/src/basics/utilities/UbMath.h
@@ -413,93 +413,93 @@ inline const T &min(const T &a1, const T &a2, const T &a3, const T &a4)
 // constants
 //
 //////////////////////////////////////////////////////////////////////////
-static const double c8o27  = 8. / 27.;
-static const double c2o27  = 2. / 27.;
-static const double c1o54  = 1. / 54.;
-static const double c1o216 = 1. / 216.;
-static const double c9o2   = 9. / 2.; // 4.5
-static const double c9o4   = 9. / 4.; // 2.25
-static const double c3o9   = 3. / 9.;
-static const double c3o54  = 3. / 54.;
-static const double c3o216 = 3. / 216.;
-
-static const double c1o27 = 1. / 27.;
-
-static const double c1o72          = 1. / 72.; // 0.01388888
-static const double c1o36          = 1. / 36.; // 0.02777777
-static const double c1o48          = 1. / 48.; // 0.02083333
-static const double c1o32          = 1. / 32.; // 0.03125
-static const double c1o24          = 1. / 24.; // 0.04166666
-static const double c1o20          = 1. / 20.; // 0.05
-static const double c1o18          = 1. / 18.; // 0.05555555
-static const double c1o16          = 1. / 16.; // 0.0625
-static const double c1o12          = 1. / 12.; // 0.08333333
-static const double c1o9           = 1. / 9.;  // 0.11111111
-static const double c1o8           = 1. / 8.;  // 0.125
-static const double c1o6           = 1. / 6.;  // 0.16666666
-static const double c1o5           = 1. / 5.;  // 0.2
-static const double c1o4           = 1. / 4.;  // 0.25
-static const double c1o100         = 1. / 100.;
-static const double c5o16          = 5. / 16.;        // 0.3125
+//static const double c8o27  = 8. / 27.;
+//static const double c2o27  = 2. / 27.;
+//static const double c1o54  = 1. / 54.;
+//static const double c1o216 = 1. / 216.;
+//static const double c9o2   = 9. / 2.; // 4.5
+//static const double c9o4   = 9. / 4.; // 2.25
+//static const double c3o9   = 3. / 9.;
+//static const double c3o54  = 3. / 54.;
+//static const double c3o216 = 3. / 216.;
+//
+//static const double c1o27 = 1. / 27.;
+//
+//static const double c1o72          = 1. / 72.; // 0.01388888
+//static const double c1o36          = 1. / 36.; // 0.02777777
+//static const double c1o48          = 1. / 48.; // 0.02083333
+//static const double c1o32          = 1. / 32.; // 0.03125
+//static const double c1o24          = 1. / 24.; // 0.04166666
+//static const double c1o20          = 1. / 20.; // 0.05
+//static const double c1o18          = 1. / 18.; // 0.05555555
+//static const double c1o16          = 1. / 16.; // 0.0625
+//static const double c1o12          = 1. / 12.; // 0.08333333
+//static const double c1o9           = 1. / 9.;  // 0.11111111
+//static const double c1o8           = 1. / 8.;  // 0.125
+//static const double c1o6           = 1. / 6.;  // 0.16666666
+//static const double c1o5           = 1. / 5.;  // 0.2
+//static const double c1o4           = 1. / 4.;  // 0.25
+//static const double c1o100         = 1. / 100.;
+//static const double c5o16          = 5. / 16.;        // 0.3125
 static const double c1o3           = 1. / 3.;         // 0.33333333
-static const double c3o8           = 3. / 8.;         // 0.375
-static const double c4o9           = 4. / 9.;         // 0.44444444
-static const double c1o2           = 1. / 2.;         // 0.5
-static const double c9o16          = 9. / 16.;        // 0.5625
-static const double c2o3           = 2. / 3.;         // 0.66666666
-static const double c3o4           = 3. / 4.;         // 0.75
-static const double c3o2           = 3. / 2.;         // 1.5
-static const double c4o3           = 4. / 3.;         // 1.33333333
-static const double c5o3           = 5. / 3.;         // 1.66666666
-static const double c9o5           = 9. / 5.;         // 1.8
-static const double c2o9           = 2. / 9.;         // 0.22222222
-static const double one_over_sqrt2 = 1.0 / sqrt(2.0); // 0.707106781
-static const double one_over_sqrt3 = 1.0 / sqrt(3.0); // 0.577350269
-static const double sqrt2          = sqrt(2.0);       // 1.4142135
-static const double sqrt3          = sqrt(3.0);       // 1.7320508
-static const double zeroReal       = 0.0;
-static const double c1             = 1.0;
-static const double c2             = 2.0;
-static const double c3             = 3.0;
-static const double c4             = 4.0;
-static const double c5             = 5.0;
-static const double c6             = 6.0;
-static const double c7             = 7.0;
-static const double c8             = 8.0;
-static const double c9             = 9.0;
-static const double c14            = 14.0;
-static const double c15            = 15.0;
-static const double c16            = 16.0;
-static const double c18            = 18.0;
-static const double c21            = 21.0;
-static const double c24            = 24.0;
-static const double c28            = 28.0;
-static const double c29            = 29.0;
-static const double c36            = 36.0;
-static const double c48            = 48.0;
-static const double c50            = 50.0;
-static const double c56            = 56.0;
-static const double c152           = 152.0;
-static const double c130           = 130.0;
-static const double one            = 1.0;
-static const double two            = 2.0;
-static const double three          = 3.0;
-static const double four           = 4.0;
-static const double five           = 5.0;
-static const double six            = 6.0;
-static const double seven          = 7.0;
-static const double eight          = 8.0;
-static const double nine           = 9.0;
-static const double fourteen       = 14.0;
-static const double fiveteen       = 15.0;
-static const double sixteen        = 16.0;
-static const double twentyone      = 21.0;
-static const double twentyfour     = 24.0;
-static const double twentyeight    = 28.0;
-static const double twentynine     = 29.0;
-static const double fourtyeight    = 48.0;
-static const double fifty          = 50.0;
-static const double fiftysix       = 56.0;
+//static const double c3o8           = 3. / 8.;         // 0.375
+//static const double c4o9           = 4. / 9.;         // 0.44444444
+//static const double c1o2           = 1. / 2.;         // 0.5
+//static const double c9o16          = 9. / 16.;        // 0.5625
+//static const double c2o3           = 2. / 3.;         // 0.66666666
+//static const double c3o4           = 3. / 4.;         // 0.75
+//static const double c3o2           = 3. / 2.;         // 1.5
+//static const double c4o3           = 4. / 3.;         // 1.33333333
+//static const double c5o3           = 5. / 3.;         // 1.66666666
+//static const double c9o5           = 9. / 5.;         // 1.8
+//static const double c2o9           = 2. / 9.;         // 0.22222222
+//static const double one_over_sqrt2 = 1.0 / sqrt(2.0); // 0.707106781
+//static const double one_over_sqrt3 = 1.0 / sqrt(3.0); // 0.577350269
+//static const double sqrt2          = sqrt(2.0);       // 1.4142135
+//static const double sqrt3          = sqrt(3.0);       // 1.7320508
+//static const double zeroReal       = 0.0;
+//static const double c1             = 1.0;
+//static const double c2             = 2.0;
+//static const double c3             = 3.0;
+//static const double c4             = 4.0;
+//static const double c5             = 5.0;
+//static const double c6             = 6.0;
+//static const double c7             = 7.0;
+//static const double c8             = 8.0;
+//static const double c9             = 9.0;
+//static const double c14            = 14.0;
+//static const double c15            = 15.0;
+//static const double c16            = 16.0;
+//static const double c18            = 18.0;
+//static const double c21            = 21.0;
+//static const double c24            = 24.0;
+//static const double c28            = 28.0;
+//static const double c29            = 29.0;
+//static const double c36            = 36.0;
+//static const double c48            = 48.0;
+//static const double c50            = 50.0;
+//static const double c56            = 56.0;
+//static const double c152           = 152.0;
+//static const double c130           = 130.0;
+//static const double one            = 1.0;
+//static const double two            = 2.0;
+//static const double three          = 3.0;
+//static const double four           = 4.0;
+//static const double five           = 5.0;
+//static const double six            = 6.0;
+//static const double seven          = 7.0;
+//static const double eight          = 8.0;
+//static const double nine           = 9.0;
+//static const double fourteen       = 14.0;
+//static const double fiveteen       = 15.0;
+//static const double sixteen        = 16.0;
+//static const double twentyone      = 21.0;
+//static const double twentyfour     = 24.0;
+//static const double twentyeight    = 28.0;
+//static const double twentynine     = 29.0;
+//static const double fourtyeight    = 48.0;
+//static const double fifty          = 50.0;
+//static const double fiftysix       = 56.0;
 
 } // namespace UbMath
 
diff --git a/src/basics/basics/utilities/UbNupsTimer.h b/src/basics/utilities/UbNupsTimer.h
similarity index 100%
rename from src/basics/basics/utilities/UbNupsTimer.h
rename to src/basics/utilities/UbNupsTimer.h
diff --git a/src/basics/basics/utilities/UbObservable.h b/src/basics/utilities/UbObservable.h
similarity index 100%
rename from src/basics/basics/utilities/UbObservable.h
rename to src/basics/utilities/UbObservable.h
diff --git a/src/basics/basics/utilities/UbObserver.h b/src/basics/utilities/UbObserver.h
similarity index 100%
rename from src/basics/basics/utilities/UbObserver.h
rename to src/basics/utilities/UbObserver.h
diff --git a/src/basics/basics/utilities/UbRandom.h b/src/basics/utilities/UbRandom.h
similarity index 100%
rename from src/basics/basics/utilities/UbRandom.h
rename to src/basics/utilities/UbRandom.h
diff --git a/src/basics/basics/utilities/UbScheduler.h b/src/basics/utilities/UbScheduler.h
similarity index 100%
rename from src/basics/basics/utilities/UbScheduler.h
rename to src/basics/utilities/UbScheduler.h
diff --git a/src/basics/basics/utilities/UbStringInputASCII.cpp b/src/basics/utilities/UbStringInputASCII.cpp
similarity index 100%
rename from src/basics/basics/utilities/UbStringInputASCII.cpp
rename to src/basics/utilities/UbStringInputASCII.cpp
diff --git a/src/basics/basics/utilities/UbStringInputASCII.h b/src/basics/utilities/UbStringInputASCII.h
similarity index 100%
rename from src/basics/basics/utilities/UbStringInputASCII.h
rename to src/basics/utilities/UbStringInputASCII.h
diff --git a/src/basics/basics/utilities/UbSystem.h b/src/basics/utilities/UbSystem.h
similarity index 100%
rename from src/basics/basics/utilities/UbSystem.h
rename to src/basics/utilities/UbSystem.h
diff --git a/src/basics/basics/utilities/UbTiming.h b/src/basics/utilities/UbTiming.h
similarity index 100%
rename from src/basics/basics/utilities/UbTiming.h
rename to src/basics/utilities/UbTiming.h
diff --git a/src/basics/basics/utilities/UbTuple.h b/src/basics/utilities/UbTuple.h
similarity index 100%
rename from src/basics/basics/utilities/UbTuple.h
rename to src/basics/utilities/UbTuple.h
diff --git a/src/basics/basics/utilities/Vector3D.cpp b/src/basics/utilities/Vector3D.cpp
similarity index 100%
rename from src/basics/basics/utilities/Vector3D.cpp
rename to src/basics/utilities/Vector3D.cpp
diff --git a/src/basics/basics/utilities/Vector3D.h b/src/basics/utilities/Vector3D.h
similarity index 100%
rename from src/basics/basics/utilities/Vector3D.h
rename to src/basics/utilities/Vector3D.h
diff --git a/src/basics/basics/utilities/Vector3DTest.cpp b/src/basics/utilities/Vector3DTest.cpp
similarity index 100%
rename from src/basics/basics/utilities/Vector3DTest.cpp
rename to src/basics/utilities/Vector3DTest.cpp
diff --git a/src/basics/basics/writer/WbWriter.h b/src/basics/writer/WbWriter.h
similarity index 95%
rename from src/basics/basics/writer/WbWriter.h
rename to src/basics/writer/WbWriter.h
index 26d43464c03311a2cbc14cd4fc9fe717d4b01531..55dceb7cb4a64dc90f0677796cab52135b726f56 100644
--- a/src/basics/basics/writer/WbWriter.h
+++ b/src/basics/writer/WbWriter.h
@@ -88,7 +88,12 @@ public:
     {
         throw UbException(UB_EXARGS, "not implemented for " + (std::string) typeid(*this).name());
     }
-
+    virtual std::string writeLinesWithLineData(const std::string & /*filename*/, std::vector<UbTupleFloat3> & /*nodes*/,
+                                               std::vector<UbTupleInt2> & /*lines*/, std::vector<std::string> & /*datanames*/,
+                                               std::vector<std::vector<float>> & /*celldata*/)
+    {
+        throw UbException(UB_EXARGS, "not implemented for " + (std::string) typeid(*this).name());
+    }
     //////////////////////////////////////////////////////////////////////////
     // triangles
     // cell numbering:
diff --git a/src/basics/basics/writer/WbWriterAvsASCII.cpp b/src/basics/writer/WbWriterAvsASCII.cpp
similarity index 100%
rename from src/basics/basics/writer/WbWriterAvsASCII.cpp
rename to src/basics/writer/WbWriterAvsASCII.cpp
diff --git a/src/basics/basics/writer/WbWriterAvsASCII.h b/src/basics/writer/WbWriterAvsASCII.h
similarity index 100%
rename from src/basics/basics/writer/WbWriterAvsASCII.h
rename to src/basics/writer/WbWriterAvsASCII.h
diff --git a/src/basics/basics/writer/WbWriterAvsBinary.cpp b/src/basics/writer/WbWriterAvsBinary.cpp
similarity index 100%
rename from src/basics/basics/writer/WbWriterAvsBinary.cpp
rename to src/basics/writer/WbWriterAvsBinary.cpp
diff --git a/src/basics/basics/writer/WbWriterAvsBinary.h b/src/basics/writer/WbWriterAvsBinary.h
similarity index 100%
rename from src/basics/basics/writer/WbWriterAvsBinary.h
rename to src/basics/writer/WbWriterAvsBinary.h
diff --git a/src/basics/basics/writer/WbWriterBOBJ.cpp b/src/basics/writer/WbWriterBOBJ.cpp
similarity index 100%
rename from src/basics/basics/writer/WbWriterBOBJ.cpp
rename to src/basics/writer/WbWriterBOBJ.cpp
diff --git a/src/basics/basics/writer/WbWriterBOBJ.h b/src/basics/writer/WbWriterBOBJ.h
similarity index 100%
rename from src/basics/basics/writer/WbWriterBOBJ.h
rename to src/basics/writer/WbWriterBOBJ.h
diff --git a/src/basics/basics/writer/WbWriterSunflow.cpp b/src/basics/writer/WbWriterSunflow.cpp
similarity index 100%
rename from src/basics/basics/writer/WbWriterSunflow.cpp
rename to src/basics/writer/WbWriterSunflow.cpp
diff --git a/src/basics/basics/writer/WbWriterSunflow.h b/src/basics/writer/WbWriterSunflow.h
similarity index 100%
rename from src/basics/basics/writer/WbWriterSunflow.h
rename to src/basics/writer/WbWriterSunflow.h
diff --git a/src/basics/basics/writer/WbWriterTecPlotASCII.cpp b/src/basics/writer/WbWriterTecPlotASCII.cpp
similarity index 100%
rename from src/basics/basics/writer/WbWriterTecPlotASCII.cpp
rename to src/basics/writer/WbWriterTecPlotASCII.cpp
diff --git a/src/basics/basics/writer/WbWriterTecPlotASCII.h b/src/basics/writer/WbWriterTecPlotASCII.h
similarity index 100%
rename from src/basics/basics/writer/WbWriterTecPlotASCII.h
rename to src/basics/writer/WbWriterTecPlotASCII.h
diff --git a/src/basics/basics/writer/WbWriterVtkASCII.cpp b/src/basics/writer/WbWriterVtkASCII.cpp
similarity index 100%
rename from src/basics/basics/writer/WbWriterVtkASCII.cpp
rename to src/basics/writer/WbWriterVtkASCII.cpp
diff --git a/src/basics/basics/writer/WbWriterVtkASCII.h b/src/basics/writer/WbWriterVtkASCII.h
similarity index 100%
rename from src/basics/basics/writer/WbWriterVtkASCII.h
rename to src/basics/writer/WbWriterVtkASCII.h
diff --git a/src/basics/basics/writer/WbWriterVtkBinary.cpp b/src/basics/writer/WbWriterVtkBinary.cpp
similarity index 100%
rename from src/basics/basics/writer/WbWriterVtkBinary.cpp
rename to src/basics/writer/WbWriterVtkBinary.cpp
diff --git a/src/basics/basics/writer/WbWriterVtkBinary.h b/src/basics/writer/WbWriterVtkBinary.h
similarity index 100%
rename from src/basics/basics/writer/WbWriterVtkBinary.h
rename to src/basics/writer/WbWriterVtkBinary.h
diff --git a/src/basics/basics/writer/WbWriterVtkXmlASCII.cpp b/src/basics/writer/WbWriterVtkXmlASCII.cpp
similarity index 100%
rename from src/basics/basics/writer/WbWriterVtkXmlASCII.cpp
rename to src/basics/writer/WbWriterVtkXmlASCII.cpp
diff --git a/src/basics/basics/writer/WbWriterVtkXmlASCII.h b/src/basics/writer/WbWriterVtkXmlASCII.h
similarity index 100%
rename from src/basics/basics/writer/WbWriterVtkXmlASCII.h
rename to src/basics/writer/WbWriterVtkXmlASCII.h
diff --git a/src/basics/basics/writer/WbWriterVtkXmlBinary.cpp b/src/basics/writer/WbWriterVtkXmlBinary.cpp
similarity index 89%
rename from src/basics/basics/writer/WbWriterVtkXmlBinary.cpp
rename to src/basics/writer/WbWriterVtkXmlBinary.cpp
index 6731fa56026ca284ad671cb6ce59000a609bbb8c..55c3541983ea4248512508146792832a34a1c563 100644
--- a/src/basics/basics/writer/WbWriterVtkXmlBinary.cpp
+++ b/src/basics/writer/WbWriterVtkXmlBinary.cpp
@@ -34,6 +34,8 @@
 #include <basics/writer/WbWriterVtkXmlASCII.h>
 #include <basics/writer/WbWriterVtkXmlBinary.h>
 #include <cstring>
+#include <fstream>
+#include <string>
 
 using namespace std;
 
@@ -154,12 +156,13 @@ string WbWriterVtkXmlBinary::writeParallelFile(const string &filename, vector<st
 
     return vtkfilename;
 }
+
 /*===============================================================================*/
-string WbWriterVtkXmlBinary::writeLines(const string &filename, vector<UbTupleFloat3> &nodes,
-                                        vector<UbTupleInt2> &lines)
+
+// helper functions
+
+ofstream createFileStream(std::string vtkfilename)
 {
-    string vtkfilename = filename + getFileExtension();
-    UBLOG(logDEBUG1, "WbWriterVtkXmlBinary::writeLines to " << vtkfilename << " - start");
 
     ofstream out(vtkfilename.c_str(), ios::out | ios::binary);
     if (!out) {
@@ -172,89 +175,199 @@ string WbWriterVtkXmlBinary::writeLines(const string &filename, vector<UbTupleFl
         if (!out)
             throw UbException(UB_EXARGS, "couldn't open file " + vtkfilename);
     }
+    return out;
+}
 
-    int nofNodes = (int)nodes.size();
-    int nofCells = (int)lines.size();
-
-    int bytesPerByteVal      = 4; //==sizeof(int)
-    int bytesPoints          = 3 /*x1/x2/x3        */ * nofNodes * sizeof(float);
-    int bytesCellConnectivty = 2 /*nodes per line */ * nofCells * sizeof(int);
-    int bytesCellOffsets     = 1 /*offset per line */ * nofCells * sizeof(int);
-    int bytesCellTypes       = 1 /*type of line */ * nofCells * sizeof(unsigned char);
-
-    int offset = 0;
-    // VTK FILE
+void writeVtkHeader(ofstream &out, int numberOfNodes, int numberOfCells)
+{
     out << "<?xml version=\"1.0\"?>\n";
     out << "<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\" >"
         << "\n";
     out << "   <UnstructuredGrid>"
         << "\n";
-    out << "      <Piece NumberOfPoints=\"" << nofNodes << "\" NumberOfCells=\"" << nofCells << "\">\n";
+    out << "      <Piece NumberOfPoints=\"" << numberOfNodes << "\" NumberOfCells=\"" << numberOfCells << "\">\n";
+}
 
-    // POINTS SECTION
+int writePointHeader(ofstream &out, int offset, int bytesPerByteVal, int bytesPoints)
+{
     out << "         <Points>\n";
     out << "            <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"appended\" offset=\"" << offset
         << "\"  />\n";
     out << "         </Points>\n";
     offset += (bytesPerByteVal + bytesPoints);
+    return offset;
+}
 
-    // CELLS SECTION
+int writeCellHeader(ofstream &out, int offset, int bytesPerByteVal, int bytesCellConnectivity, int bytesCellOffsets,
+                    int bytesCellTypes)
+{
     out << "         <Cells>\n";
     out << "            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
-    offset += (bytesPerByteVal + bytesCellConnectivty);
+    offset += (bytesPerByteVal + bytesCellConnectivity);
     out << "            <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
     offset += (bytesPerByteVal + bytesCellOffsets);
     out << "            <DataArray type=\"UInt8\" Name=\"types\" format=\"appended\" offset=\"" << offset << "\" />\n ";
     offset += (bytesPerByteVal + bytesCellTypes);
     out << "         </Cells>\n";
+    return offset;
+}
 
+int writeDataHeader(ofstream &out, vector<string> &datanames, int offset, int bytesPerByteVal, int bytesScalarData)
+{
+    out << "         <CellData>\n";
+    for (size_t s = 0; s < datanames.size(); ++s) {
+        out << "            <DataArray type=\"Float32\" Name=\"" << datanames[s] << "\" format=\"appended\" offset=\""
+            << offset << "\" /> \n";
+        offset += (bytesPerByteVal + bytesScalarData);
+    }
+    out << "         </CellData>\n";
+    return offset;
+}
+
+void writeAppendDataHeader(ofstream &out)
+{
     out << "      </Piece>\n";
     out << "   </UnstructuredGrid>\n";
-
-    // AppendedData SECTION
     out << "   <AppendedData encoding=\"raw\">\n";
     out << "_";
+}
 
-    // POINTS SECTION
+void writePoints(ofstream &out, int bytesPerByteVal, int bytesPoints, vector<UbTupleFloat3> &nodes)
+{
     out.write((char *)&bytesPoints, bytesPerByteVal);
-    for (int n = 0; n < nofNodes; n++) {
+    for (int n = 0; n < (int)nodes.size(); n++) {
         out.write((char *)&val<1>(nodes[n]), sizeof(float));
         out.write((char *)&val<2>(nodes[n]), sizeof(float));
         out.write((char *)&val<3>(nodes[n]), sizeof(float));
     }
+}
 
-    // CELLS SECTION
-    // cellConnectivity
-    out.write((char *)&bytesCellConnectivty, bytesPerByteVal);
-    for (int c = 0; c < nofCells; c++) {
-        out.write((char *)&val<1>(lines[c]), sizeof(int));
-        out.write((char *)&val<2>(lines[c]), sizeof(int));
+void writeCellConnectivity(ofstream &out, int bytesPerByteVal, int bytesCellConnectivity, vector<UbTupleInt2> &cells)
+{
+    out.write((char *)&bytesCellConnectivity, bytesPerByteVal);
+    for (int c = 0; c < (int)cells.size(); c++) {
+        out.write((char *)&val<1>(cells[c]), sizeof(int));
+        out.write((char *)&val<2>(cells[c]), sizeof(int));
     }
+}
 
-    // cellOffsets
+void writeCellOffsets(ofstream &out, int bytesPerByteVal, int bytesCellOffsets, int numberOfCells)
+{
     out.write((char *)&bytesCellOffsets, bytesPerByteVal);
     int itmp;
-    for (int c = 1; c <= nofCells; c++) {
+    for (int c = 1; c <= numberOfCells; c++) {
         itmp = 2 * c;
         out.write((char *)&itmp, sizeof(int));
     }
+}
 
-    // cellTypes
+void writeCellTypes(ofstream &out, int bytesPerByteVal, int bytesCellTypes, int numberOfCells)
+{
     out.write((char *)&bytesCellTypes, bytesPerByteVal);
     unsigned char vtkCellType = 3;
-    for (int c = 0; c < nofCells; c++) {
+    for (int c = 0; c < numberOfCells; c++) {
         out.write((char *)&vtkCellType, sizeof(unsigned char));
     }
+}
+
+void writeCellData(ofstream &out, int bytesPerByteVal, int bytesScalarData, vector<string> &datanames,
+                   vector<vector<float>> &celldata)
+{
+    for (size_t s = 0; s < datanames.size(); ++s) {
+        out.write((char *)&bytesScalarData, bytesPerByteVal);
+        for (size_t d = 0; d < celldata[s].size(); ++d) {
+            // loake kopie machen, da in celldata "doubles" sind
+            float tmp = (float)celldata[s][d];
+            out.write((char *)&tmp, sizeof(float));
+        }
+    }
+}
+
+void writeEndOfFile(ofstream &out)
+{
     out << "\n</AppendedData>\n";
     out << "</VTKFile>";
     out << endl;
     out.close();
+}
+
+/*===============================================================================*/
+string WbWriterVtkXmlBinary::writeLines(const string &filename, vector<UbTupleFloat3> &nodes,
+                                        vector<UbTupleInt2> &lines)
+{
+    string vtkfilename = filename + getFileExtension();
+    UBLOG(logDEBUG1, "WbWriterVtkXmlBinary::writeLines to " << vtkfilename << " - start");
+
+    ofstream out = createFileStream(vtkfilename);
+
+    int nofNodes = (int)nodes.size();
+    int nofCells = (int)lines.size();
+
+    int bytesPerByteVal = 4; //==sizeof(int)
+    int bytesPoints = 3 /*x1/x2/x3        */ * nofNodes * sizeof(float);
+    int bytesCellConnectivity = 2 /*nodes per line */ * nofCells * sizeof(int);
+    int bytesCellOffsets = 1 /*offset per line */ * nofCells * sizeof(int);
+    int bytesCellTypes = 1 /*type of line */ * nofCells * sizeof(unsigned char);
+
+    int offset = 0;
+
+    writeVtkHeader(out, nofNodes, nofCells);
+    offset = writePointHeader(out, offset, bytesPerByteVal, bytesPoints);
+    writeCellHeader(out, offset, bytesPerByteVal, bytesCellConnectivity, bytesCellOffsets, bytesCellTypes);
+    writeAppendDataHeader(out);
+
+    writePoints(out, bytesPerByteVal, bytesPoints, nodes);
+    writeCellConnectivity(out, bytesPerByteVal, bytesCellConnectivity, lines);
+    writeCellOffsets(out, bytesPerByteVal, bytesCellOffsets, nofCells);
+    writeCellTypes(out, bytesPerByteVal, bytesCellTypes, nofCells);
+    writeEndOfFile(out);
     UBLOG(logDEBUG1, "WbWriterVtkXmlBinary::writeLines to " << vtkfilename << " - end");
 
     return vtkfilename;
 }
+
+/*===============================================================================*/
+string WbWriterVtkXmlBinary::writeLinesWithLineData(const string &filename, vector<UbTupleFloat3> &nodes,
+                                                    vector<UbTupleInt2> &lines, vector<string> &datanames,
+                                                    vector<vector<float>> &celldata)
+{
+    string vtkfilename = filename + getFileExtension();
+    UBLOG(logDEBUG1, "WbWriterVtkXmlBinary::writeLinesWithLineData to " << vtkfilename << " - start");
+
+    ofstream out = createFileStream(vtkfilename);
+
+    int nofNodes = (int)nodes.size();
+    int nofCells = (int)lines.size();
+
+    int bytesPerByteVal = 4; //==sizeof(int)
+    int bytesPoints = 3 /*x1/x2/x3        */ * nofNodes * sizeof(float);
+    int bytesCellConnectivity = 2 /*nodes per line */ * nofCells * sizeof(int);
+    int bytesCellOffsets = 1 /*offset per line */ * nofCells * sizeof(int);
+    int bytesCellTypes = 1 /*type of line */ * nofCells * sizeof(unsigned char);
+    int bytesScalarData = 1 /*scalar        */ * nofCells * sizeof(float);
+
+    int offset = 0;
+
+    writeVtkHeader(out, nofNodes, nofCells);
+    offset = writePointHeader(out, offset, bytesPerByteVal, bytesPoints);
+    offset = writeCellHeader(out, offset, bytesPerByteVal, bytesCellConnectivity, bytesCellOffsets, bytesCellTypes);
+    writeDataHeader(out, datanames, offset, bytesPerByteVal, bytesScalarData);
+    writeAppendDataHeader(out);
+
+    writePoints(out, bytesPerByteVal, bytesPoints, nodes);
+    writeCellConnectivity(out, bytesPerByteVal, bytesCellConnectivity, lines);
+    writeCellOffsets(out, bytesPerByteVal, bytesCellOffsets, nofCells);
+    writeCellTypes(out, bytesPerByteVal, bytesCellTypes, nofCells);
+    writeCellData(out, bytesPerByteVal, bytesScalarData, datanames, celldata);
+    writeEndOfFile(out);
+
+    UBLOG(logDEBUG1, "WbWriterVtkXmlBinary::writeLinesWithLineData to " << vtkfilename << " - end");
+
+    return vtkfilename;
+}
+
 /*===============================================================================*/
 // std::string WbWriterVtkXmlBinary::writeLinesWithNodeData(const string& filename,vector<UbTupleFloat3 >& nodes,
 // vector<UbTupleInt2 >& lines, std::vector< std::string >& datanames, std::vector< std::vector< double > >& nodedata)
@@ -276,7 +389,7 @@ string WbWriterVtkXmlBinary::writeLines(const string &filename, vector<UbTupleFl
 //
 //   int bytesPerByteVal      = 4; //==sizeof(int)
 //   int bytesPoints          = 3 /*x1/x2/x3        */ * nofNodes * sizeof(float);
-//   int bytesCellConnectivty = 2 /*nodes per line  */ * nofCells * sizeof(int  );
+//   int bytesCellConnectivity = 2 /*nodes per line  */ * nofCells * sizeof(int  );
 //   int bytesCellOffsets     = 1 /*offset per line */ * nofCells * sizeof(int  );
 //   int bytesCellTypes       = 1 /*type of line    */ * nofCells * sizeof(unsigned char);
 //   int bytesScalarData      = 1 /*scalar          */ * nofNodes * sizeof(float);
@@ -296,7 +409,7 @@ string WbWriterVtkXmlBinary::writeLines(const string &filename, vector<UbTupleFl
 //   //CELLS SECTION
 //   out<<"         <Cells>\n";
 //   out<<"            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\""<< offset <<"\"
-//   />\n"; offset += (bytesPerByteVal + bytesCellConnectivty); out<<"            <DataArray type=\"Int32\"
+//   />\n"; offset += (bytesPerByteVal + bytesCellConnectivity); out<<"            <DataArray type=\"Int32\"
 //   Name=\"offsets\" format=\"appended\" offset=\""<< offset <<"\" />\n"; offset += (bytesPerByteVal +
 //   bytesCellOffsets); out<<"            <DataArray type=\"UInt8\" Name=\"types\" format=\"appended\" offset=\""<<
 //   offset <<"\" />\n "; offset += (bytesPerByteVal + bytesCellTypes); out<<"         </Cells>\n";
@@ -328,7 +441,7 @@ string WbWriterVtkXmlBinary::writeLines(const string &filename, vector<UbTupleFl
 //
 //   //CELLS SECTION
 //   //cellConnectivity
-//   out.write( (char*)&bytesCellConnectivty, bytesPerByteVal );
+//   out.write( (char*)&bytesCellConnectivity, bytesPerByteVal );
 //   for(int c=0; c<nofCells; c++)
 //   {
 //      out.write( (char*)&val<1>(lines[c]), sizeof(int) );
@@ -397,7 +510,7 @@ string WbWriterVtkXmlBinary::writeTriangles(const string &filename, vector<UbTup
 
     int bytesPerByteVal      = 4; //==sizeof(int)
     int bytesPoints          = 3 /*x1/x2/x3 - coord    */ * nofNodes * sizeof(float);
-    int bytesCellConnectivty = 3 /*nodes per triangle  */ * nofCells * sizeof(int);
+    int bytesCellConnectivity = 3 /*nodes per triangle  */ * nofCells * sizeof(int);
     int bytesCellOffsets     = 1 /*offset per triangle */ * nofCells * sizeof(int);
     int bytesCellTypes       = 1 /*type of triangle    */ * nofCells * sizeof(unsigned char);
 
@@ -421,7 +534,7 @@ string WbWriterVtkXmlBinary::writeTriangles(const string &filename, vector<UbTup
     out << "         <Cells>\n";
     out << "            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
-    offset += (bytesPerByteVal + bytesCellConnectivty);
+    offset += (bytesPerByteVal + bytesCellConnectivity);
     out << "            <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
     offset += (bytesPerByteVal + bytesCellOffsets);
@@ -446,7 +559,7 @@ string WbWriterVtkXmlBinary::writeTriangles(const string &filename, vector<UbTup
 
     // CELLS SECTION
     // cellConnectivity
-    out.write((char *)&bytesCellConnectivty, bytesPerByteVal);
+    out.write((char *)&bytesCellConnectivity, bytesPerByteVal);
     for (int c = 0; c < nofCells; c++) {
         out.write((char *)&val<1>(triangles[c]), sizeof(int));
         out.write((char *)&val<2>(triangles[c]), sizeof(int));
@@ -502,7 +615,7 @@ string WbWriterVtkXmlBinary::writeTrianglesWithNodeData(const string &filename,
 
     int bytesPerByteVal      = 4; //==sizeof(int)
     int bytesPoints          = 3 /*x1/x2/x3        */ * nofNodes * sizeof(float);
-    int bytesCellConnectivty = 3 /*nodes per tri   */ * nofCells * sizeof(int);
+    int bytesCellConnectivity = 3 /*nodes per tri   */ * nofCells * sizeof(int);
     int bytesCellOffsets     = 1 /*offset per tri  */ * nofCells * sizeof(int);
     int bytesCellTypes       = 1 /*type of tri     */ * nofCells * sizeof(unsigned char);
     int bytesScalarData      = 1 /*scalar          */ * nofNodes * sizeof(float);
@@ -527,7 +640,7 @@ string WbWriterVtkXmlBinary::writeTrianglesWithNodeData(const string &filename,
     out << "         <Cells>\n";
     out << "            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
-    offset += (bytesPerByteVal + bytesCellConnectivty);
+    offset += (bytesPerByteVal + bytesCellConnectivity);
     out << "            <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
     offset += (bytesPerByteVal + bytesCellOffsets);
@@ -561,7 +674,7 @@ string WbWriterVtkXmlBinary::writeTrianglesWithNodeData(const string &filename,
 
     // CELLS SECTION
     // cellConnectivity
-    out.write((char *)&bytesCellConnectivty, bytesPerByteVal);
+    out.write((char *)&bytesCellConnectivity, bytesPerByteVal);
     for (int c = 0; c < nofCells; c++) {
         out.write((char *)&val<1>(cells[c]), sizeof(int));
         out.write((char *)&val<2>(cells[c]), sizeof(int));
@@ -625,7 +738,7 @@ string WbWriterVtkXmlBinary::writeQuads(const string &filename, vector<UbTupleFl
 
     int bytesPerByteVal      = 4; //==sizeof(int)
     int bytesPoints          = 3 /*x1/x2/x3        */ * nofNodes * sizeof(float);
-    int bytesCellConnectivty = 4 /*nodes per quad  */ * nofCells * sizeof(int);
+    int bytesCellConnectivity = 4 /*nodes per quad  */ * nofCells * sizeof(int);
     int bytesCellOffsets     = 1 /*offset per quad */ * nofCells * sizeof(int);
     int bytesCellTypes       = 1 /*type of quad    */ * nofCells * sizeof(unsigned char);
 
@@ -649,7 +762,7 @@ string WbWriterVtkXmlBinary::writeQuads(const string &filename, vector<UbTupleFl
     out << "         <Cells>\n";
     out << "            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
-    offset += (bytesPerByteVal + bytesCellConnectivty);
+    offset += (bytesPerByteVal + bytesCellConnectivity);
     out << "            <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
     offset += (bytesPerByteVal + bytesCellOffsets);
@@ -674,7 +787,7 @@ string WbWriterVtkXmlBinary::writeQuads(const string &filename, vector<UbTupleFl
 
     // CELLS SECTION
     // cellConnectivity
-    out.write((char *)&bytesCellConnectivty, bytesPerByteVal);
+    out.write((char *)&bytesCellConnectivity, bytesPerByteVal);
     for (int c = 0; c < nofCells; c++) {
         out.write((char *)&val<1>(cells[c]), sizeof(int));
         out.write((char *)&val<2>(cells[c]), sizeof(int));
@@ -730,7 +843,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithNodeData(const string &filename, vect
 
     int bytesPerByteVal      = 4; //==sizeof(int)
     int bytesPoints          = 3 /*x1/x2/x3        */ * nofNodes * sizeof(float);
-    int bytesCellConnectivty = 4 /*nodes per quad  */ * nofCells * sizeof(int);
+    int bytesCellConnectivity = 4 /*nodes per quad  */ * nofCells * sizeof(int);
     int bytesCellOffsets     = 1 /*offset per quad */ * nofCells * sizeof(int);
     int bytesCellTypes       = 1 /*type of quad    */ * nofCells * sizeof(unsigned char);
     int bytesScalarData      = 1 /*scalar          */ * nofNodes * sizeof(float);
@@ -755,7 +868,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithNodeData(const string &filename, vect
     out << "         <Cells>\n";
     out << "            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
-    offset += (bytesPerByteVal + bytesCellConnectivty);
+    offset += (bytesPerByteVal + bytesCellConnectivity);
     out << "            <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
     offset += (bytesPerByteVal + bytesCellOffsets);
@@ -789,7 +902,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithNodeData(const string &filename, vect
 
     // CELLS SECTION
     // cellConnectivity
-    out.write((char *)&bytesCellConnectivty, bytesPerByteVal);
+    out.write((char *)&bytesCellConnectivity, bytesPerByteVal);
     for (int c = 0; c < nofCells; c++) {
         out.write((char *)&val<1>(cells[c]), sizeof(int));
         out.write((char *)&val<2>(cells[c]), sizeof(int));
@@ -855,7 +968,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithCellData(const string &filename, vect
 
     int bytesPerByteVal      = 4; //==sizeof(int)
     int bytesPoints          = 3 /*x1/x2/x3        */ * nofNodes * sizeof(float);
-    int bytesCellConnectivty = 4 /*nodes per quad  */ * nofCells * sizeof(int);
+    int bytesCellConnectivity = 4 /*nodes per quad  */ * nofCells * sizeof(int);
     int bytesCellOffsets     = 1 /*offset per quad */ * nofCells * sizeof(int);
     int bytesCellTypes       = 1 /*type of quad    */ * nofCells * sizeof(unsigned char);
     int bytesScalarData      = 1 /*scalar          */ * nofCells * sizeof(float);
@@ -880,7 +993,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithCellData(const string &filename, vect
     out << "         <Cells>\n";
     out << "            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
-    offset += (bytesPerByteVal + bytesCellConnectivty);
+    offset += (bytesPerByteVal + bytesCellConnectivity);
     out << "            <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
     offset += (bytesPerByteVal + bytesCellOffsets);
@@ -914,7 +1027,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithCellData(const string &filename, vect
 
     // CELLS SECTION
     // cellConnectivity
-    out.write((char *)&bytesCellConnectivty, bytesPerByteVal);
+    out.write((char *)&bytesCellConnectivity, bytesPerByteVal);
     for (int c = 0; c < nofCells; c++) {
         out.write((char *)&val<1>(cells[c]), sizeof(int));
         out.write((char *)&val<2>(cells[c]), sizeof(int));
@@ -984,7 +1097,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithNodeAndCellData(const string &filenam
 
     int bytesPerByteVal      = 4; //==sizeof(int)
     int bytesPoints          = 3 /*x1/x2/x3        */ * nofNodes * sizeof(float);
-    int bytesCellConnectivty = 4 /*nodes per quad  */ * nofCells * sizeof(int);
+    int bytesCellConnectivity = 4 /*nodes per quad  */ * nofCells * sizeof(int);
     int bytesCellOffsets     = 1 /*offset per quad */ * nofCells * sizeof(int);
     int bytesCellTypes       = 1 /*type of quad    */ * nofCells * sizeof(unsigned char);
     int bytesScalarDataPoint = 1 /*scalar          */ * nofNodes * sizeof(float);
@@ -1010,7 +1123,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithNodeAndCellData(const string &filenam
     out << "         <Cells>\n";
     out << "            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
-    offset += (bytesPerByteVal + bytesCellConnectivty);
+    offset += (bytesPerByteVal + bytesCellConnectivity);
     out << "            <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
     offset += (bytesPerByteVal + bytesCellOffsets);
@@ -1052,7 +1165,7 @@ string WbWriterVtkXmlBinary::writeQuadsWithNodeAndCellData(const string &filenam
 
     // CELLS SECTION
     // cellConnectivity
-    out.write((char *)&bytesCellConnectivty, bytesPerByteVal);
+    out.write((char *)&bytesCellConnectivity, bytesPerByteVal);
     for (int c = 0; c < nofCells; c++) {
         out.write((char *)&val<1>(cells[c]), sizeof(int));
         out.write((char *)&val<2>(cells[c]), sizeof(int));
@@ -1128,7 +1241,7 @@ string WbWriterVtkXmlBinary::writeOctsWithCellData(const string &filename, vecto
 
     int bytesPerByteVal      = 4; //==sizeof(int)
     int bytesPoints          = 3 /*x1/x2/x3      */ * nofNodes * sizeof(float);
-    int bytesCellConnectivty = 8 /*nodes per oct */ * nofCells * sizeof(int);
+    int bytesCellConnectivity = 8 /*nodes per oct */ * nofCells * sizeof(int);
     int bytesCellOffsets     = 1 /*offset per oct*/ * nofCells * sizeof(int);
     int bytesCellTypes       = 1 /*type of oct   */ * nofCells * sizeof(unsigned char);
     int bytesScalarData      = 1 /*scalar        */ * nofCells * sizeof(float);
@@ -1153,7 +1266,7 @@ string WbWriterVtkXmlBinary::writeOctsWithCellData(const string &filename, vecto
     out << "         <Cells>\n";
     out << "            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
-    offset += (bytesPerByteVal + bytesCellConnectivty);
+    offset += (bytesPerByteVal + bytesCellConnectivity);
     out << "            <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
     offset += (bytesPerByteVal + bytesCellOffsets);
@@ -1187,7 +1300,7 @@ string WbWriterVtkXmlBinary::writeOctsWithCellData(const string &filename, vecto
 
     // CELLS SECTION
     // cellConnectivity
-    out.write((char *)&bytesCellConnectivty, bytesPerByteVal);
+    out.write((char *)&bytesCellConnectivity, bytesPerByteVal);
     for (int c = 0; c < nofCells; c++) {
         out.write((char *)&val<1>(cells[c]), sizeof(int));
         out.write((char *)&val<2>(cells[c]), sizeof(int));
@@ -1257,7 +1370,7 @@ string WbWriterVtkXmlBinary::writeOctsWithNodeData(const string &filename, vecto
 
     int bytesPerByteVal      = 4; //==sizeof(int)
     int bytesPoints          = 3 /*x1/x2/x3      */ * nofNodes * sizeof(float);
-    int bytesCellConnectivty = 8 /*nodes per oct */ * nofCells * sizeof(int);
+    int bytesCellConnectivity = 8 /*nodes per oct */ * nofCells * sizeof(int);
     int bytesCellOffsets     = 1 /*offset per oct*/ * nofCells * sizeof(int);
     int bytesCellTypes       = 1 /*type of oct   */ * nofCells * sizeof(unsigned char);
     int bytesScalarData      = 1 /*scalar        */ * nofNodes * sizeof(double);
@@ -1282,7 +1395,7 @@ string WbWriterVtkXmlBinary::writeOctsWithNodeData(const string &filename, vecto
     out << "         <Cells>\n";
     out << "            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
-    offset += (bytesPerByteVal + bytesCellConnectivty);
+    offset += (bytesPerByteVal + bytesCellConnectivity);
     out << "            <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
     offset += (bytesPerByteVal + bytesCellOffsets);
@@ -1316,7 +1429,7 @@ string WbWriterVtkXmlBinary::writeOctsWithNodeData(const string &filename, vecto
 
     // CELLS SECTION
     // cellConnectivity
-    out.write((char *)&bytesCellConnectivty, bytesPerByteVal);
+    out.write((char *)&bytesCellConnectivity, bytesPerByteVal);
     for (int c = 0; c < nofCells; c++) {
         out.write((char *)&val<1>(cells[c]), sizeof(int));
         out.write((char *)&val<2>(cells[c]), sizeof(int));
@@ -1386,7 +1499,7 @@ string WbWriterVtkXmlBinary::writeOcts(const string &filename, vector<UbTupleFlo
 
     int bytesPerByteVal      = 4; //==sizeof(int)
     int bytesPoints          = 3 /*x1/x2/x3      */ * nofNodes * sizeof(float);
-    int bytesCellConnectivty = 8 /*nodes per oct */ * nofCells * sizeof(int);
+    int bytesCellConnectivity = 8 /*nodes per oct */ * nofCells * sizeof(int);
     int bytesCellOffsets     = 1 /*offset per oct*/ * nofCells * sizeof(int);
     int bytesCellTypes       = 1 /*type of oct   */ * nofCells * sizeof(unsigned char);
     // int bytesScalarData      = 1 /*scalar        */ * nofNodes * sizeof(float);
@@ -1411,7 +1524,7 @@ string WbWriterVtkXmlBinary::writeOcts(const string &filename, vector<UbTupleFlo
     out << "         <Cells>\n";
     out << "            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
-    offset += (bytesPerByteVal + bytesCellConnectivty);
+    offset += (bytesPerByteVal + bytesCellConnectivity);
     out << "            <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
     offset += (bytesPerByteVal + bytesCellOffsets);
@@ -1436,7 +1549,7 @@ string WbWriterVtkXmlBinary::writeOcts(const string &filename, vector<UbTupleFlo
 
     // CELLS SECTION
     // cellConnectivity
-    out.write((char *)&bytesCellConnectivty, bytesPerByteVal);
+    out.write((char *)&bytesCellConnectivity, bytesPerByteVal);
     for (int c = 0; c < nofCells; c++) {
         out.write((char *)&val<1>(cells[c]), sizeof(int));
         out.write((char *)&val<2>(cells[c]), sizeof(int));
@@ -1491,7 +1604,7 @@ std::string WbWriterVtkXmlBinary::writeNodes(const std::string &filename, std::v
 
     int bytesPerByteVal      = 4; //==sizeof(int)
     int bytesPoints          = 3 /*x1/x2/x3        */ * nofNodes * sizeof(float);
-    int bytesCellConnectivty = 1 /*nodes per cell  */ * nofNodes * sizeof(int);
+    int bytesCellConnectivity = 1 /*nodes per cell  */ * nofNodes * sizeof(int);
     int bytesCellOffsets     = 1 /*offset per cell */ * nofNodes * sizeof(int);
     int bytesCellTypes       = 1 /*type of line    */ * nofNodes * sizeof(unsigned char);
 
@@ -1515,7 +1628,7 @@ std::string WbWriterVtkXmlBinary::writeNodes(const std::string &filename, std::v
     out << "         <Cells>\n";
     out << "            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
-    offset += (bytesPerByteVal + bytesCellConnectivty);
+    offset += (bytesPerByteVal + bytesCellConnectivity);
     out << "            <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
     offset += (bytesPerByteVal + bytesCellOffsets);
@@ -1540,7 +1653,7 @@ std::string WbWriterVtkXmlBinary::writeNodes(const std::string &filename, std::v
 
     // CELLS SECTION
     // cellConnectivity
-    out.write((char *)&bytesCellConnectivty, bytesPerByteVal);
+    out.write((char *)&bytesCellConnectivity, bytesPerByteVal);
     for (int c = 0; c < nofNodes; c++)
         out.write((char *)&c, sizeof(int));
 
@@ -1586,7 +1699,7 @@ std::string WbWriterVtkXmlBinary::writeNodesWithNodeData(const std::string &file
 
     int bytesPerByteVal      = 4; //==sizeof(int)
     int bytesPoints          = 3 /*x1/x2/x3       */ * nofNodes * sizeof(float);
-    int bytesCellConnectivty = 1 /*nodes per cell */ * nofNodes * sizeof(int);
+    int bytesCellConnectivity = 1 /*nodes per cell */ * nofNodes * sizeof(int);
     int bytesCellOffsets     = 1 /*offset per cell*/ * nofNodes * sizeof(int);
     int bytesCellTypes       = 1 /*type of oct    */ * nofNodes * sizeof(unsigned char);
     int bytesScalarData      = 1 /*scalar         */ * nofNodes * sizeof(double);
@@ -1611,7 +1724,7 @@ std::string WbWriterVtkXmlBinary::writeNodesWithNodeData(const std::string &file
     out << "         <Cells>\n";
     out << "            <DataArray type=\"Int32\" Name=\"connectivity\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
-    offset += (bytesPerByteVal + bytesCellConnectivty);
+    offset += (bytesPerByteVal + bytesCellConnectivity);
     out << "            <DataArray type=\"Int32\" Name=\"offsets\" format=\"appended\" offset=\"" << offset
         << "\" />\n";
     offset += (bytesPerByteVal + bytesCellOffsets);
@@ -1645,7 +1758,7 @@ std::string WbWriterVtkXmlBinary::writeNodesWithNodeData(const std::string &file
 
     // CELLS SECTION
     // cellConnectivity
-    out.write((char *)&bytesCellConnectivty, bytesPerByteVal);
+    out.write((char *)&bytesCellConnectivity, bytesPerByteVal);
     for (int c = 0; c < nofNodes; c++)
         out.write((char *)&c, sizeof(int));
 
diff --git a/src/basics/basics/writer/WbWriterVtkXmlBinary.h b/src/basics/writer/WbWriterVtkXmlBinary.h
similarity index 96%
rename from src/basics/basics/writer/WbWriterVtkXmlBinary.h
rename to src/basics/writer/WbWriterVtkXmlBinary.h
index 421148d90497e3628ed274439c0b2fd7636b7fd2..0f2c31eda81ad0c1975c9715ac1b7fb37a06339b 100644
--- a/src/basics/basics/writer/WbWriterVtkXmlBinary.h
+++ b/src/basics/writer/WbWriterVtkXmlBinary.h
@@ -93,6 +93,9 @@ public:
     // nodedata);
     // FIXME: hides function in base class
 
+    std::string writeLinesWithLineData(const std::string &filename, std::vector<UbTupleFloat3> &nodes, std::vector<UbTupleInt2> &lines,
+                                       std::vector<std::string> &datanames, std::vector<std::vector<float>> &celldata) override;
+
     //////////////////////////////////////////////////////////////////////////
     // triangles
     //                    2
diff --git a/src/basics/basics/writer/WbWriterVtkXmlImageBinary.cpp b/src/basics/writer/WbWriterVtkXmlImageBinary.cpp
similarity index 100%
rename from src/basics/basics/writer/WbWriterVtkXmlImageBinary.cpp
rename to src/basics/writer/WbWriterVtkXmlImageBinary.cpp
diff --git a/src/basics/basics/writer/WbWriterVtkXmlImageBinary.h b/src/basics/writer/WbWriterVtkXmlImageBinary.h
similarity index 100%
rename from src/basics/basics/writer/WbWriterVtkXmlImageBinary.h
rename to src/basics/writer/WbWriterVtkXmlImageBinary.h
diff --git a/src/basics/basics/writer/WbWriterX3D.cpp b/src/basics/writer/WbWriterX3D.cpp
similarity index 100%
rename from src/basics/basics/writer/WbWriterX3D.cpp
rename to src/basics/writer/WbWriterX3D.cpp
diff --git a/src/basics/basics/writer/WbWriterX3D.h b/src/basics/writer/WbWriterX3D.h
similarity index 100%
rename from src/basics/basics/writer/WbWriterX3D.h
rename to src/basics/writer/WbWriterX3D.h
diff --git a/src/cpu/DemCoupling/CreateDemObjectsCoProcessor.cpp b/src/cpu/DemCoupling/CreateDemObjectsCoProcessor.cpp
deleted file mode 100644
index 6f32a053afb4f6f45ae74e32b7f8665ab4fd58db..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/CreateDemObjectsCoProcessor.cpp
+++ /dev/null
@@ -1,121 +0,0 @@
-#include "CreateDemObjectsCoProcessor.h"
-#include <mpi/Communicator.h>
-#include "DemCoProcessor.h"
-#include "EquilibriumReconstructor.h"
-#include "ExtrapolationReconstructor.h"
-#include "GbSphere3D.h"
-#include "Grid3D.h"
-#include "LBMReconstructor.h"
-#include "MovableObjectInteractor.h"
-#include "NoSlipBCAlgorithm.h"
-#include "PePhysicsEngineMaterialAdapter.h"
-#include "PhysicsEngineMaterialAdapter.h"
-#include "SetBcBlocksBlockVisitor.h"
-#include "UbScheduler.h"
-#include "VelocityBCAdapter.h"
-#include "VelocityBCAlgorithm.h"
-#include "VelocityBcReconstructor.h"
-#include "VelocityWithDensityBCAlgorithm.h"
-#include "muParser.h"
-
-CreateDemObjectsCoProcessor::CreateDemObjectsCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s,
-                                                         std::shared_ptr<vf::mpi::Communicator> comm,
-                                                         SPtr<DemCoProcessor> demCoProcessor,
-                                                         SPtr<PhysicsEngineMaterialAdapter> demObjectMaterial,
-                                                         double tolerance)
-    : CoProcessor(grid, s), comm(comm), demCoProcessor(demCoProcessor), demObjectMaterial(demObjectMaterial),
-      tolerance(tolerance)
-{
-    mu::Parser fct;
-    fct.SetExpr("U");
-    fct.DefineConst("U", 0.0);
-    velocityBcParticleAdapter =
-        SPtr<BCAdapter>(new VelocityBCAdapter(true, false, false, fct, 0, BCFunction::INFCONST));
-    velocityBcParticleAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new VelocityWithDensityBCAlgorithm()));
-
-    // const std::shared_ptr<Reconstructor> velocityReconstructor(new VelocityBcReconstructor());
-    std::shared_ptr<Reconstructor> equilibriumReconstructor(new EquilibriumReconstructor());
-    // const std::shared_ptr<Reconstructor> lbmReconstructor(new LBMReconstructor(false));
-    extrapolationReconstructor = SPtr<Reconstructor>(new ExtrapolationReconstructor(equilibriumReconstructor));
-    demCounter                 = 0;
-}
-//////////////////////////////////////////////////////////////////////////
-void CreateDemObjectsCoProcessor::process(double step)
-{
-    if (scheduler->isDue(step)) {
-        int istep = static_cast<int>(step);
-
-#ifdef TIMING
-        if (comm->isRoot())
-            UBLOG(logINFO, "CreateDemObjectsCoProcessor::process start step: " << istep);
-        timer.resetAndStart();
-#endif
-
-        createGeoObjects();
-
-#ifdef TIMING
-        //      if (comm->isRoot()) UBLOG(logINFO, "createGeoObjects() time = "<<timer.stop()<<" s");
-        //      if (comm->isRoot()) UBLOG(logINFO, "number of objects = "<<(int)(geoObjectPrototypeVector.size()));
-        //      if (comm->isRoot()) UBLOG(logINFO, "total number of objects = "<<demCounter);
-        if (comm->isRoot())
-            UBLOG(logINFO, "CreateDemObjectsCoProcessor::process stop step: " << istep);
-#endif
-
-        // demCoProcessor->distributeIDs();
-
-        //#ifdef TIMING
-        //      if (comm->isRoot()) UBLOG(logINFO, "demCoProcessor->distributeIDs() time = "<<timer.stop()<<" s");
-        //#endif
-    }
-}
-//////////////////////////////////////////////////////////////////////////
-void CreateDemObjectsCoProcessor::addGeoObject(SPtr<GbObject3D> geoObjectPrototype, Vector3D initalVelocity)
-{
-    geoObjectPrototypeVector.push_back(geoObjectPrototype);
-    this->initalVelocity.push_back(initalVelocity);
-}
-
-void CreateDemObjectsCoProcessor::clearGeoObjects()
-{
-    geoObjectPrototypeVector.clear();
-    initalVelocity.clear();
-}
-
-void CreateDemObjectsCoProcessor::createGeoObjects()
-{
-    int size = (int)(geoObjectPrototypeVector.size());
-
-    std::vector<std::shared_ptr<Block3D>> blockVector;
-
-    for (int i = 0; i < size; i++) {
-        SPtr<GbSphere3D> sphere = std::dynamic_pointer_cast<GbSphere3D>(geoObjectPrototypeVector[i]);
-        if (demCoProcessor->isSpheresIntersection(sphere->getX1Centroid(), sphere->getX2Centroid(),
-                                                  sphere->getX3Centroid(),
-                                                  sphere->getRadius() * 2.0 * (1.0 - tolerance))) {
-            continue;
-        }
-
-        SPtr<GbObject3D> geoObject((GbObject3D *)(geoObjectPrototypeVector[i]->clone()));
-        SPtr<MovableObjectInteractor> geoObjectInt = SPtr<MovableObjectInteractor>(new MovableObjectInteractor(
-            geoObject, grid, velocityBcParticleAdapter, Interactor3D::SOLID, extrapolationReconstructor, State::UNPIN));
-        demCoProcessor->addInteractor(geoObjectInt, demObjectMaterial, initalVelocity[i]);
-        demCounter++;
-    }
-
-#ifdef TIMING
-    if (comm->isRoot())
-        UBLOG(logINFO, "createGeoObjects() time = " << timer.stop() << " s");
-    if (comm->isRoot())
-        UBLOG(logINFO, "number of objects = " << (int)(geoObjectPrototypeVector.size()));
-    if (comm->isRoot())
-        UBLOG(logINFO, "total number of objects = " << demCounter);
-        // if (comm->isRoot()) UBLOG(logINFO, "CreateDemObjectsCoProcessor::process stop step: " << istep);
-#endif
-
-    demCoProcessor->distributeIDs();
-
-#ifdef TIMING
-    if (comm->isRoot())
-        UBLOG(logINFO, "demCoProcessor->distributeIDs() time = " << timer.stop() << " s");
-#endif
-}
diff --git a/src/cpu/DemCoupling/CreateDemObjectsCoProcessor.h b/src/cpu/DemCoupling/CreateDemObjectsCoProcessor.h
deleted file mode 100644
index 7da317e67bd932f7d594c68d63ebc117b50c1e85..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/CreateDemObjectsCoProcessor.h
+++ /dev/null
@@ -1,52 +0,0 @@
-#ifndef CreateSphereCoProcessor_h__
-#define CreateSphereCoProcessor_h__
-
-#include "CoProcessor.h"
-#include "Vector3D.h"
-#include <array>
-#include <vector>
-
-//#define TIMING
-
-#ifdef TIMING
-#include "UbTiming.h"
-#endif
-
-class Grid3D;
-class UbScheduler;
-namespace vf::mpi {class Communicator;}
-class DemCoProcessor;
-class GbObject3D;
-class BCAdapter;
-class Reconstructor;
-class PhysicsEngineMaterialAdapter;
-
-class CreateDemObjectsCoProcessor : public CoProcessor
-{
-public:
-    CreateDemObjectsCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, std::shared_ptr<vf::mpi::Communicator> comm,
-                                SPtr<DemCoProcessor> demCoProcessor,
-                                SPtr<PhysicsEngineMaterialAdapter> geoObjectMaterial, double tolerance = 0);
-    void process(double step) override;
-    void addGeoObject(SPtr<GbObject3D> geoObjectPrototype, Vector3D initalVelocity);
-    void clearGeoObjects();
-    void createGeoObjects();
-    double getToleranz() const { return tolerance; }
-    void setToleranz(double val) { tolerance = val; }
-
-protected:
-private:
-    std::shared_ptr<vf::mpi::Communicator> comm;
-    SPtr<DemCoProcessor> demCoProcessor;
-    std::vector<SPtr<GbObject3D>> geoObjectPrototypeVector;
-    SPtr<PhysicsEngineMaterialAdapter> demObjectMaterial;
-    std::vector<Vector3D> initalVelocity;
-    SPtr<BCAdapter> velocityBcParticleAdapter;
-    SPtr<Reconstructor> extrapolationReconstructor;
-    int demCounter;
-    double tolerance;
-#ifdef TIMING
-    UbTimer timer;
-#endif
-};
-#endif // CreateSphereCoProcessor_h__
diff --git a/src/cpu/DemCoupling/DemCoProcessor.cpp b/src/cpu/DemCoupling/DemCoProcessor.cpp
deleted file mode 100644
index 642a942d7d96b73af898690a5737f53d2d88b1a5..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/DemCoProcessor.cpp
+++ /dev/null
@@ -1,515 +0,0 @@
-#include "DemCoProcessor.h"
-
-#include "BCProcessor.h"
-#include <mpi/Communicator.h>
-#include "DataSet3D.h"
-#include "DistributionArray3D.h"
-#include "ForceCalculator.h"
-#include "GbSphere3D.h"
-#include "Grid3D.h"
-#include "ILBMKernel.h"
-#include "MovableObjectInteractor.h"
-#include "SetBcBlocksBlockVisitor.h"
-#include "UbScheduler.h"
-
-#include "PePhysicsEngineGeometryAdapter.h"
-#include "PePhysicsEngineSolverAdapter.h"
-#include "PhysicsEngineGeometryAdapter.h"
-#include "PhysicsEngineMaterialAdapter.h"
-#include "PhysicsEngineSolverAdapter.h"
-
-#include "BCArray3D.h"
-#include "Block3D.h"
-#include "BoundaryConditions.h"
-#include "BoundaryConditionsBlockVisitor.h"
-#include "MPICommunicator.h"
-
-#include "UbLogger.h"
-
-#include <array>
-#include <functional>
-
-DemCoProcessor::DemCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, std::shared_ptr<vf::mpi::Communicator> comm,
-                               std::shared_ptr<ForceCalculator> forceCalculator,
-                               std::shared_ptr<PhysicsEngineSolverAdapter> physicsEngineSolver,
-                               double intermediatePeSteps)
-    : CoProcessor(grid, s), comm(comm), forceCalculator(forceCalculator),
-      physicsEngineSolver(std::dynamic_pointer_cast<PePhysicsEngineSolverAdapter>(physicsEngineSolver)),
-      intermediateDemSteps(intermediatePeSteps)
-{
-#ifdef TIMING
-    timer.resetAndStart();
-#endif
-
-    std::shared_ptr<walberla::blockforest::BlockForest> forest =
-        std::dynamic_pointer_cast<PePhysicsEngineSolverAdapter>(physicsEngineSolver)->getBlockForest();
-    std::shared_ptr<walberla::domain_decomposition::BlockDataID> storageId =
-        std::dynamic_pointer_cast<PePhysicsEngineSolverAdapter>(physicsEngineSolver)->getStorageId();
-
-    for (auto blockIt = forest->begin(); blockIt != forest->end(); ++blockIt) {
-        walberla::pe::Storage *storage                     = blockIt->getData<walberla::pe::Storage>(*storageId.get());
-        walberla::pe::BodyStorage *bodyStorage             = &(*storage)[0];
-        walberla::pe::BodyStorage *bodyStorageShadowCopies = &(*storage)[1];
-
-        bodyStorage->registerAddCallback("DemCoProcessor", std::bind1st(std::mem_fun(&DemCoProcessor::addPeGeo), this));
-        bodyStorage->registerRemoveCallback("DemCoProcessor",
-                                            std::bind1st(std::mem_fun(&DemCoProcessor::removePeGeo), this));
-
-        bodyStorageShadowCopies->registerAddCallback("DemCoProcessor",
-                                                     std::bind1st(std::mem_fun(&DemCoProcessor::addPeShadowGeo), this));
-        bodyStorageShadowCopies->registerRemoveCallback(
-            "DemCoProcessor", std::bind1st(std::mem_fun(&DemCoProcessor::removePeShadowGeo), this));
-    }
-}
-
-DemCoProcessor::~DemCoProcessor()
-{
-    std::shared_ptr<walberla::blockforest::BlockForest> forest =
-        std::dynamic_pointer_cast<PePhysicsEngineSolverAdapter>(physicsEngineSolver)->getBlockForest();
-    std::shared_ptr<walberla::domain_decomposition::BlockDataID> storageId =
-        std::dynamic_pointer_cast<PePhysicsEngineSolverAdapter>(physicsEngineSolver)->getStorageId();
-
-    for (auto &currentBlock : *forest) {
-        walberla::pe::Storage *storage           = currentBlock.getData<walberla::pe::Storage>(*storageId.get());
-        walberla::pe::BodyStorage &localStorage  = (*storage)[0];
-        walberla::pe::BodyStorage &shadowStorage = (*storage)[1];
-
-        localStorage.clearAddCallbacks();
-        localStorage.clearRemoveCallbacks();
-
-        shadowStorage.clearAddCallbacks();
-        shadowStorage.clearRemoveCallbacks();
-    }
-}
-
-void DemCoProcessor::addInteractor(std::shared_ptr<MovableObjectInteractor> interactor,
-                                   std::shared_ptr<PhysicsEngineMaterialAdapter> physicsEngineMaterial,
-                                   Vector3D initalVelocity)
-{
-    interactors.push_back(interactor);
-    const int id = static_cast<int>(interactors.size() - 1);
-    interactor->setID(id);
-    const auto peGeometryAdapter = this->createPhysicsEngineGeometryAdapter(interactor, physicsEngineMaterial);
-    if (std::dynamic_pointer_cast<PePhysicsEngineGeometryAdapter>(peGeometryAdapter)->isActive()) {
-        peGeometryAdapter->setLinearVelolocity(initalVelocity);
-        geoIdMap.insert(
-            std::make_pair(std::dynamic_pointer_cast<PePhysicsEngineGeometryAdapter>(peGeometryAdapter)->getSystemID(),
-                           std::dynamic_pointer_cast<PePhysicsEngineGeometryAdapter>(peGeometryAdapter)));
-    }
-    SetBcBlocksBlockVisitor setBcVisitor(interactor);
-    grid->accept(setBcVisitor);
-
-    // std::vector< std::shared_ptr<Block3D> > blockVector;
-    // UbTupleInt3 blockNX=grid->getBlockNX();
-    // SPtr<GbObject3D> geoObject(interactor->getGbObject3D());
-    // double ext = 0.0;
-    // std::array<double, 6> AABB ={
-    // geoObject->getX1Minimum(),geoObject->getX2Minimum(),geoObject->getX3Minimum(),geoObject->getX1Maximum(),geoObject->getX2Maximum(),geoObject->getX3Maximum()
-    // }; grid->getBlocksByCuboid(AABB[0]-(double)val<1>(blockNX)*ext, AABB[1]-(double)val<2>(blockNX)*ext,
-    // AABB[2]-(double)val<3>(blockNX)*ext, AABB[3]+(double)val<1>(blockNX)*ext, AABB[4]+(double)val<2>(blockNX)*ext,
-    // AABB[5]+(double)val<3>(blockNX)*ext, blockVector); for (std::shared_ptr<Block3D> block : blockVector)
-    //{
-    //   if (block->getKernel())
-    //   {
-    //      interactor->setBCBlock(block);
-    //      //UBLOG(logINFO, "DemCoProcessor::addInteractor() rank = "<<comm->getProcessID());
-    //   }
-    //}
-
-    interactor->initInteractor();
-
-    physicsEngineGeometrieAdapters.push_back(peGeometryAdapter);
-}
-
-std::shared_ptr<PhysicsEngineGeometryAdapter> DemCoProcessor::createPhysicsEngineGeometryAdapter(
-    std::shared_ptr<MovableObjectInteractor> interactor,
-    std::shared_ptr<PhysicsEngineMaterialAdapter> physicsEngineMaterial) const
-{
-    const int id              = static_cast<int>(interactors.size() - 1);
-    SPtr<GbSphere3D> vfSphere = std::static_pointer_cast<GbSphere3D>(interactor->getGbObject3D());
-    const Vector3D position(vfSphere->getX1Centroid(), vfSphere->getX2Centroid(), vfSphere->getX3Centroid());
-    auto peGeometryAdapter = this->physicsEngineSolver->createPhysicsEngineGeometryAdapter(
-        id, position, vfSphere->getRadius(), physicsEngineMaterial);
-    interactor->setPhysicsEngineGeometry(peGeometryAdapter);
-    return peGeometryAdapter;
-}
-
-void DemCoProcessor::process(double actualTimeStep)
-{
-#ifdef TIMING
-    timer.resetAndStart();
-#endif
-
-    this->applyForcesOnGeometries();
-
-#ifdef TIMING
-    if (comm->isRoot())
-        UBLOG(logINFO, "DemCoProcessor::process start step: " << actualTimeStep);
-    if (comm->isRoot())
-        UBLOG(logINFO, "DemCoProcessor::applyForcesOnGeometries() time = " << timer.stop() << " s");
-#endif
-
-    if (scheduler->isDue(actualTimeStep)) {
-        // UBLOG(logINFO, "DemCoProcessor::update - START - timestep = " << actualTimeStep);
-        const double demTimeStepsPerIteration = scheduler->getMinStep();
-
-        if (demTimeStepsPerIteration != 1)
-            this->scaleForcesAndTorques(1.0 / demTimeStepsPerIteration);
-
-#ifdef TIMING
-        if (comm->isRoot())
-            UBLOG(logINFO, "DemCoProcessor::scaleForcesAndTorques() time = " << timer.stop() << " s");
-        if (comm->isRoot())
-            UBLOG(logINFO, "DemCoProcessor::calculateDemTimeStep():");
-#endif
-
-        if (this->intermediateDemSteps == 1)
-            this->calculateDemTimeStep(demTimeStepsPerIteration);
-
-        //#ifdef TIMING
-        //      if (comm->isRoot()) UBLOG(logINFO, "DemCoProcessor::calculateDemTimeStep() time = "<<timer.stop()<<"
-        //      s");
-        //#endif
-        // if ((int)actualTimeStep % 100 == 0)
-        //{
-        //    if (std::dynamic_pointer_cast<PePhysicsEngineGeometryAdapter>(physicsEngineGeometries[0])->isActive())
-        //    {
-        //        //UBLOG(logINFO, "v: (x,y,z) " << physicsEngineGeometries[0]->getLinearVelocity() << " actualTimeStep
-        //        = " << UbSystem::toString(actualTimeStep));
-        //    }
-        //}
-
-        // during the intermediate time steps of the collision response, the currently acting forces
-        // (interaction forces, gravitational force, ...) have to remain constant.
-        // Since they are reset after the call to collision response, they have to be stored explicitly before.
-        // Then they are set again after each intermediate step.
-
-        this->moveVfGeoObjects();
-
-#ifdef TIMING
-        if (comm->isRoot())
-            UBLOG(logINFO, "DemCoProcessor::moveVfGeoObject() time = " << timer.stop() << " s");
-#endif
-
-        grid->accept(*boundaryConditionsBlockVisitor.get());
-
-#ifdef TIMING
-        if (comm->isRoot())
-            UBLOG(logINFO, "grid->accept(*boundaryConditionsBlockVisitor.get()) time = " << timer.stop() << " s");
-#endif
-
-        // UBLOG(logINFO, "DemCoProcessor::update - END - timestep = " << actualTimeStep);
-    }
-
-#ifdef TIMING
-    if (comm->isRoot())
-        UBLOG(logINFO, "DemCoProcessor::process stop step: " << actualTimeStep);
-#endif
-}
-//////////////////////////////////////////////////////////////////////////
-std::shared_ptr<PhysicsEngineSolverAdapter> DemCoProcessor::getPhysicsEngineSolver() { return physicsEngineSolver; }
-
-void DemCoProcessor::applyForcesOnGeometries()
-{
-    for (int i = 0; i < physicsEngineGeometrieAdapters.size(); i++) {
-        if (std::dynamic_pointer_cast<PePhysicsEngineGeometryAdapter>(physicsEngineGeometrieAdapters[i])->isActive()) {
-            this->setForcesToObject(grid, interactors[i], physicsEngineGeometrieAdapters[i]);
-
-            // physicsEngineGeometries[i]->setLinearVelolocity(Vector3D(-0.001, 0.0, 0.0));
-            // physicsEngineGeometries[i]->setAngularVelocity(Vector3D(0.01, 0.01, 0.01));
-            // UBLOG(logINFO, "v: (x,y,z) " << physicsEngineGeometries[i]->getLinearVelocity());
-        }
-    }
-}
-
-void DemCoProcessor::setForcesToObject(SPtr<Grid3D> grid, SPtr<MovableObjectInteractor> interactor,
-                                       std::shared_ptr<PhysicsEngineGeometryAdapter> physicsEngineGeometry)
-{
-    for (BcNodeIndicesMap::value_type t : interactor->getBcNodeIndicesMap()) {
-        SPtr<Block3D> block                     = t.first;
-        SPtr<ILBMKernel> kernel                 = block->getKernel();
-        SPtr<BCArray3D> bcArray                 = kernel->getBCProcessor()->getBCArray();
-        SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions();
-        distributions->swap();
-
-        std::set<std::vector<int>> &transNodeIndicesSet = t.second;
-        for (std::vector<int> node : transNodeIndicesSet) {
-            int x1 = node[0];
-            int x2 = node[1];
-            int x3 = node[2];
-
-            if (kernel->isInsideOfDomain(x1, x2, x3) && bcArray->isFluid(x1, x2, x3)) {
-                // TODO: calculate assumed boundary position
-
-                const Vector3D worldCoordinates = grid->getNodeCoordinates(block, x1, x2, x3);
-                const auto boundaryVelocity     = physicsEngineGeometry->getVelocityAtPosition(worldCoordinates);
-
-                SPtr<BoundaryConditions> bc = bcArray->getBC(x1, x2, x3);
-                const Vector3D force = forceCalculator->getForces(x1, x2, x3, distributions, bc, boundaryVelocity);
-                physicsEngineGeometry->addForceAtPosition(force, worldCoordinates);
-            }
-        }
-        distributions->swap();
-    }
-}
-
-void DemCoProcessor::scaleForcesAndTorques(double scalingFactor)
-{
-    for (int i = 0; i < physicsEngineGeometrieAdapters.size(); i++) {
-        if (std::dynamic_pointer_cast<PePhysicsEngineGeometryAdapter>(physicsEngineGeometrieAdapters[i])->isActive()) {
-            const Vector3D force  = physicsEngineGeometrieAdapters[i]->getForce() * scalingFactor;
-            const Vector3D torque = physicsEngineGeometrieAdapters[i]->getTorque() * scalingFactor;
-
-            physicsEngineGeometrieAdapters[i]->resetForceAndTorque();
-
-            physicsEngineGeometrieAdapters[i]->setForce(force);
-            physicsEngineGeometrieAdapters[i]->setTorque(torque);
-
-            // UBLOG(logINFO, "F: (x,y,z) " << force);
-            // UBLOG(logINFO, "T: (x,y,z) " << torque);
-        }
-    }
-}
-
-void DemCoProcessor::calculateDemTimeStep(double step)
-{
-    physicsEngineSolver->runTimestep(step);
-
-#ifdef TIMING
-    if (comm->isRoot())
-        UBLOG(logINFO, "  physicsEngineSolver->runTimestep() time = " << timer.stop() << " s");
-#endif
-}
-
-void DemCoProcessor::moveVfGeoObjects()
-{
-    for (int i = 0; i < interactors.size(); i++) {
-        if (std::dynamic_pointer_cast<PePhysicsEngineGeometryAdapter>(physicsEngineGeometrieAdapters[i])->isActive()) {
-            if (std::dynamic_pointer_cast<PePhysicsEngineGeometryAdapter>(physicsEngineGeometrieAdapters[i])
-                    ->getSemiactive()) {
-                walberla::pe::RigidBody *peGeoObject = getPeGeoObject(
-                    std::dynamic_pointer_cast<PePhysicsEngineGeometryAdapter>(physicsEngineGeometrieAdapters[i])
-                        ->getSystemID());
-                if (peGeoObject != nullptr) {
-                    std::dynamic_pointer_cast<PePhysicsEngineGeometryAdapter>(physicsEngineGeometrieAdapters[i])
-                        ->setGeometry(peGeoObject);
-                    interactors[i]->moveGbObjectTo(physicsEngineGeometrieAdapters[i]->getPosition());
-                    std::dynamic_pointer_cast<PePhysicsEngineGeometryAdapter>(physicsEngineGeometrieAdapters[i])
-                        ->setSemiactive(false);
-                } else {
-                    std::dynamic_pointer_cast<PePhysicsEngineGeometryAdapter>(physicsEngineGeometrieAdapters[i])
-                        ->setInactive();
-                }
-            } else {
-                interactors[i]->moveGbObjectTo(physicsEngineGeometrieAdapters[i]->getPosition());
-            }
-        }
-    }
-}
-
-bool DemCoProcessor::isDemObjectInAABB(std::array<double, 6> AABB)
-{
-    bool result = false;
-    for (int i = 0; i < interactors.size(); i++) {
-        if (std::dynamic_pointer_cast<PePhysicsEngineGeometryAdapter>(physicsEngineGeometrieAdapters[i])->isActive()) {
-            SPtr<GbObject3D> geoObject = interactors[i]->getGbObject3D();
-            std::array<double, 2> minMax1;
-            std::array<double, 2> minMax2;
-            std::array<double, 2> minMax3;
-            minMax1[0] = geoObject->getX1Minimum();
-            minMax2[0] = geoObject->getX2Minimum();
-            minMax3[0] = geoObject->getX3Minimum();
-            minMax1[1] = geoObject->getX1Maximum();
-            minMax2[1] = geoObject->getX2Maximum();
-            minMax3[1] = geoObject->getX3Maximum();
-
-            for (int x3 = 0; x3 < 2; x3++)
-                for (int x2 = 0; x2 < 2; x2++)
-                    for (int x1 = 0; x1 < 2; x1++) {
-                        result =
-                            result || (minMax1[x1] >= AABB[0] && minMax2[x2] >= AABB[1] && minMax3[x3] >= AABB[2] &&
-                                       minMax1[x1] <= AABB[3] && minMax2[x2] <= AABB[4] && minMax3[x3] <= AABB[5]);
-                    }
-        }
-    }
-
-    std::vector<int> values;
-    values.push_back((int)result);
-    std::vector<int> rvalues = comm->gather(values);
-
-    if (comm->isRoot()) {
-        for (int i = 0; i < (int)rvalues.size(); i++) {
-            result = result || (bool)rvalues[i];
-        }
-    }
-    int iresult = (int)result;
-    comm->broadcast(iresult);
-    result = (bool)iresult;
-
-    return result;
-}
-
-int DemCoProcessor::addSurfaceTriangleSet(std::vector<UbTupleFloat3> &nodes, std::vector<UbTupleInt3> &triangles)
-{
-    for (int i = 0; i < interactors.size(); i++) {
-        if (std::dynamic_pointer_cast<PePhysicsEngineGeometryAdapter>(physicsEngineGeometrieAdapters[i])->isActive()) {
-            interactors[i]->getGbObject3D()->addSurfaceTriangleSet(nodes, triangles);
-        }
-    }
-    return (int)interactors.size();
-}
-
-void DemCoProcessor::getObjectsPropertiesVector(std::vector<double> &p)
-{
-    for (int i = 0; i < interactors.size(); i++) {
-        if (std::dynamic_pointer_cast<PePhysicsEngineGeometryAdapter>(physicsEngineGeometrieAdapters[i])->isActive()) {
-            p.push_back(i);
-            p.push_back(interactors[i]->getGbObject3D()->getX1Centroid());
-            p.push_back(interactors[i]->getGbObject3D()->getX2Centroid());
-            p.push_back(interactors[i]->getGbObject3D()->getX3Centroid());
-            Vector3D v = physicsEngineGeometrieAdapters[i]->getLinearVelocity();
-            p.push_back(v[0]);
-            p.push_back(v[1]);
-            p.push_back(v[2]);
-        }
-    }
-}
-
-void DemCoProcessor::addPeGeo(walberla::pe::RigidBody *peGeo)
-{
-    auto geometry = getPeGeoAdapter(peGeo->getSystemID());
-    if (geometry != nullptr) {
-        geometry->setActive();
-        geometry->setGeometry(peGeo);
-        return;
-    } else
-        return;
-}
-
-void DemCoProcessor::removePeGeo(walberla::pe::RigidBody *peGeo)
-{
-    auto geometry = getPeGeoAdapter(peGeo->getSystemID());
-    if (geometry != nullptr) {
-        geometry->setSemiactive(true);
-    } else
-        throw UbException(UB_EXARGS, "PeGeo SystemId=" + UbSystem::toString(peGeo->getSystemID()) +
-                                         " is not matching geometry ID");
-}
-
-void DemCoProcessor::addPeShadowGeo(walberla::pe::RigidBody *peGeo)
-{
-    auto geometry = getPeGeoAdapter(peGeo->getSystemID());
-    if (geometry != nullptr) {
-        geometry->setActive();
-        geometry->setGeometry(peGeo);
-        return;
-    } else
-        throw UbException(UB_EXARGS,
-                          "PeGeo ID=" + UbSystem::toString(peGeo->getSystemID()) + " is not matching geometry ID");
-}
-
-void DemCoProcessor::removePeShadowGeo(walberla::pe::RigidBody *peGeo)
-{
-    auto geometry = getPeGeoAdapter(peGeo->getSystemID());
-
-    if (geometry != nullptr) {
-        geometry->setSemiactive(true);
-    } else
-        throw UbException(UB_EXARGS,
-                          "PeGeo ID=" + UbSystem::toString(peGeo->getSystemID()) + " is not matching geometry ID");
-}
-
-bool DemCoProcessor::isSpheresIntersection(double centerX1, double centerX2, double centerX3, double d)
-{
-    bool result = false;
-    for (int i = 0; i < interactors.size(); i++) {
-        if (std::dynamic_pointer_cast<PePhysicsEngineGeometryAdapter>(physicsEngineGeometrieAdapters[i])->isActive()) {
-            SPtr<GbObject3D> sphere = interactors[i]->getGbObject3D();
-            result                  = result ||
-                     (sqrt(pow(sphere->getX1Centroid() - centerX1, 2.0) + pow(sphere->getX2Centroid() - centerX2, 2.0) +
-                           pow(sphere->getX3Centroid() - centerX3, 2.0)) <= d);
-        }
-    }
-    std::vector<int> values;
-    values.push_back((int)result);
-    std::vector<int> rvalues = comm->gather(values);
-
-    if (comm->isRoot()) {
-        for (int i = 0; i < (int)rvalues.size(); i++) {
-            result = result || (bool)rvalues[i];
-        }
-    }
-    int iresult = (int)result;
-    comm->broadcast(iresult);
-    result = (bool)iresult;
-
-    return result;
-}
-
-void DemCoProcessor::distributeIDs()
-{
-    std::vector<unsigned long long> peIDsSend;
-    std::vector<int> vfIDsSend;
-
-    for (int i = 0; i < interactors.size(); i++) {
-        if (std::dynamic_pointer_cast<PePhysicsEngineGeometryAdapter>(physicsEngineGeometrieAdapters[i])->isActive()) {
-            peIDsSend.push_back(
-                std::dynamic_pointer_cast<PePhysicsEngineGeometryAdapter>(physicsEngineGeometrieAdapters[i])
-                    ->getSystemID());
-            vfIDsSend.push_back(interactors[i]->getID());
-        }
-    }
-
-    std::vector<unsigned long long> peIDsRecv;
-    std::vector<int> vfIDsRecv;
-
-    comm->allGather(peIDsSend, peIDsRecv);
-    comm->allGather(vfIDsSend, vfIDsRecv);
-
-    std::map<int, unsigned long long> idMap;
-
-    for (int i = 0; i < peIDsRecv.size(); i++) {
-        idMap.insert(std::make_pair(vfIDsRecv[i], peIDsRecv[i]));
-    }
-
-    for (int i = 0; i < interactors.size(); i++) {
-        std::map<int, unsigned long long>::const_iterator it;
-        if ((it = idMap.find(interactors[i]->getID())) == idMap.end()) {
-            throw UbException(UB_EXARGS, "Interactor ID = " + UbSystem::toString(interactors[i]->getID()) +
-                                             " is invalid! The DEM object may be not in PE domain!");
-        }
-
-        std::dynamic_pointer_cast<PePhysicsEngineGeometryAdapter>(physicsEngineGeometrieAdapters[i])
-            ->setSystemID(it->second);
-
-        geoIdMap.insert(std::make_pair(
-            it->second, std::dynamic_pointer_cast<PePhysicsEngineGeometryAdapter>(physicsEngineGeometrieAdapters[i])));
-    }
-}
-//////////////////////////////////////////////////////////////////////////
-void DemCoProcessor::setBlockVisitor(std::shared_ptr<BoundaryConditionsBlockVisitor> boundaryConditionsBlockVisitor)
-{
-    this->boundaryConditionsBlockVisitor = boundaryConditionsBlockVisitor;
-}
-//////////////////////////////////////////////////////////////////////////
-walberla::pe::RigidBody *DemCoProcessor::getPeGeoObject(walberla::id_t id)
-{
-    std::shared_ptr<walberla::blockforest::BlockForest> forest =
-        std::dynamic_pointer_cast<PePhysicsEngineSolverAdapter>(physicsEngineSolver)->getBlockForest();
-    std::shared_ptr<walberla::domain_decomposition::BlockDataID> storageId =
-        std::dynamic_pointer_cast<PePhysicsEngineSolverAdapter>(physicsEngineSolver)->getStorageId();
-    std::shared_ptr<walberla::pe::BodyStorage> globalBodyStorage =
-        std::dynamic_pointer_cast<PePhysicsEngineSolverAdapter>(physicsEngineSolver)->getGlobalBodyStorage();
-
-    return walberla::pe::getBody(*globalBodyStorage, *forest, *storageId, id,
-                                 walberla::pe::StorageSelect::LOCAL | walberla::pe::StorageSelect::SHADOW);
-}
-////////////////////////////////////////////////////////////////////////////
-std::shared_ptr<PePhysicsEngineGeometryAdapter> DemCoProcessor::getPeGeoAdapter(unsigned long long systemId)
-{
-    std::map<unsigned long long, std::shared_ptr<PePhysicsEngineGeometryAdapter>>::const_iterator it;
-    if ((it = geoIdMap.find(systemId)) == geoIdMap.end()) {
-        return nullptr;
-    } else
-        return it->second;
-}
diff --git a/src/cpu/DemCoupling/DemCoProcessor.h b/src/cpu/DemCoupling/DemCoProcessor.h
deleted file mode 100644
index d2946f1e93fcaedc69d44a83a68dc2079910e48f..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/DemCoProcessor.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- *  Author: S. Peters
- *  mail: peters@irmb.tu-bs.de
- */
-#ifndef DEM_CO_PROCESSOR_H
-#define DEM_CO_PROCESSOR_H
-
-#include <map>
-#include <memory>
-#include <vector>
-
-#include "Vector3D.h"
-
-#include "CoProcessor.h"
-#include "UbTuple.h"
-
-#include <pe/basic.h>
-
-//#define TIMING
-
-#ifdef TIMING
-#include "UbTiming.h"
-#endif
-
-class PhysicsEngineGeometryAdapter;
-class PhysicsEngineSolverAdapter;
-class PePhysicsEngineSolverAdapter;
-class PhysicsEngineMaterialAdapter;
-class PePhysicsEngineGeometryAdapter;
-
-class UbScheduler;
-class Grid3D;
-class ForceCalculator;
-namespace vf::mpi {class Communicator;}
-class MovableObjectInteractor;
-class BoundaryConditionsBlockVisitor;
-
-class DemCoProcessor : public CoProcessor
-{
-public:
-    DemCoProcessor(std::shared_ptr<Grid3D> grid, std::shared_ptr<UbScheduler> s, std::shared_ptr<vf::mpi::Communicator> comm,
-                   std::shared_ptr<ForceCalculator> forceCalculator,
-                   std::shared_ptr<PhysicsEngineSolverAdapter> physicsEngineSolver, double intermediatePeSteps = 1.0);
-    virtual ~DemCoProcessor();
-
-    void addInteractor(std::shared_ptr<MovableObjectInteractor> interactor,
-                       std::shared_ptr<PhysicsEngineMaterialAdapter> physicsEngineMaterial,
-                       Vector3D initalVelocity = Vector3D(0.0, 0.0, 0.0));
-    void process(double step) override;
-    std::shared_ptr<PhysicsEngineSolverAdapter> getPhysicsEngineSolver();
-    void distributeIDs();
-    void setBlockVisitor(std::shared_ptr<BoundaryConditionsBlockVisitor> blockVisitor);
-    bool isDemObjectInAABB(std::array<double, 6> AABB);
-    int addSurfaceTriangleSet(std::vector<UbTupleFloat3> &nodes, std::vector<UbTupleInt3> &triangles);
-    void getObjectsPropertiesVector(std::vector<double> &p);
-    void addPeGeo(walberla::pe::RigidBody *peGeo);
-    void removePeGeo(walberla::pe::RigidBody *peGeo);
-    void addPeShadowGeo(walberla::pe::RigidBody *peGeo);
-    void removePeShadowGeo(walberla::pe::RigidBody *peGeo);
-    bool isSpheresIntersection(double centerX1, double centerX2, double centerX3, double d);
-
-private:
-    std::shared_ptr<PhysicsEngineGeometryAdapter>
-    createPhysicsEngineGeometryAdapter(std::shared_ptr<MovableObjectInteractor> interactor,
-                                       std::shared_ptr<PhysicsEngineMaterialAdapter> physicsEngineMaterial) const;
-    void applyForcesOnGeometries();
-    void setForcesToObject(SPtr<Grid3D> grid, std::shared_ptr<MovableObjectInteractor> interactor,
-                           std::shared_ptr<PhysicsEngineGeometryAdapter> physicsEngineGeometry);
-    void scaleForcesAndTorques(double scalingFactor);
-    void calculateDemTimeStep(double step);
-    void moveVfGeoObjects();
-    walberla::pe::RigidBody *getPeGeoObject(walberla::id_t id);
-    std::shared_ptr<PePhysicsEngineGeometryAdapter> getPeGeoAdapter(unsigned long long systemId);
-
-private:
-    std::shared_ptr<vf::mpi::Communicator> comm;
-    std::vector<std::shared_ptr<MovableObjectInteractor>> interactors;
-    std::shared_ptr<ForceCalculator> forceCalculator;
-    std::shared_ptr<PePhysicsEngineSolverAdapter> physicsEngineSolver;
-    std::vector<std::shared_ptr<PhysicsEngineGeometryAdapter>> physicsEngineGeometrieAdapters;
-    double intermediateDemSteps;
-    SPtr<BoundaryConditionsBlockVisitor> boundaryConditionsBlockVisitor;
-    // walberla::pe::BodyStorage* bodyStorage;    //!< Reference to the central body storage.
-    // walberla::pe::BodyStorage* bodyStorageShadowCopies;    //!< Reference to the body storage containing body shadow
-    // copies.
-
-    std::map<unsigned long long, std::shared_ptr<PePhysicsEngineGeometryAdapter>> geoIdMap;
-
-#ifdef TIMING
-    UbTimer timer;
-#endif
-};
-
-#endif
diff --git a/src/cpu/DemCoupling/DemCoupling.cmake b/src/cpu/DemCoupling/DemCoupling.cmake
deleted file mode 100644
index 927c08b6dadae76d2ed023253503f8e7bd804601..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/DemCoupling.cmake
+++ /dev/null
@@ -1,31 +0,0 @@
-INCLUDE(${SOURCE_ROOT}/DemCoupling/CMakePackage.txt)
-INCLUDE(${SOURCE_ROOT}/DemCoupling/physicsEngineAdapter/CMakePackage.txt)
-INCLUDE(${SOURCE_ROOT}/DemCoupling/physicsEngineAdapter/dummy/CMakePackage.txt)
-INCLUDE(${SOURCE_ROOT}/DemCoupling/physicsEngineAdapter/pe/CMakePackage.txt)
-INCLUDE(${SOURCE_ROOT}/DemCoupling/reconstructor/CMakePackage.txt)
-
-INCLUDE(${SOURCE_ROOT}/DemCoupling/IncludsList.cmake)
-
-SET(LINK_LIBRARY optimized ${PE_RELEASE_LIBRARY} debug ${PE_DEBUG_LIBRARY})
-SET(CAB_ADDITIONAL_LINK_LIBRARIES ${CAB_ADDITIONAL_LINK_LIBRARIES} ${LINK_LIBRARY})
-
-SET(LINK_LIBRARY optimized ${BLOCKFOREST_RELEASE_LIBRARY} debug ${BLOCKFOREST_DEBUG_LIBRARY})
-SET(CAB_ADDITIONAL_LINK_LIBRARIES ${CAB_ADDITIONAL_LINK_LIBRARIES} ${LINK_LIBRARY})
-
-SET(LINK_LIBRARY optimized ${DOMAIN_DECOMPOSITION_RELEASE_LIBRARY} debug ${DOMAIN_DECOMPOSITION_DEBUG_LIBRARY})
-SET(CAB_ADDITIONAL_LINK_LIBRARIES ${CAB_ADDITIONAL_LINK_LIBRARIES} ${LINK_LIBRARY})
-
-SET(LINK_LIBRARY optimized ${GEOMETRY_RELEASE_LIBRARY} debug ${GEOMETRY_DEBUG_LIBRARY})
-SET(CAB_ADDITIONAL_LINK_LIBRARIES ${CAB_ADDITIONAL_LINK_LIBRARIES} ${LINK_LIBRARY})
-
-SET(LINK_LIBRARY optimized ${CORE_RELEASE_LIBRARY} debug ${CORE_DEBUG_LIBRARY})
-SET(CAB_ADDITIONAL_LINK_LIBRARIES ${CAB_ADDITIONAL_LINK_LIBRARIES} ${LINK_LIBRARY})
-
-IF(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
-   SET(CAB_ADDITIONAL_LINK_LIBRARIES ${CAB_ADDITIONAL_LINK_LIBRARIES} "stdc++fs")
-ENDIF()
-
-IF(${USE_METIS})
-   SET(LINK_LIBRARY optimized ${METIS_RELEASE_LIBRARY} debug ${METIS_DEBUG_LIBRARY})
-   SET(CAB_ADDITIONAL_LINK_LIBRARIES ${CAB_ADDITIONAL_LINK_LIBRARIES} ${LINK_LIBRARY})
-ENDIF()
\ No newline at end of file
diff --git a/src/cpu/DemCoupling/IncludsList.cmake b/src/cpu/DemCoupling/IncludsList.cmake
deleted file mode 100644
index 7ebf198e6082131956d5c1e146031394f39e37d5..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/IncludsList.cmake
+++ /dev/null
@@ -1,8 +0,0 @@
-INCLUDE_DIRECTORIES(${SOURCE_ROOT}/DemCoupling)
-INCLUDE_DIRECTORIES(${SOURCE_ROOT}/DemCoupling/physicsEngineAdapter)
-INCLUDE_DIRECTORIES(${SOURCE_ROOT}/DemCoupling/physicsEngineAdapter/dummy)
-INCLUDE_DIRECTORIES(${SOURCE_ROOT}/DemCoupling/physicsEngineAdapter/pe)
-INCLUDE_DIRECTORIES(${SOURCE_ROOT}/DemCoupling/reconstructor)
-
-INCLUDE_DIRECTORIES(${PE_ROOT}/src)
-INCLUDE_DIRECTORIES(${PE_BINARY_DIR}/src)
\ No newline at end of file
diff --git a/src/cpu/DemCoupling/MovableObjectInteractor.cpp b/src/cpu/DemCoupling/MovableObjectInteractor.cpp
deleted file mode 100644
index 17185c8bb1cdfedfca1d76fa799cc0810a3fb43d..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/MovableObjectInteractor.cpp
+++ /dev/null
@@ -1,254 +0,0 @@
-#include "MovableObjectInteractor.h"
-
-#include "GbObject3D.h"
-#include "UbLogger.h"
-#include "Vector3D.h"
-
-#include "BCAdapter.h"
-#include "BCArray3D.h"
-#include "BCProcessor.h"
-#include "Block3D.h"
-#include "CoordinateTransformation3D.h"
-#include "Grid3D.h"
-#include "ILBMKernel.h"
-
-#include "BoundaryConditionsBlockVisitor.h"
-#include "SetBcBlocksBlockVisitor.h"
-
-#include "PhysicsEngineGeometryAdapter.h"
-#include "Reconstructor.h"
-
-#include <array>
-
-//#define TIMING
-
-#ifdef TIMING
-#include "UbTiming.h"
-#endif
-
-MovableObjectInteractor::MovableObjectInteractor(std::shared_ptr<GbObject3D> geoObject3D, std::shared_ptr<Grid3D> grid,
-                                                 std::shared_ptr<BCAdapter> bcAdapter, int type,
-                                                 std::shared_ptr<Reconstructor> reconstructor, State state)
-    : D3Q27Interactor(geoObject3D, grid, bcAdapter, type), reconstructor(reconstructor), state(state)
-{
-    // grid->getBlocks(0, grid->getRank(), true, blockVector);
-}
-
-MovableObjectInteractor::~MovableObjectInteractor() {}
-
-void MovableObjectInteractor::setPhysicsEngineGeometry(
-    std::shared_ptr<PhysicsEngineGeometryAdapter> physicsEngineGeometry)
-{
-    this->physicsEngineGeometry = physicsEngineGeometry;
-    physicsEngineGeometry->changeState(this->state);
-}
-
-void MovableObjectInteractor::moveGbObjectTo(const Vector3D &position)
-{
-    // UBLOG(logINFO, "new position: (x,y,z) " << val<1>(position) << ", " << val<2>(position) << ", " <<
-    // val<3>(position));
-
-    this->getGbObject3D()->setCenterCoordinates(UbTupleDouble3(position[0], position[1], position[2]));
-    this->rearrangeGrid();
-}
-
-void MovableObjectInteractor::rearrangeGrid()
-{
-#ifdef TIMING
-    UbTimer timer;
-    timer.resetAndStart();
-#endif
-
-#ifdef TIMING
-    UBLOG(logINFO, "MovableObjectInteractor::rearrangeGrid():start");
-#endif
-
-    this->reconstructDistributionOnSolidNodes();
-
-#ifdef TIMING
-    UBLOG(logINFO, "reconstructDistributionOnSolidNodes() time = " << timer.stop() << " s");
-#endif
-
-    this->setSolidNodesToFluid();
-
-#ifdef TIMING
-    UBLOG(logINFO, "setSolidNodesToFluid() time = " << timer.stop() << " s");
-#endif
-
-    this->setBcNodesToFluid();
-
-#ifdef TIMING
-    UBLOG(logINFO, "setBcNodesToFluid() time = " << timer.stop() << " s");
-#endif
-
-    this->removeSolidBlocks();
-
-#ifdef TIMING
-    UBLOG(logINFO, "removeSolidBlocks() time = " << timer.stop() << " s");
-#endif
-
-    this->removeBcBlocks();
-
-#ifdef TIMING
-    UBLOG(logINFO, "removeBcBlocks() time = " << timer.stop() << " s");
-#endif
-
-    this->setBcBlocks();
-
-#ifdef TIMING
-    UBLOG(logINFO, "setBcBlocks() time = " << timer.stop() << " s");
-#endif
-
-    this->initInteractor();
-
-#ifdef TIMING
-    UBLOG(logINFO, "initInteractor() time = " << timer.stop() << " s");
-#endif
-
-    this->updateVelocityBc();
-
-#ifdef TIMING
-    UBLOG(logINFO, "updateVelocityBc() time = " << timer.stop() << " s");
-#endif
-}
-
-void MovableObjectInteractor::updateNodeLists()
-{
-    // for (BcNodeIndicesMap::value_type t : bcNodeIndicesMap)
-    //{
-    //   SPtr<Block3D> block = t.first;
-    //   std::set< UbTupleInt3 >& bcNodeIndices = t.second;
-
-    //   SPtr<ILBMKernel> kernel = block->getKernel();
-
-    //   for (UbTupleInt3 node : bcNodeIndices)
-    //   {
-
-    //   }
-    //}
-}
-
-void MovableObjectInteractor::reconstructDistributionOnSolidNodes()
-{
-    for (SolidNodeIndicesMap::value_type t : solidNodeIndicesMap) {
-        SPtr<Block3D> block                     = t.first;
-        std::set<UbTupleInt3> &solidNodeIndices = t.second;
-
-        SPtr<ILBMKernel> kernel = block->getKernel();
-
-        for (UbTupleInt3 node : solidNodeIndices) {
-            const int x1 = val<1>(node);
-            const int x2 = val<2>(node);
-            const int x3 = val<3>(node);
-
-            const Vector3D worldCoordinates = this->grid.lock()->getNodeCoordinates(block, x1, x2, x3);
-
-            if (kernel->isInsideOfDomain(x1, x2, x3))
-                reconstructor->reconstructNode(x1, x2, x3, worldCoordinates, physicsEngineGeometry, kernel);
-        }
-    }
-}
-
-void MovableObjectInteractor::setSolidNodesToFluid()
-{
-    for (SolidNodeIndicesMap::value_type t : solidNodeIndicesMap) {
-        SPtr<Block3D> block                     = t.first;
-        std::set<UbTupleInt3> &solidNodeIndices = t.second;
-
-        SPtr<ILBMKernel> kernel = block->getKernel();
-        SPtr<BCArray3D> bcArray = kernel->getBCProcessor()->getBCArray();
-
-        for (UbTupleInt3 node : solidNodeIndices)
-            bcArray->setFluid(val<1>(node), val<2>(node), val<3>(node));
-    }
-}
-
-void MovableObjectInteractor::setBcNodesToFluid()
-{
-    for (BcNodeIndicesMap::value_type t : bcNodeIndicesMap) {
-        SPtr<Block3D> block                       = t.first;
-        std::set<std::vector<int>> &bcNodeIndices = t.second;
-
-        SPtr<ILBMKernel> kernel = block->getKernel();
-        SPtr<BCArray3D> bcArray = kernel->getBCProcessor()->getBCArray();
-
-        for (std::vector<int> node : bcNodeIndices)
-            bcArray->setFluid(node[0], node[1], node[2]);
-    }
-}
-
-void MovableObjectInteractor::setBcBlocks()
-{
-    SetBcBlocksBlockVisitor v(shared_from_this());
-    this->grid.lock()->accept(v);
-
-    //////////////////////////////////////////////////////////////////////////
-    // SPtr<GbObject3D> geoObject = this->getGbObject3D();
-    // std::array<double, 6> AABB ={
-    // geoObject->getX1Minimum(),geoObject->getX2Minimum(),geoObject->getX3Minimum(),geoObject->getX1Maximum(),geoObject->getX2Maximum(),geoObject->getX3Maximum()
-    // }; blockVector.clear(); UbTupleInt3 blockNX=grid.lock()->getBlockNX(); double ext = 0.0;
-    // grid.lock()->getBlocksByCuboid(AABB[0]-(double)val<1>(blockNX)*ext, AABB[1]-(double)val<2>(blockNX)*ext,
-    // AABB[2]-(double)val<3>(blockNX)*ext, AABB[3]+(double)val<1>(blockNX)*ext, AABB[4]+(double)val<2>(blockNX)*ext,
-    // AABB[5]+(double)val<3>(blockNX)*ext, blockVector);
-
-    // for(std::shared_ptr<Block3D> block : this->blockVector)
-    //{
-    //   if (block->getKernel())
-    //   {
-    //      setBCBlock(block);
-    //   }
-    //}
-    //////////////////////////////////////////////////////////////////////////
-    // SPtr<GbObject3D> geoObject = this->getGbObject3D();
-    // std::array <double, 2> minMax1;
-    // std::array <double, 2> minMax2;
-    // std::array <double, 2> minMax3;
-    // minMax1[0] = geoObject->getX1Minimum();
-    // minMax2[0] = geoObject->getX2Minimum();
-    // minMax3[0] = geoObject->getX3Minimum();
-    // minMax1[1] = geoObject->getX1Maximum();
-    // minMax2[1] = geoObject->getX2Maximum();
-    // minMax3[1] = geoObject->getX3Maximum();
-
-    // SPtr<CoordinateTransformation3D> trafo = grid.lock()->getCoordinateTransformator();
-
-    // for (int x3 = 0; x3 < 2; x3++)
-    //   for (int x2 = 0; x2 < 2; x2++)
-    //      for (int x1 = 0; x1 < 2; x1++)
-    //      {
-    //         int ix1 = (int)trafo->transformForwardToX1Coordinate(minMax1[x1], minMax2[x2], minMax3[x3]);
-    //         int ix2 = (int)trafo->transformForwardToX2Coordinate(minMax1[x1], minMax2[x2], minMax3[x3]);
-    //         int ix3 = (int)trafo->transformForwardToX3Coordinate(minMax1[x1], minMax2[x2], minMax3[x3]);
-    //         blockVector.push_back(grid.lock()->getBlock(ix1, ix2, ix3, 0));
-    //      }
-    // for(std::shared_ptr<Block3D> block : this->blockVector)
-    //{
-    //   if (block->getKernel())
-    //   {
-    //      setBCBlock(block);
-    //   }
-    //}
-}
-
-void MovableObjectInteractor::updateVelocityBc()
-{
-    for (BcNodeIndicesMap::value_type t : this->getBcNodeIndicesMap()) {
-        SPtr<Block3D> block                       = t.first;
-        std::set<std::vector<int>> &bcNodeIndices = t.second;
-
-        SPtr<BCArray3D> bcArray = block->getKernel()->getBCProcessor()->getBCArray();
-
-        for (std::vector<int> node : bcNodeIndices)
-            setGeometryVelocityToBoundaryCondition(node, block, bcArray);
-    }
-}
-
-void MovableObjectInteractor::setGeometryVelocityToBoundaryCondition(std::vector<int> node, SPtr<Block3D> block,
-                                                                     SPtr<BCArray3D> bcArray) const
-{
-    const SPtr<BoundaryConditions> bc = bcArray->getBC(node[0], node[1], node[2]);
-    const Vector3D worldCoordinates   = this->grid.lock()->getNodeCoordinates(block, node[0], node[1], node[2]);
-    const Vector3D velocity           = this->physicsEngineGeometry->getVelocityAtPosition(worldCoordinates);
-
-    bc->setBoundaryVelocity(velocity);
-}
diff --git a/src/cpu/DemCoupling/MovableObjectInteractor.h b/src/cpu/DemCoupling/MovableObjectInteractor.h
deleted file mode 100644
index e0e4343a066d5fd69ddb3cf68d2106337a51d031..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/MovableObjectInteractor.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- *  Author: S. Peters
- *  mail: peters@irmb.tu-bs.de
- */
-#ifndef D3Q27_MOVABLE_OBJECT_INTERACTOR_H
-#define D3Q27_MOVABLE_OBJECT_INTERACTOR_H
-
-#include <memory>
-#include <vector>
-
-#include "D3Q27Interactor.h"
-
-#include "PhysicsEngineGeometryAdapter.h"
-#include "Vector3D.h"
-
-class Grid3D;
-class Block3D;
-class BCArray3D;
-class BCAdapter;
-class GbObject3D;
-
-class PhysicsEngineGeometryAdapter;
-class Reconstructor;
-
-class MovableObjectInteractor : public D3Q27Interactor
-{
-public:
-    typedef std::map<SPtr<Block3D>, std::set<std::array<int, 3>>> InBcNodeIndicesMap;
-    typedef std::map<SPtr<Block3D>, std::set<std::array<int, 3>>> OutBcNodeIndicesMap;
-
-public:
-    MovableObjectInteractor(std::shared_ptr<GbObject3D> geoObject3D, std::shared_ptr<Grid3D> grid,
-                            std::shared_ptr<BCAdapter> bcAdapter, int type,
-                            std::shared_ptr<Reconstructor> reconstructor, State isPinned);
-    virtual ~MovableObjectInteractor();
-
-    void setPhysicsEngineGeometry(std::shared_ptr<PhysicsEngineGeometryAdapter> physicsEngineGeometry);
-
-    void moveGbObjectTo(const Vector3D &position);
-
-private:
-    void rearrangeGrid();
-    void updateNodeLists();
-    void setSolidNodesToFluid();
-    void setBcNodesToFluid();
-    void reconstructDistributionOnSolidNodes();
-    void setBcBlocks();
-
-    void updateVelocityBc();
-    void setGeometryVelocityToBoundaryCondition(std::vector<int> node, std::shared_ptr<Block3D> block,
-                                                std::shared_ptr<BCArray3D> bcArray) const;
-
-    std::shared_ptr<PhysicsEngineGeometryAdapter> physicsEngineGeometry;
-
-    std::shared_ptr<Reconstructor> reconstructor;
-    State state;
-    std::vector<std::shared_ptr<Block3D>> blockVector;
-};
-
-#endif
diff --git a/src/cpu/DemCoupling/PePartitioningGridVisitor.cpp b/src/cpu/DemCoupling/PePartitioningGridVisitor.cpp
deleted file mode 100644
index 429eaeb8be0d3a601b64199e5e86279f7d05ce8f..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/PePartitioningGridVisitor.cpp
+++ /dev/null
@@ -1,148 +0,0 @@
-#if defined VF_METIS && defined VF_MPI
-
-#include "PePartitioningGridVisitor.h"
-#include "Block3D.h"
-#include <mpi/Communicator.h>
-#include "CoordinateTransformation3D.h"
-#include "Grid3D.h"
-#include "UbLogger.h"
-#include <math.h>
-#include <shared_mutex>
-
-#include "DemCoProcessor.h"
-
-using namespace std;
-
-PePartitioningGridVisitor::PePartitioningGridVisitor(std::shared_ptr<vf::mpi::Communicator> comm, std::shared_ptr<DemCoProcessor> dem)
-    : Grid3DVisitor(), comm(comm), dem(dem)
-{
-    forest = dynamicPointerCast<PePhysicsEngineSolverAdapter>(dem->getPhysicsEngineSolver())->getForest();
-}
-//////////////////////////////////////////////////////////////////////////
-PePartitioningGridVisitor::~PePartitioningGridVisitor() {}
-//////////////////////////////////////////////////////////////////////////
-void PePartitioningGridVisitor::visit(SPtr<Grid3D> grid)
-{
-    UBLOG(logDEBUG1, "PePartitioningGridVisitor::visit() - start");
-
-    collectData(grid);
-    distributePartitionData(grid);
-
-    UBLOG(logDEBUG1, "PePartitioningGridVisitor::visit() - end");
-}
-//////////////////////////////////////////////////////////////////////////
-void PePartitioningGridVisitor::collectData(SPtr<Grid3D> grid)
-{
-    // int minInitLevel = grid->getCoarsestInitializedLevel();
-    // int maxInitLevel = grid->getFinestInitializedLevel();
-
-    walberla::uint_t peRank;
-
-    for (auto blockIt = forest->begin(); blockIt != forest->end(); ++blockIt) {
-        forest->getProcessRank(peRank, blockIt->getId());
-        vector<SPtr<Block3D>> blocks;
-        walberla::AABB aabb = blockIt->getAABB();
-
-        // getBlocksByCuboid((double)aabb.xMin(), (double)aabb.yMin(), (double)aabb.zMin(), (double)aabb.xMax(),
-        // (double)aabb.yMax(), (double)aabb.zMax(), blocks, grid); for (SPtr<Block3D> block : blocks)
-        //{
-        //   ids.push_back(block->getGlobalID());
-        //   ranks.push_back((int)peRank);
-        //}
-        SPtr<Block3D> block = getBlockByMinUniform((double)aabb.xMin(), (double)aabb.yMin(), (double)aabb.zMin(), grid);
-        if (block) {
-            ids.push_back(block->getGlobalID());
-            ranks.push_back((int)peRank);
-        }
-    }
-}
-//////////////////////////////////////////////////////////////////////////
-// void PePartitioningGridVisitor::getBlocksByCuboid(double minX1, double minX2, double minX3, double maxX1, double
-// maxX2, double maxX3, std::vector<SPtr<Block3D>>& blocks, SPtr<Grid3D> grid)
-//{
-//   int coarsestLevel = grid->getCoarsestInitializedLevel();
-//   int finestLevel   = grid->getFinestInitializedLevel();
-//
-//   SPtr<CoordinateTransformation3D> trafo = grid->getCoordinateTransformator();
-//
-//   //////////////////////////////////////////////////////////////////////////
-//   //MINIMALE BLOCK-INDIZES BESTIMMEN
-//   //
-//   //min:
-//   double dMinX1 = trafo->transformForwardToX1Coordinate(minX1, minX2, minX3)*(1<<finestLevel);
-//   double dMinX2 = trafo->transformForwardToX2Coordinate(minX1, minX2, minX3)*(1<<finestLevel);
-//   double dMinX3 = trafo->transformForwardToX3Coordinate(minX1, minX2, minX3)*(1<<finestLevel);
-//
-//   //Achtung, wenn minX1 genau auf grenze zwischen zwei bloecken -> der "kleinere" muss genommen werden,
-//   //da beim Transformieren der "groessere" Index rauskommt
-//   int iMinX1 = (int)dMinX1; //if (UbMath::zero(dMinX1-iMinX1)) iMinX1-=1;
-//   int iMinX2 = (int)dMinX2; //if (UbMath::zero(dMinX2-iMinX2)) iMinX2-=1;
-//   int iMinX3 = (int)dMinX3; //if (UbMath::zero(dMinX3-iMinX3)) iMinX3-=1;
-//
-//   //max (hier kann die Zusatzabfrage vernachlaessigt werden):
-//   int iMaxX1 = (int)(trafo->transformForwardToX1Coordinate(maxX1, maxX2, maxX3)*(1<<finestLevel));
-//   int iMaxX2 = (int)(trafo->transformForwardToX2Coordinate(maxX1, maxX2, maxX3)*(1<<finestLevel));
-//   int iMaxX3 = (int)(trafo->transformForwardToX3Coordinate(maxX1, maxX2, maxX3)*(1<<finestLevel));
-//
-//   SPtr<Block3D> block;
-//
-//   //set, um doppelte bloecke zu vermeiden, die u.U. bei periodic auftreten koennen
-//   std::set<SPtr<Block3D>> blockset;
-//   for (int level=coarsestLevel; level<=finestLevel; level++)
-//   {
-//      //damit bei negativen werten auch der "kleinere" genommen wird -> floor!
-//      int minx1 = (int)std::floor((double)iMinX1/(1<<(finestLevel-level)));
-//      int minx2 = (int)std::floor((double)iMinX2/(1<<(finestLevel-level)));
-//      int minx3 = (int)std::floor((double)iMinX3/(1<<(finestLevel-level)));
-//
-//      int maxx1 = iMaxX1/(1<<(finestLevel-level));
-//      int maxx2 = iMaxX2/(1<<(finestLevel-level));
-//      int maxx3 = iMaxX3/(1<<(finestLevel-level));
-//
-//      for (int ix1=minx1; ix1<maxx1; ix1++)
-//         for (int ix2=minx2; ix2<maxx2; ix2++)
-//            for (int ix3=minx3; ix3<maxx3; ix3++)
-//               if ((block=grid->getBlock(ix1, ix2, ix3, level)))
-//               {
-//                  blockset.insert(block);
-//               }
-//   }
-//
-//   blocks.resize(blockset.size());
-//   std::copy(blockset.begin(), blockset.end(), blocks.begin());
-//}
-
-SPtr<Block3D> PePartitioningGridVisitor::getBlockByMinUniform(double minX1, double minX2, double minX3,
-                                                              SPtr<Grid3D> grid)
-{
-    SPtr<CoordinateTransformation3D> trafo = grid->getCoordinateTransformator();
-
-    int ix1 = (int)trafo->transformForwardToX1Coordinate(minX1, minX2, minX3);
-    int ix2 = (int)trafo->transformForwardToX2Coordinate(minX1, minX2, minX3);
-    int ix3 = (int)trafo->transformForwardToX3Coordinate(minX1, minX2, minX3);
-
-    return grid->getBlock(ix1, ix2, ix3, 0);
-}
-
-//////////////////////////////////////////////////////////////////////////
-void PePartitioningGridVisitor::distributePartitionData(SPtr<Grid3D> grid)
-{
-    std::vector<int> totalIDs;
-    std::vector<int> totalRanks;
-
-    assert(ids.size() != 0);
-    assert(ranks.size() != 0);
-
-    comm->allGather(ids, totalIDs);
-    comm->allGather(ranks, totalRanks);
-
-    assert(totalIDs.size() == totalRanks.size());
-    for (int i = 0; i < totalIDs.size(); i++) {
-        SPtr<Block3D> block = grid->getBlock(totalIDs[i]);
-        if (block)
-            block->setRank(totalRanks[i]);
-    }
-}
-//////////////////////////////////////////////////////////////////////////
-
-#endif // VF_METIS
diff --git a/src/cpu/DemCoupling/PePartitioningGridVisitor.h b/src/cpu/DemCoupling/PePartitioningGridVisitor.h
deleted file mode 100644
index cad80c0f4d986c45560c6111e8943226df136d24..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/PePartitioningGridVisitor.h
+++ /dev/null
@@ -1,57 +0,0 @@
-#ifndef PePartitioningGridVisitor_h
-#define PePartitioningGridVisitor_h
-
-#if defined VF_MPI
-
-#include <PointerDefinitions.h>
-#include <vector>
-
-#include "Grid3DVisitor.h"
-
-#include "PePhysicsEngineSolverAdapter.h"
-
-#include <array>
-
-////////////////////////////////////////////////////////////////////////
-//! \brief The class implements domain decomposition with PE library
-//! \author Konstantin Kutscher
-//////////////////////////////////////////////////////////////////////////
-namespace vf::mpi {class Communicator;}
-class Grid3D;
-class Block3D;
-class DemCoProcessor;
-// class walberla::blockforest::BlockForest;
-
-class PePartitioningGridVisitor : public Grid3DVisitor
-{
-public:
-    //! This describe different types of decomposition
-    enum GraphType { LevelIntersected, LevelBased };
-
-public:
-    //! Constructor
-    //! \param comm - communicator
-
-    PePartitioningGridVisitor(std::shared_ptr<vf::mpi::Communicator> comm, std::shared_ptr<DemCoProcessor> dem);
-    virtual ~PePartitioningGridVisitor();
-    void visit(SPtr<Grid3D> grid) override;
-
-protected:
-    void collectData(SPtr<Grid3D> grid);
-    void distributePartitionData(SPtr<Grid3D> grid);
-    // void getBlocksByCuboid(double minX1, double minX2, double minX3, double maxX1, double maxX2, double maxX3,
-    // std::vector<SPtr<Block3D>>& blocks, SPtr<Grid3D> grid);
-    SPtr<Block3D> getBlockByMinUniform(double minX1, double minX2, double minX3, SPtr<Grid3D> grid);
-
-private:
-    std::shared_ptr<vf::mpi::Communicator> comm;
-    std::shared_ptr<DemCoProcessor> dem;
-
-    std::vector<int> ids;
-    std::vector<int> ranks;
-
-    std::shared_ptr<walberla::blockforest::BlockForest> forest;
-};
-
-#endif // VF_MPI
-#endif
diff --git a/src/cpu/DemCoupling/RestartDemObjectsCoProcessor.cpp b/src/cpu/DemCoupling/RestartDemObjectsCoProcessor.cpp
deleted file mode 100644
index ff6cbe7e5a3e394bac18016507a57308d0f1ecbf..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/RestartDemObjectsCoProcessor.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-#include "RestartDemObjectsCoProcessor.h"
-
-#include <mpi/Communicator.h>
-#include "CreateDemObjectsCoProcessor.h"
-#include "DemCoProcessor.h"
-#include "GbSphere3D.h"
-#include "Grid3D.h"
-#include "UbFileInputBinary.h"
-#include "UbFileOutputBinary.h"
-#include "UbScheduler.h"
-#include "UbSystem.h"
-#include "Vector3D.h"
-
-RestartDemObjectsCoProcessor::RestartDemObjectsCoProcessor() {}
-
-RestartDemObjectsCoProcessor::RestartDemObjectsCoProcessor(
-    SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, SPtr<DemCoProcessor> demCoProcessor,
-    SPtr<CreateDemObjectsCoProcessor> createDemObjectsCoProcessor, double radius, std::shared_ptr<vf::mpi::Communicator> comm)
-    : CoProcessor(grid, s), path(path), demCoProcessor(demCoProcessor),
-      createDemObjectsCoProcessor(createDemObjectsCoProcessor), radius(radius), comm(comm)
-{
-}
-
-void RestartDemObjectsCoProcessor::process(double step)
-{
-    if (scheduler->isDue(step)) {
-        int istep = static_cast<int>(step);
-
-        if (comm->isRoot())
-            UBLOG(logINFO, "RestartDemObjectsCoProcessor::write step: " << istep);
-
-        write(istep);
-    }
-}
-
-void RestartDemObjectsCoProcessor::restart(double step)
-{
-    if (comm->isRoot())
-        UBLOG(logINFO, "RestartDemObjectsCoProcessor::read step: " << (int)step);
-
-    read((int)step);
-}
-
-void RestartDemObjectsCoProcessor::write(int step)
-{
-    if (comm->isRoot())
-        UBLOG(logINFO, "RestartDemObjectsCoProcessor::write start ");
-    std::vector<double> p;
-
-    demCoProcessor->getObjectsPropertiesVector(p);
-
-    // TODO implement getherv
-    std::vector<double> rvalues;
-    comm->allGather(p, rvalues);
-
-    if (comm->isRoot()) {
-        std::map<int, std::vector<double>> infMap;
-        int size = (int)rvalues.size();
-        for (int i = 0; i < size; i += 7) {
-            std::vector<double> infVector(6);
-            for (int j = 0; j < 6; j++) {
-                infVector[j] = rvalues[i + 1 + j];
-            }
-            infMap.insert(std::make_pair((int)rvalues[i], infVector));
-        }
-        std::vector<double> wvalues;
-        typedef std::map<int, std::vector<double>>::iterator it_type;
-        for (it_type iterator = infMap.begin(); iterator != infMap.end(); iterator++) {
-            // iterator->first = key
-            // iterator->second = value
-            std::vector<double>::iterator it = wvalues.end();
-            it                               = wvalues.insert(it, iterator->second.begin(), iterator->second.end());
-        }
-        std::string subfolder = "dem_cp_" + UbSystem::toString(step);
-        std::string filePath  = path + "/dem_cp/" + subfolder + "/dem_cp.bin";
-        UbFileOutputBinary fo(filePath);
-        fo.writeInteger((int)wvalues.size());
-        fo.writeVector<double>(wvalues);
-        UBLOG(logINFO, "RestartDemObjectsCoProcessor::write number of objects = " << wvalues.size() / 6);
-    }
-    if (comm->isRoot())
-        UBLOG(logINFO, "RestartDemObjectsCoProcessor::write stop ");
-}
-
-void RestartDemObjectsCoProcessor::read(int step)
-{
-    if (comm->isRoot())
-        UBLOG(logINFO, "RestartDemObjectsCoProcessor::read start ");
-    std::vector<double> p;
-
-    if (comm->isRoot()) {
-        std::string subfolder = "dem_cp_" + UbSystem::toString(step);
-        std::string filePath  = path + "/dem_cp/" + subfolder + "/dem_cp.bin";
-        UbFileInputBinary fi(filePath);
-        int size = fi.readInteger();
-        p.resize(size);
-        fi.readVector<double>(p);
-    }
-    comm->broadcast(p);
-
-    if (comm->isRoot())
-        UBLOG(logINFO, "RestartDemObjectsCoProcessor::read number of objects = " << p.size() / 6);
-
-    createDemObjectsCoProcessor->clearGeoObjects();
-
-    int size = (int)p.size();
-
-    for (int i = 0; i < size; i += 6) {
-        SPtr<GbObject3D> sphere(new GbSphere3D(p[i], p[i + 1], p[i + 2], radius));
-        createDemObjectsCoProcessor->addGeoObject(sphere, Vector3D(p[i + 3], p[i + 4], p[i + 5]));
-    }
-
-    createDemObjectsCoProcessor->createGeoObjects();
-
-    createDemObjectsCoProcessor->clearGeoObjects();
-
-    if (comm->isRoot())
-        UBLOG(logINFO, "RestartDemObjectsCoProcessor::read stop ");
-}
diff --git a/src/cpu/DemCoupling/RestartDemObjectsCoProcessor.h b/src/cpu/DemCoupling/RestartDemObjectsCoProcessor.h
deleted file mode 100644
index 5123a2d6e51ece8e96d6623d573141a8c272026f..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/RestartDemObjectsCoProcessor.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- *  Author: K. Kutscher
- *  mail: kutscher@irmb.tu-bs.de
- */
-#ifndef RestartDemObjectsCoProcessor_H
-#define RestartDemObjectsCoProcessor_H
-
-#include <PointerDefinitions.h>
-#include <string>
-#include <vector>
-
-#include "CoProcessor.h"
-
-namespace vf::mpi {class Communicator;}
-class Grid3D;
-class UbScheduler;
-class DemCoProcessor;
-class CreateDemObjectsCoProcessor;
-
-class RestartDemObjectsCoProcessor : public CoProcessor
-{
-public:
-    RestartDemObjectsCoProcessor();
-    RestartDemObjectsCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                 SPtr<DemCoProcessor> demCoProcessor,
-                                 SPtr<CreateDemObjectsCoProcessor> createDemObjectsCoProcessor, double radius,
-                                 std::shared_ptr<vf::mpi::Communicator> comm);
-    ~RestartDemObjectsCoProcessor() {}
-    void process(double step) override;
-    void restart(double step);
-    void write(int step);
-    void read(int step);
-
-private:
-    std::string path;
-    double radius;
-    std::shared_ptr<vf::mpi::Communicator> comm;
-    SPtr<DemCoProcessor> demCoProcessor;
-    SPtr<CreateDemObjectsCoProcessor> createDemObjectsCoProcessor;
-};
-#endif
diff --git a/src/cpu/DemCoupling/WriteDemObjectsCoProcessor.cpp b/src/cpu/DemCoupling/WriteDemObjectsCoProcessor.cpp
deleted file mode 100644
index 3e22c90cf266fa8593b0036d160d79080a3ad31c..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/WriteDemObjectsCoProcessor.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-#include "WriteDemObjectsCoProcessor.h"
-
-#include "basics/writer/WbWriterVtkXmlASCII.h"
-#include "basics/writer/WbWriterVtkXmlBinary.h"
-
-#include <mpi/Communicator.h>
-#include "DemCoProcessor.h"
-#include "Grid3D.h"
-#include "UbScheduler.h"
-#include "UbSystem.h"
-
-WriteDemObjectsCoProcessor::WriteDemObjectsCoProcessor() {}
-//////////////////////////////////////////////////////////////////////////
-WriteDemObjectsCoProcessor::WriteDemObjectsCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                                       WbWriter *const writer, SPtr<DemCoProcessor> demCoProcessor,
-                                                       std::shared_ptr<vf::mpi::Communicator> comm)
-    : CoProcessor(grid, s), path(path), writer(writer), demCoProcessor(demCoProcessor), comm(comm)
-{
-}
-//////////////////////////////////////////////////////////////////////////
-void WriteDemObjectsCoProcessor::process(double step)
-{
-    if (scheduler->isDue(step)) {
-        std::vector<UbTupleFloat3> nodes;
-        std::vector<UbTupleInt3> triangles;
-
-        int numObjcts = demCoProcessor->addSurfaceTriangleSet(nodes, triangles);
-
-        int istep = static_cast<int>(step);
-
-        std::string pfilePath, partPath, subfolder, cfilePath;
-
-        subfolder = "dem" + UbSystem::toString(istep);
-        pfilePath = path + "/dem/" + subfolder;
-        cfilePath = path + "/dem/dem_collection";
-        partPath  = pfilePath + "/dem" + UbSystem::toString(comm->getProcessID()) + "_" + UbSystem::toString(istep);
-
-        std::string partName = writer->writeTriangles(partPath, nodes, triangles);
-        size_t found         = partName.find_last_of("/");
-        std::string piece    = partName.substr(found + 1);
-        piece                = subfolder + "/" + piece;
-
-        std::vector<std::string> datanames;
-        std::vector<std::string> cellDataNames;
-        std::vector<std::string> pieces = comm->gather(piece);
-        if (comm->isRoot()) {
-            std::string pname =
-                WbWriterVtkXmlASCII::getInstance()->writeParallelFile(pfilePath, pieces, datanames, cellDataNames);
-            found = pname.find_last_of("/");
-            piece = pname.substr(found + 1);
-
-            std::vector<std::string> filenames;
-            filenames.push_back(piece);
-            if (step == CoProcessor::scheduler->getMinBegin()) {
-                WbWriterVtkXmlASCII::getInstance()->writeCollection(cfilePath, filenames, istep, false);
-            } else {
-                WbWriterVtkXmlASCII::getInstance()->addFilesToCollection(cfilePath, filenames, istep, false);
-            }
-            UBLOG(logINFO, "WriteDemObjectsCoProcessor number of objects: " << numObjcts);
-            UBLOG(logINFO, "WriteDemObjectsCoProcessor step: " << istep);
-        }
-    }
-}
diff --git a/src/cpu/DemCoupling/WriteDemObjectsCoProcessor.h b/src/cpu/DemCoupling/WriteDemObjectsCoProcessor.h
deleted file mode 100644
index 7fb3b045ccd439d772ef565c2013af32c75a7a2d..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/WriteDemObjectsCoProcessor.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- *  Author: K. Kutscher
- *  mail: kutscher@irmb.tu-bs.de
- */
-#ifndef WriteDemObjectsCoProcessor_H
-#define WriteDemObjectsCoProcessor_H
-
-#include <PointerDefinitions.h>
-#include <string>
-#include <vector>
-
-#include "CoProcessor.h"
-
-namespace vf::mpi {class Communicator;}
-class Grid3D;
-class UbScheduler;
-class DemCoProcessor;
-class WbWriter;
-
-class WriteDemObjectsCoProcessor : public CoProcessor
-{
-public:
-    WriteDemObjectsCoProcessor();
-    WriteDemObjectsCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, WbWriter *const writer,
-                               SPtr<DemCoProcessor> demCoProcessor, std::shared_ptr<vf::mpi::Communicator> comm);
-    ~WriteDemObjectsCoProcessor() {}
-    void process(double step) override;
-
-private:
-    std::string path;
-    WbWriter *writer;
-    std::shared_ptr<vf::mpi::Communicator> comm;
-    SPtr<DemCoProcessor> demCoProcessor;
-};
-#endif
diff --git a/src/cpu/DemCoupling/WritePeBlocksCoProcessor.cpp b/src/cpu/DemCoupling/WritePeBlocksCoProcessor.cpp
deleted file mode 100644
index 401ea91bc7225eea7f871cbc2e92be44d1a5c9d7..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/WritePeBlocksCoProcessor.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-#include "WritePeBlocksCoProcessor.h"
-
-#include "basics/writer/WbWriterVtkXmlASCII.h"
-
-#include "Block3D.h"
-#include <mpi/Communicator.h>
-#include "D3Q27System.h"
-#include "Grid3D.h"
-#include "UbScheduler.h"
-
-WritePeBlocksCoProcessor::WritePeBlocksCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                                   WbWriter *const writer, std::shared_ptr<vf::mpi::Communicator> comm,
-                                                   SPtr<walberla::blockforest::BlockForest> forest)
-    : CoProcessor(grid, s), path(path), writer(writer), comm(comm), forest(forest)
-{
-}
-
-WritePeBlocksCoProcessor::~WritePeBlocksCoProcessor() {}
-
-void WritePeBlocksCoProcessor::process(double step)
-{
-    if (scheduler->isDue(step))
-        collectData(step);
-}
-
-void WritePeBlocksCoProcessor::collectData(double step)
-{
-    if (comm->getProcessID() == comm->getRoot()) {
-        int istep = int(step);
-        std::vector<std::string> filenames;
-        std::vector<UbTupleFloat3> nodes;
-        std::vector<UbTupleInt8> cells;
-        std::vector<std::string> celldatanames;
-
-        celldatanames.push_back("ID");
-        celldatanames.push_back("rank");
-
-        walberla::uint_t rank = 0;
-
-        std::vector<std::vector<double>> celldata(celldatanames.size());
-
-        int nr = 0;
-
-        for (auto blockIt = forest->begin(); blockIt != forest->end(); ++blockIt) {
-            walberla::AABB aabb = blockIt->getAABB();
-
-            nodes.push_back(makeUbTuple((float)aabb.xMin(), (float)aabb.yMin(), (float)aabb.zMin()));
-            nodes.push_back(makeUbTuple((float)aabb.xMax(), (float)aabb.yMin(), (float)aabb.zMin()));
-            nodes.push_back(makeUbTuple((float)aabb.xMax(), (float)aabb.yMax(), (float)aabb.zMin()));
-            nodes.push_back(makeUbTuple((float)aabb.xMin(), (float)aabb.yMax(), (float)aabb.zMin()));
-            nodes.push_back(makeUbTuple((float)aabb.xMin(), (float)aabb.yMin(), (float)aabb.zMax()));
-            nodes.push_back(makeUbTuple((float)aabb.xMax(), (float)aabb.yMin(), (float)aabb.zMax()));
-            nodes.push_back(makeUbTuple((float)aabb.xMax(), (float)aabb.yMax(), (float)aabb.zMax()));
-            nodes.push_back(makeUbTuple((float)aabb.xMin(), (float)aabb.yMax(), (float)aabb.zMax()));
-            cells.push_back(makeUbTuple(nr, nr + 1, nr + 2, nr + 3, nr + 4, nr + 5, nr + 6, nr + 7));
-            nr += 8;
-
-            // data
-            celldata[0].push_back((double)blockIt->getId().getID());
-            forest->getProcessRank(rank, blockIt->getId());
-            celldata[1].push_back((double)rank);
-        }
-
-        filenames.push_back(writer->writeOctsWithCellData(
-            path + "/peBlocks/peBlocks_" + UbSystem::toString(grid->getRank()) + "_" + UbSystem::toString(istep), nodes,
-            cells, celldatanames, celldata));
-
-        if (istep == CoProcessor::scheduler->getMinBegin()) {
-            WbWriterVtkXmlASCII::getInstance()->writeCollection(path + "/peBlocks/peBlocks_collection", filenames,
-                                                                istep, false);
-        } else {
-            WbWriterVtkXmlASCII::getInstance()->addFilesToCollection(path + "/peBlocks/peBlocks_collection", filenames,
-                                                                     istep, false);
-        }
-
-        UBLOG(logINFO, "WritePeBlocksCoProcessor step: " << istep);
-    }
-}
\ No newline at end of file
diff --git a/src/cpu/DemCoupling/WritePeBlocksCoProcessor.h b/src/cpu/DemCoupling/WritePeBlocksCoProcessor.h
deleted file mode 100644
index ae27d50b3f0bba867db7ad8cce79f2e5d8fd5681..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/WritePeBlocksCoProcessor.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- *  WritePeBlocksCoProcessor.h
- *
- *  Created on: 07.09.2018
- *  Author: K. Kutscher
- */
-
-#ifndef WritePeBlocksCoProcessor_H_
-#define WritePeBlocksCoProcessor_H_
-
-#include <PointerDefinitions.h>
-#include <string>
-
-#include "CoProcessor.h"
-
-#include <pe/basic.h>
-
-namespace vf::mpi {class Communicator;}
-class Grid3D;
-class UbScheduler;
-class WbWriter;
-
-class WritePeBlocksCoProcessor : public CoProcessor
-{
-public:
-    WritePeBlocksCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, WbWriter *const writer,
-                             std::shared_ptr<vf::mpi::Communicator> comm, SPtr<walberla::blockforest::BlockForest> forest);
-    virtual ~WritePeBlocksCoProcessor();
-
-    void process(double step) override;
-
-protected:
-    void collectData(double step);
-
-    std::string path;
-    WbWriter *writer;
-    std::shared_ptr<vf::mpi::Communicator> comm;
-    SPtr<walberla::blockforest::BlockForest> forest;
-};
-
-#endif
diff --git a/src/cpu/DemCoupling/physicsEngineAdapter/PhysicsEngineGeometryAdapter.h b/src/cpu/DemCoupling/physicsEngineAdapter/PhysicsEngineGeometryAdapter.h
deleted file mode 100644
index 490c1f979eaba285f1c39f834396acf9646584d9..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/physicsEngineAdapter/PhysicsEngineGeometryAdapter.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- *  Author: S. Peters
- *  mail: peters@irmb.tu-bs.de
- */
-#ifndef PHYSICS_ENGINE_GEOMETRY_ADAPTER_H
-#define PHYSICS_ENGINE_GEOMETRY_ADAPTER_H
-
-#include "Vector3D.h"
-
-enum class State { PIN, UNPIN };
-
-class PhysicsEngineGeometryAdapter
-{
-public:
-    virtual ~PhysicsEngineGeometryAdapter() {}
-
-    virtual void addForce(const Vector3D &force)   = 0;
-    virtual void addTorque(const Vector3D &torque) = 0;
-
-    virtual void setForce(const Vector3D &force)   = 0;
-    virtual void setTorque(const Vector3D &torque) = 0;
-
-    virtual void addForceAtPosition(const Vector3D &force, const Vector3D &position) = 0;
-    virtual void setLinearVelolocity(const Vector3D &velocity)                       = 0;
-    virtual void setAngularVelocity(const Vector3D &velocity)                        = 0;
-
-    virtual void resetForceAndTorque() = 0;
-
-    virtual Vector3D getPosition() const                                   = 0;
-    virtual Vector3D getVelocityAtPosition(const Vector3D &position) const = 0;
-    virtual Vector3D getLinearVelocity() const                             = 0;
-    virtual Vector3D getAngularVelocity() const                            = 0;
-
-    virtual Vector3D getForce() const  = 0;
-    virtual Vector3D getTorque() const = 0;
-
-    virtual void changeState(State state) = 0;
-};
-
-#endif
diff --git a/src/cpu/DemCoupling/physicsEngineAdapter/PhysicsEngineMaterialAdapter.h b/src/cpu/DemCoupling/physicsEngineAdapter/PhysicsEngineMaterialAdapter.h
deleted file mode 100644
index 30504bee98580f25f23e37030d5ec8180fce3ebc..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/physicsEngineAdapter/PhysicsEngineMaterialAdapter.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- *  Author: S. Peters
- *  mail: peters@irmb.tu-bs.de
- */
-#ifndef PHYSICS_ENGINE_MATERIAL_ADAPTER_H
-#define PHYSICS_ENGINE_MATERIAL_ADAPTER_H
-
-#include <string>
-
-class PhysicsEngineMaterialAdapter
-{
-public:
-    PhysicsEngineMaterialAdapter(std::string name, double density, double restitution, double staticFriction,
-                                 double dynamicFriction, double poissonRatio, double youngModul,
-                                 double stiffnessInNormalDirection, double dampingoefficientNormalDirection,
-                                 double dampingTangentialDirection)
-        : name(name), density(density), restitution(restitution), staticFriction(staticFriction),
-          dynamicFriction(dynamicFriction), poissonRatio(poissonRatio), youngModul(youngModul),
-          stiffnessInNormalDirection(stiffnessInNormalDirection),
-          dampingoefficientNormalDirection(dampingoefficientNormalDirection),
-          dampingTangentialDirection(dampingTangentialDirection)
-    {
-    }
-    virtual ~PhysicsEngineMaterialAdapter() {}
-
-protected:
-    std::string name;
-    double density;
-    double restitution;
-    double staticFriction;  // Note: pe doubles the input coefficient of friction for material-material contacts.
-    double dynamicFriction; //  Similar to static friction for low speed friction.
-    double poissonRatio;
-    double youngModul;
-    double stiffnessInNormalDirection;
-    double dampingoefficientNormalDirection;
-    double dampingTangentialDirection;
-};
-
-#endif
diff --git a/src/cpu/DemCoupling/physicsEngineAdapter/PhysicsEngineSolverAdapter.h b/src/cpu/DemCoupling/physicsEngineAdapter/PhysicsEngineSolverAdapter.h
deleted file mode 100644
index 8e03bf1d6e651f993012acc1e8297e309e993cc8..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/physicsEngineAdapter/PhysicsEngineSolverAdapter.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- *  Author: S. Peters
- *  mail: peters@irmb.tu-bs.de
- */
-#ifndef PHYSICS_ENGINE_SOLVER_ADAPTER_H
-#define PHYSICS_ENGINE_SOLVER_ADAPTER_H
-
-#include "Vector3D.h"
-
-class PhysicsEngineGeometryAdapter;
-class PhysicsEngineMaterialAdapter;
-
-class PhysicsEngineSolverAdapter
-{
-public:
-    virtual ~PhysicsEngineSolverAdapter() {}
-
-    virtual std::shared_ptr<PhysicsEngineGeometryAdapter>
-    createPhysicsEngineGeometryAdapter(int id, const Vector3D &position, double radius,
-                                       std::shared_ptr<PhysicsEngineMaterialAdapter> material) const = 0;
-    virtual void runTimestep(double step)                                                            = 0;
-};
-
-#endif
diff --git a/src/cpu/DemCoupling/physicsEngineAdapter/dummy/DummyPhysicsEngineGeometryAdapter.cpp b/src/cpu/DemCoupling/physicsEngineAdapter/dummy/DummyPhysicsEngineGeometryAdapter.cpp
deleted file mode 100644
index b18a57532880491a2419ff9d78bc0c64aac108f8..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/physicsEngineAdapter/dummy/DummyPhysicsEngineGeometryAdapter.cpp
+++ /dev/null
@@ -1,31 +0,0 @@
-#include "DummyPhysicsEngineGeometryAdapter.h"
-
-void DummyPhysicsEngineGeometryAdapter::addForce(const Vector3D &force) {}
-
-void DummyPhysicsEngineGeometryAdapter::addTorque(const Vector3D &torque) {}
-
-void DummyPhysicsEngineGeometryAdapter::setForce(const Vector3D &force) {}
-
-void DummyPhysicsEngineGeometryAdapter::setTorque(const Vector3D &torque) {}
-
-void DummyPhysicsEngineGeometryAdapter::addForceAtPosition(const Vector3D &force, const Vector3D &position) {}
-
-void DummyPhysicsEngineGeometryAdapter::setLinearVelolocity(const Vector3D &velocity) { this->velocity = velocity; }
-
-void DummyPhysicsEngineGeometryAdapter::setAngularVelocity(const Vector3D &velocity) {}
-
-void DummyPhysicsEngineGeometryAdapter::resetForceAndTorque() {}
-
-Vector3D DummyPhysicsEngineGeometryAdapter::getVelocityAtPosition(const Vector3D &position) const { return velocity; }
-
-Vector3D DummyPhysicsEngineGeometryAdapter::getLinearVelocity() const { return Vector3D(); }
-
-Vector3D DummyPhysicsEngineGeometryAdapter::getAngularVelocity() const { return Vector3D(); }
-
-Vector3D DummyPhysicsEngineGeometryAdapter::getPosition() const { return Vector3D(); }
-
-Vector3D DummyPhysicsEngineGeometryAdapter::getForce() const { return Vector3D(); }
-
-Vector3D DummyPhysicsEngineGeometryAdapter::getTorque() const { return Vector3D(); }
-
-void DummyPhysicsEngineGeometryAdapter::changeState(State state) {}
diff --git a/src/cpu/DemCoupling/physicsEngineAdapter/dummy/DummyPhysicsEngineGeometryAdapter.h b/src/cpu/DemCoupling/physicsEngineAdapter/dummy/DummyPhysicsEngineGeometryAdapter.h
deleted file mode 100644
index 70620d3c0681a2d2dc702c4a74f5c8d5a94141ea..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/physicsEngineAdapter/dummy/DummyPhysicsEngineGeometryAdapter.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- *  Author: S. Peters
- *  mail: peters@irmb.tu-bs.de
- */
-#ifndef DUMMY_PHYSICS_ENGINE_GEOMETRY_ADAPTER_H
-#define DUMMY_PHYSICS_ENGINE_GEOMETRY_ADAPTER_H
-
-#include "UbTuple.h"
-
-#include "PhysicsEngineGeometryAdapter.h"
-
-class DummyPhysicsEngineGeometryAdapter : public PhysicsEngineGeometryAdapter
-{
-public:
-    DummyPhysicsEngineGeometryAdapter() {}
-    virtual ~DummyPhysicsEngineGeometryAdapter() {}
-
-    void addForce(const Vector3D &force) override;
-    void addTorque(const Vector3D &torque) override;
-
-    void setForce(const Vector3D &force) override;
-    void setTorque(const Vector3D &torque) override;
-
-    void addForceAtPosition(const Vector3D &force, const Vector3D &position) override;
-    void setLinearVelolocity(const Vector3D &velocity) override;
-    void setAngularVelocity(const Vector3D &velocity) override;
-
-    void resetForceAndTorque() override;
-
-    Vector3D getVelocityAtPosition(const Vector3D &position) const override;
-    Vector3D getLinearVelocity() const override;
-    Vector3D getAngularVelocity() const override;
-    Vector3D getPosition() const override;
-    Vector3D getForce() const override;
-    Vector3D getTorque() const override;
-
-    void changeState(State state) override;
-
-private:
-    Vector3D velocity;
-};
-
-#endif
diff --git a/src/cpu/DemCoupling/physicsEngineAdapter/dummy/DummyPhysicsEngineMaterialAdapter.cpp b/src/cpu/DemCoupling/physicsEngineAdapter/dummy/DummyPhysicsEngineMaterialAdapter.cpp
deleted file mode 100644
index 7890f966872d5433efa5b40b6358ca0b5a25a40d..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/physicsEngineAdapter/dummy/DummyPhysicsEngineMaterialAdapter.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "DummyPhysicsEngineMaterialAdapter.h"
diff --git a/src/cpu/DemCoupling/physicsEngineAdapter/dummy/DummyPhysicsEngineMaterialAdapter.h b/src/cpu/DemCoupling/physicsEngineAdapter/dummy/DummyPhysicsEngineMaterialAdapter.h
deleted file mode 100644
index e84e0f1089a017ed9135383995c96e55cdf0cee6..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/physicsEngineAdapter/dummy/DummyPhysicsEngineMaterialAdapter.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- *  Author: S. Peters
- *  mail: peters@irmb.tu-bs.de
- */
-#ifndef DUMMY_PHYSICS_ENGINE_MATERIAL_ADAPTER
-#define DUMMY_PHYSICS_ENGINE_MATERIAL_ADAPTER
-
-#include "PhysicsEngineMaterialAdapter.h"
-
-class DummyPhysicsEngineMaterialAdapter : public PhysicsEngineMaterialAdapter
-{
-public:
-    DummyPhysicsEngineMaterialAdapter(std::string name, double density, double restitution, double staticFriction,
-                                      double dynamicFriction, double poissonRatio, double youngModul,
-                                      double stiffnessInNormalDirection, double dampingoefficientNormalDirection,
-                                      double dampingTangentialDirection)
-        : PhysicsEngineMaterialAdapter(name, density, restitution, staticFriction, dynamicFriction, poissonRatio,
-                                       youngModul, stiffnessInNormalDirection, dampingoefficientNormalDirection,
-                                       dampingTangentialDirection)
-    {
-    }
-    virtual ~DummyPhysicsEngineMaterialAdapter() {}
-};
-
-#endif
diff --git a/src/cpu/DemCoupling/physicsEngineAdapter/dummy/DummyPhysicsEngineSolverAdapter.cpp b/src/cpu/DemCoupling/physicsEngineAdapter/dummy/DummyPhysicsEngineSolverAdapter.cpp
deleted file mode 100644
index 321b523ead35a6a8d16d7b48b1fbb7cb66b0fdc8..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/physicsEngineAdapter/dummy/DummyPhysicsEngineSolverAdapter.cpp
+++ /dev/null
@@ -1,12 +0,0 @@
-#include "DummyPhysicsEngineSolverAdapter.h"
-
-#include "DummyPhysicsEngineGeometryAdapter.h"
-
-std::shared_ptr<PhysicsEngineGeometryAdapter> DummyPhysicsEngineSolverAdapter::createPhysicsEngineGeometryAdapter(
-    int id, const Vector3D &position, double radius, std::shared_ptr<PhysicsEngineMaterialAdapter> material) const
-{
-    return std::static_pointer_cast<PhysicsEngineGeometryAdapter>(
-        std::make_shared<DummyPhysicsEngineGeometryAdapter>());
-}
-
-void DummyPhysicsEngineSolverAdapter::runTimestep(double step) {}
diff --git a/src/cpu/DemCoupling/physicsEngineAdapter/dummy/DummyPhysicsEngineSolverAdapter.h b/src/cpu/DemCoupling/physicsEngineAdapter/dummy/DummyPhysicsEngineSolverAdapter.h
deleted file mode 100644
index 38e9e7f055b415266cfd35c055951707ba4cda44..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/physicsEngineAdapter/dummy/DummyPhysicsEngineSolverAdapter.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- *  Author: S. Peters
- *  mail: peters@irmb.tu-bs.de
- */
-#ifndef DUMMY_PHYSICS_ENGINE_SOLVER_ADAPTER_H
-#define DUMMY_PHYSICS_ENGINE_SOLVER_ADAPTER_H
-
-#include <memory>
-
-#include "UbTuple.h"
-
-#include "PhysicsEngineSolverAdapter.h"
-
-class DummyPhysicsEngineSolverAdapter : public PhysicsEngineSolverAdapter
-{
-public:
-    DummyPhysicsEngineSolverAdapter(){};
-    virtual ~DummyPhysicsEngineSolverAdapter() {}
-
-    std::shared_ptr<PhysicsEngineGeometryAdapter>
-    createPhysicsEngineGeometryAdapter(int id, const Vector3D &position, double radius,
-                                       std::shared_ptr<PhysicsEngineMaterialAdapter> material) const override;
-    void runTimestep(double step) override;
-};
-
-#endif
diff --git a/src/cpu/DemCoupling/physicsEngineAdapter/pe/PeAdapter.h b/src/cpu/DemCoupling/physicsEngineAdapter/pe/PeAdapter.h
deleted file mode 100644
index 0373281d60b6618555de3d48190816d79b9ade5b..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/physicsEngineAdapter/pe/PeAdapter.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- *  Author: S. Peters
- *  mail: peters@irmb.tu-bs.de
- */
-#ifndef PE_ADAPTER_H
-#define PE_ADAPTER_H
-
-#include "Vector3D.h"
-#include <pe/basic.h>
-
-class PeConverter
-{
-public:
-    static Vector3D convert(walberla::pe::Vec3 vec3) { return Vector3D(vec3[0], vec3[1], vec3[2]); }
-
-    static walberla::pe::Vec3 convert(const Vector3D &vec3) { return walberla::pe::Vec3(vec3[0], vec3[1], vec3[2]); }
-};
-
-#endif
diff --git a/src/cpu/DemCoupling/physicsEngineAdapter/pe/PeAdapterTest.cpp b/src/cpu/DemCoupling/physicsEngineAdapter/pe/PeAdapterTest.cpp
deleted file mode 100644
index a92127c3fd0e2b10ad2d73ee88e7fbd00b8ace85..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/physicsEngineAdapter/pe/PeAdapterTest.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-//#include "gmock/gmock.h"
-//
-//#include "PeAdapter.h"
-//#include <pe/basic.h>
-//
-//#include "UbTuple.h"
-//
-//
-// TEST(PeAdapterTest, convert_WalberlaVec3_to_Vector3D)
-//{
-//    walberla::pe::Vec3 walberlaVec(1.0, -2.0, 3.4);
-//    Vector3D ubTuple = PeConverter::convert(walberlaVec);
-//
-//    EXPECT_THAT(ubTuple[0], testing::DoubleEq(walberlaVec[0]));
-//    EXPECT_THAT(ubTuple[1], testing::DoubleEq(walberlaVec[1]));
-//    EXPECT_THAT(ubTuple[2], testing::DoubleEq(walberlaVec[2]));
-//}
-//
-// TEST(PeAdapterTest, convert_Vector3D_to_WalberlaVec3)
-//{
-//    Vector3D ubTuple(1.0, -2.0, 3.4);
-//    walberla::pe::Vec3 walberlaVec = PeConverter::convert(ubTuple);
-//
-//    EXPECT_THAT(ubTuple[0], testing::DoubleEq(walberlaVec[0]));
-//    EXPECT_THAT(ubTuple[1], testing::DoubleEq(walberlaVec[1]));
-//    EXPECT_THAT(ubTuple[2], testing::DoubleEq(walberlaVec[2]));
-//}
diff --git a/src/cpu/DemCoupling/physicsEngineAdapter/pe/PeLoadBalancerAdapter.cpp b/src/cpu/DemCoupling/physicsEngineAdapter/pe/PeLoadBalancerAdapter.cpp
deleted file mode 100644
index 6c597698d608a287cc3a8bd5db84fbf061539234..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/physicsEngineAdapter/pe/PeLoadBalancerAdapter.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-#include "PeLoadBalancerAdapter.h"
-#include "Block3D.h"
-#include "CoordinateTransformation3D.h"
-#include "Grid3D.h"
-#include "UbLogger.h"
-
-#include "core/debug/CheckFunctions.h"
-
-PeLoadBalancerAdapter::PeLoadBalancerAdapter(SPtr<Grid3D> grid, unsigned numberOfProcesses, int rank)
-    : grid(grid), numberOfProcesses(numberOfProcesses), rank(rank)
-{
-}
-
-walberla::uint_t PeLoadBalancerAdapter::operator()(walberla::SetupBlockForest &forest,
-                                                   const walberla::uint_t numberOfProcesses,
-                                                   const walberla::memory_t perProcessMemoryLimit)
-{
-    std::vector<walberla::SetupBlock *> peBlocks;
-    forest.getBlocks(peBlocks);
-
-    for (auto peBlock = peBlocks.begin(); peBlock != peBlocks.end(); ++peBlock) {
-        walberla::AABB aabb = (*peBlock)->getAABB();
-        SPtr<Block3D> block = getBlockByMinUniform(aabb.xMin() + 0.5 * (aabb.xMax() - aabb.xMin()),
-                                                   aabb.yMin() + 0.5 * (aabb.yMax() - aabb.yMin()),
-                                                   aabb.zMin() + 0.5 * (aabb.zMax() - aabb.zMin()), grid);
-        if (block) {
-            (*peBlock)->assignTargetProcess((walberla::uint_t)block->getRank());
-        } else {
-            // TODO: the rank of pe blocks is not consistent with VF blocks
-            (*peBlock)->assignTargetProcess(0);
-            // UBLOG(logINFO, "PeLoadBalancerAdapter::operator() peBlockId="<<(*peBlock)->getId());
-        }
-    }
-
-    return numberOfProcesses;
-}
-
-SPtr<Block3D> PeLoadBalancerAdapter::getBlockByMinUniform(double minX1, double minX2, double minX3, SPtr<Grid3D> grid)
-{
-    SPtr<CoordinateTransformation3D> trafo = grid->getCoordinateTransformator();
-
-    int ix1 = (int)trafo->transformForwardToX1Coordinate(minX1, minX2, minX3);
-    int ix2 = (int)trafo->transformForwardToX2Coordinate(minX1, minX2, minX3);
-    int ix3 = (int)trafo->transformForwardToX3Coordinate(minX1, minX2, minX3);
-
-    return grid->getBlock(ix1, ix2, ix3, 0);
-}
diff --git a/src/cpu/DemCoupling/physicsEngineAdapter/pe/PeLoadBalancerAdapter.h b/src/cpu/DemCoupling/physicsEngineAdapter/pe/PeLoadBalancerAdapter.h
deleted file mode 100644
index 9e1c64dd330cd9b1aa06857d4d441b736c8a39a1..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/physicsEngineAdapter/pe/PeLoadBalancerAdapter.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef PeLoadBalancerAdapter_h__
-#define PeLoadBalancerAdapter_h__
-
-#include "PointerDefinitions.h"
-#include "blockforest/SetupBlockForest.h"
-
-class Grid3D;
-class Block3D;
-
-class PeLoadBalancerAdapter
-{
-public:
-    PeLoadBalancerAdapter(SPtr<Grid3D> grid, unsigned numberOfProcesses, int rank);
-    walberla::uint_t operator()(walberla::SetupBlockForest &forest, const walberla::uint_t numberOfProcesses,
-                                const walberla::memory_t perProcessMemoryLimit);
-    unsigned getNumberOfProcesses() const { return numberOfProcesses; }
-    int getRank() const { return rank; }
-
-protected:
-    SPtr<Block3D> getBlockByMinUniform(double minX1, double minX2, double minX3, SPtr<Grid3D> grid);
-
-private:
-    SPtr<Grid3D> grid;
-    unsigned numberOfProcesses;
-    int rank;
-};
-
-#endif // PeLoadBalancerAdapter_h__
\ No newline at end of file
diff --git a/src/cpu/DemCoupling/physicsEngineAdapter/pe/PePhysicsEngineGeometryAdapter.cpp b/src/cpu/DemCoupling/physicsEngineAdapter/pe/PePhysicsEngineGeometryAdapter.cpp
deleted file mode 100644
index 9800d75b18a78b7eaf0b46c4193b93a55f3ff91b..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/physicsEngineAdapter/pe/PePhysicsEngineGeometryAdapter.cpp
+++ /dev/null
@@ -1,105 +0,0 @@
-#include "PePhysicsEngineGeometryAdapter.h"
-
-#include <pe/basic.h>
-
-#include "PeAdapter.h"
-
-// PePhysicsEngineGeometryAdapter::PePhysicsEngineGeometryAdapter(walberla::pe::RigidBody* peGeoObject) :
-// peGeoObject(peGeoObject)
-//{
-//    this->id = peGeoObject->getID();
-//    this->active = true;
-//}
-
-PePhysicsEngineGeometryAdapter::PePhysicsEngineGeometryAdapter()
-{
-    this->id         = -999;
-    this->systemID   = -999;
-    this->active     = false;
-    this->semiactive = false;
-    shadowCounter    = 0;
-    counter          = 0;
-}
-
-void PePhysicsEngineGeometryAdapter::addForce(const Vector3D &force)
-{
-    peGeoObject->addForce(PeConverter::convert(force));
-}
-
-void PePhysicsEngineGeometryAdapter::addTorque(const Vector3D &torque)
-{
-    peGeoObject->addTorque(PeConverter::convert(torque));
-}
-
-void PePhysicsEngineGeometryAdapter::setForce(const Vector3D &force)
-{
-    peGeoObject->setForce(PeConverter::convert(force));
-}
-
-void PePhysicsEngineGeometryAdapter::setTorque(const Vector3D &torque)
-{
-    peGeoObject->setTorque(PeConverter::convert(torque));
-}
-
-void PePhysicsEngineGeometryAdapter::addForceAtPosition(const Vector3D &force, const Vector3D &position)
-{
-    peGeoObject->addForceAtPos(PeConverter::convert(force), PeConverter::convert(position));
-}
-
-void PePhysicsEngineGeometryAdapter::setLinearVelolocity(const Vector3D &velocity)
-{
-    peGeoObject->setLinearVel(PeConverter::convert(velocity));
-}
-
-void PePhysicsEngineGeometryAdapter::setAngularVelocity(const Vector3D &velocity)
-{
-    peGeoObject->setAngularVel(PeConverter::convert(velocity));
-}
-
-void PePhysicsEngineGeometryAdapter::resetForceAndTorque() { peGeoObject->resetForceAndTorque(); }
-
-Vector3D PePhysicsEngineGeometryAdapter::getVelocityAtPosition(const Vector3D &position) const
-{
-    return PeConverter::convert(peGeoObject->velFromWF(PeConverter::convert(position)));
-}
-
-Vector3D PePhysicsEngineGeometryAdapter::getLinearVelocity() const
-{
-    return PeConverter::convert(peGeoObject->getLinearVel());
-}
-
-Vector3D PePhysicsEngineGeometryAdapter::getAngularVelocity() const
-{
-    return PeConverter::convert(peGeoObject->getAngularVel());
-}
-
-Vector3D PePhysicsEngineGeometryAdapter::getPosition() const
-{
-    return PeConverter::convert(peGeoObject->getPosition());
-}
-
-Vector3D PePhysicsEngineGeometryAdapter::getForce() const { return PeConverter::convert(peGeoObject->getForce()); }
-
-Vector3D PePhysicsEngineGeometryAdapter::getTorque() const { return PeConverter::convert(peGeoObject->getTorque()); }
-
-void PePhysicsEngineGeometryAdapter::changeState(State state)
-{
-    if (state == State::PIN)
-        peGeoObject->setMassAndInertiaToInfinity();
-}
-
-int PePhysicsEngineGeometryAdapter::getId() const { return id; }
-
-void PePhysicsEngineGeometryAdapter::setId(int id) { this->id = id; }
-
-void PePhysicsEngineGeometryAdapter::setGeometry(walberla::pe::RigidBody *peGeoObject)
-{
-    this->peGeoObject = peGeoObject;
-}
-
-//////////////////////////////////////////////////////////////////////////
-void PePhysicsEngineGeometryAdapter::setActive() { active = true; }
-//////////////////////////////////////////////////////////////////////////
-void PePhysicsEngineGeometryAdapter::setInactive() { active = false; }
-//////////////////////////////////////////////////////////////////////////
-bool PePhysicsEngineGeometryAdapter::isActive() { return active; }
diff --git a/src/cpu/DemCoupling/physicsEngineAdapter/pe/PePhysicsEngineGeometryAdapter.h b/src/cpu/DemCoupling/physicsEngineAdapter/pe/PePhysicsEngineGeometryAdapter.h
deleted file mode 100644
index a8eaa7d33ede840f4d76ae90ffb44bef30139f99..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/physicsEngineAdapter/pe/PePhysicsEngineGeometryAdapter.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- *  Author: S. Peters
- *  mail: peters@irmb.tu-bs.de
- */
-#ifndef PE_PHYSICS_ENGINE_GEOMETRY_ADAPTER_H
-#define PE_PHYSICS_ENGINE_GEOMETRY_ADAPTER_H
-
-#include "PhysicsEngineGeometryAdapter.h"
-#include <core/DataTypes.h>
-
-namespace walberla
-{
-namespace pe
-{
-class RigidBody;
-}
-} // namespace walberla
-
-class PePhysicsEngineGeometryAdapter : public PhysicsEngineGeometryAdapter
-{
-public:
-    PePhysicsEngineGeometryAdapter();
-    // PePhysicsEngineGeometryAdapter(walberla::pe::RigidBody* peGeoObject);
-    virtual ~PePhysicsEngineGeometryAdapter() {}
-
-    void addForce(const Vector3D &force) override;
-    void addTorque(const Vector3D &torque) override;
-
-    void setForce(const Vector3D &force) override;
-    void setTorque(const Vector3D &torque) override;
-
-    void addForceAtPosition(const Vector3D &force, const Vector3D &position) override;
-    void setLinearVelolocity(const Vector3D &velocity) override;
-    void setAngularVelocity(const Vector3D &velocity) override;
-
-    void resetForceAndTorque() override;
-
-    Vector3D getVelocityAtPosition(const Vector3D &position) const override;
-    Vector3D getLinearVelocity() const override;
-    Vector3D getAngularVelocity() const override;
-    Vector3D getPosition() const override;
-    Vector3D getForce() const override;
-    Vector3D getTorque() const override;
-
-    void changeState(State state) override;
-
-    int getId() const;
-    void setId(int id);
-    void setGeometry(walberla::pe::RigidBody *peGeoObject);
-
-    void setActive();
-    void setInactive();
-    bool isActive();
-    // void increaseShadowCounter();
-    // void decreaseShad
-    int shadowCounter;
-    int counter;
-
-    unsigned long long getSystemID() const { return systemID; }
-    void setSystemID(unsigned long long val) { systemID = val; }
-    bool getSemiactive() const { return semiactive; }
-    void setSemiactive(bool val) { semiactive = val; }
-
-private:
-    walberla::pe::RigidBody *peGeoObject;
-    // unsigned long long id;
-    int id;
-    // walberla::id_t systemId;
-    unsigned long long systemID;
-    bool active;
-    bool semiactive;
-};
-
-#endif
diff --git a/src/cpu/DemCoupling/physicsEngineAdapter/pe/PePhysicsEngineMaterialAdapter.cpp b/src/cpu/DemCoupling/physicsEngineAdapter/pe/PePhysicsEngineMaterialAdapter.cpp
deleted file mode 100644
index 6a36fa1a3b6415e255e0083ec621f1bcea03e3de..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/physicsEngineAdapter/pe/PePhysicsEngineMaterialAdapter.cpp
+++ /dev/null
@@ -1,11 +0,0 @@
-#include "PePhysicsEngineMaterialAdapter.h"
-
-walberla::pe::MaterialID PePhysicsEngineMaterialAdapter::getPeMaterial() const
-{
-    if (walberla::pe::Material::find(name) != -1)
-        return walberla::pe::Material::find(name);
-
-    return walberla::pe::createMaterial(name, density, restitution, staticFriction, dynamicFriction, poissonRatio,
-                                        youngModul, stiffnessInNormalDirection, dampingoefficientNormalDirection,
-                                        dampingTangentialDirection);
-}
diff --git a/src/cpu/DemCoupling/physicsEngineAdapter/pe/PePhysicsEngineMaterialAdapter.h b/src/cpu/DemCoupling/physicsEngineAdapter/pe/PePhysicsEngineMaterialAdapter.h
deleted file mode 100644
index 6ebfa8d1d9ca67760bffb9e06cafec256b19cf4f..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/physicsEngineAdapter/pe/PePhysicsEngineMaterialAdapter.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- *  Author: S. Peters
- *  mail: peters@irmb.tu-bs.de
- */
-#ifndef PE_PHYSICS_ENGINE_MATERIAL_ADAPTER
-#define PE_PHYSICS_ENGINE_MATERIAL_ADAPTER
-
-#include "../PhysicsEngineMaterialAdapter.h"
-#include <pe/basic.h>
-
-class PePhysicsEngineMaterialAdapter : public PhysicsEngineMaterialAdapter
-{
-public:
-    PePhysicsEngineMaterialAdapter(std::string name, double density, double restitution, double staticFriction,
-                                   double dynamicFriction, double poissonRatio, double youngModul,
-                                   double stiffnessInNormalDirection, double dampingoefficientNormalDirection,
-                                   double dampingTangentialDirection)
-        : PhysicsEngineMaterialAdapter(name, density, restitution, staticFriction, dynamicFriction, poissonRatio,
-                                       youngModul, stiffnessInNormalDirection, dampingoefficientNormalDirection,
-                                       dampingTangentialDirection)
-    {
-    }
-    virtual ~PePhysicsEngineMaterialAdapter() {}
-
-    virtual walberla::pe::MaterialID getPeMaterial() const;
-};
-
-#endif
diff --git a/src/cpu/DemCoupling/physicsEngineAdapter/pe/PePhysicsEngineSolverAdapter.cpp b/src/cpu/DemCoupling/physicsEngineAdapter/pe/PePhysicsEngineSolverAdapter.cpp
deleted file mode 100644
index 14cef406392fbfbd9862a71b0c054df85a8608ec..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/physicsEngineAdapter/pe/PePhysicsEngineSolverAdapter.cpp
+++ /dev/null
@@ -1,235 +0,0 @@
-#include "PePhysicsEngineSolverAdapter.h"
-
-#include <exception>
-
-#include "pe/rigidbody/BoxFactory.h"
-#include "pe/rigidbody/PlaneFactory.h"
-#include "pe/rigidbody/SphereFactory.h"
-#include <pe/basic.h>
-#include <pe/rigidbody/UnionFactory.h>
-//#include "geometry/GeometricalFunctions.h"
-#include <mpi/Communicator.h>
-#include "PeAdapter.h"
-#include "PeLoadBalancerAdapter.h"
-#include "PePhysicsEngineGeometryAdapter.h"
-#include "PePhysicsEngineMaterialAdapter.h"
-#include "UbException.h"
-#include "UbLogger.h"
-#include "UbSystem.h"
-#include <boost/tuple/tuple.hpp>
-#include <memory>
-
-using namespace walberla;
-using namespace walberla::pe;
-
-typedef boost::tuple<walberla::pe::Box, walberla::pe::Sphere, walberla::pe::Plane> BodyTypeTuple;
-
-PePhysicsEngineSolverAdapter::PePhysicsEngineSolverAdapter(std::shared_ptr<PeParameter> peParameter,
-                                                           std::shared_ptr<PeLoadBalancerAdapter> loadBalancer)
-    : peParameter(peParameter), loadBalancer(loadBalancer)
-{
-    this->initalizePeEnvironment();
-}
-
-void PePhysicsEngineSolverAdapter::initalizePeEnvironment()
-{
-    this->initialPeBodyStorage();
-    this->initialPeBlockForest();
-    this->initalBlockData();
-    this->initalPeIntegrator();
-    this->executePeBodyTypeTuple();
-    this->initialPeChannel();
-}
-
-std::shared_ptr<PhysicsEngineGeometryAdapter> PePhysicsEngineSolverAdapter::createPhysicsEngineGeometryAdapter(
-    int id, const Vector3D &position, double radius, std::shared_ptr<PhysicsEngineMaterialAdapter> material) const
-{
-    const std::shared_ptr<PePhysicsEngineMaterialAdapter> peMaterial =
-        std::dynamic_pointer_cast<PePhysicsEngineMaterialAdapter>(material);
-    std::shared_ptr<PePhysicsEngineGeometryAdapter> peGeometryAdapter(new PePhysicsEngineGeometryAdapter());
-
-    // UBLOG(logINFO, "PePhysicsEngineSolverAdapter::createSphere():start");
-    walberla::pe::GeomID peGeometry = createSphere(*globalBodyStorage, *forest, *storageId, id,
-                                                   PeConverter::convert(position), radius, peMaterial->getPeMaterial());
-    // UBLOG(logINFO, "PePhysicsEngineSolverAdapter::createSphere():end");
-
-    if (peGeometry) {
-        peGeometryAdapter->setId(id);
-        peGeometryAdapter->setSystemID(peGeometry->getSystemID());
-        peGeometryAdapter->setActive();
-        peGeometryAdapter->setGeometry(peGeometry);
-        return peGeometryAdapter;
-    } else {
-        peGeometryAdapter->setId(id);
-        peGeometryAdapter->setInactive();
-        return peGeometryAdapter;
-    }
-
-    walberla::pe::syncNextNeighbors<BodyTypeTuple>(*forest, *storageId);
-}
-
-void PePhysicsEngineSolverAdapter::runTimestep(double step)
-{
-    cr->timestep(walberla::real_c(step));
-    walberla::pe::syncNextNeighbors<BodyTypeTuple>(*forest, *storageId);
-}
-
-void PePhysicsEngineSolverAdapter::initialPeBodyStorage()
-{
-    globalBodyStorage = std::make_shared<walberla::pe::BodyStorage>();
-}
-
-void PePhysicsEngineSolverAdapter::initialPeBlockForest()
-{
-
-    // walberla::SetupBlockForest sforest =
-    // walberla::blockforest::createUniformBlockGrid(walberla::AABB(peParameter->simulationDomain[0],
-    // peParameter->simulationDomain[1], peParameter->simulationDomain[2],
-    //   peParameter->simulationDomain[3], peParameter->simulationDomain[4], peParameter->simulationDomain[5]), //
-    //   simulationDomain walberla::uint_t(val<1>(peParameter->numberOfBlocks)),
-    //   walberla::uint_t(val<2>(peParameter->numberOfBlocks)),
-    //   walberla::uint_t(val<3>(peParameter->numberOfBlocks)),walberla::uint_t(10),walberla::uint_t(10),walberla::uint_t(10),
-    //   5.0,false);
-    walberla::SetupBlockForest sforest;
-    // sforest.addWorkloadMemorySUIDAssignmentFunction( uniformWorkloadAndMemoryAssignment );
-    sforest.init(walberla::AABB(peParameter->simulationDomain[0], peParameter->simulationDomain[1],
-                                peParameter->simulationDomain[2], peParameter->simulationDomain[3],
-                                peParameter->simulationDomain[4], peParameter->simulationDomain[5]), // simulationDomain
-                 walberla::uint_t(val<1>(peParameter->numberOfBlocks)),
-                 walberla::uint_t(val<2>(peParameter->numberOfBlocks)),
-                 walberla::uint_t(val<3>(peParameter->numberOfBlocks)), // blocks in each direction
-                 val<1>(peParameter->isPeriodic), val<2>(peParameter->isPeriodic), val<3>(peParameter->isPeriodic));
-    sforest.balanceLoad(*loadBalancer.get(), loadBalancer->getNumberOfProcesses());
-    forest = std::shared_ptr<walberla::blockforest::BlockForest>(
-        new walberla::blockforest::BlockForest(walberla::uint_c(loadBalancer->getRank()), sforest));
-
-    auto mpiManager = walberla::MPIManager::instance();
-    mpiManager->useWorldComm();
-    if (!forest)
-        throw std::runtime_error("No PE BlockForest created ... ");
-}
-
-void PePhysicsEngineSolverAdapter::initalBlockData()
-{
-    storageId = std::make_shared<walberla::domain_decomposition::BlockDataID>(
-        forest->addBlockData(walberla::pe::createStorageDataHandling<BodyTypeTuple>(), "Storage"));
-}
-
-void PePhysicsEngineSolverAdapter::initalPeIntegrator()
-{
-    auto ccdID =
-        forest->addBlockData(walberla::pe::ccd::createHashGridsDataHandling(globalBodyStorage, *storageId), "CCD");
-    auto fcdID = forest->addBlockData(
-        walberla::pe::fcd::createGenericFCDDataHandling<BodyTypeTuple, walberla::pe::fcd::AnalyticCollideFunctor>(),
-        "FCD");
-
-    cr = std::make_shared<walberla::pe::cr::HardContactSemiImplicitTimesteppingSolvers>(globalBodyStorage, forest,
-                                                                                        *storageId, ccdID, fcdID);
-    cr->setMaxIterations(peParameter->maxPeIterations);
-    cr->setRelaxationModel(
-        walberla::pe::cr::HardContactSemiImplicitTimesteppingSolvers::ApproximateInelasticCoulombContactByDecoupling);
-    cr->setRelaxationParameter(walberla::real_t(peParameter->relaxationParameter));
-    cr->setGlobalLinearAcceleration(PeConverter::convert(peParameter->globalLinearAcceleration));
-}
-
-void PePhysicsEngineSolverAdapter::executePeBodyTypeTuple() { walberla::pe::SetBodyTypeIDs<BodyTypeTuple>::execute(); }
-
-void PePhysicsEngineSolverAdapter::initialPeChannel() const
-{
-    const walberla::pe::MaterialID material = peParameter->planes->getPeMaterial();
-
-    auto simulationDomain = forest->getDomain();
-
-    // createPlane(*globalBodyStorage, 0, walberla::pe::Vec3(1, 0, 0), simulationDomain.minCorner(), material);
-    // createPlane(*globalBodyStorage, 0, walberla::pe::Vec3(-1, 0, 0), simulationDomain.maxCorner(), material);
-    // createPlane(*globalBodyStorage, 0, walberla::pe::Vec3(0, 1, 0), simulationDomain.minCorner(), material);
-    // createPlane(*globalBodyStorage, 0, walberla::pe::Vec3(0, -1, 0), simulationDomain.maxCorner(), material);
-    // createPlane(*globalBodyStorage, 0, walberla::pe::Vec3(0, 0, 1), simulationDomain.minCorner(), material);
-    // createPlane(*globalBodyStorage, 0, walberla::pe::Vec3(0, 0, -1), simulationDomain.maxCorner(), material);
-
-    Vector3D minOffset = peParameter->minOffset;
-    Vector3D maxOffset = peParameter->maxOffset;
-
-    walberla::pe::Vec3 minX1_Offset(minOffset.X1(), 0, 0);
-    walberla::pe::Vec3 maxX1_Offset(maxOffset.X1(), 0, 0);
-    walberla::pe::Vec3 minX2_Offset(0, minOffset.X2(), 0);
-    walberla::pe::Vec3 maxX2_Offset(0, maxOffset.X2(), 0);
-    walberla::pe::Vec3 minX3_Offset(0, 0, minOffset.X3());
-    walberla::pe::Vec3 maxX3_Offset(0, 0, maxOffset.X3());
-
-    walberla::pe::Vec3 minCorner = simulationDomain.minCorner();
-    walberla::pe::Vec3 maxCorner = simulationDomain.maxCorner();
-
-    createPlane(*globalBodyStorage, 0, walberla::pe::Vec3(1, 0, 0), minCorner + minX1_Offset, material);
-    createPlane(*globalBodyStorage, 0, walberla::pe::Vec3(-1, 0, 0), maxCorner + maxX1_Offset, material);
-    createPlane(*globalBodyStorage, 0, walberla::pe::Vec3(0, 1, 0), minCorner + minX2_Offset, material);
-    createPlane(*globalBodyStorage, 0, walberla::pe::Vec3(0, -1, 0), maxCorner + maxX2_Offset, material);
-    createPlane(*globalBodyStorage, 0, walberla::pe::Vec3(0, 0, 1), minCorner + minX3_Offset, material);
-    createPlane(*globalBodyStorage, 0, walberla::pe::Vec3(0, 0, -1), maxCorner + maxX3_Offset, material);
-}
-
-std::shared_ptr<walberla::blockforest::BlockForest> PePhysicsEngineSolverAdapter::getForest() { return forest; }
-
-void PePhysicsEngineSolverAdapter::saveToFile(const std::string &path)
-{
-    forest->saveToFile(path + "SerializeDeserialize.sbf");
-    forest->saveBlockData("SerializeDeserialize.dump", *storageId.get());
-}
-
-void PePhysicsEngineSolverAdapter::loadFromFile(const std::string &path)
-{
-    // forest = std::make_shared< walberla::blockforest::BlockForest >( walberla::uint_c(
-    // walberla::MPIManager::instance()->rank() ), path+"SerializeDeserialize.sbf", true, false );
-    std::string file = path + "SerializeDeserialize.sbf";
-    forest           = std::shared_ptr<walberla::blockforest::BlockForest>(new walberla::blockforest::BlockForest(
-        walberla::uint_c(walberla::MPIManager::instance()->rank()), file.c_str(), true, false));
-    storageId        = std::make_shared<walberla::domain_decomposition::BlockDataID>(forest->loadBlockData(
-        path + "SerializeDeserialize.dump", walberla::pe::createStorageDataHandling<BodyTypeTuple>(), "Storage"));
-
-    this->initalPeIntegrator();
-
-    auto ccdID =
-        forest->addBlockData(walberla::pe::ccd::createHashGridsDataHandling(globalBodyStorage, *storageId), "CCD");
-    auto fcdID = forest->addBlockData(
-        walberla::pe::fcd::createGenericFCDDataHandling<BodyTypeTuple, walberla::pe::fcd::AnalyticCollideFunctor>(),
-        "FCD");
-
-    cr = std::make_shared<walberla::pe::cr::HardContactSemiImplicitTimesteppingSolvers>(globalBodyStorage, forest,
-                                                                                        *storageId, ccdID, fcdID);
-    cr->setMaxIterations(peParameter->maxPeIterations);
-    cr->setRelaxationModel(
-        walberla::pe::cr::HardContactSemiImplicitTimesteppingSolvers::ApproximateInelasticCoulombContactByDecoupling);
-    cr->setRelaxationParameter(walberla::real_t(peParameter->relaxationParameter));
-    cr->setGlobalLinearAcceleration(PeConverter::convert(peParameter->globalLinearAcceleration));
-
-    this->executePeBodyTypeTuple();
-    this->initialPeChannel();
-
-    for (auto blockIt = forest->begin(); blockIt != forest->end(); ++blockIt) {
-        walberla::pe::ccd::ICCD *ccd = blockIt->getData<walberla::pe::ccd::ICCD>(ccdID);
-        ccd->reloadBodies();
-    }
-}
-
-std::shared_ptr<walberla::blockforest::BlockForest> PePhysicsEngineSolverAdapter::getBlockForest() { return forest; }
-
-std::shared_ptr<walberla::domain_decomposition::BlockDataID> PePhysicsEngineSolverAdapter::getStorageId()
-{
-    return storageId;
-}
-
-std::shared_ptr<walberla::pe::BodyStorage> PePhysicsEngineSolverAdapter::getGlobalBodyStorage()
-{
-    return globalBodyStorage;
-}
-
-void PePhysicsEngineSolverAdapter::createObstacle(const Vector3D &center, const Vector3D &lengths)
-{
-    const walberla::pe::MaterialID material = peParameter->planes->getPeMaterial();
-    bool global                             = true;
-    bool communicating                      = false;
-    bool infiniteMass                       = true;
-
-    walberla::pe::createBox(*globalBodyStorage, *forest, *storageId, 0, PeConverter::convert(center),
-                            PeConverter::convert(lengths), material, global, communicating, infiniteMass);
-}
diff --git a/src/cpu/DemCoupling/physicsEngineAdapter/pe/PePhysicsEngineSolverAdapter.h b/src/cpu/DemCoupling/physicsEngineAdapter/pe/PePhysicsEngineSolverAdapter.h
deleted file mode 100644
index 5b2ef94b8a59b5073aebc7eb999db6e9eb860e3d..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/physicsEngineAdapter/pe/PePhysicsEngineSolverAdapter.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- *  Author: S. Peters
- *  mail: peters@irmb.tu-bs.de
- */
-#ifndef PE_PHYSICS_ENGINE_SOLVER_ADAPTER_H
-#define PE_PHYSICS_ENGINE_SOLVER_ADAPTER_H
-
-#include <memory>
-#include <shared_mutex>
-
-#include "UbTuple.h"
-#include <pe/basic.h>
-
-#include "PePhysicsEngineSolverAdapter.h"
-#include "PhysicsEngineSolverAdapter.h"
-
-class PePhysicsEngineMaterialAdapter;
-class PhysicsEngineGeometryAdapter;
-class PePhysicsEngineGeometryAdapter;
-class PeLoadBalancerAdapter;
-
-namespace walberla
-{
-namespace domain_decomposition
-{
-class BlockDataID;
-}
-namespace blockforest
-{
-class BlockForest;
-}
-namespace pe
-{
-class BodyStorage;
-class RigidBody;
-namespace cr
-{
-class HardContactSemiImplicitTimesteppingSolvers;
-}
-} // namespace pe
-} // namespace walberla
-
-struct PeParameter {
-    PeParameter(double relaxationParameter, int maxPeIterations, Vector3D globalLinearAcceleration,
-                std::shared_ptr<PePhysicsEngineMaterialAdapter> planes, std::array<double, 6> simulationDomain,
-                UbTupleInt3 numberOfBlocks, UbTupleBool3 isPeriodic, Vector3D minOffset, Vector3D maxOffset)
-        : relaxationParameter(relaxationParameter), maxPeIterations(maxPeIterations),
-          globalLinearAcceleration(globalLinearAcceleration), simulationDomain(simulationDomain),
-          numberOfBlocks(numberOfBlocks), isPeriodic(isPeriodic), planes(planes), minOffset(minOffset),
-          maxOffset(maxOffset)
-    {
-    }
-
-    double relaxationParameter;
-    int maxPeIterations;
-    Vector3D globalLinearAcceleration;
-
-    std::array<double, 6> simulationDomain;
-    UbTupleInt3 numberOfBlocks;
-    UbTupleBool3 isPeriodic;
-
-    std::shared_ptr<PePhysicsEngineMaterialAdapter> planes;
-
-    Vector3D minOffset;
-    Vector3D maxOffset;
-};
-
-class PePhysicsEngineSolverAdapter : public PhysicsEngineSolverAdapter
-{
-public:
-    PePhysicsEngineSolverAdapter(std::shared_ptr<PeParameter> peParameter,
-                                 std::shared_ptr<PeLoadBalancerAdapter> loadBalancer);
-    virtual ~PePhysicsEngineSolverAdapter() {}
-
-    std::shared_ptr<PhysicsEngineGeometryAdapter>
-    createPhysicsEngineGeometryAdapter(int id, const Vector3D &position, double radius,
-                                       std::shared_ptr<PhysicsEngineMaterialAdapter> material) const override;
-    void runTimestep(double step) override;
-    std::shared_ptr<walberla::blockforest::BlockForest> getForest();
-    void saveToFile(const std::string &path);
-    void loadFromFile(const std::string &path);
-    std::shared_ptr<walberla::blockforest::BlockForest> getBlockForest();
-    std::shared_ptr<walberla::domain_decomposition::BlockDataID> getStorageId();
-    std::shared_ptr<walberla::pe::BodyStorage> getGlobalBodyStorage();
-    void createObstacle(const Vector3D &center, const Vector3D &lengths);
-
-private:
-    void initalizePeEnvironment();
-    void initialPeBodyStorage();
-    void initialPeBlockForest();
-    void initalBlockData();
-
-    void initalPeIntegrator();
-    static void executePeBodyTypeTuple();
-    void initialPeChannel() const;
-
-private:
-    std::shared_ptr<PeParameter> peParameter;
-    std::shared_ptr<PeLoadBalancerAdapter> loadBalancer;
-
-    std::shared_ptr<walberla::pe::BodyStorage> globalBodyStorage;
-    std::shared_ptr<walberla::blockforest::BlockForest> forest;
-    std::shared_ptr<walberla::domain_decomposition::BlockDataID> storageId;
-    std::shared_ptr<walberla::pe::cr::HardContactSemiImplicitTimesteppingSolvers> cr;
-};
-
-#endif
diff --git a/src/cpu/DemCoupling/reconstructor/EquilibriumReconstructor.cpp b/src/cpu/DemCoupling/reconstructor/EquilibriumReconstructor.cpp
deleted file mode 100644
index c5486db0e23d6df29a04e724471bac74c01ceec8..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/reconstructor/EquilibriumReconstructor.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-#include "EquilibriumReconstructor.h"
-
-#include "BCArray3D.h"
-#include "BCProcessor.h"
-#include "D3Q27System.h"
-#include "DataSet3D.h"
-#include "ILBMKernel.h"
-
-#include "PhysicsEngineGeometryAdapter.h"
-
-void EquilibriumReconstructor::reconstructNode(const int &x1, const int &x2, const int &x3,
-                                               const Vector3D &worldCoordinates,
-                                               std::shared_ptr<PhysicsEngineGeometryAdapter> physicsEngineGeometry,
-                                               std::shared_ptr<ILBMKernel> kernel) const
-{
-    const double averageDensity = this->getLocalAverageDensity(x1, x2, x3, kernel);
-    LBMReal feq[27];
-    const Vector3D boundaryVelocity = physicsEngineGeometry->getVelocityAtPosition(worldCoordinates);
-
-    if (kernel->getCompressible())
-        D3Q27System::calcCompFeq(feq, averageDensity, boundaryVelocity[0], boundaryVelocity[1], boundaryVelocity[2]);
-    else
-        D3Q27System::calcIncompFeq(feq, averageDensity, boundaryVelocity[0], boundaryVelocity[1], boundaryVelocity[2]);
-
-    SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions();
-    // distributions->setDistribution(feq, x1, x2, x3);
-    distributions->setDistributionInv(feq, x1, x2, x3);
-}
-
-double EquilibriumReconstructor::getLocalAverageDensity(const int &x1, const int &x2, const int &x3,
-                                                        std::shared_ptr<ILBMKernel> kernel) const
-{
-    int nAverage          = 0;
-    double averageDensity = 0.0;
-
-    SPtr<BCArray3D> bcArray = kernel->getBCProcessor()->getBCArray();
-
-    LBMReal f[D3Q27System::ENDF + 1];
-    SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions();
-
-    int neighborX1, neighborX2, neighborX3;
-    for (int fDir = D3Q27System::FSTARTDIR; fDir <= D3Q27System::FENDDIR; fDir++) {
-        neighborX1 = x1 + D3Q27System::DX1[fDir];
-        neighborX2 = x2 + D3Q27System::DX2[fDir];
-        neighborX3 = x3 + D3Q27System::DX3[fDir];
-
-        if (bcArray->isFluid(neighborX1, neighborX2, neighborX3)) {
-            distributions->getDistribution(f, neighborX1, neighborX2, neighborX3);
-            averageDensity += D3Q27System::getDensity(f);
-            ++nAverage;
-        }
-    }
-    return (nAverage > 0) ? averageDensity / nAverage : 0.0;
-}
diff --git a/src/cpu/DemCoupling/reconstructor/EquilibriumReconstructor.h b/src/cpu/DemCoupling/reconstructor/EquilibriumReconstructor.h
deleted file mode 100644
index 4a5c0071922645f66baeb6dcf8577f7003ca9078..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/reconstructor/EquilibriumReconstructor.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- *  Author: S. Peters
- *  mail: peters@irmb.tu-bs.de
- */
-#ifndef EQUILIBRIUM_RECONSTRUCTOR_H
-#define EQUILIBRIUM_RECONSTRUCTOR_H
-
-#include "UbTuple.h"
-
-#include "Reconstructor.h"
-
-class ILBMKernel;
-class PhysicsEngineGeometryAdapter;
-
-class EquilibriumReconstructor : public Reconstructor
-{
-public:
-    virtual ~EquilibriumReconstructor() {}
-
-    void reconstructNode(const int &x1, const int &x2, const int &x3, const Vector3D &worldCoordinates,
-                         std::shared_ptr<PhysicsEngineGeometryAdapter> physicsEngineGeometry,
-                         std::shared_ptr<ILBMKernel> kernel) const override;
-
-private:
-    double getLocalAverageDensity(const int &x1, const int &x2, const int &x3,
-                                  std::shared_ptr<ILBMKernel> kernel) const;
-};
-
-#endif
diff --git a/src/cpu/DemCoupling/reconstructor/ExtrapolationReconstructor.cpp b/src/cpu/DemCoupling/reconstructor/ExtrapolationReconstructor.cpp
deleted file mode 100644
index 8bd915b6c130fd8a1b70cf9e54cbfdfc60b6efcc..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/reconstructor/ExtrapolationReconstructor.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-#include "ExtrapolationReconstructor.h"
-
-#include "BCArray3D.h"
-#include "BCProcessor.h"
-#include "D3Q27System.h"
-#include "DataSet3D.h"
-#include "ILBMKernel.h"
-
-#include "DistributionArray3D.h"
-#include "PhysicsEngineGeometryAdapter.h"
-
-void ExtrapolationReconstructor::setAlternativeReconstructor(std::shared_ptr<Reconstructor> alternativeReconstructor)
-{
-    this->alternativeReconstructor = alternativeReconstructor;
-}
-
-ExtrapolationReconstructor::ExtrapolationReconstructor(std::shared_ptr<Reconstructor> alternativeReconstructor)
-    : alternativeReconstructor(alternativeReconstructor)
-{
-}
-
-void ExtrapolationReconstructor::reconstructNode(const int &x1, const int &x2, const int &x3,
-                                                 const Vector3D &worldCoordinates,
-                                                 std::shared_ptr<PhysicsEngineGeometryAdapter> physicsEngineGeometry,
-                                                 std::shared_ptr<ILBMKernel> kernel) const
-{
-    const UbTupleInt3 extrapolationDirection = getSphereDirection(worldCoordinates, physicsEngineGeometry);
-    const int numberOfCellsForExtrapolation = getNumberOfExtrapolationCells(x1, x2, x3, extrapolationDirection, kernel);
-
-    // if (numberOfCellsForExtrapolation < 2)
-    alternativeReconstructor->reconstructNode(x1, x2, x3, worldCoordinates, physicsEngineGeometry, kernel);
-    // else
-    //{
-    //    //UBLOG(logINFO, "point (x,y,z) " << val<1>(worldCoordinates) << ", " << val<2>(worldCoordinates) << ", " <<
-    //    val<3>(worldCoordinates));
-    //    //UBLOG(logINFO, "extradir (x,y,z) " << val<1>(extrapolationDirection) << ", " <<
-    //    val<2>(extrapolationDirection) << ", " << val<3>(extrapolationDirection));
-    //    //UBLOG(logINFO, "numberOfCellsForExtrapolation: " << numberOfCellsForExtrapolation );
-
-    //    this->extrapolatePdFs(x1, x2, x3, extrapolationDirection, numberOfCellsForExtrapolation, kernel);
-    //}
-}
-
-UbTupleInt3 ExtrapolationReconstructor::getSphereDirection(
-    const Vector3D &worldCoordinates, std::shared_ptr<PhysicsEngineGeometryAdapter> physicsEngineGeometry) const
-{
-    const Vector3D spherePosition = physicsEngineGeometry->getPosition();
-    const Vector3D bodyNormal     = worldCoordinates - spherePosition;
-    return this->getCorrespondingLatticeDirection(bodyNormal);
-}
-
-UbTupleInt3 ExtrapolationReconstructor::getCorrespondingLatticeDirection(const Vector3D &direction) const
-{
-    int correspondingDirection = 0;
-    double innerProduct        = 0.0;
-    for (int fDir = D3Q27System::FSTARTDIR; fDir <= D3Q27System::FENDDIR; fDir++) {
-        // compute inner product <dir,c_i>
-        const double temporaryInnerProduct = direction[0] * D3Q27System::cNorm[0][fDir] +
-                                             direction[1] * D3Q27System::cNorm[1][fDir] +
-                                             direction[2] * D3Q27System::cNorm[2][fDir];
-        if (temporaryInnerProduct > innerProduct) {
-            innerProduct           = temporaryInnerProduct;
-            correspondingDirection = fDir;
-        }
-    }
-
-    return UbTupleInt3(D3Q27System::DX1[correspondingDirection], D3Q27System::DX2[correspondingDirection],
-                       D3Q27System::DX3[correspondingDirection]);
-}
-
-int ExtrapolationReconstructor::getNumberOfExtrapolationCells(const int x1, const int x2, const int x3,
-                                                              const UbTupleInt3 &extrapolationDirection,
-                                                              std::shared_ptr<ILBMKernel> kernel) const
-{
-    if (extrapolationDirection == UbTupleInt3(0, 0, 0))
-        return 0;
-
-    const int desiredCellsInExtrapolationDirection = 3;
-
-    for (int numCells = 1; numCells <= desiredCellsInExtrapolationDirection; ++numCells) {
-        UbTupleInt3 neighbor(x1 + numCells * val<1>(extrapolationDirection),
-                             x2 + numCells * val<2>(extrapolationDirection),
-                             x3 + numCells * val<3>(extrapolationDirection));
-
-        if (!kernel->isInsideOfDomain(val<1>(neighbor), val<2>(neighbor), val<3>(neighbor)))
-            return numCells - 1;
-
-        if (!kernel->getBCProcessor()->getBCArray()->isFluid(val<1>(neighbor), val<2>(neighbor), val<3>(neighbor)))
-            return numCells - 1;
-    }
-    return desiredCellsInExtrapolationDirection;
-}
-
-void ExtrapolationReconstructor::extrapolatePdFs(const int x1, const int x2, const int x3,
-                                                 const UbTupleInt3 &extrapolationDirection,
-                                                 int numberOfCellsForExtrapolation,
-                                                 std::shared_ptr<ILBMKernel> kernel) const
-{
-    SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions();
-
-    const int nx1 = val<1>(extrapolationDirection);
-    const int nx2 = val<2>(extrapolationDirection);
-    const int nx3 = val<3>(extrapolationDirection);
-
-    LBMReal pdf[D3Q27System::ENDF + 1];
-    LBMReal pdfNeighbor1[D3Q27System::ENDF + 1];
-    LBMReal pdfNeighbor2[D3Q27System::ENDF + 1];
-
-    distributions->getDistribution(pdf, x1, x2, x3);
-    distributions->getDistribution(pdfNeighbor1, x1 + nx1, x2 + nx2, x3 + nx3);
-    distributions->getDistribution(pdfNeighbor2, x1 + 2 * nx1, x2 + 2 * nx2, x3 + 2 * nx3);
-
-    if (numberOfCellsForExtrapolation == 3) // quadratic normal extrapolation
-    {
-        LBMReal pdfNeighbor3[D3Q27System::ENDF + 1];
-        distributions->getDistribution(pdfNeighbor3, x1 + 3 * nx1, x2 + 3 * nx2, x3 + 3 * nx3);
-
-        for (int fDir = D3Q27System::FSTARTDIR; fDir <= D3Q27System::FENDDIR; fDir++)
-            pdf[fDir] = 3 * pdfNeighbor1[fDir] - 3 * pdfNeighbor2[fDir] + pdfNeighbor3[fDir];
-    } else // numberOfCellsForExtrapolation == 2 // linear normal extrapolation
-    {
-        for (int fDir = D3Q27System::FSTARTDIR; fDir <= D3Q27System::FENDDIR; fDir++)
-            pdf[fDir] = 2 * pdfNeighbor1[fDir] - pdfNeighbor2[fDir];
-    }
-
-    distributions->setDistribution(pdf, x1, x2, x3);
-}
diff --git a/src/cpu/DemCoupling/reconstructor/ExtrapolationReconstructor.h b/src/cpu/DemCoupling/reconstructor/ExtrapolationReconstructor.h
deleted file mode 100644
index 2844fdf2490a43e6db31ea9bf32dbcdea67d6f34..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/reconstructor/ExtrapolationReconstructor.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- *  Author: S. Peters
- *  mail: peters@irmb.tu-bs.de
- */
-#ifndef EXTRAPOLATION_RECONSTRUCTOR_H
-#define EXTRAPOLATION_RECONSTRUCTOR_H
-
-#include <memory>
-
-#include "UbTuple.h"
-
-#include "Reconstructor.h"
-
-class ILBMKernel;
-class PhysicsEngineGeometryAdapter;
-
-class ExtrapolationReconstructor : public Reconstructor
-{
-public:
-    ExtrapolationReconstructor(std::shared_ptr<Reconstructor> alternativeReconstructor);
-    virtual ~ExtrapolationReconstructor() {}
-
-    void reconstructNode(const int &x1, const int &x2, const int &x3, const Vector3D &worldCoordinates,
-                         std::shared_ptr<PhysicsEngineGeometryAdapter> physicsEngineGeometry,
-                         std::shared_ptr<ILBMKernel> kernel) const override;
-
-    void setAlternativeReconstructor(std::shared_ptr<Reconstructor> alternativeReconstructor);
-
-private:
-    int getNumberOfExtrapolationCells(const int x1, const int x2, const int x3, const UbTupleInt3 &ubTuple,
-                                      std::shared_ptr<ILBMKernel> kernel) const;
-    UbTupleInt3 getSphereDirection(const Vector3D &worldCoordinates,
-                                   std::shared_ptr<PhysicsEngineGeometryAdapter> physicsEngineGeometry) const;
-    UbTupleInt3 getCorrespondingLatticeDirection(const Vector3D &direction) const;
-    void extrapolatePdFs(const int x1, const int x2, const int x3, const UbTupleInt3 &ubTuple,
-                         int numberOfCellsForExtrapolation, std::shared_ptr<ILBMKernel> kernel) const;
-
-    std::shared_ptr<Reconstructor> alternativeReconstructor;
-};
-
-#endif
diff --git a/src/cpu/DemCoupling/reconstructor/LBMReconstructor.cpp b/src/cpu/DemCoupling/reconstructor/LBMReconstructor.cpp
deleted file mode 100644
index c6dfdc6dfbe064918fc50faf3222ed14d2b3a8d3..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/reconstructor/LBMReconstructor.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-#include "LBMReconstructor.h"
-
-#include "BCArray3D.h"
-#include "BCProcessor.h"
-#include "D3Q27System.h"
-#include "DataSet3D.h"
-#include "ILBMKernel.h"
-
-#include "PhysicsEngineGeometryAdapter.h"
-
-using namespace D3Q27System;
-
-LBMReconstructor::LBMReconstructor(bool compressible)
-{
-    if (compressible) {
-        calcMacrosFct = &D3Q27System::calcCompMacroscopicValues;
-    } else {
-        calcMacrosFct = &D3Q27System::calcIncompMacroscopicValues;
-    }
-}
-
-void LBMReconstructor::reconstructNode(const int &x1, const int &x2, const int &x3, const Vector3D &worldCoordinates,
-                                       std::shared_ptr<PhysicsEngineGeometryAdapter> physicsEngineGeometry,
-                                       std::shared_ptr<ILBMKernel> kernel) const
-{
-    LBMReal pdf[D3Q27System::ENDF + 1];
-
-    LBMReal rho, vx1, vx2, vx3;
-    calcMacrosFct(pdf, rho, vx1, vx2, vx3);
-
-    LBMReal rho_dif = 1;
-
-    while (rho_dif > 1e-5) {
-        for (int fDir = D3Q27System::FSTARTDIR; fDir <= D3Q27System::FENDDIR; fDir++) {
-
-            UbTupleInt3 neighbor(x1 + D3Q27System::DX1[fDir], x2 + D3Q27System::DX2[fDir], x3 + D3Q27System::DX3[fDir]);
-
-            if (!kernel->getBCProcessor()->getBCArray()->isFluid(val<1>(neighbor), val<2>(neighbor),
-                                                                 val<3>(neighbor))) {
-                LBMReal pdfNeighbor[D3Q27System::ENDF + 1];
-                SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions();
-                const int invDir                        = D3Q27System::INVDIR[fDir];
-                distributions->getDistributionForDirection(pdfNeighbor[invDir], val<1>(neighbor), val<2>(neighbor),
-                                                           val<3>(neighbor));
-                distributions->setDistributionInvForDirection(pdf[invDir], x1, x2, x3, invDir);
-            }
-        }
-    }
-
-    LBMReal collFactor = kernel->getCollisionFactor();
-    collide(pdf, collFactor);
-}
-
-void LBMReconstructor::collide(LBMReal *f, LBMReal collFactor)
-{
-
-    LBMReal drho, vx1, vx2, vx3;
-    LBMReal feq[D3Q27System::ENDF + 1];
-
-    drho = ((f[TNE] + f[BSW]) + (f[TSE] + f[BNW])) + ((f[BSE] + f[TNW]) + (f[TSW] + f[BNE])) +
-           (((f[NE] + f[SW]) + (f[SE] + f[NW])) + ((f[TE] + f[BW]) + (f[BE] + f[TW])) +
-            ((f[BN] + f[TS]) + (f[TN] + f[BS]))) +
-           ((f[E] + f[W]) + (f[N] + f[S]) + (f[T] + f[B])) + f[ZERO];
-
-    vx1 = ((((f[TNE] - f[BSW]) + (f[TSE] - f[BNW])) + ((f[BSE] - f[TNW]) + (f[BNE] - f[TSW]))) +
-           (((f[BE] - f[TW]) + (f[TE] - f[BW])) + ((f[SE] - f[NW]) + (f[NE] - f[SW]))) + (f[E] - f[W]));
-
-    vx2 = ((((f[TNE] - f[BSW]) + (f[BNW] - f[TSE])) + ((f[TNW] - f[BSE]) + (f[BNE] - f[TSW]))) +
-           (((f[BN] - f[TS]) + (f[TN] - f[BS])) + ((f[NW] - f[SE]) + (f[NE] - f[SW]))) + (f[N] - f[S]));
-
-    vx3 = ((((f[TNE] - f[BSW]) + (f[TSE] - f[BNW])) + ((f[TNW] - f[BSE]) + (f[TSW] - f[BNE]))) +
-           (((f[TS] - f[BN]) + (f[TN] - f[BS])) + ((f[TW] - f[BE]) + (f[TE] - f[BW]))) + (f[T] - f[B]));
-
-    LBMReal cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
-
-    feq[ZERO] = c8o27 * (drho - cu_sq);
-    feq[E]    = c2o27 * (drho + 3.0 * (vx1) + c9o2 * (vx1) * (vx1)-cu_sq);
-    feq[W]    = c2o27 * (drho + 3.0 * (-vx1) + c9o2 * (-vx1) * (-vx1) - cu_sq);
-    feq[N]    = c2o27 * (drho + 3.0 * (vx2) + c9o2 * (vx2) * (vx2)-cu_sq);
-    feq[S]    = c2o27 * (drho + 3.0 * (-vx2) + c9o2 * (-vx2) * (-vx2) - cu_sq);
-    feq[T]    = c2o27 * (drho + 3.0 * (vx3) + c9o2 * (vx3) * (vx3)-cu_sq);
-    feq[B]    = c2o27 * (drho + 3.0 * (-vx3) + c9o2 * (-vx3) * (-vx3) - cu_sq);
-    feq[NE]   = c1o54 * (drho + 3.0 * (vx1 + vx2) + c9o2 * (vx1 + vx2) * (vx1 + vx2) - cu_sq);
-    feq[SW]   = c1o54 * (drho + 3.0 * (-vx1 - vx2) + c9o2 * (-vx1 - vx2) * (-vx1 - vx2) - cu_sq);
-    feq[SE]   = c1o54 * (drho + 3.0 * (vx1 - vx2) + c9o2 * (vx1 - vx2) * (vx1 - vx2) - cu_sq);
-    feq[NW]   = c1o54 * (drho + 3.0 * (-vx1 + vx2) + c9o2 * (-vx1 + vx2) * (-vx1 + vx2) - cu_sq);
-    feq[TE]   = c1o54 * (drho + 3.0 * (vx1 + vx3) + c9o2 * (vx1 + vx3) * (vx1 + vx3) - cu_sq);
-    feq[BW]   = c1o54 * (drho + 3.0 * (-vx1 - vx3) + c9o2 * (-vx1 - vx3) * (-vx1 - vx3) - cu_sq);
-    feq[BE]   = c1o54 * (drho + 3.0 * (vx1 - vx3) + c9o2 * (vx1 - vx3) * (vx1 - vx3) - cu_sq);
-    feq[TW]   = c1o54 * (drho + 3.0 * (-vx1 + vx3) + c9o2 * (-vx1 + vx3) * (-vx1 + vx3) - cu_sq);
-    feq[TN]   = c1o54 * (drho + 3.0 * (vx2 + vx3) + c9o2 * (vx2 + vx3) * (vx2 + vx3) - cu_sq);
-    feq[BS]   = c1o54 * (drho + 3.0 * (-vx2 - vx3) + c9o2 * (-vx2 - vx3) * (-vx2 - vx3) - cu_sq);
-    feq[BN]   = c1o54 * (drho + 3.0 * (vx2 - vx3) + c9o2 * (vx2 - vx3) * (vx2 - vx3) - cu_sq);
-    feq[TS]   = c1o54 * (drho + 3.0 * (-vx2 + vx3) + c9o2 * (-vx2 + vx3) * (-vx2 + vx3) - cu_sq);
-    feq[TNE]  = c1o216 * (drho + 3.0 * (vx1 + vx2 + vx3) + c9o2 * (vx1 + vx2 + vx3) * (vx1 + vx2 + vx3) - cu_sq);
-    feq[BSW]  = c1o216 * (drho + 3.0 * (-vx1 - vx2 - vx3) + c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq);
-    feq[BNE]  = c1o216 * (drho + 3.0 * (vx1 + vx2 - vx3) + c9o2 * (vx1 + vx2 - vx3) * (vx1 + vx2 - vx3) - cu_sq);
-    feq[TSW]  = c1o216 * (drho + 3.0 * (-vx1 - vx2 + vx3) + c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq);
-    feq[TSE]  = c1o216 * (drho + 3.0 * (vx1 - vx2 + vx3) + c9o2 * (vx1 - vx2 + vx3) * (vx1 - vx2 + vx3) - cu_sq);
-    feq[BNW]  = c1o216 * (drho + 3.0 * (-vx1 + vx2 - vx3) + c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq);
-    feq[BSE]  = c1o216 * (drho + 3.0 * (vx1 - vx2 - vx3) + c9o2 * (vx1 - vx2 - vx3) * (vx1 - vx2 - vx3) - cu_sq);
-    feq[TNW]  = c1o216 * (drho + 3.0 * (-vx1 + vx2 + vx3) + c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq);
-
-    // Relaxation
-    f[ZERO] += (feq[ZERO] - f[ZERO]) * collFactor;
-    f[E] += (feq[E] - f[E]) * collFactor;
-    f[W] += (feq[W] - f[W]) * collFactor;
-    f[N] += (feq[N] - f[N]) * collFactor;
-    f[S] += (feq[S] - f[S]) * collFactor;
-    f[T] += (feq[T] - f[T]) * collFactor;
-    f[B] += (feq[B] - f[B]) * collFactor;
-    f[NE] += (feq[NE] - f[NE]) * collFactor;
-    f[SW] += (feq[SW] - f[SW]) * collFactor;
-    f[SE] += (feq[SE] - f[SE]) * collFactor;
-    f[NW] += (feq[NW] - f[NW]) * collFactor;
-    f[TE] += (feq[TE] - f[TE]) * collFactor;
-    f[BW] += (feq[BW] - f[BW]) * collFactor;
-    f[BE] += (feq[BE] - f[BE]) * collFactor;
-    f[TW] += (feq[TW] - f[TW]) * collFactor;
-    f[TN] += (feq[TN] - f[TN]) * collFactor;
-    f[BS] += (feq[BS] - f[BS]) * collFactor;
-    f[BN] += (feq[BN] - f[BN]) * collFactor;
-    f[TS] += (feq[TS] - f[TS]) * collFactor;
-
-    f[TNE] += (feq[TNE] - f[TNE]) * collFactor;
-    f[BSW] += (feq[BSW] - f[BSW]) * collFactor;
-    f[BNE] += (feq[BNE] - f[BNE]) * collFactor;
-    f[TSW] += (feq[TSW] - f[TSW]) * collFactor;
-    f[TSE] += (feq[TSE] - f[TSE]) * collFactor;
-    f[BNW] += (feq[BNW] - f[BNW]) * collFactor;
-    f[BSE] += (feq[BSE] - f[BSE]) * collFactor;
-    f[TNW] += (feq[TNW] - f[TNW]) * collFactor;
-}
\ No newline at end of file
diff --git a/src/cpu/DemCoupling/reconstructor/LBMReconstructor.h b/src/cpu/DemCoupling/reconstructor/LBMReconstructor.h
deleted file mode 100644
index 173f008a30bc9f1edf4b79eecce87fc0941d9f62..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/reconstructor/LBMReconstructor.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- *  Author: S. Peters
- *  mail: peters@irmb.tu-bs.de
- */
-#ifndef LBM_RECONSTRUCTOR_H
-#define LBM_RECONSTRUCTOR_H
-
-#include "UbTuple.h"
-
-#include "Reconstructor.h"
-
-#include "LBMSystem.h"
-
-class ILBMKernel;
-class PhysicsEngineGeometryAdapter;
-
-class LBMReconstructor : public Reconstructor
-{
-public:
-    LBMReconstructor(bool compressible);
-    virtual ~LBMReconstructor() {}
-
-    void reconstructNode(const int &x1, const int &x2, const int &x3, const Vector3D &worldCoordinates,
-                         std::shared_ptr<PhysicsEngineGeometryAdapter> physicsEngineGeometry,
-                         std::shared_ptr<ILBMKernel> kernel) const override;
-
-private:
-    static void collide(LBMReal *f, LBMReal collFactor);
-
-    typedef void (*CalcMacrosFct)(const LBMReal *const & /*f[27]*/, LBMReal & /*rho*/, LBMReal & /*vx1*/,
-                                  LBMReal & /*vx2*/, LBMReal & /*vx3*/);
-    CalcMacrosFct calcMacrosFct;
-};
-
-#endif
diff --git a/src/cpu/DemCoupling/reconstructor/Reconstructor.h b/src/cpu/DemCoupling/reconstructor/Reconstructor.h
deleted file mode 100644
index 15355d515dc332521583955f225c3e8758bb5fb7..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/reconstructor/Reconstructor.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- *  Author: S. Peters
- *  mail: peters@irmb.tu-bs.de
- */
-#ifndef RECONSTRCUTOR_H
-#define RECONSTRCUTOR_H
-
-#include <PointerDefinitions.h>
-
-#include "Vector3D.h"
-
-class ILBMKernel;
-class PhysicsEngineGeometryAdapter;
-
-class Reconstructor
-{
-public:
-    virtual ~Reconstructor() {}
-
-    virtual void reconstructNode(const int &x1, const int &x2, const int &x3, const Vector3D &worldCoordinates,
-                                 SPtr<PhysicsEngineGeometryAdapter> physicsEngineGeometry,
-                                 std::shared_ptr<ILBMKernel> kernel) const = 0;
-};
-
-#endif
diff --git a/src/cpu/DemCoupling/reconstructor/VelocityBcReconstructor.cpp b/src/cpu/DemCoupling/reconstructor/VelocityBcReconstructor.cpp
deleted file mode 100644
index c48586ca4ed170d7129990590a7e7ec32154c5f7..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/reconstructor/VelocityBcReconstructor.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-#include "VelocityBcReconstructor.h"
-
-#include <exception>
-
-#include "BCArray3D.h"
-#include "BCProcessor.h"
-#include "D3Q27System.h"
-#include "DataSet3D.h"
-#include "EsoTwist3D.h"
-#include "ILBMKernel.h"
-
-#include "PhysicsEngineGeometryAdapter.h"
-
-void VelocityBcReconstructor::reconstructNode(const int &x1, const int &x2, const int &x3,
-                                              const Vector3D &worldCoordinates,
-                                              std::shared_ptr<PhysicsEngineGeometryAdapter> physicsEngineGeometry,
-                                              std::shared_ptr<ILBMKernel> kernel) const
-{
-    if (kernel->getCompressible())
-        throw std::runtime_error("not implemented yet!");
-
-    const Vector3D boundaryVelocity = physicsEngineGeometry->getVelocityAtPosition(worldCoordinates);
-    // TODO: move to D3Q27 system
-    LBMReal wijk[D3Q27System::ENDF + 1];
-    D3Q27System::calcIncompFeq(wijk, 1, 0, 0, 0);
-
-    SPtr<BCArray3D> bcArray = kernel->getBCProcessor()->getBCArray();
-
-    SPtr<BoundaryConditions> bc = SPtr<BoundaryConditions>(new BoundaryConditions());
-    bc->setBoundaryVelocityX1((float)boundaryVelocity[0]);
-    bc->setBoundaryVelocityX2((float)boundaryVelocity[1]);
-    bc->setBoundaryVelocityX3((float)boundaryVelocity[2]);
-
-    LBMReal feqNullRho[D3Q27System::ENDF + 1];
-    D3Q27System::calcIncompFeq(feqNullRho, 0, boundaryVelocity[0], boundaryVelocity[1], boundaryVelocity[2]);
-
-    LBMReal fpre[D3Q27System::ENDF + 1];
-    LBMReal fpost[D3Q27System::ENDF + 1];
-    SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions();
-
-    distributions->swap();
-    distributions->getDistributionInv(fpost, x1, x2, x3);
-    distributions->swap();
-    distributions->getDistribution(fpre, x1, x2, x3);
-
-    int neighborX1, neighborX2, neighborX3;
-    int neighborX1Inv, neighborX2Inv, neighborX3Inv;
-
-    double sumRho = 0, sumWijk = 0;
-    double collFactor = kernel->getCollisionFactor();
-
-    for (int fDir = D3Q27System::FSTARTDIR; fDir <= D3Q27System::FENDDIR; fDir++) {
-        neighborX1 = x1 + D3Q27System::DX1[fDir];
-        neighborX2 = x2 + D3Q27System::DX2[fDir];
-        neighborX3 = x3 + D3Q27System::DX3[fDir];
-
-        if (bcArray->isFluid(neighborX1, neighborX2, neighborX3)) {
-            int invDir = D3Q27System::INVDIR[fDir];
-
-            neighborX1Inv = x1 + D3Q27System::DX1[invDir];
-            neighborX2Inv = x2 + D3Q27System::DX2[invDir];
-            neighborX3Inv = x3 + D3Q27System::DX3[invDir];
-            if (!bcArray->isFluid(neighborX1Inv, neighborX2Inv, neighborX3Inv)) {
-
-                double velocity = bc->getBoundaryVelocity(invDir);
-
-                fpre[fDir]   = fpre[invDir] - velocity;
-                double Omega = fpost[fDir] - fpre[fDir];
-
-                sumRho += Omega / collFactor + fpre[fDir] - feqNullRho[fDir];
-                sumWijk += wijk[fDir];
-            }
-        }
-    }
-
-    double rho = 0.0;
-    if (sumWijk > 0.0)
-        rho = sumRho / sumWijk;
-
-    for (int fDir = D3Q27System::FSTARTDIR; fDir <= D3Q27System::FENDDIR; fDir++) {
-        neighborX1 = x1 + D3Q27System::DX1[fDir];
-        neighborX2 = x2 + D3Q27System::DX2[fDir];
-        neighborX3 = x3 + D3Q27System::DX3[fDir];
-
-        if (!bcArray->isFluid(neighborX1, neighborX2, neighborX3)) {
-            int invDir    = D3Q27System::INVDIR[fDir];
-            neighborX1Inv = x1 + D3Q27System::DX1[invDir];
-            neighborX2Inv = x2 + D3Q27System::DX2[invDir];
-            neighborX3Inv = x3 + D3Q27System::DX3[invDir];
-            if (!bcArray->isFluid(neighborX1Inv, neighborX2Inv, neighborX3Inv)) {
-                fpre[fDir] = D3Q27System::getIncompFeqForDirection(
-                    fDir, rho, bc->getBoundaryVelocityX1(), bc->getBoundaryVelocityX2(), bc->getBoundaryVelocityX3());
-            }
-        }
-    }
-}
diff --git a/src/cpu/DemCoupling/reconstructor/VelocityBcReconstructor.h b/src/cpu/DemCoupling/reconstructor/VelocityBcReconstructor.h
deleted file mode 100644
index 9f5b3f0b67be91edbcfcdadbc8f5c637a87827dc..0000000000000000000000000000000000000000
--- a/src/cpu/DemCoupling/reconstructor/VelocityBcReconstructor.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- *  Author: S. Peters
- *  mail: peters@irmb.tu-bs.de
- */
-#ifndef VELOCITY_BC_RECONSTRUCTOR_H
-#define VELOCITY_BC_RECONSTRUCTOR_H
-
-#include "UbTuple.h"
-
-#include "Reconstructor.h"
-
-class ILBMKernel;
-class PhysicsEngineGeometryAdapter;
-
-class VelocityBcReconstructor : public Reconstructor
-{
-public:
-    virtual ~VelocityBcReconstructor() {}
-
-    void reconstructNode(const int &x1, const int &x2, const int &x3, const Vector3D &worldCoordinates,
-                         std::shared_ptr<PhysicsEngineGeometryAdapter> physicsEngineGeometry,
-                         std::shared_ptr<ILBMKernel> kernel) const override;
-};
-
-#endif
diff --git a/src/cpu/LiggghtsCoupling/IBcumulantK17LBMKernel.cpp b/src/cpu/LiggghtsCoupling/IBcumulantK17LBMKernel.cpp
index 7e36c2b9c8acd63af35d14ebcc7029a278977a4c..873bbd88aa43c59429eaff42a0a42d11c80e32bc 100644
--- a/src/cpu/LiggghtsCoupling/IBcumulantK17LBMKernel.cpp
+++ b/src/cpu/LiggghtsCoupling/IBcumulantK17LBMKernel.cpp
@@ -41,7 +41,7 @@
 
 #define PROOF_CORRECTNESS
 
-using namespace UbMath;
+//using namespace UbMath;
 
 //////////////////////////////////////////////////////////////////////////
 IBcumulantK17LBMKernel::IBcumulantK17LBMKernel()
@@ -111,6 +111,7 @@ void IBcumulantK17LBMKernel::calculate(int step)
     //!
 
     using namespace std;
+    using namespace vf::basics::constant;
 
     //initializing of forcing stuff
     if (withForcing)
@@ -225,66 +226,66 @@ void IBcumulantK17LBMKernel::calculate(int step)
                     LBMReal fEqSolid[D3Q27System::ENDF + 1];
                     LBMReal fPre[D3Q27System::ENDF + 1];
 
-                    f[D3Q27System::DIR_000] = mfbbb;
-
-                    f[D3Q27System::DIR_P00] = mfcbb;
-                    f[D3Q27System::DIR_0P0] = mfbcb;
-                    f[D3Q27System::DIR_00P] = mfbbc;
-                    f[D3Q27System::DIR_PP0] = mfccb;
-                    f[D3Q27System::DIR_MP0] = mfacb;
-                    f[D3Q27System::DIR_P0P] = mfcbc;
-                    f[D3Q27System::DIR_M0P] = mfabc;
-                    f[D3Q27System::DIR_0PP] = mfbcc;
-                    f[D3Q27System::DIR_0MP] = mfbac;
-                    f[D3Q27System::DIR_PPP] = mfccc;
-                    f[D3Q27System::DIR_MPP] = mfacc;
-                    f[D3Q27System::DIR_PMP] = mfcac;
-                    f[D3Q27System::DIR_MMP] = mfaac;
-
-                    f[D3Q27System::DIR_M00] = mfabb;
-                    f[D3Q27System::DIR_0M0] = mfbab;
-                    f[D3Q27System::DIR_00M] = mfbba;
-                    f[D3Q27System::DIR_MM0] = mfaab;
-                    f[D3Q27System::DIR_PM0] = mfcab;
-                    f[D3Q27System::DIR_M0M] = mfaba;
-                    f[D3Q27System::DIR_P0M] = mfcba;
-                    f[D3Q27System::DIR_0MM] = mfbaa;
-                    f[D3Q27System::DIR_0PM] = mfbca;
-                    f[D3Q27System::DIR_MMM] = mfaaa;
-                    f[D3Q27System::DIR_PMM] = mfcaa;
-                    f[D3Q27System::DIR_MPM] = mfaca;
-                    f[D3Q27System::DIR_PPM] = mfcca;
+                    f[vf::lbm::dir::DIR_000] = mfbbb;
+
+                    f[vf::lbm::dir::DIR_P00] = mfcbb;
+                    f[vf::lbm::dir::DIR_0P0] = mfbcb;
+                    f[vf::lbm::dir::DIR_00P] = mfbbc;
+                    f[vf::lbm::dir::DIR_PP0] = mfccb;
+                    f[vf::lbm::dir::DIR_MP0] = mfacb;
+                    f[vf::lbm::dir::DIR_P0P] = mfcbc;
+                    f[vf::lbm::dir::DIR_M0P] = mfabc;
+                    f[vf::lbm::dir::DIR_0PP] = mfbcc;
+                    f[vf::lbm::dir::DIR_0MP] = mfbac;
+                    f[vf::lbm::dir::DIR_PPP] = mfccc;
+                    f[vf::lbm::dir::DIR_MPP] = mfacc;
+                    f[vf::lbm::dir::DIR_PMP] = mfcac;
+                    f[vf::lbm::dir::DIR_MMP] = mfaac;
+
+                    f[vf::lbm::dir::DIR_M00] = mfabb;
+                    f[vf::lbm::dir::DIR_0M0] = mfbab;
+                    f[vf::lbm::dir::DIR_00M] = mfbba;
+                    f[vf::lbm::dir::DIR_MM0] = mfaab;
+                    f[vf::lbm::dir::DIR_PM0] = mfcab;
+                    f[vf::lbm::dir::DIR_M0M] = mfaba;
+                    f[vf::lbm::dir::DIR_P0M] = mfcba;
+                    f[vf::lbm::dir::DIR_0MM] = mfbaa;
+                    f[vf::lbm::dir::DIR_0PM] = mfbca;
+                    f[vf::lbm::dir::DIR_MMM] = mfaaa;
+                    f[vf::lbm::dir::DIR_PMM] = mfcaa;
+                    f[vf::lbm::dir::DIR_MPM] = mfaca;
+                    f[vf::lbm::dir::DIR_PPM] = mfcca;
 
                     if ((*particleData)(x1, x2, x3)->solidFraction > SOLFRAC_MIN) {
-                        fPre[D3Q27System::DIR_000] = mfbbb;
-
-                        fPre[D3Q27System::DIR_P00] = mfcbb;
-                        fPre[D3Q27System::DIR_0P0] = mfbcb;
-                        fPre[D3Q27System::DIR_00P] = mfbbc;
-                        fPre[D3Q27System::DIR_PP0] = mfccb;
-                        fPre[D3Q27System::DIR_MP0] = mfacb;
-                        fPre[D3Q27System::DIR_P0P] = mfcbc;
-                        fPre[D3Q27System::DIR_M0P] = mfabc;
-                        fPre[D3Q27System::DIR_0PP] = mfbcc;
-                        fPre[D3Q27System::DIR_0MP] = mfbac;
-                        fPre[D3Q27System::DIR_PPP] = mfccc;
-                        fPre[D3Q27System::DIR_MPP] = mfacc;
-                        fPre[D3Q27System::DIR_PMP] = mfcac;
-                        fPre[D3Q27System::DIR_MMP] = mfaac;
-
-                        fPre[D3Q27System::DIR_M00] = mfabb;
-                        fPre[D3Q27System::DIR_0M0] = mfbab;
-                        fPre[D3Q27System::DIR_00M] = mfbba;
-                        fPre[D3Q27System::DIR_MM0] = mfaab;
-                        fPre[D3Q27System::DIR_PM0] = mfcab;
-                        fPre[D3Q27System::DIR_M0M] = mfaba;
-                        fPre[D3Q27System::DIR_P0M] = mfcba;
-                        fPre[D3Q27System::DIR_0MM] = mfbaa;
-                        fPre[D3Q27System::DIR_0PM] = mfbca;
-                        fPre[D3Q27System::DIR_MMM] = mfaaa;
-                        fPre[D3Q27System::DIR_PMM] = mfcaa;
-                        fPre[D3Q27System::DIR_MPM] = mfaca;
-                        fPre[D3Q27System::DIR_PPM] = mfcca;
+                        fPre[vf::lbm::dir::DIR_000] = mfbbb;
+
+                        fPre[vf::lbm::dir::DIR_P00] = mfcbb;
+                        fPre[vf::lbm::dir::DIR_0P0] = mfbcb;
+                        fPre[vf::lbm::dir::DIR_00P] = mfbbc;
+                        fPre[vf::lbm::dir::DIR_PP0] = mfccb;
+                        fPre[vf::lbm::dir::DIR_MP0] = mfacb;
+                        fPre[vf::lbm::dir::DIR_P0P] = mfcbc;
+                        fPre[vf::lbm::dir::DIR_M0P] = mfabc;
+                        fPre[vf::lbm::dir::DIR_0PP] = mfbcc;
+                        fPre[vf::lbm::dir::DIR_0MP] = mfbac;
+                        fPre[vf::lbm::dir::DIR_PPP] = mfccc;
+                        fPre[vf::lbm::dir::DIR_MPP] = mfacc;
+                        fPre[vf::lbm::dir::DIR_PMP] = mfcac;
+                        fPre[vf::lbm::dir::DIR_MMP] = mfaac;
+                          
+                        fPre[vf::lbm::dir::DIR_M00] = mfabb;
+                        fPre[vf::lbm::dir::DIR_0M0] = mfbab;
+                        fPre[vf::lbm::dir::DIR_00M] = mfbba;
+                        fPre[vf::lbm::dir::DIR_MM0] = mfaab;
+                        fPre[vf::lbm::dir::DIR_PM0] = mfcab;
+                        fPre[vf::lbm::dir::DIR_M0M] = mfaba;
+                        fPre[vf::lbm::dir::DIR_P0M] = mfcba;
+                        fPre[vf::lbm::dir::DIR_0MM] = mfbaa;
+                        fPre[vf::lbm::dir::DIR_0PM] = mfbca;
+                        fPre[vf::lbm::dir::DIR_MMM] = mfaaa;
+                        fPre[vf::lbm::dir::DIR_PMM] = mfcaa;
+                        fPre[vf::lbm::dir::DIR_MPM] = mfaca;
+                        fPre[vf::lbm::dir::DIR_PPM] = mfcca;
                     }
 
                     (*particleData)(x1, x2, x3)->hydrodynamicForce.fill(0.0);
@@ -302,8 +303,8 @@ void IBcumulantK17LBMKernel::calculate(int step)
                                     ((mfabb + mfcbb) + (mfbab + mfbcb)) + (mfbba + mfbbc)) +
                                    mfbbb;
 
-                    LBMReal rho   = c1 + drho;
-                    LBMReal OOrho = c1 / rho;
+                    LBMReal rho   = c1o1 + drho;
+                    LBMReal OOrho = c1o1 / rho;
                     ////////////////////////////////////////////////////////////////////////////////////
                     LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
                                    (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
@@ -361,39 +362,39 @@ void IBcumulantK17LBMKernel::calculate(int step)
                     //!
                     ////////////////////////////////////////////////////////////////////////////////////
                     // Z - Dir
-                    forwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36, c1o36);
-                    forwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9, c1o9);
-                    forwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36, c1o36);
-                    forwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9, c1o9);
+                    forwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36);
+                    forwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9);
+                    forwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36);
+                    forwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9);
                     forwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9);
-                    forwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9, c1o9);
-                    forwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36, c1o36);
-                    forwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9, c1o9);
-                    forwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36, c1o36);
+                    forwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9);
+                    forwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36);
+                    forwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9);
+                    forwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     // Y - Dir
-                    forwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6, c1o6);
+                    forwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6);
                     forwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
-                    forwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18, c1o18);
+                    forwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18);
                     forwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3);
                     forwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
                     forwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9);
-                    forwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6, c1o6);
+                    forwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6);
                     forwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
-                    forwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18, c1o18);
+                    forwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     // X - Dir
-                    forwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1, c1);
+                    forwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1);
                     forwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
-                    forwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3, c1o3);
+                    forwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3);
                     forwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
                     forwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
                     forwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
-                    forwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3, c1o3);
+                    forwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3);
                     forwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
-                    forwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9, c1o9);
+                    forwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9o1, c1o9);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     //! - Setting relaxation rates for non-hydrodynamic cumulants (default values). Variable names and
@@ -418,24 +419,24 @@ void IBcumulantK17LBMKernel::calculate(int step)
                     //!
                     ////////////////////////////////////////////////////////////
                     // 2.
-                    LBMReal OxxPyyPzz = c1;
+                    LBMReal OxxPyyPzz = c1o1;
                     ////////////////////////////////////////////////////////////
                     // 3.
                     LBMReal OxyyPxzz =
-                        c8 * (-c2 + omega) * (c1 + c2 * omega) / (-c8 - c14 * omega + c7 * omega * omega);
+                        c8o1 * (-c2o1 + omega) * (c1o1 + c2o1 * omega) / (-c8o1 - c14o1 * omega + c7o1 * omega * omega);
                     LBMReal OxyyMxzz =
-                        c8 * (-c2 + omega) * (-c7 + c4 * omega) / (c56 - c50 * omega + c9 * omega * omega);
-                    LBMReal Oxyz = c24 * (-c2 + omega) * (-c2 - c7 * omega + c3 * omega * omega) /
-                                   (c48 + c152 * omega - c130 * omega * omega + c29 * omega * omega * omega);
+                        c8o1 * (-c2o1 + omega) * (-c7o1 + c4o1 * omega) / (c56o1 - c50o1 * omega + c9o1 * omega * omega);
+                    LBMReal Oxyz = c24o1 * (-c2o1 + omega) * (-c2o1 - c7o1 * omega + c3o1 * omega * omega) /
+                                   (c48o1 + c152o1 * omega - c130o1 * omega * omega + c29o1 * omega * omega * omega);
                     ////////////////////////////////////////////////////////////
                     // 4.
-                    LBMReal O4 = c1;
+                    LBMReal O4 = c1o1;
                     ////////////////////////////////////////////////////////////
                     // 5.
-                    LBMReal O5 = c1;
+                    LBMReal O5 = c1o1;
                     ////////////////////////////////////////////////////////////
                     // 6.
-                    LBMReal O6 = c1;
+                    LBMReal O6 = c1o1;
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     //! - A and B: parameters for fourth order convergence of the diffusion term according to Eq. (114)
@@ -443,8 +444,8 @@ void IBcumulantK17LBMKernel::calculate(int step)
                     //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> with simplifications assuming \f$\omega_2 = 1.0\f$
                     //! (modify for different bulk viscosity).
                     //!
-                    LBMReal A = (c4 + c2 * omega - c3 * omega * omega) / (c2 - c7 * omega + c5 * omega * omega);
-                    LBMReal B = (c4 + c28 * omega - c14 * omega * omega) / (c6 - c21 * omega + c15 * omega * omega);
+                    LBMReal A = (c4o1 + c2o1 * omega - c3o1 * omega * omega) / (c2o1 - c7o1 * omega + c5o1 * omega * omega);
+                    LBMReal B = (c4o1 + c28o1 * omega - c14o1 * omega * omega) / (c6o1 - c21o1 * omega + c15o1 * omega * omega);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     //! - Compute cumulants from central moments according to Eq. (20)-(23) in
@@ -453,45 +454,36 @@ void IBcumulantK17LBMKernel::calculate(int step)
                     //!
                     ////////////////////////////////////////////////////////////
                     // 4.
-                    LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2 * mfbba * mfbab) * OOrho;
-                    LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2 * mfbba * mfabb) * OOrho;
-                    LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2 * mfbab * mfabb) * OOrho;
+                    LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) * OOrho;
+                    LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) * OOrho;
+                    LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) * OOrho;
 
-                    LBMReal CUMcca = mfcca - (((mfcaa * mfaca + c2 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) * OOrho -
+                    LBMReal CUMcca = mfcca - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) * OOrho -
                                               c1o9 * (drho * OOrho));
-                    LBMReal CUMcac = mfcac - (((mfcaa * mfaac + c2 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) * OOrho -
+                    LBMReal CUMcac = mfcac - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) * OOrho -
                                               c1o9 * (drho * OOrho));
-                    LBMReal CUMacc = mfacc - (((mfaac * mfaca + c2 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) * OOrho -
+                    LBMReal CUMacc = mfacc - (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) * OOrho -
                                               c1o9 * (drho * OOrho));
                     ////////////////////////////////////////////////////////////
                     // 5.
-                    LBMReal CUMbcc =
-                        mfbcc -
-                        ((mfaac * mfbca + mfaca * mfbac + c4 * mfabb * mfbbb + c2 * (mfbab * mfacb + mfbba * mfabc)) +
-                         c1o3 * (mfbca + mfbac)) *
-                            OOrho;
-                    LBMReal CUMcbc =
-                        mfcbc -
-                        ((mfaac * mfcba + mfcaa * mfabc + c4 * mfbab * mfbbb + c2 * (mfabb * mfcab + mfbba * mfbac)) +
-                         c1o3 * (mfcba + mfabc)) *
-                            OOrho;
-                    LBMReal CUMccb =
-                        mfccb -
-                        ((mfcaa * mfacb + mfaca * mfcab + c4 * mfbba * mfbbb + c2 * (mfbab * mfbca + mfabb * mfcba)) +
-                         c1o3 * (mfacb + mfcab)) *
-                            OOrho;
+                    LBMReal CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) +
+                         c1o3 * (mfbca + mfbac)) * OOrho;
+                    LBMReal CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) +
+                         c1o3 * (mfcba + mfabc)) * OOrho;
+                    LBMReal CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) +
+                         c1o3 * (mfacb + mfcab)) * OOrho;
                     ////////////////////////////////////////////////////////////
                     // 6.
                     LBMReal CUMccc =
-                        mfccc + ((-c4 * mfbbb * mfbbb - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) -
-                                  c4 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) -
-                                  c2 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) *
+                        mfccc + ((-c4o1 * mfbbb * mfbbb - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) -
+                                  c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) -
+                                  c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) *
                                      OOrho +
-                                 (c4 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) +
-                                  c2 * (mfcaa * mfaca * mfaac) + c16 * mfbba * mfbab * mfabb) *
+                                 (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) +
+                                  c2o1 * (mfcaa * mfaca * mfaac) + c16o1 * mfbba * mfbab * mfabb) *
                                      OOrho * OOrho -
                                  c1o3 * (mfacc + mfcac + mfcca) * OOrho - c1o9 * (mfcaa + mfaca + mfaac) * OOrho +
-                                 (c2 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) +
+                                 (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) +
                                   (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) *
                                      OOrho * OOrho * c2o3 +
                                  c1o27 * ((drho * drho - drho) * OOrho * OOrho));
@@ -525,9 +517,9 @@ void IBcumulantK17LBMKernel::calculate(int step)
                     //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> Note that the division by rho is omitted here as we
                     //! need rho times the gradients later.
                     //!
-                    LBMReal Dxy  = -c3 * omega * mfbba;
-                    LBMReal Dxz  = -c3 * omega * mfbab;
-                    LBMReal Dyz  = -c3 * omega * mfabb;
+                    LBMReal Dxy  = -c3o1 * omega * mfbba;
+                    LBMReal Dxz  = -c3o1 * omega * mfbab;
+                    LBMReal Dyz  = -c3o1 * omega * mfabb;
                     LBMReal dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
                     LBMReal dyuy = dxux + omega * c3o2 * mxxMyy;
                     LBMReal dzuz = dxux + omega * c3o2 * mxxMzz;
@@ -537,9 +529,9 @@ void IBcumulantK17LBMKernel::calculate(int step)
                     //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
                     //!
                     mxxPyyPzz += OxxPyyPzz * (mfaaa - mxxPyyPzz) -
-                                 c3 * (c1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
-                    mxxMyy += omega * (-mxxMyy) - c3 * (c1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
-                    mxxMzz += omega * (-mxxMzz) - c3 * (c1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
+                                 c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
+                    mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
+                    mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
 
                     /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
                     ////no correction
@@ -559,19 +551,19 @@ void IBcumulantK17LBMKernel::calculate(int step)
                     //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
                     //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
                     //!
-                    wadjust = Oxyz + (c1 - Oxyz) * abs(mfbbb) / (abs(mfbbb) + qudricLimitD);
+                    wadjust = Oxyz + (c1o1 - Oxyz) * abs(mfbbb) / (abs(mfbbb) + qudricLimitD);
                     mfbbb += wadjust * (-mfbbb);
-                    wadjust = OxyyPxzz + (c1 - OxyyPxzz) * abs(mxxyPyzz) / (abs(mxxyPyzz) + qudricLimitP);
+                    wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxyPyzz) / (abs(mxxyPyzz) + qudricLimitP);
                     mxxyPyzz += wadjust * (-mxxyPyzz);
-                    wadjust = OxyyMxzz + (c1 - OxyyMxzz) * abs(mxxyMyzz) / (abs(mxxyMyzz) + qudricLimitM);
+                    wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxyMyzz) / (abs(mxxyMyzz) + qudricLimitM);
                     mxxyMyzz += wadjust * (-mxxyMyzz);
-                    wadjust = OxyyPxzz + (c1 - OxyyPxzz) * abs(mxxzPyyz) / (abs(mxxzPyyz) + qudricLimitP);
+                    wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxzPyyz) / (abs(mxxzPyyz) + qudricLimitP);
                     mxxzPyyz += wadjust * (-mxxzPyyz);
-                    wadjust = OxyyMxzz + (c1 - OxyyMxzz) * abs(mxxzMyyz) / (abs(mxxzMyyz) + qudricLimitM);
+                    wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxzMyyz) / (abs(mxxzMyyz) + qudricLimitM);
                     mxxzMyyz += wadjust * (-mxxzMyyz);
-                    wadjust = OxyyPxzz + (c1 - OxyyPxzz) * abs(mxyyPxzz) / (abs(mxyyPxzz) + qudricLimitP);
+                    wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxyyPxzz) / (abs(mxyyPxzz) + qudricLimitP);
                     mxyyPxzz += wadjust * (-mxyyPxzz);
-                    wadjust = OxyyMxzz + (c1 - OxyyMxzz) * abs(mxyyMxzz) / (abs(mxyyMxzz) + qudricLimitM);
+                    wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxyyMxzz) / (abs(mxyyMxzz) + qudricLimitM);
                     mxyyMxzz += wadjust * (-mxyyMxzz);
                     //////////////////////////////////////////////////////////////////////////
                     // no limiter
@@ -587,8 +579,8 @@ void IBcumulantK17LBMKernel::calculate(int step)
                     //! - Compute inverse linear combinations of second and third order cumulants
                     //!
                     mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
-                    mfaca = c1o3 * (-c2 * mxxMyy + mxxMzz + mxxPyyPzz);
-                    mfaac = c1o3 * (mxxMyy - c2 * mxxMzz + mxxPyyPzz);
+                    mfaca = c1o3 * (-c2o1 * mxxMyy + mxxMzz + mxxPyyPzz);
+                    mfaac = c1o3 * (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz);
 
                     mfcba = (mxxyMyzz + mxxyPyzz) * c1o2;
                     mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2;
@@ -605,12 +597,12 @@ void IBcumulantK17LBMKernel::calculate(int step)
                     //! according to Eq. (43)-(48) <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et
                     //! al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
                     //!
-                    CUMacc = -O4 * (c1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1 - O4) * (CUMacc);
-                    CUMcac = -O4 * (c1 / omega - c1o2) * (dxux + dzuz) * c2o3 * A + (c1 - O4) * (CUMcac);
-                    CUMcca = -O4 * (c1 / omega - c1o2) * (dyuy + dxux) * c2o3 * A + (c1 - O4) * (CUMcca);
-                    CUMbbc = -O4 * (c1 / omega - c1o2) * Dxy * c1o3 * B + (c1 - O4) * (CUMbbc);
-                    CUMbcb = -O4 * (c1 / omega - c1o2) * Dxz * c1o3 * B + (c1 - O4) * (CUMbcb);
-                    CUMcbb = -O4 * (c1 / omega - c1o2) * Dyz * c1o3 * B + (c1 - O4) * (CUMcbb);
+                    CUMacc = -O4 * (c1o1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc);
+                    CUMcac = -O4 * (c1o1 / omega - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac);
+                    CUMcca = -O4 * (c1o1 / omega - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca);
+                    CUMbbc = -O4 * (c1o1 / omega - c1o2) * Dxy * c1o3 * B + (c1o1 - O4) * (CUMbbc);
+                    CUMbcb = -O4 * (c1o1 / omega - c1o2) * Dxz * c1o3 * B + (c1o1 - O4) * (CUMbcb);
+                    CUMcbb = -O4 * (c1o1 / omega - c1o2) * Dyz * c1o3 * B + (c1o1 - O4) * (CUMcbb);
 
                     //////////////////////////////////////////////////////////////////////////
                     // 5.
@@ -630,50 +622,50 @@ void IBcumulantK17LBMKernel::calculate(int step)
 
                     //////////////////////////////////////////////////////////////////////////
                     // 4.
-                    mfcbb = CUMcbb + c1o3 * ((c3 * mfcaa + c1) * mfabb + c6 * mfbba * mfbab) * OOrho;
-                    mfbcb = CUMbcb + c1o3 * ((c3 * mfaca + c1) * mfbab + c6 * mfbba * mfabb) * OOrho;
-                    mfbbc = CUMbbc + c1o3 * ((c3 * mfaac + c1) * mfbba + c6 * mfbab * mfabb) * OOrho;
+                    mfcbb = CUMcbb + c1o3 * ((c3o1 * mfcaa + c1o1) * mfabb + c6o1 * mfbba * mfbab) * OOrho;
+                    mfbcb = CUMbcb + c1o3 * ((c3o1 * mfaca + c1o1) * mfbab + c6o1 * mfbba * mfabb) * OOrho;
+                    mfbbc = CUMbbc + c1o3 * ((c3o1 * mfaac + c1o1) * mfbba + c6o1 * mfbab * mfabb) * OOrho;
 
-                    mfcca = CUMcca + (((mfcaa * mfaca + c2 * mfbba * mfbba) * c9 + c3 * (mfcaa + mfaca)) * OOrho -
+                    mfcca = CUMcca + (((mfcaa * mfaca + c2o1 * mfbba * mfbba) * c9o1 + c3o1 * (mfcaa + mfaca)) * OOrho -
                                       (drho * OOrho)) *
                                          c1o9;
-                    mfcac = CUMcac + (((mfcaa * mfaac + c2 * mfbab * mfbab) * c9 + c3 * (mfcaa + mfaac)) * OOrho -
+                    mfcac = CUMcac + (((mfcaa * mfaac + c2o1 * mfbab * mfbab) * c9o1 + c3o1 * (mfcaa + mfaac)) * OOrho -
                                       (drho * OOrho)) *
                                          c1o9;
-                    mfacc = CUMacc + (((mfaac * mfaca + c2 * mfabb * mfabb) * c9 + c3 * (mfaac + mfaca)) * OOrho -
+                    mfacc = CUMacc + (((mfaac * mfaca + c2o1 * mfabb * mfabb) * c9o1 + c3o1 * (mfaac + mfaca)) * OOrho -
                                       (drho * OOrho)) *
                                          c1o9;
 
                     //////////////////////////////////////////////////////////////////////////
                     // 5.
                     mfbcc = CUMbcc + c1o3 *
-                                         (c3 * (mfaac * mfbca + mfaca * mfbac + c4 * mfabb * mfbbb +
-                                                c2 * (mfbab * mfacb + mfbba * mfabc)) +
+                                         (c3o1 * (mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb +
+                                                c2o1 * (mfbab * mfacb + mfbba * mfabc)) +
                                           (mfbca + mfbac)) *
                                          OOrho;
                     mfcbc = CUMcbc + c1o3 *
-                                         (c3 * (mfaac * mfcba + mfcaa * mfabc + c4 * mfbab * mfbbb +
-                                                c2 * (mfabb * mfcab + mfbba * mfbac)) +
+                                         (c3o1 * (mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb +
+                                                c2o1 * (mfabb * mfcab + mfbba * mfbac)) +
                                           (mfcba + mfabc)) *
                                          OOrho;
                     mfccb = CUMccb + c1o3 *
-                                         (c3 * (mfcaa * mfacb + mfaca * mfcab + c4 * mfbba * mfbbb +
-                                                c2 * (mfbab * mfbca + mfabb * mfcba)) +
+                                         (c3o1 * (mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb +
+                                                c2o1 * (mfbab * mfbca + mfabb * mfcba)) +
                                           (mfacb + mfcab)) *
                                          OOrho;
 
                     //////////////////////////////////////////////////////////////////////////
                     // 6.
                     mfccc =
-                        CUMccc - ((-c4 * mfbbb * mfbbb - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) -
-                                   c4 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) -
-                                   c2 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) *
+                        CUMccc - ((-c4o1 * mfbbb * mfbbb - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) -
+                                   c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) -
+                                   c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) *
                                       OOrho +
-                                  (c4 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) +
-                                   c2 * (mfcaa * mfaca * mfaac) + c16 * mfbba * mfbab * mfabb) *
+                                  (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) +
+                                   c2o1 * (mfcaa * mfaca * mfaac) + c16o1 * mfbba * mfbab * mfabb) *
                                       OOrho * OOrho -
                                   c1o3 * (mfacc + mfcac + mfcca) * OOrho - c1o9 * (mfcaa + mfaca + mfaac) * OOrho +
-                                  (c2 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) +
+                                  (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) +
                                    (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) *
                                       OOrho * OOrho * c2o3 +
                                   c1o27 * ((drho * drho - drho) * OOrho * OOrho));
@@ -697,39 +689,39 @@ void IBcumulantK17LBMKernel::calculate(int step)
                     //!
                     ////////////////////////////////////////////////////////////////////////////////////
                     // X - Dir
-                    backwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1, c1);
+                    backwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1);
                     backwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
-                    backwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3, c1o3);
+                    backwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3);
                     backwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
                     backwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
                     backwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
-                    backwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3, c1o3);
+                    backwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3);
                     backwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
-                    backwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9, c1o9);
+                    backwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9o1, c1o9);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     // Y - Dir
-                    backwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6, c1o6);
+                    backwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6);
                     backwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
-                    backwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18, c1o18);
+                    backwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18);
                     backwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3);
                     backwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
                     backwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9);
-                    backwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6, c1o6);
+                    backwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6);
                     backwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
-                    backwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18, c1o18);
+                    backwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     // Z - Dir
-                    backwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36, c1o36);
-                    backwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9, c1o9);
-                    backwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36, c1o36);
-                    backwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9, c1o9);
+                    backwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36);
+                    backwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9);
+                    backwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36);
+                    backwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9);
                     backwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9);
-                    backwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9, c1o9);
-                    backwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36, c1o36);
-                    backwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9, c1o9);
-                    backwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36, c1o36);
+                    backwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9);
+                    backwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36);
+                    backwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9);
+                    backwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36);
                     ////////////////////////////////////////////////////////////////////////////////////
 
                     //////////////////////////////////////////////////////////////////////////
@@ -791,35 +783,35 @@ void IBcumulantK17LBMKernel::calculate(int step)
 
                     (*this->restDistributions)(x1, x2, x3) = mfbbb;
                     //////////////////////////////////////////////////////////////////////////
-                    f[D3Q27System::DIR_000] = mfbbb;
+                    f[vf::lbm::dir::DIR_000] = mfbbb;
                      
-                    f[D3Q27System::DIR_P00]  = mfcbb;
-                    f[D3Q27System::DIR_0P0]  = mfbcb;
-                    f[D3Q27System::DIR_00P]  = mfbbc;
-                    f[D3Q27System::DIR_PP0]  = mfccb;
-                    f[D3Q27System::DIR_MP0]  = mfacb;
-                    f[D3Q27System::DIR_P0P]  = mfcbc;
-                    f[D3Q27System::DIR_M0P]  = mfabc;
-                    f[D3Q27System::DIR_0PP]  = mfbcc;
-                    f[D3Q27System::DIR_0MP]  = mfbac;
-                    f[D3Q27System::DIR_PPP]  = mfccc;
-                    f[D3Q27System::DIR_MPP]  = mfacc;
-                    f[D3Q27System::DIR_PMP]  = mfcac;
-                    f[D3Q27System::DIR_MMP]  = mfaac;
+                    f[vf::lbm::dir::DIR_P00]  = mfcbb;
+                    f[vf::lbm::dir::DIR_0P0]  = mfbcb;
+                    f[vf::lbm::dir::DIR_00P]  = mfbbc;
+                    f[vf::lbm::dir::DIR_PP0]  = mfccb;
+                    f[vf::lbm::dir::DIR_MP0]  = mfacb;
+                    f[vf::lbm::dir::DIR_P0P]  = mfcbc;
+                    f[vf::lbm::dir::DIR_M0P]  = mfabc;
+                    f[vf::lbm::dir::DIR_0PP]  = mfbcc;
+                    f[vf::lbm::dir::DIR_0MP]  = mfbac;
+                    f[vf::lbm::dir::DIR_PPP]  = mfccc;
+                    f[vf::lbm::dir::DIR_MPP]  = mfacc;
+                    f[vf::lbm::dir::DIR_PMP]  = mfcac;
+                    f[vf::lbm::dir::DIR_MMP]  = mfaac;
                                      
-                    f[D3Q27System::DIR_M00]  = mfabb;
-                    f[D3Q27System::DIR_0M0]  = mfbab;
-                    f[D3Q27System::DIR_00M]  = mfbba;
-                    f[D3Q27System::DIR_MM0]  = mfaab;
-                    f[D3Q27System::DIR_PM0]  = mfcab;
-                    f[D3Q27System::DIR_M0M]  = mfaba;
-                    f[D3Q27System::DIR_P0M]  = mfcba;
-                    f[D3Q27System::DIR_0MM]  = mfbaa;
-                    f[D3Q27System::DIR_0PM]  = mfbca;
-                    f[D3Q27System::DIR_MMM]  = mfaaa;
-                    f[D3Q27System::DIR_PMM]  = mfcaa;
-                    f[D3Q27System::DIR_MPM]  = mfaca;
-                    f[D3Q27System::DIR_PPM]  = mfcca;
+                    f[vf::lbm::dir::DIR_M00]  = mfabb;
+                    f[vf::lbm::dir::DIR_0M0]  = mfbab;
+                    f[vf::lbm::dir::DIR_00M]  = mfbba;
+                    f[vf::lbm::dir::DIR_MM0]  = mfaab;
+                    f[vf::lbm::dir::DIR_PM0]  = mfcab;
+                    f[vf::lbm::dir::DIR_M0M]  = mfaba;
+                    f[vf::lbm::dir::DIR_P0M]  = mfcba;
+                    f[vf::lbm::dir::DIR_0MM]  = mfbaa;
+                    f[vf::lbm::dir::DIR_0PM]  = mfbca;
+                    f[vf::lbm::dir::DIR_MMM]  = mfaaa;
+                    f[vf::lbm::dir::DIR_PMM]  = mfcaa;
+                    f[vf::lbm::dir::DIR_MPM]  = mfaca;
+                    f[vf::lbm::dir::DIR_PPM]  = mfcca;
                 }
                     if ((*particleData)(x1, x2, x3)->solidFraction < SOLFRAC_MIN)
                         continue;
@@ -836,8 +828,8 @@ void IBcumulantK17LBMKernel::calculate(int step)
                     D3Q27System::calcCompFeq(fEqSolid, drho, uPart[0], uPart[1], uPart[2]);
 
                     if ((*particleData)(x1, x2, x3)->solidFraction > SOLFRAC_MAX) {
-                        double const bb0     = fEq[D3Q27System::DIR_000] - fEqSolid[D3Q27System::DIR_000];
-                        f[D3Q27System::DIR_000] = fPre[D3Q27System::DIR_000] + bb0;
+                    double const bb0 = fEq[vf::lbm::dir::DIR_000] - fEqSolid[vf::lbm::dir::DIR_000];
+                    f[vf::lbm::dir::DIR_000] = fPre[vf::lbm::dir::DIR_000] + bb0;
                         for (int iPop = D3Q27System::FSTARTDIR; iPop <= D3Q27System::FENDDIR; iPop++) {
                             const int iOpp        = D3Q27System::INVDIR[iPop];
                             double const bb       = ((fPre[iOpp] - fEq[iOpp]) - (fPre[iPop] - fEqSolid[iPop]));
@@ -860,8 +852,8 @@ void IBcumulantK17LBMKernel::calculate(int step)
 //#endif
                         double const oneMinB = 1. - B;
 
-                        double const bb0 = fEq[D3Q27System::DIR_000] - fEqSolid[D3Q27System::DIR_000];
-                        f[D3Q27System::DIR_000] = fPre[D3Q27System::DIR_000] + oneMinB * (f[D3Q27System::DIR_000] - fPre[D3Q27System::DIR_000]) + B * bb0;
+                        double const bb0 = fEq[vf::lbm::dir::DIR_000] - fEqSolid[vf::lbm::dir::DIR_000];
+                        f[vf::lbm::dir::DIR_000] = fPre[vf::lbm::dir::DIR_000] + oneMinB * (f[vf::lbm::dir::DIR_000] - fPre[vf::lbm::dir::DIR_000]) + B * bb0;
 
                         for (int iPop = D3Q27System::FSTARTDIR; iPop <= D3Q27System::FENDDIR; iPop++) {
                             int const iOpp = D3Q27System::INVDIR[iPop];
@@ -877,35 +869,35 @@ void IBcumulantK17LBMKernel::calculate(int step)
                         }
                     } /* if solidFraction > SOLFRAC_MAX */
 
-                    (*this->restDistributions)(x1, x2, x3)                             = f[D3Q27System::DIR_000];
+                    (*this->restDistributions)(x1, x2, x3)                             = f[vf::lbm::dir::DIR_000];
                                                                                           
-                    (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3)         = f[D3Q27System::DIR_M00];
-                    (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3)         = f[D3Q27System::DIR_0M0];
-                    (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3)         = f[D3Q27System::DIR_00M];
-                    (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3)        = f[D3Q27System::DIR_MM0];
-                    (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3)       = f[D3Q27System::DIR_PM0];
-                    (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3)        = f[D3Q27System::DIR_M0M];
-                    (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3)       = f[D3Q27System::DIR_P0M];
-                    (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3)        = f[D3Q27System::DIR_0MM];
-                    (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3)       = f[D3Q27System::DIR_0PM];
-                    (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3)       = f[D3Q27System::DIR_MMM];
-                    (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3)      = f[D3Q27System::DIR_PMM];
-                    (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3)      = f[D3Q27System::DIR_MPM];
-                    (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3)     = f[D3Q27System::DIR_PPM];
-                                                                                                          
-                    (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3)     =  f[D3Q27System::DIR_P00];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3)     =  f[D3Q27System::DIR_0P0];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p)     =  f[D3Q27System::DIR_00P];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3)   =  f[D3Q27System::DIR_PP0];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3)    =  f[D3Q27System::DIR_MP0];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p)   =  f[D3Q27System::DIR_P0P];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p)    =  f[D3Q27System::DIR_M0P];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p)   =  f[D3Q27System::DIR_0PP];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p)    =  f[D3Q27System::DIR_0MP];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p) =  f[D3Q27System::DIR_PPP];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p)  =  f[D3Q27System::DIR_MPP];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p)  =  f[D3Q27System::DIR_PMP];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p)   =  f[D3Q27System::DIR_MMP];
+                    (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3)         = f[vf::lbm::dir::DIR_M00];
+                    (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3)         = f[vf::lbm::dir::DIR_0M0];
+                    (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3)         = f[vf::lbm::dir::DIR_00M];
+                    (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3)        = f[vf::lbm::dir::DIR_MM0];
+                    (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3)       = f[vf::lbm::dir::DIR_PM0];
+                    (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3)        = f[vf::lbm::dir::DIR_M0M];
+                    (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3)       = f[vf::lbm::dir::DIR_P0M];
+                    (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3)        = f[vf::lbm::dir::DIR_0MM];
+                    (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3)       = f[vf::lbm::dir::DIR_0PM];
+                    (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3)       = f[vf::lbm::dir::DIR_MMM];
+                    (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3)      = f[vf::lbm::dir::DIR_PMM];
+                    (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3)      = f[vf::lbm::dir::DIR_MPM];
+                    (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3)     = f[vf::lbm::dir::DIR_PPM];
+                                                                                              
+                    (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3)     = f[vf::lbm::dir::DIR_P00];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3)     = f[vf::lbm::dir::DIR_0P0];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p)     = f[vf::lbm::dir::DIR_00P];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3)   = f[vf::lbm::dir::DIR_PP0];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3)    = f[vf::lbm::dir::DIR_MP0];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p)   = f[vf::lbm::dir::DIR_P0P];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p)    = f[vf::lbm::dir::DIR_M0P];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p)   = f[vf::lbm::dir::DIR_0PP];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p)    = f[vf::lbm::dir::DIR_0MP];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p) = f[vf::lbm::dir::DIR_PPP];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p)  = f[vf::lbm::dir::DIR_MPP];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p)  = f[vf::lbm::dir::DIR_PMP];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p)   = f[vf::lbm::dir::DIR_MMP];
                 }
             }
         }
diff --git a/src/cpu/LiggghtsCoupling/IBcumulantK17LBMKernel.h b/src/cpu/LiggghtsCoupling/IBcumulantK17LBMKernel.h
index 2d5216d3607e4489cc93a062f66efdb6f2c2457a..58191d27e4de872aeef87d888fff4a35b90d962b 100644
--- a/src/cpu/LiggghtsCoupling/IBcumulantK17LBMKernel.h
+++ b/src/cpu/LiggghtsCoupling/IBcumulantK17LBMKernel.h
@@ -94,15 +94,15 @@ protected:
 ////////////////////////////////////////////////////////////////////////////////
 inline void IBcumulantK17LBMKernel::forwardInverseChimeraWithK(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K)
 {
-    using namespace UbMath;
+    using namespace vf::basics::constant;
     LBMReal m2 = mfa + mfc;
     LBMReal m1 = mfc - mfa;
     LBMReal m0 = m2 + mfb;
     mfa = m0;
     m0 *= Kinverse;
-    m0 += c1;
+    m0 += c1o1;
     mfb = (m1 * Kinverse - m0 * vv) * K;
-    mfc = ((m2 - c2 * m1 * vv) * Kinverse + v2 * m0) * K;
+    mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K;
 }
 ////////////////////////////////////////////////////////////////////////////////
 //! \brief backward chimera transformation \ref backwardInverseChimeraWithK
@@ -112,10 +112,10 @@ inline void IBcumulantK17LBMKernel::forwardInverseChimeraWithK(LBMReal& mfa, LBM
 ////////////////////////////////////////////////////////////////////////////////
 inline void IBcumulantK17LBMKernel::backwardInverseChimeraWithK(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K)
 {
-    using namespace UbMath;
-    LBMReal m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1) * (v2 - vv) * c1o2) * K;
-    LBMReal m1 = (((mfa - mfc) - c2 * mfb * vv) * Kinverse + (mfa * Kinverse + c1) * (-v2)) * K;
-    mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1) * (v2 + vv) * c1o2) * K;
+    using namespace vf::basics::constant;
+    LBMReal m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 - vv) * c1o2) * K;
+    LBMReal m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (-v2)) * K;
+    mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 + vv) * c1o2) * K;
     mfa = m0;
     mfb = m1;
 }
@@ -128,10 +128,10 @@ inline void IBcumulantK17LBMKernel::backwardInverseChimeraWithK(LBMReal& mfa, LB
 ////////////////////////////////////////////////////////////////////////////////
 inline void IBcumulantK17LBMKernel::forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2)
 {
-    using namespace UbMath;
+    using namespace vf::basics::constant;
     LBMReal m1 = (mfa + mfc) + mfb;
     LBMReal m2 = mfc - mfa;
-    mfc = (mfc + mfa) + (v2 * m1 - c2 * vv * m2);
+    mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2);
     mfb = m2 - vv * m1;
     mfa = m1;
 }
@@ -144,9 +144,9 @@ inline void IBcumulantK17LBMKernel::forwardChimera(LBMReal& mfa, LBMReal& mfb, L
 ////////////////////////////////////////////////////////////////////////////////
 inline void IBcumulantK17LBMKernel::backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2)
 {
-    using namespace UbMath;
+    using namespace vf::basics::constant;
     LBMReal ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
-    LBMReal mb = ((mfa - mfc) - mfa * v2) - c2 * mfb * vv;
+    LBMReal mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv;
     mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2);
     mfb = mb;
     mfa = ma;
diff --git a/src/cpu/LiggghtsCoupling/LiggghtsCouplingCoProcessor.cpp b/src/cpu/LiggghtsCoupling/LiggghtsCouplingCoProcessor.cpp
index 51d451064ff1b2e45433997b0e3e771b22f19ffb..bf923b2a216df1c6b82c9538daf4f33a65b24fd6 100644
--- a/src/cpu/LiggghtsCoupling/LiggghtsCouplingCoProcessor.cpp
+++ b/src/cpu/LiggghtsCoupling/LiggghtsCouplingCoProcessor.cpp
@@ -66,7 +66,7 @@ void LiggghtsCouplingCoProcessor::setSpheresOnLattice()
         if (excludeFlag)
             continue;
 
-        double x[3], v[3], omega[3];
+        double x[3] = { 0, 0, 0 }, v[3] = { 0, 0, 0 }, omega[3] = { 0, 0, 0 };
         double r;
         int id = wrapper.lmp->atom->tag[iS];
 
@@ -194,7 +194,7 @@ double LiggghtsCouplingCoProcessor::calcSolidFraction(double const dx_, double c
         return 1;
 
     double const r_sq = r_ * r_;
-    double dx_sq[slicesPerDim], dy_sq[slicesPerDim], dz_sq[slicesPerDim];
+    double dx_sq[slicesPerDim] = { 0, 0, 0, 0, 0 }, dy_sq[slicesPerDim] = { 0, 0, 0, 0, 0 }, dz_sq[slicesPerDim] = { 0, 0, 0, 0, 0 };
 
     // pre-calculate d[xyz]_sq for efficiency
     for (int i = 0; i < slicesPerDim; i++) {
@@ -255,13 +255,13 @@ void LiggghtsCouplingCoProcessor::getForcesFromLattice()
     if (nPart == 0)
         return; // no particles - no work
 
-    if (nPart > x_lb.size()) {
-        for (int iPart = 0; iPart < x_lb.size(); iPart++) {
+    if (nPart > (int)x_lb.size()) {
+        for (int iPart = 0; iPart < (int)x_lb.size(); iPart++) {
             x_lb[iPart][0] = wrapper.lmp->atom->x[iPart][0];
             x_lb[iPart][1] = wrapper.lmp->atom->x[iPart][1];
             x_lb[iPart][2] = wrapper.lmp->atom->x[iPart][2];
         }
-        for (int iPart = x_lb.size(); iPart < nPart; iPart++) {
+        for (int iPart = (int)x_lb.size(); iPart < nPart; iPart++) {
             std::array<double, 3> ar = {wrapper.lmp->atom->x[iPart][0],
                                         wrapper.lmp->atom->x[iPart][1],
                                         wrapper.lmp->atom->x[iPart][2]};
@@ -277,12 +277,12 @@ void LiggghtsCouplingCoProcessor::getForcesFromLattice()
         }
     }
 
-    if (n_force > force.size()) {
-        for (int i = 0; i < force.size(); i++) {
+    if (n_force > (int)force.size()) {
+        for (int i = 0; i < (int)force.size(); i++) {
             force[i]  = 0;
             torque[i] = 0;
         }
-        for (int i = force.size(); i < n_force; i++) {
+        for (int i = (int)force.size(); i < n_force; i++) {
             force.push_back(0.);
             torque.push_back(0.);
         }
@@ -367,17 +367,17 @@ void LiggghtsCouplingCoProcessor::SumForceTorque3D(ParticleData::ParticleDataArr
                         // minimum image convention, needed if
                         // (1) PBC are used and
                         // (2) both ends of PBC lie on the same processor
-                        if (dx > nx / 2)
+                        if ((int)dx > nx / 2)
                             dx -= nx;
-                        else if (dx < -nx / 2)
+                        else if ((int)dx < -nx / 2)
                             dx += nx;
-                        if (dy > ny / 2)
+                        if ((int)dy > ny / 2)
                             dy -= ny;
-                        else if (dy < -ny / 2)
+                        else if ((int)dy < -ny / 2)
                             dy += ny;
-                        if (dz > nz / 2)
+                        if ((int)dz > nz / 2)
                             dz -= nz;
-                        else if (dz < -nz / 2)
+                        else if ((int)dz < -nz / 2)
                             dz += nz;
 
                         double const forceX = (*particleData)(ix1, ix2, ix3)->hydrodynamicForce[0];
diff --git a/src/cpu/VirtualFluids.h b/src/cpu/VirtualFluids.h
index 8aed1556b058c8420d79eab32646ae10112ec288..1ee4c7e78aded0e11ea723769d4515f0f1ec846d 100644
--- a/src/cpu/VirtualFluids.h
+++ b/src/cpu/VirtualFluids.h
@@ -121,6 +121,8 @@
 #include <BoundaryConditions/NoSlipBCAlgorithm.h>
 #include <BoundaryConditions/NonEqDensityBCAlgorithm.h>
 #include <BoundaryConditions/NonReflectingOutflowBCAlgorithm.h>
+#include <BoundaryConditions/NonReflectingOutflowBCAlgorithmWithRelaxation.h>
+#include <BoundaryConditions/NonReflectingInflowBCAlgorithm.h>
 #include <BoundaryConditions/SlipBCAdapter.h>
 #include <BoundaryConditions/SlipBCAlgorithm.h>
 #include <BoundaryConditions/ThinWallBCProcessor.h>
@@ -204,6 +206,8 @@
 #include <CoProcessors/ShearStressCoProcessor.h>
 #include <CoProcessors/TimeseriesCoProcessor.h>
 #include <CoProcessors/TurbulenceIntensityCoProcessor.h>
+#include <CoProcessors/TimeAveragedValuesCoProcessor.h>
+
 //#include <CoProcessors/MeanValuesCoProcessor.h>
 #include <CoProcessors/InSituCatalystCoProcessor.h>
 #include <CoProcessors/LineTimeSeriesCoProcessor.h>
@@ -265,6 +269,7 @@
 #include <geometry3d/GbCylinder3D.h>
 #include <geometry3d/GbHalfSpace3D.h>
 #include <geometry3d/GbHalfSpaceKrischan3D.h>
+#include <geometry3d/GbImplicitSurface.h>
 #include <geometry3d/GbLine3D.h>
 #include <geometry3d/GbMeshTools3D.h>
 #include <geometry3d/GbObject3D.h>
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAdapter.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAdapter.h
index d66dd3bc64caac711c61f75ed92d7065baaa2699..625fb92149df067639b05435d9b8597b6f96e775 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAdapter.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAdapter.h
@@ -57,14 +57,14 @@ public:
     virtual short getSecondaryBcOption() { return this->secondaryBcOption; }
     virtual void setSecondaryBcOption(const short &val) { this->secondaryBcOption = val; }
 
-    virtual void init(const D3Q27Interactor *const &interactor, const double &time = 0)   = 0;
-    virtual void update(const D3Q27Interactor *const &interactor, const double &time = 0) = 0;
+    virtual void init(const D3Q27Interactor *const &interactor, const real &time = 0)   = 0;
+    virtual void update(const D3Q27Interactor *const &interactor, const real &time = 0) = 0;
 
-    virtual void adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double &worldX1,
-                         const double &worldX2, const double &worldX3, const double &time = 0)       = 0;
+    virtual void adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real &worldX1,
+                         const real &worldX2, const real &worldX3, const real &time = 0)       = 0;
     virtual void adaptBCForDirection(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc,
-                                     const double &worldX1, const double &worldX2, const double &worldX3,
-                                     const double &q, const int &fdirection, const double &time = 0) = 0;
+                                     const real &worldX1, const real &worldX2, const real &worldX3,
+                                     const real &q, const int &fdirection, const real &time = 0) = 0;
 
     void setBcAlgorithm(SPtr<BCAlgorithm> alg)
     {
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.cpp
index 5c852528a2abe2bf8de06753f9aaa78bf7f8a565..179007cb6f3f881517c55196420c2cf7135a62f1 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.cpp
@@ -69,17 +69,17 @@ void BCAlgorithm::setCompressible(bool c)
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void BCAlgorithm::setCollFactor(LBMReal cf) { collFactor = cf; }
+void BCAlgorithm::setCollFactor(real cf) { collFactor = cf; }
 //////////////////////////////////////////////////////////////////////////
-void BCAlgorithm::setCollFactorL(LBMReal cf) { collFactorL = cf; }
+void BCAlgorithm::setCollFactorL(real cf) { collFactorL = cf; }
 //////////////////////////////////////////////////////////////////////////
-void BCAlgorithm::setCollFactorG(LBMReal cf) { collFactorG = cf; }
+void BCAlgorithm::setCollFactorG(real cf) { collFactorG = cf; }
 //////////////////////////////////////////////////////////////////////////
-void BCAlgorithm::setCollFactorPh(LBMReal cf) { collFactorPh = cf; }
+void BCAlgorithm::setCollFactorPh(real cf) { collFactorPh = cf; }
 //////////////////////////////////////////////////////////////////////////
-void BCAlgorithm::setDensityRatio(LBMReal dr) { densityRatio = dr; }
+void BCAlgorithm::setDensityRatio(real dr) { densityRatio = dr; }
 //////////////////////////////////////////////////////////////////////////
-void BCAlgorithm::setPhiBound(LBMReal phiL, LBMReal phiH)
+void BCAlgorithm::setPhiBound(real phiL, real phiH)
 {
     this->phiL = phiL;
     this->phiH = phiH;
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.h
index f8e77af6d8280e61978740757c682fe9119d9710..f182546b0740cbff6b66b3849e2c67e42de1a98d 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.h
@@ -69,9 +69,9 @@ public:
     static const char RheologyPowellEyringModelNoSlipBCAlgorithm           = 18;
     static const char RheologyBinghamModelVelocityBCAlgorithm              = 19;
     static const char MultiphaseNoSlipBCAlgorithm                  = 20;
-    static const char MultiphaseVelocityBCAlgorithm = 21;
-
-
+    static const char MultiphaseVelocityBCAlgorithm                      = 21;
+    static const char NonReflectingInflowBCAlgorithm = 22;
+    static const char NonReflectingOutflowBCAlgorithmWithRelaxation = 23;
 
 public:
     BCAlgorithm() = default;
@@ -84,13 +84,13 @@ public:
     void setNodeIndex(int x1, int x2, int x3);
     void setBcPointer(SPtr<BoundaryConditions> bcPtr);
     void setCompressible(bool c);
-    void setCollFactor(LBMReal cf);
+    void setCollFactor(real cf);
 
-    void setCollFactorL(LBMReal cf);
-    void setCollFactorG(LBMReal cf);
-    void setCollFactorPh(LBMReal cf);
-    void setDensityRatio(LBMReal dr);
-    void setPhiBound(LBMReal phiL, LBMReal phiH);
+    void setCollFactorL(real cf);
+    void setCollFactorG(real cf);
+    void setCollFactorPh(real cf);
+    void setDensityRatio(real dr);
+    void setPhiBound(real phiL, real phiH);
 
     char getType();
     bool isPreCollision();
@@ -113,18 +113,18 @@ protected:
     SPtr<BCArray3D> bcArray;
     SPtr<Block3D> block;
 
-    LBMReal collFactor;
-    LBMReal collFactorL, collFactorG, collFactorPh;
-    LBMReal densityRatio;
-    LBMReal phiL, phiH;
+    real collFactor;
+    real collFactorL, collFactorG, collFactorPh;
+    real densityRatio;
+    real phiL, phiH;
     int x1, x2, x3;
 
-    LBMReal compressibleFactor;
+    real compressibleFactor;
 
-    using CalcMacrosFct    = void (*)(const LBMReal *const &, LBMReal &, LBMReal &, LBMReal &, LBMReal &);
-    using CalcFeqForDirFct = LBMReal (*)(const int &, const LBMReal &, const LBMReal &, const LBMReal &,
-                                         const LBMReal &);
-    using CalcFeqFct = void (*)(LBMReal *const &, const LBMReal &, const LBMReal &, const LBMReal &, const LBMReal &);
+    using CalcMacrosFct    = void (*)(const real *const &, real &, real &, real &, real &);
+    using CalcFeqForDirFct = real (*)(const int &, const real &, const real &, const real &,
+                                         const real &);
+    using CalcFeqFct = void (*)(real *const &, const real &, const real &, const real &, const real &);
 
     CalcFeqForDirFct calcFeqsForDirFct;
     CalcMacrosFct calcMacrosFct;
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCFunction.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCFunction.cpp
index 781958f858e54a348358ec11014ef1012779ebc9..286c9a9f7b9ecd131f90a8c6853ed8e250e1f262 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCFunction.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCFunction.cpp
@@ -33,5 +33,5 @@
 
 #include "BCFunction.h"
 
-const double BCFunction::INFTIMEDEPENDENT = -1.0;
-const double BCFunction::INFCONST         = -10.0;
+const real BCFunction::INFTIMEDEPENDENT = -1.0;
+const real BCFunction::INFCONST         = -10.0;
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCFunction.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCFunction.h
index cf1a5a578af00fd4e326f72ac922f2f4d018667f..68f6caefcd50c32f38c9b329f94db85be3f58688 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCFunction.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCFunction.h
@@ -35,6 +35,7 @@
 #define D3Q27BCFUNCTION_H
 
 #include <basics/utilities/UbInfinity.h>
+#include "lbm/constants/D3Q27.h"
 
 #include <muParser.h>
 
@@ -42,21 +43,21 @@
 class BCFunction
 {
 public:
-    static const double INFTIMEDEPENDENT;
-    static const double INFCONST;
+    static const real INFTIMEDEPENDENT;
+    static const real INFCONST;
 
 public:
     BCFunction() : starttime(-Ub::inf), endtime(-Ub::inf) {}
-    BCFunction(const mu::Parser &function, const double &starttime, const double &endtime)
+    BCFunction(const mu::Parser &function, const real &starttime, const real &endtime)
         : function(function), starttime(starttime), endtime(endtime)
     {
     }
-    BCFunction(const std::string &functionstring, const double &starttime, const double &endtime)
+    BCFunction(const std::string &functionstring, const real &starttime, const real &endtime)
         : starttime(starttime), endtime(endtime)
     {
         this->setFunction(functionstring);
     }
-    BCFunction(const double &velocity, const double &starttime, const double &endtime)
+    BCFunction(const real &velocity, const real &starttime, const real &endtime)
         : starttime(starttime), endtime(endtime)
     {
         this->setFunction(velocity);
@@ -64,19 +65,19 @@ public:
 
     void setFunction(const mu::Parser &function) { this->function = function; }
     void setFunction(const std::string &functionstring) { this->function.SetExpr(functionstring); }
-    void setFunction(const double &constVelocity)
+    void setFunction(const real &constVelocity)
     {
         std::stringstream dummy;
         dummy << constVelocity;
         function.SetExpr(dummy.str());
     }
-    void setStartTime(const double &starttime) { this->starttime = starttime; }
-    void setEndTime(const double &endtime) { this->endtime = endtime; }
+    void setStartTime(const real &starttime) { this->starttime = starttime; }
+    void setEndTime(const real &endtime) { this->endtime = endtime; }
 
     mu::Parser &getFunction() { return function; }
     const mu::Parser &getFunction() const { return function; }
-    const double &getStartTime() const { return starttime; }
-    const double &getEndTime() const { return endtime; }
+    const real &getStartTime() const { return starttime; }
+    const real &getEndTime() const { return endtime; }
 
     std::string toString() const
     {
@@ -109,8 +110,8 @@ public:
 
 protected:
     mu::Parser function;
-    double starttime;
-    double endtime;
+    real starttime;
+    real endtime;
 
 private:
 };
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/BoundaryConditions.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/BoundaryConditions.h
index fa61e7224ede371f1c28d3eab8e0ba795ccfa3b1..2dcd667bddbd85cb0c07e74ec19d55f93f880157 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/BoundaryConditions.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/BoundaryConditions.h
@@ -192,61 +192,63 @@ public:
     float getBoundaryVelocityX3() { return this->bcVelocityX3; }
     float getBoundaryVelocity(const int &direction)
     {
+        using namespace vf::lbm::dir;
+
         switch (direction) {
-            case D3Q27System::DIR_P00:
-                return (float)(UbMath::c4o9 *
+            case DIR_P00:
+                return (float)(vf::basics::constant::c4o9 *
                                (+bcVelocityX1)); //(2/cs^2)(=6)*rho_0(=1 bei inkompr)*wi*u*ei mit cs=1/sqrt(3)
-            case D3Q27System::DIR_M00:
-                return (float)(UbMath::c4o9 *
+            case DIR_M00:
+                return (float)(vf::basics::constant::c4o9 *
                                (-bcVelocityX1)); // z.B. aus paper manfred MRT LB models in three dimensions (2002)
-            case D3Q27System::DIR_0P0:
-                return (float)(UbMath::c4o9 * (+bcVelocityX2));
-            case D3Q27System::DIR_0M0:
-                return (float)(UbMath::c4o9 * (-bcVelocityX2));
-            case D3Q27System::DIR_00P:
-                return (float)(UbMath::c4o9 * (+bcVelocityX3));
-            case D3Q27System::DIR_00M:
-                return (float)(UbMath::c4o9 * (-bcVelocityX3));
-            case D3Q27System::DIR_PP0:
-                return (float)(UbMath::c1o9 * (+bcVelocityX1 + bcVelocityX2));
-            case D3Q27System::DIR_MM0:
-                return (float)(UbMath::c1o9 * (-bcVelocityX1 - bcVelocityX2));
-            case D3Q27System::DIR_PM0:
-                return (float)(UbMath::c1o9 * (+bcVelocityX1 - bcVelocityX2));
-            case D3Q27System::DIR_MP0:
-                return (float)(UbMath::c1o9 * (-bcVelocityX1 + bcVelocityX2));
-            case D3Q27System::DIR_P0P:
-                return (float)(UbMath::c1o9 * (+bcVelocityX1 + bcVelocityX3));
-            case D3Q27System::DIR_M0M:
-                return (float)(UbMath::c1o9 * (-bcVelocityX1 - bcVelocityX3));
-            case D3Q27System::DIR_P0M:
-                return (float)(UbMath::c1o9 * (+bcVelocityX1 - bcVelocityX3));
-            case D3Q27System::DIR_M0P:
-                return (float)(UbMath::c1o9 * (-bcVelocityX1 + bcVelocityX3));
-            case D3Q27System::DIR_0PP:
-                return (float)(UbMath::c1o9 * (+bcVelocityX2 + bcVelocityX3));
-            case D3Q27System::DIR_0MM:
-                return (float)(UbMath::c1o9 * (-bcVelocityX2 - bcVelocityX3));
-            case D3Q27System::DIR_0PM:
-                return (float)(UbMath::c1o9 * (+bcVelocityX2 - bcVelocityX3));
-            case D3Q27System::DIR_0MP:
-                return (float)(UbMath::c1o9 * (-bcVelocityX2 + bcVelocityX3));
-            case D3Q27System::DIR_PPP:
-                return (float)(UbMath::c1o36 * (+bcVelocityX1 + bcVelocityX2 + bcVelocityX3));
-            case D3Q27System::DIR_MMM:
-                return (float)(UbMath::c1o36 * (-bcVelocityX1 - bcVelocityX2 - bcVelocityX3));
-            case D3Q27System::DIR_PPM:
-                return (float)(UbMath::c1o36 * (+bcVelocityX1 + bcVelocityX2 - bcVelocityX3));
-            case D3Q27System::DIR_MMP:
-                return (float)(UbMath::c1o36 * (-bcVelocityX1 - bcVelocityX2 + bcVelocityX3));
-            case D3Q27System::DIR_PMP:
-                return (float)(UbMath::c1o36 * (+bcVelocityX1 - bcVelocityX2 + bcVelocityX3));
-            case D3Q27System::DIR_MPM:
-                return (float)(UbMath::c1o36 * (-bcVelocityX1 + bcVelocityX2 - bcVelocityX3));
-            case D3Q27System::DIR_PMM:
-                return (float)(UbMath::c1o36 * (+bcVelocityX1 - bcVelocityX2 - bcVelocityX3));
-            case D3Q27System::DIR_MPP:
-                return (float)(UbMath::c1o36 * (-bcVelocityX1 + bcVelocityX2 + bcVelocityX3));
+            case DIR_0P0:
+                return (float)(vf::basics::constant::c4o9 * (+bcVelocityX2));
+            case DIR_0M0:
+                return (float)(vf::basics::constant::c4o9 * (-bcVelocityX2));
+            case DIR_00P:
+                return (float)(vf::basics::constant::c4o9 * (+bcVelocityX3));
+            case DIR_00M:
+                return (float)(vf::basics::constant::c4o9 * (-bcVelocityX3));
+            case DIR_PP0:
+                return (float)(vf::basics::constant::c1o9 * (+bcVelocityX1 + bcVelocityX2));
+            case DIR_MM0:
+                return (float)(vf::basics::constant::c1o9 * (-bcVelocityX1 - bcVelocityX2));
+            case DIR_PM0:
+                return (float)(vf::basics::constant::c1o9 * (+bcVelocityX1 - bcVelocityX2));
+            case DIR_MP0:
+                return (float)(vf::basics::constant::c1o9 * (-bcVelocityX1 + bcVelocityX2));
+            case DIR_P0P:
+                return (float)(vf::basics::constant::c1o9 * (+bcVelocityX1 + bcVelocityX3));
+            case DIR_M0M:
+                return (float)(vf::basics::constant::c1o9 * (-bcVelocityX1 - bcVelocityX3));
+            case DIR_P0M:
+                return (float)(vf::basics::constant::c1o9 * (+bcVelocityX1 - bcVelocityX3));
+            case DIR_M0P:
+                return (float)(vf::basics::constant::c1o9 * (-bcVelocityX1 + bcVelocityX3));
+            case DIR_0PP:
+                return (float)(vf::basics::constant::c1o9 * (+bcVelocityX2 + bcVelocityX3));
+            case DIR_0MM:
+                return (float)(vf::basics::constant::c1o9 * (-bcVelocityX2 - bcVelocityX3));
+            case DIR_0PM:
+                return (float)(vf::basics::constant::c1o9 * (+bcVelocityX2 - bcVelocityX3));
+            case DIR_0MP:
+                return (float)(vf::basics::constant::c1o9 * (-bcVelocityX2 + bcVelocityX3));
+            case DIR_PPP:
+                return (float)(vf::basics::constant::c1o36 * (+bcVelocityX1 + bcVelocityX2 + bcVelocityX3));
+            case DIR_MMM:
+                return (float)(vf::basics::constant::c1o36 * (-bcVelocityX1 - bcVelocityX2 - bcVelocityX3));
+            case DIR_PPM:
+                return (float)(vf::basics::constant::c1o36 * (+bcVelocityX1 + bcVelocityX2 - bcVelocityX3));
+            case DIR_MMP:
+                return (float)(vf::basics::constant::c1o36 * (-bcVelocityX1 - bcVelocityX2 + bcVelocityX3));
+            case DIR_PMP:
+                return (float)(vf::basics::constant::c1o36 * (+bcVelocityX1 - bcVelocityX2 + bcVelocityX3));
+            case DIR_MPM:
+                return (float)(vf::basics::constant::c1o36 * (-bcVelocityX1 + bcVelocityX2 - bcVelocityX3));
+            case DIR_PMM:
+                return (float)(vf::basics::constant::c1o36 * (+bcVelocityX1 - bcVelocityX2 - bcVelocityX3));
+            case DIR_MPP:
+                return (float)(vf::basics::constant::c1o36 * (-bcVelocityX1 + bcVelocityX2 + bcVelocityX3));
             default:
                 throw UbException(UB_EXARGS, "unknown error");
         }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/DensityBCAdapter.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/DensityBCAdapter.cpp
index b11e303d82f67b107b7c9b9f427d7fa3fb71eb79..86ac7726170c2322e2749f1d4cfaa92033cf7ff9 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/DensityBCAdapter.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/DensityBCAdapter.cpp
@@ -36,7 +36,7 @@
 
 using namespace std;
 /*==========================================================*/
-DensityBCAdapter::DensityBCAdapter(const double &dens, const double &startTime, const double &endTime)
+DensityBCAdapter::DensityBCAdapter(const real &dens, const real &startTime, const real &endTime)
 {
     this->densBCs.emplace_back(dens, startTime, endTime);
     this->init();
@@ -54,7 +54,7 @@ DensityBCAdapter::DensityBCAdapter(const std::vector<BCFunction> &densBCs)
     this->init();
 }
 /*==========================================================*/
-DensityBCAdapter::DensityBCAdapter(const mu::Parser &function, const double &startTime, const double &endTime)
+DensityBCAdapter::DensityBCAdapter(const mu::Parser &function, const real &startTime, const real &endTime)
 {
     this->densBCs.emplace_back(function, startTime, endTime);
     this->init();
@@ -96,11 +96,11 @@ void DensityBCAdapter::init()
     }
 }
 /*==========================================================*/
-void DensityBCAdapter::init(const D3Q27Interactor *const & /*interactor*/, const double &time)
+void DensityBCAdapter::init(const D3Q27Interactor *const & /*interactor*/, const real &time)
 {
     this->timeStep           = time;
     this->tmpDensityFunction = NULL;
-    double maxEndtime        = -Ub::inf;
+    real maxEndtime        = -Ub::inf;
 
     // aktuelle Densityfunction bestimmen
     for (size_t pos = 0; pos < densBCs.size(); ++pos) {
@@ -111,8 +111,8 @@ void DensityBCAdapter::init(const D3Q27Interactor *const & /*interactor*/, const
 
         if (UbMath::greaterEqual(this->timeStep, densBCs[pos].getStartTime())) {
             if (UbMath::lessEqual(this->timeStep, densBCs[pos].getEndTime()) ||
-                UbMath::equal(densBCs[pos].getEndTime(), (double)BCFunction::INFCONST) ||
-                UbMath::equal(densBCs[pos].getEndTime(), (double)BCFunction::INFTIMEDEPENDENT)) {
+                UbMath::equal(densBCs[pos].getEndTime(), (real)BCFunction::INFCONST) ||
+                UbMath::equal(densBCs[pos].getEndTime(), (real)BCFunction::INFTIMEDEPENDENT)) {
                 tmpDensityFunction = &densBCs[pos].getFunction();
                 break;
             }
@@ -130,30 +130,30 @@ void DensityBCAdapter::init(const D3Q27Interactor *const & /*interactor*/, const
                          << "\", timedependant=" << (this->isTimeDependent() ? "true" : "false"));
 }
 /*==========================================================*/
-void DensityBCAdapter::update(const D3Q27Interactor *const &interactor, const double &time)
+void DensityBCAdapter::update(const D3Q27Interactor *const &interactor, const real &time)
 {
     this->init(interactor, time);
 }
 /*==========================================================*/
 void DensityBCAdapter::adaptBCForDirection(const D3Q27Interactor & /*interactor*/, SPtr<BoundaryConditions> bc,
-                                           const double & /*worldX1*/, const double & /*worldX2*/,
-                                           const double & /*worldX3*/, const double &q, const int &fdirection,
-                                           const double & /*time*/)
+                                           const real & /*worldX1*/, const real & /*worldX2*/,
+                                           const real & /*worldX3*/, const real &q, const int &fdirection,
+                                           const real & /*time*/)
 {
     bc->setDensityBoundaryFlag(D3Q27System::INVDIR[fdirection], secondaryBcOption);
-    bc->setQ((float)q, fdirection);
+    bc->setQ((real)q, fdirection);
 }
 /*==========================================================*/
-void DensityBCAdapter::adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double &worldX1,
-                               const double &worldX2, const double &worldX3, const double &time)
+void DensityBCAdapter::adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real &worldX1,
+                               const real &worldX2, const real &worldX3, const real &time)
 {
     this->setNodeDensity(interactor, bc, worldX1, worldX2, worldX3, time);
     bc->setBcAlgorithmType(algorithmType);
 }
 /*==========================================================*/
 void DensityBCAdapter::setNodeDensity(const D3Q27Interactor & /*interactor*/, SPtr<BoundaryConditions> bc,
-                                      const double &worldX1, const double &worldX2, const double &worldX3,
-                                      const double &timestep)
+                                      const real &worldX1, const real &worldX2, const real &worldX3,
+                                      const real &timestep)
 {
     // Geschwindigkeiten setzen
     try {
@@ -164,7 +164,7 @@ void DensityBCAdapter::setNodeDensity(const D3Q27Interactor & /*interactor*/, SP
         this->timeStep = timestep;
 
         if (tmpDensityFunction)
-            bc->setBoundaryDensity((float)tmpDensityFunction->Eval());
+            bc->setBoundaryDensity((real)tmpDensityFunction->Eval());
     } catch (mu::Parser::exception_type &e) {
         stringstream error;
         error << "mu::parser exception occurs, message(" << e.GetMsg() << "), formula("
@@ -176,7 +176,7 @@ void DensityBCAdapter::setNodeDensity(const D3Q27Interactor & /*interactor*/, SP
     }
 }
 /*==========================================================*/
-double DensityBCAdapter::getDensity(const double &x1, const double &x2, const double &x3, const double &timeStep)
+real DensityBCAdapter::getDensity(const real &x1, const real &x2, const real &x3, const real &timeStep)
 {
     this->x1       = x1;
     this->x2       = x2;
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/DensityBCAdapter.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/DensityBCAdapter.h
index 5425d9fcbb57f18ca5e5e57d02133ef8f2a9b8f2..74bfea4dd533ca8bbe81a5941ab302e3ffb06a95 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/DensityBCAdapter.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/DensityBCAdapter.h
@@ -39,7 +39,7 @@
 #include <string>
 #include <vector>
 
-#include "basics/utilities/UbMath.h"
+//#include "basics/utilities/UbMath.h"
 #include "basics/utilities/UbTuple.h"
 
 #include "BCAdapter.h"
@@ -62,25 +62,25 @@ class DensityBCAdapter : public BCAdapter
 public:
     // constructors
     DensityBCAdapter() { this->init(); }
-    DensityBCAdapter(const double &dens, const double &startTime = 0.0, const double &endTime = BCFunction::INFCONST);
+    DensityBCAdapter(const real &dens, const real &startTime = 0.0, const real &endTime = BCFunction::INFCONST);
     DensityBCAdapter(const BCFunction &densBC);
     DensityBCAdapter(const std::vector<BCFunction> &densBCs);
-    DensityBCAdapter(const mu::Parser &function, const double &startTime = 0.0,
-                     const double &endTime = BCFunction::INFCONST);
+    DensityBCAdapter(const mu::Parser &function, const real &startTime = 0.0,
+                     const real &endTime = BCFunction::INFCONST);
 
     //------------- implements D3Q27BoundaryConditionAdapter ----- start
     std::string toString();
 
-    void init(const D3Q27Interactor *const &interactor, const double &time = 0) override;
-    void update(const D3Q27Interactor *const &interactor, const double &time = 0) override;
+    void init(const D3Q27Interactor *const &interactor, const real &time = 0) override;
+    void update(const D3Q27Interactor *const &interactor, const real &time = 0) override;
 
-    void adaptBCForDirection(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double &worldX1,
-                             const double &worldX2, const double &worldX3, const double &q, const int &fdirection,
-                             const double &time = 0) override;
-    void adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double &worldX1,
-                 const double &worldX2, const double &worldX3, const double &time = 0) override;
+    void adaptBCForDirection(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real &worldX1,
+                             const real &worldX2, const real &worldX3, const real &q, const int &fdirection,
+                             const real &time = 0) override;
+    void adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real &worldX1,
+                 const real &worldX2, const real &worldX3, const real &time = 0) override;
 
-    double getDensity(const double &x1, const double &x2, const double &x3, const double &timeStep);
+    real getDensity(const real &x1, const real &x2, const real &x3, const real &timeStep);
 
     //------------- implements D3Q27BoundaryConditionAdapter ----- end
 
@@ -92,8 +92,8 @@ protected:
     void unsetTimeDependent() { (this->type &= ~TIMEDEPENDENT); }
 
     void clear() { densBCs.clear(); }
-    void setNodeDensity(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double &worldX1,
-                        const double &worldX2, const double &worldX3, const double &timestep);
+    void setNodeDensity(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real &worldX1,
+                        const real &worldX2, const real &worldX3, const real &timestep);
 
 private:
     mu::value_type x1, x2, x3; // brauch man nicht serialisieren!
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/EqDensityBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/EqDensityBCAlgorithm.cpp
index bdddd2369377f1e2b30c86eb243bf4d4a843e06c..fa5dc1bdeff9112a7a0c1a26b9c52ee5f27012a5 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/EqDensityBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/EqDensityBCAlgorithm.cpp
@@ -55,7 +55,9 @@ void EqDensityBCAlgorithm::addDistributions(SPtr<DistributionArray3D> distributi
 //////////////////////////////////////////////////////////////////////////
 void EqDensityBCAlgorithm::applyBC()
 {
-    LBMReal f[D3Q27System::ENDF + 1];
+    using namespace vf::lbm::dir;
+
+    real f[D3Q27System::ENDF + 1];
 
     distributions->getDistributionInv(f, x1, x2, x3);
     int nx1 = x1;
@@ -63,28 +65,28 @@ void EqDensityBCAlgorithm::applyBC()
     int nx3 = x3;
 
     // flag points in direction of fluid
-    if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_P00)) {
+    if (bcPtr->hasDensityBoundaryFlag(DIR_P00)) {
         nx1 -= 1;
-    } else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_M00)) {
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_M00)) {
         nx1 += 1;
-    } else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_0P0)) {
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_0P0)) {
         nx2 -= 1;
-    } else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_0M0)) {
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_0M0)) {
         nx2 += 1;
-    } else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_00P)) {
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_00P)) {
         nx3 -= 1;
-    } else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_00M)) {
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_00M)) {
         nx3 += 1;
     } else
         UB_THROW(UbException(UB_EXARGS, "Danger...no orthogonal BC-Flag on density boundary..."));
 
-    LBMReal rho, vx1, vx2, vx3;
+    real rho, vx1, vx2, vx3;
     calcMacrosFct(f, rho, vx1, vx2, vx3);
-    LBMReal rhoBC = bcPtr->getBoundaryDensity();
+    real rhoBC = bcPtr->getBoundaryDensity();
     for (int fdir = D3Q27System::STARTF; fdir <= D3Q27System::ENDF; fdir++) {
         if (bcPtr->hasDensityBoundaryFlag(fdir)) {
             // Ehsan: 15.2.2013:
-            LBMReal ftemp = calcFeqsForDirFct(fdir, rhoBC, vx1, vx2, vx3);
+            real ftemp = calcFeqsForDirFct(fdir, rhoBC, vx1, vx2, vx3);
             distributions->setDistributionForDirection(ftemp, nx1, nx2, nx3, fdir);
         }
     }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/HighViscosityNoSlipBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/HighViscosityNoSlipBCAlgorithm.cpp
index 9d14940929d45bf70268ed415f4d02457a7c09fc..3ed53ee85f2047cedc5cdc6eb71f607ca8792b6f 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/HighViscosityNoSlipBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/HighViscosityNoSlipBCAlgorithm.cpp
@@ -55,10 +55,10 @@ void HighViscosityNoSlipBCAlgorithm::addDistributions(SPtr<DistributionArray3D>
 //////////////////////////////////////////////////////////////////////////
 void HighViscosityNoSlipBCAlgorithm::applyBC()
 {
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal feq[D3Q27System::ENDF + 1];
+    real f[D3Q27System::ENDF + 1];
+    real feq[D3Q27System::ENDF + 1];
     distributions->getDistribution(f, x1, x2, x3);
-    LBMReal rho, vx1, vx2, vx3;
+    real rho, vx1, vx2, vx3;
     calcMacrosFct(f, rho, vx1, vx2, vx3);
     calcFeqFct(feq, rho, vx1, vx2, vx3);
 
@@ -66,8 +66,8 @@ void HighViscosityNoSlipBCAlgorithm::applyBC()
         if (bcPtr->hasNoSlipBoundaryFlag(fDir)) {
             // quadratic bounce back
             const int invDir = D3Q27System::INVDIR[fDir];
-            LBMReal q        = bcPtr->getQ(invDir);
-            LBMReal fReturn =
+            real q        = bcPtr->getQ(invDir);
+            real fReturn =
                 (f[invDir] + q * f[fDir] + q * collFactor * (feq[invDir] - f[invDir] + feq[fDir] - f[fDir])) /
                 (1.0 + q);
             distributions->setDistributionInvForDirection(fReturn, x1 + D3Q27System::DX1[invDir],
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseNoSlipBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseNoSlipBCAlgorithm.cpp
index aafa0da55a085b1025c693cf29bfb18730b92882..73e399fb6ab3df0ea06620da4b5c0f6fedc8428e 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseNoSlipBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseNoSlipBCAlgorithm.cpp
@@ -64,9 +64,9 @@ void MultiphaseNoSlipBCAlgorithm::addDistributionsH(SPtr<DistributionArray3D> di
 //////////////////////////////////////////////////////////////////////////
 void MultiphaseNoSlipBCAlgorithm::applyBC()
 {
-   LBMReal f[D3Q27System::ENDF+1];
-   LBMReal h[D3Q27System::ENDF+1];
-   LBMReal h2[D3Q27System::ENDF + 1];
+   real f[D3Q27System::ENDF+1];
+   real h[D3Q27System::ENDF+1];
+   real h2[D3Q27System::ENDF + 1];
    //LBMReal feq[D3Q27System::ENDF+1];
    //LBMReal heq[D3Q27System::ENDF+1];
    distributions ->getDistributionInv(f, x1, x2, x3);
@@ -87,15 +87,15 @@ void MultiphaseNoSlipBCAlgorithm::applyBC()
       {
          //quadratic bounce back
          const int invDir = D3Q27System::INVDIR[fdir];
-		 LBMReal fReturn = f[invDir];
+		 real fReturn = f[invDir];
          //distributions->setDistributionForDirection(fReturn, x1+D3Q27System::DX1[invDir], x2+D3Q27System::DX2[invDir], x3+D3Q27System::DX3[invDir], fdir);
          distributions->setDistributionForDirection(fReturn, x1, x2, x3, invDir);//delay BB 
-         LBMReal hReturn = h[invDir];
+         real hReturn = h[invDir];
 		// distributionsH->setDistributionForDirection(hReturn, x1+D3Q27System::DX1[invDir], x2+D3Q27System::DX2[invDir], x3+D3Q27System::DX3[invDir], fdir);
          distributionsH->setDistributionForDirection(hReturn, x1, x2, x3, invDir);//delay BB  
          if (distributionsH2)
          {
-             LBMReal h2Return = h2[invDir];
+             real h2Return = h2[invDir];
              distributionsH2->setDistributionForDirection(h2Return, x1, x2, x3, invDir);//delay BB
             // distributionsH2->setDistributionForDirection(h2Return, x1 + D3Q27System::DX1[invDir], x2 + D3Q27System::DX2[invDir], x3 + D3Q27System::DX3[invDir], fdir);
 
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseNonReflectingOutflowBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseNonReflectingOutflowBCAlgorithm.cpp
index 054227ecd9f6d79b4f4f345335184e9da84359eb..918f564b64667223d12169aba21dd659bc3308b6 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseNonReflectingOutflowBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseNonReflectingOutflowBCAlgorithm.cpp
@@ -32,14 +32,14 @@
 //=======================================================================================
 
 #include "MultiphaseNonReflectingOutflowBCAlgorithm.h"
+#include "BoundaryConditions.h"
 #include "D3Q27System.h"
 #include "DistributionArray3D.h"
-#include "BoundaryConditions.h"
 
 MultiphaseNonReflectingOutflowBCAlgorithm::MultiphaseNonReflectingOutflowBCAlgorithm()
 {
-   BCAlgorithm::type = BCAlgorithm::NonReflectingOutflowBCAlgorithm;
-   BCAlgorithm::preCollision = true;
+    BCAlgorithm::type = BCAlgorithm::NonReflectingOutflowBCAlgorithm;
+    BCAlgorithm::preCollision = true;
 }
 //////////////////////////////////////////////////////////////////////////
 MultiphaseNonReflectingOutflowBCAlgorithm::~MultiphaseNonReflectingOutflowBCAlgorithm()
@@ -49,17 +49,17 @@ MultiphaseNonReflectingOutflowBCAlgorithm::~MultiphaseNonReflectingOutflowBCAlgo
 SPtr<BCAlgorithm> MultiphaseNonReflectingOutflowBCAlgorithm::clone()
 {
     SPtr<BCAlgorithm> bc(new MultiphaseNonReflectingOutflowBCAlgorithm());
-   return bc;
+    return bc;
 }
 //////////////////////////////////////////////////////////////////////////
 void MultiphaseNonReflectingOutflowBCAlgorithm::addDistributions(SPtr<DistributionArray3D> distributions)
 {
-   this->distributions = distributions;
+    this->distributions = distributions;
 }
 //////////////////////////////////////////////////////////////////////////
 void MultiphaseNonReflectingOutflowBCAlgorithm::addDistributionsH(SPtr<DistributionArray3D> distributionsH)
 {
-	this->distributionsH = distributionsH;
+    this->distributionsH = distributionsH;
 }
 //////////////////////////////////////////////////////////////////////////
 void MultiphaseNonReflectingOutflowBCAlgorithm::addDistributionsH2(SPtr<DistributionArray3D> distributionsH2)
@@ -69,481 +69,431 @@ void MultiphaseNonReflectingOutflowBCAlgorithm::addDistributionsH2(SPtr<Distribu
 //////////////////////////////////////////////////////////////////////////
 void MultiphaseNonReflectingOutflowBCAlgorithm::applyBC()
 {
-   using namespace D3Q27System;
-   using namespace UbMath;
-   LBMReal f[ENDF+1];
-   LBMReal ftemp[ENDF+1];
-   LBMReal h[D3Q27System::ENDF+1];
-   LBMReal htemp[ENDF+1];
-   LBMReal h2[D3Q27System::ENDF + 1];
-   LBMReal h2temp[ENDF + 1];
-
-   int nx1 = x1;
-   int nx2 = x2;
-   int nx3 = x3;
-   int direction = -1;
-
-   //flag points in direction of fluid
-   if      (bcPtr->hasDensityBoundaryFlag(DIR_P00)) { nx1 += 1; direction = DIR_P00; }
-   else if (bcPtr->hasDensityBoundaryFlag(DIR_M00)) { nx1 -= 1; direction = DIR_M00; }
-   else if (bcPtr->hasDensityBoundaryFlag(DIR_0P0)) { nx2 += 1; direction = DIR_0P0; }
-   else if (bcPtr->hasDensityBoundaryFlag(DIR_0M0)) { nx2 -= 1; direction = DIR_0M0; }
-   else if (bcPtr->hasDensityBoundaryFlag(DIR_00P)) { nx3 += 1; direction = DIR_00P; }
-   else if (bcPtr->hasDensityBoundaryFlag(DIR_00M)) { nx3 -= 1; direction = DIR_00M; }
-   else UB_THROW(UbException(UB_EXARGS, "Danger...no orthogonal BC-Flag on density boundary..."));
-
-   distributions->getDistribution(f, x1, x2, x3);
-   distributions->getDistribution(ftemp, nx1, nx2, nx3);
-   distributionsH->getDistribution(h, x1, x2, x3);
-   distributionsH->getDistribution(htemp, nx1, nx2, nx3);
-   distributionsH2->getDistribution(h2, x1, x2, x3);
-   distributionsH2->getDistribution(h2temp, nx1, nx2, nx3);
-
-   LBMReal /* phi,*/ p1, vx1, vx2, vx3;
-   
-   //D3Q27System::calcDensity(h, phi);
-   
-   calcMacrosFct(f, p1, vx1, vx2, vx3);
-
-   switch (direction)
-   {
-   case DIR_P00:
-      f[DIR_P00]   = ftemp[DIR_P00]   * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1)*f[DIR_P00]   ;
-      f[DIR_PP0]  = ftemp[DIR_PP0]  * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1)*f[DIR_PP0]  ;
-      f[DIR_PM0]  = ftemp[DIR_PM0]  * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1)*f[DIR_PM0]  ;
-      f[DIR_P0P]  = ftemp[DIR_P0P]  * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1)*f[DIR_P0P]  ;
-      f[DIR_P0M]  = ftemp[DIR_P0M]  * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1)*f[DIR_P0M]  ;
-      f[DIR_PPP] = ftemp[DIR_PPP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1)*f[DIR_PPP] ;
-      f[DIR_PMP] = ftemp[DIR_PMP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1)*f[DIR_PMP] ;
-      f[DIR_PPM] = ftemp[DIR_PPM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1)*f[DIR_PPM] ;
-      f[DIR_PMM] = ftemp[DIR_PMM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1)*f[DIR_PMM] ;
-
-      distributions->setDistributionInvForDirection(f[DIR_P00],   x1+DX1[DIR_M00],   x2+DX2[DIR_M00],   x3+DX3[DIR_M00],   DIR_M00);
-      distributions->setDistributionInvForDirection(f[DIR_PP0],  x1+DX1[DIR_MM0],  x2+DX2[DIR_MM0],  x3+DX3[DIR_MM0],  DIR_MM0);
-      distributions->setDistributionInvForDirection(f[DIR_PM0],  x1+DX1[DIR_MP0],  x2+DX2[DIR_MP0],  x3+DX3[DIR_MP0],  DIR_MP0);
-      distributions->setDistributionInvForDirection(f[DIR_P0P],  x1+DX1[DIR_M0M],  x2+DX2[DIR_M0M],  x3+DX3[DIR_M0M],  DIR_M0M);
-      distributions->setDistributionInvForDirection(f[DIR_P0M],  x1+DX1[DIR_M0P],  x2+DX2[DIR_M0P],  x3+DX3[DIR_M0P],  DIR_M0P);
-      distributions->setDistributionInvForDirection(f[DIR_PPP], x1+DX1[DIR_MMM], x2+DX2[DIR_MMM], x3+DX3[DIR_MMM], DIR_MMM);
-      distributions->setDistributionInvForDirection(f[DIR_PMP], x1+DX1[DIR_MPM], x2+DX2[DIR_MPM], x3+DX3[DIR_MPM], DIR_MPM);
-      distributions->setDistributionInvForDirection(f[DIR_PPM], x1+DX1[DIR_MMP], x2+DX2[DIR_MMP], x3+DX3[DIR_MMP], DIR_MMP);
-      distributions->setDistributionInvForDirection(f[DIR_PMM], x1+DX1[DIR_MPP], x2+DX2[DIR_MPP], x3+DX3[DIR_MPP], DIR_MPP);
-      
-	  h[DIR_P00]   = htemp[DIR_P00]   * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1)*h[DIR_P00]   ;
-	  h[DIR_PP0]  = htemp[DIR_PP0]  * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1)*h[DIR_PP0]  ;
-	  h[DIR_PM0]  = htemp[DIR_PM0]  * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1)*h[DIR_PM0]  ;
-	  h[DIR_P0P]  = htemp[DIR_P0P]  * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1)*h[DIR_P0P]  ;
-	  h[DIR_P0M]  = htemp[DIR_P0M]  * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1)*h[DIR_P0M]  ;
-	  h[DIR_PPP] = htemp[DIR_PPP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1)*h[DIR_PPP] ;
-	  h[DIR_PMP] = htemp[DIR_PMP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1)*h[DIR_PMP] ;
-	  h[DIR_PPM] = htemp[DIR_PPM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1)*h[DIR_PPM] ;
-	  h[DIR_PMM] = htemp[DIR_PMM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1)*h[DIR_PMM] ;
-
-	  distributionsH->setDistributionInvForDirection(h[DIR_P00],   x1+DX1[DIR_M00],   x2+DX2[DIR_M00],   x3+DX3[DIR_M00],   DIR_M00);
-	  distributionsH->setDistributionInvForDirection(h[DIR_PP0],  x1+DX1[DIR_MM0],  x2+DX2[DIR_MM0],  x3+DX3[DIR_MM0],  DIR_MM0);
-	  distributionsH->setDistributionInvForDirection(h[DIR_PM0],  x1+DX1[DIR_MP0],  x2+DX2[DIR_MP0],  x3+DX3[DIR_MP0],  DIR_MP0);
-	  distributionsH->setDistributionInvForDirection(h[DIR_P0P],  x1+DX1[DIR_M0M],  x2+DX2[DIR_M0M],  x3+DX3[DIR_M0M],  DIR_M0M);
-	  distributionsH->setDistributionInvForDirection(h[DIR_P0M],  x1+DX1[DIR_M0P],  x2+DX2[DIR_M0P],  x3+DX3[DIR_M0P],  DIR_M0P);
-	  distributionsH->setDistributionInvForDirection(h[DIR_PPP], x1+DX1[DIR_MMM], x2+DX2[DIR_MMM], x3+DX3[DIR_MMM], DIR_MMM);
-	  distributionsH->setDistributionInvForDirection(h[DIR_PMP], x1+DX1[DIR_MPM], x2+DX2[DIR_MPM], x3+DX3[DIR_MPM], DIR_MPM);
-	  distributionsH->setDistributionInvForDirection(h[DIR_PPM], x1+DX1[DIR_MMP], x2+DX2[DIR_MMP], x3+DX3[DIR_MMP], DIR_MMP);
-	  distributionsH->setDistributionInvForDirection(h[DIR_PMM], x1+DX1[DIR_MPP], x2+DX2[DIR_MPP], x3+DX3[DIR_MPP], DIR_MPP);
-
-	  h2[DIR_P00] = h2temp[DIR_P00] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h2[DIR_P00];
-      h2[DIR_PP0] = h2temp[DIR_PP0] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h2[DIR_PP0];
-      h2[DIR_PM0] = h2temp[DIR_PM0] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h2[DIR_PM0];
-      h2[DIR_P0P] = h2temp[DIR_P0P] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h2[DIR_P0P];
-      h2[DIR_P0M] = h2temp[DIR_P0M] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h2[DIR_P0M];
-      h2[DIR_PPP] = h2temp[DIR_PPP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h2[DIR_PPP];
-      h2[DIR_PMP] = h2temp[DIR_PMP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h2[DIR_PMP];
-      h2[DIR_PPM] = h2temp[DIR_PPM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h2[DIR_PPM];
-      h2[DIR_PMM] = h2temp[DIR_PMM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h2[DIR_PMM];
-
-      distributionsH2->setDistributionInvForDirection(h2[DIR_P00], x1 + DX1[DIR_M00], x2 + DX2[DIR_M00], x3 + DX3[DIR_M00], DIR_M00);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_PM0], x1 + DX1[DIR_MP0], x2 + DX2[DIR_MP0], x3 + DX3[DIR_MP0], DIR_MP0);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_P0P], x1 + DX1[DIR_M0M], x2 + DX2[DIR_M0M], x3 + DX3[DIR_M0M], DIR_M0M);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_P0M], x1 + DX1[DIR_M0P], x2 + DX2[DIR_M0P], x3 + DX3[DIR_M0P], DIR_M0P);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_PPP], x1 + DX1[DIR_MMM], x2 + DX2[DIR_MMM], x3 + DX3[DIR_MMM], DIR_MMM);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_PMP], x1 + DX1[DIR_MPM], x2 + DX2[DIR_MPM], x3 + DX3[DIR_MPM], DIR_MPM);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_PPM], x1 + DX1[DIR_MMP], x2 + DX2[DIR_MMP], x3 + DX3[DIR_MMP], DIR_MMP);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
-	  
-	  break;
-   case DIR_M00:
-      if (false /* vx1 >= 0*/) {
-
-          f[DIR_M00] = ftemp[DIR_M00] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_M00];
-          f[DIR_MP0] = ftemp[DIR_MP0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MP0];
-          f[DIR_MM0] = ftemp[DIR_MM0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MM0];
-          f[DIR_M0P] = ftemp[DIR_M0P] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_M0P];
-          f[DIR_M0M] = ftemp[DIR_M0M] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_M0M];
-          f[DIR_MPP] = ftemp[DIR_MPP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MPP];
-          f[DIR_MMP] = ftemp[DIR_MMP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MMP];
-          f[DIR_MPM] = ftemp[DIR_MPM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MPM];
-          f[DIR_MMM] = ftemp[DIR_MMM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MMM];
-
-          distributions->setDistributionInvForDirection(f[DIR_M00], x1 + DX1[DIR_P00], x2 + DX2[DIR_P00], x3 + DX3[DIR_P00], DIR_P00);
-          distributions->setDistributionInvForDirection(f[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
-          distributions->setDistributionInvForDirection(f[DIR_MM0], x1 + DX1[DIR_PP0], x2 + DX2[DIR_PP0], x3 + DX3[DIR_PP0], DIR_PP0);
-          distributions->setDistributionInvForDirection(f[DIR_M0P], x1 + DX1[DIR_P0M], x2 + DX2[DIR_P0M], x3 + DX3[DIR_P0M], DIR_P0M);
-          distributions->setDistributionInvForDirection(f[DIR_M0M], x1 + DX1[DIR_P0P], x2 + DX2[DIR_P0P], x3 + DX3[DIR_P0P], DIR_P0P);
-          distributions->setDistributionInvForDirection(f[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
-          distributions->setDistributionInvForDirection(f[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
-          distributions->setDistributionInvForDirection(f[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
-          distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
-
-          h[DIR_M00] = htemp[DIR_M00] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_M00];
-          h[DIR_MP0] = htemp[DIR_MP0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_MP0];
-          h[DIR_MM0] = htemp[DIR_MM0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_MM0];
-          h[DIR_M0P] = htemp[DIR_M0P] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_M0P];
-          h[DIR_M0M] = htemp[DIR_M0M] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_M0M];
-          h[DIR_MPP] = htemp[DIR_MPP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_MPP];
-          h[DIR_MMP] = htemp[DIR_MMP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_MMP];
-          h[DIR_MPM] = htemp[DIR_MPM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_MPM];
-          h[DIR_MMM] = htemp[DIR_MMM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_MMM];
-
-          distributionsH->setDistributionInvForDirection(h[DIR_M00], x1 + DX1[DIR_P00], x2 + DX2[DIR_P00], x3 + DX3[DIR_P00], DIR_P00);
-          distributionsH->setDistributionInvForDirection(h[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
-          distributionsH->setDistributionInvForDirection(h[DIR_MM0], x1 + DX1[DIR_PP0], x2 + DX2[DIR_PP0], x3 + DX3[DIR_PP0], DIR_PP0);
-          distributionsH->setDistributionInvForDirection(h[DIR_M0P], x1 + DX1[DIR_P0M], x2 + DX2[DIR_P0M], x3 + DX3[DIR_P0M], DIR_P0M);
-          distributionsH->setDistributionInvForDirection(h[DIR_M0M], x1 + DX1[DIR_P0P], x2 + DX2[DIR_P0P], x3 + DX3[DIR_P0P], DIR_P0P);
-          distributionsH->setDistributionInvForDirection(h[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
-          distributionsH->setDistributionInvForDirection(h[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
-          distributionsH->setDistributionInvForDirection(h[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
-          distributionsH->setDistributionInvForDirection(h[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
-
-          h2[DIR_M00] = htemp[DIR_M00] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h2[DIR_M00];
-          h2[DIR_MP0] = htemp[DIR_MP0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h2[DIR_MP0];
-          h2[DIR_MM0] = htemp[DIR_MM0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h2[DIR_MM0];
-          h2[DIR_M0P] = htemp[DIR_M0P] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h2[DIR_M0P];
-          h2[DIR_M0M] = htemp[DIR_M0M] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h2[DIR_M0M];
-          h2[DIR_MPP] = htemp[DIR_MPP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h2[DIR_MPP];
-          h2[DIR_MMP] = htemp[DIR_MMP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h2[DIR_MMP];
-          h2[DIR_MPM] = htemp[DIR_MPM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h2[DIR_MPM];
-          h2[DIR_MMM] = htemp[DIR_MMM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h2[DIR_MMM];
-
-          distributionsH2->setDistributionInvForDirection(h2[DIR_M00], x1 + DX1[DIR_P00], x2 + DX2[DIR_P00], x3 + DX3[DIR_P00], DIR_P00);
-          distributionsH2->setDistributionInvForDirection(h2[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
-          distributionsH2->setDistributionInvForDirection(h2[DIR_MM0], x1 + DX1[DIR_PP0], x2 + DX2[DIR_PP0], x3 + DX3[DIR_PP0], DIR_PP0);
-          distributionsH2->setDistributionInvForDirection(h2[DIR_M0P], x1 + DX1[DIR_P0M], x2 + DX2[DIR_P0M], x3 + DX3[DIR_P0M], DIR_P0M);
-          distributionsH2->setDistributionInvForDirection(h2[DIR_M0M], x1 + DX1[DIR_P0P], x2 + DX2[DIR_P0P], x3 + DX3[DIR_P0P], DIR_P0P);
-          distributionsH2->setDistributionInvForDirection(h2[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
-          distributionsH2->setDistributionInvForDirection(h2[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
-          distributionsH2->setDistributionInvForDirection(h2[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
-          distributionsH2->setDistributionInvForDirection(h2[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
-      } else {
-
-          f[DIR_M00]   = ftemp[DIR_M00] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_M00];
-          f[DIR_MP0]  = ftemp[DIR_MP0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MP0];
-          f[DIR_MM0]  = ftemp[DIR_MM0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MM0];
-          f[DIR_M0P]  = ftemp[DIR_M0P] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_M0P];
-          f[DIR_M0M]  = ftemp[DIR_M0M] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_M0M];
-          f[DIR_MPP] = ftemp[DIR_MPP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MPP];
-          f[DIR_MMP] = ftemp[DIR_MMP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MMP];
-          f[DIR_MPM] = ftemp[DIR_MPM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MPM];
-          f[DIR_MMM] = ftemp[DIR_MMM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MMM];
-
-          distributions->setDistributionInvForDirection(f[DIR_M00], x1 + DX1[DIR_P00], x2 + DX2[DIR_P00], x3 + DX3[DIR_P00], DIR_P00);
-          distributions->setDistributionInvForDirection(f[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
-          distributions->setDistributionInvForDirection(f[DIR_MM0], x1 + DX1[DIR_PP0], x2 + DX2[DIR_PP0], x3 + DX3[DIR_PP0], DIR_PP0);
-          distributions->setDistributionInvForDirection(f[DIR_M0P], x1 + DX1[DIR_P0M], x2 + DX2[DIR_P0M], x3 + DX3[DIR_P0M], DIR_P0M);
-          distributions->setDistributionInvForDirection(f[DIR_M0M], x1 + DX1[DIR_P0P], x2 + DX2[DIR_P0P], x3 + DX3[DIR_P0P], DIR_P0P);
-          distributions->setDistributionInvForDirection(f[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
-          distributions->setDistributionInvForDirection(f[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
-          distributions->setDistributionInvForDirection(f[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
-          distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
-
-          h[DIR_M00]   = htemp[DIR_M00] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_M00];
-          h[DIR_MP0]  = htemp[DIR_MP0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_MP0];
-          h[DIR_MM0]  = htemp[DIR_MM0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_MM0];
-          h[DIR_M0P]  = htemp[DIR_M0P] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_M0P];
-          h[DIR_M0M]  = htemp[DIR_M0M] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_M0M];
-          h[DIR_MPP] = htemp[DIR_MPP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_MPP];
-          h[DIR_MMP] = htemp[DIR_MMP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_MMP];
-          h[DIR_MPM] = htemp[DIR_MPM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_MPM];
-          h[DIR_MMM] = htemp[DIR_MMM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_MMM];
-
-          distributionsH->setDistributionInvForDirection(h[DIR_M00], x1 + DX1[DIR_P00], x2 + DX2[DIR_P00], x3 + DX3[DIR_P00], DIR_P00);
-          distributionsH->setDistributionInvForDirection(h[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
-          distributionsH->setDistributionInvForDirection(h[DIR_MM0], x1 + DX1[DIR_PP0], x2 + DX2[DIR_PP0], x3 + DX3[DIR_PP0], DIR_PP0);
-          distributionsH->setDistributionInvForDirection(h[DIR_M0P], x1 + DX1[DIR_P0M], x2 + DX2[DIR_P0M], x3 + DX3[DIR_P0M], DIR_P0M);
-          distributionsH->setDistributionInvForDirection(h[DIR_M0M], x1 + DX1[DIR_P0P], x2 + DX2[DIR_P0P], x3 + DX3[DIR_P0P], DIR_P0P);
-          distributionsH->setDistributionInvForDirection(h[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
-          distributionsH->setDistributionInvForDirection(h[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
-          distributionsH->setDistributionInvForDirection(h[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
-          distributionsH->setDistributionInvForDirection(h[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
-
-          h2[DIR_M00]   = 0.5 * (htemp[DIR_M00] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1)   * h2[DIR_M00]);
-          h2[DIR_MP0]  = 0.5 * (htemp[DIR_MP0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1)  * h2[DIR_MP0]);
-          h2[DIR_MM0]  = 0.5 * (htemp[DIR_MM0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1)  * h2[DIR_MM0]);
-          h2[DIR_M0P]  = 0.5 * (htemp[DIR_M0P] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1)  * h2[DIR_M0P]);
-          h2[DIR_M0M]  = 0.5 * (htemp[DIR_M0M] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1)  * h2[DIR_M0M]);
-          h2[DIR_MPP] = 0.5 * (htemp[DIR_MPP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h2[DIR_MPP]);
-          h2[DIR_MMP] = 0.5 * (htemp[DIR_MMP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h2[DIR_MMP]);
-          h2[DIR_MPM] = 0.5 * (htemp[DIR_MPM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h2[DIR_MPM]);
-          h2[DIR_MMM] = 0.5 * (htemp[DIR_MMM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h2[DIR_MMM]);
-
-          distributionsH2->setDistributionInvForDirection(h2[DIR_M00], x1 + DX1[DIR_P00], x2 + DX2[DIR_P00], x3 + DX3[DIR_P00], DIR_P00);
-          distributionsH2->setDistributionInvForDirection(h2[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
-          distributionsH2->setDistributionInvForDirection(h2[DIR_MM0], x1 + DX1[DIR_PP0], x2 + DX2[DIR_PP0], x3 + DX3[DIR_PP0], DIR_PP0);
-          distributionsH2->setDistributionInvForDirection(h2[DIR_M0P], x1 + DX1[DIR_P0M], x2 + DX2[DIR_P0M], x3 + DX3[DIR_P0M], DIR_P0M);
-          distributionsH2->setDistributionInvForDirection(h2[DIR_M0M], x1 + DX1[DIR_P0P], x2 + DX2[DIR_P0P], x3 + DX3[DIR_P0P], DIR_P0P);
-          distributionsH2->setDistributionInvForDirection(h2[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
-          distributionsH2->setDistributionInvForDirection(h2[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
-          distributionsH2->setDistributionInvForDirection(h2[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
-          distributionsH2->setDistributionInvForDirection(h2[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);      
-      }
-      break;
-   case DIR_0P0:
-      f[DIR_0P0]   = ftemp[DIR_0P0]   * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2)*f[DIR_0P0]   ;
-      f[DIR_PP0]  = ftemp[DIR_PP0]  * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2)*f[DIR_PP0]  ;
-      f[DIR_MP0]  = ftemp[DIR_MP0]  * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2)*f[DIR_MP0]  ;
-      f[DIR_0PP]  = ftemp[DIR_0PP]  * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2)*f[DIR_0PP]  ;
-      f[DIR_0PM]  = ftemp[DIR_0PM]  * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2)*f[DIR_0PM]  ;
-      f[DIR_PPP] = ftemp[DIR_PPP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2)*f[DIR_PPP] ;
-      f[DIR_MPP] = ftemp[DIR_MPP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2)*f[DIR_MPP] ;
-      f[DIR_PPM] = ftemp[DIR_PPM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2)*f[DIR_PPM] ;
-      f[DIR_MPM] = ftemp[DIR_MPM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2)*f[DIR_MPM] ;
-
-      distributions->setDistributionInvForDirection(f[DIR_0P0],   x1+DX1[DIR_0M0],   x2+DX2[DIR_0M0],   x3+DX3[DIR_0M0],     DIR_0M0);
-      distributions->setDistributionInvForDirection(f[DIR_PP0],  x1+DX1[DIR_MM0],  x2+DX2[DIR_MM0],  x3+DX3[DIR_MM0],   DIR_MM0);
-      distributions->setDistributionInvForDirection(f[DIR_MP0],  x1+DX1[DIR_PM0],  x2+DX2[DIR_PM0],  x3+DX3[DIR_PM0],   DIR_PM0);
-      distributions->setDistributionInvForDirection(f[DIR_0PP],  x1+DX1[DIR_0MM],  x2+DX2[DIR_0MM],  x3+DX3[DIR_0MM],   DIR_0MM);
-      distributions->setDistributionInvForDirection(f[DIR_0PM],  x1+DX1[DIR_0MP],  x2+DX2[DIR_0MP],  x3+DX3[DIR_0MP],   DIR_0MP);
-      distributions->setDistributionInvForDirection(f[DIR_PPP], x1+DX1[DIR_MMM], x2+DX2[DIR_MMM], x3+DX3[DIR_MMM], DIR_MMM);
-      distributions->setDistributionInvForDirection(f[DIR_MPP], x1+DX1[DIR_PMM], x2+DX2[DIR_PMM], x3+DX3[DIR_PMM], DIR_PMM);
-      distributions->setDistributionInvForDirection(f[DIR_PPM], x1+DX1[DIR_MMP], x2+DX2[DIR_MMP], x3+DX3[DIR_MMP], DIR_MMP);
-      distributions->setDistributionInvForDirection(f[DIR_MPM], x1+DX1[DIR_PMP], x2+DX2[DIR_PMP], x3+DX3[DIR_PMP], DIR_PMP);
-
-	  h[DIR_0P0]   = htemp[DIR_0P0]   * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2)*h[DIR_0P0]   ;
-	  h[DIR_PP0]  = htemp[DIR_PP0]  * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2)*h[DIR_PP0]  ;
-	  h[DIR_MP0]  = htemp[DIR_MP0]  * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2)*h[DIR_MP0]  ;
-	  h[DIR_0PP]  = htemp[DIR_0PP]  * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2)*h[DIR_0PP]  ;
-	  h[DIR_0PM]  = htemp[DIR_0PM]  * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2)*h[DIR_0PM]  ;
-	  h[DIR_PPP] = htemp[DIR_PPP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2)*h[DIR_PPP] ;
-	  h[DIR_MPP] = htemp[DIR_MPP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2)*h[DIR_MPP] ;
-	  h[DIR_PPM] = htemp[DIR_PPM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2)*h[DIR_PPM] ;
-	  h[DIR_MPM] = htemp[DIR_MPM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2)*h[DIR_MPM] ;
-
-	  distributionsH->setDistributionInvForDirection(h[DIR_0P0],   x1+DX1[DIR_0M0],   x2+DX2[DIR_0M0],   x3+DX3[DIR_0M0],     DIR_0M0);
-	  distributionsH->setDistributionInvForDirection(h[DIR_PP0],  x1+DX1[DIR_MM0],  x2+DX2[DIR_MM0],  x3+DX3[DIR_MM0],   DIR_MM0);
-	  distributionsH->setDistributionInvForDirection(h[DIR_MP0],  x1+DX1[DIR_PM0],  x2+DX2[DIR_PM0],  x3+DX3[DIR_PM0],   DIR_PM0);
-	  distributionsH->setDistributionInvForDirection(h[DIR_0PP],  x1+DX1[DIR_0MM],  x2+DX2[DIR_0MM],  x3+DX3[DIR_0MM],   DIR_0MM);
-	  distributionsH->setDistributionInvForDirection(h[DIR_0PM],  x1+DX1[DIR_0MP],  x2+DX2[DIR_0MP],  x3+DX3[DIR_0MP],   DIR_0MP);
-	  distributionsH->setDistributionInvForDirection(h[DIR_PPP], x1+DX1[DIR_MMM], x2+DX2[DIR_MMM], x3+DX3[DIR_MMM], DIR_MMM);
-	  distributionsH->setDistributionInvForDirection(h[DIR_MPP], x1+DX1[DIR_PMM], x2+DX2[DIR_PMM], x3+DX3[DIR_PMM], DIR_PMM);
-	  distributionsH->setDistributionInvForDirection(h[DIR_PPM], x1+DX1[DIR_MMP], x2+DX2[DIR_MMP], x3+DX3[DIR_MMP], DIR_MMP);
-	  distributionsH->setDistributionInvForDirection(h[DIR_MPM], x1+DX1[DIR_PMP], x2+DX2[DIR_PMP], x3+DX3[DIR_PMP], DIR_PMP);
-
-	  h2[DIR_0P0] = htemp[DIR_0P0] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h2[DIR_0P0];
-      h2[DIR_PP0] = htemp[DIR_PP0] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h2[DIR_PP0];
-      h2[DIR_MP0] = htemp[DIR_MP0] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h2[DIR_MP0];
-      h2[DIR_0PP] = htemp[DIR_0PP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h2[DIR_0PP];
-      h2[DIR_0PM] = htemp[DIR_0PM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h2[DIR_0PM];
-      h2[DIR_PPP] = htemp[DIR_PPP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h2[DIR_PPP];
-      h2[DIR_MPP] = htemp[DIR_MPP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h2[DIR_MPP];
-      h2[DIR_PPM] = htemp[DIR_PPM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h2[DIR_PPM];
-      h2[DIR_MPM] = htemp[DIR_MPM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h2[DIR_MPM];
-
-      distributionsH2->setDistributionInvForDirection(h2[DIR_0P0], x1 + DX1[DIR_0M0], x2 + DX2[DIR_0M0], x3 + DX3[DIR_0M0], DIR_0M0);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_0PP], x1 + DX1[DIR_0MM], x2 + DX2[DIR_0MM], x3 + DX3[DIR_0MM], DIR_0MM);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_0PM], x1 + DX1[DIR_0MP], x2 + DX2[DIR_0MP], x3 + DX3[DIR_0MP], DIR_0MP);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_PPP], x1 + DX1[DIR_MMM], x2 + DX2[DIR_MMM], x3 + DX3[DIR_MMM], DIR_MMM);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_PPM], x1 + DX1[DIR_MMP], x2 + DX2[DIR_MMP], x3 + DX3[DIR_MMP], DIR_MMP);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
-
-      break;
-   case DIR_0M0:
-      f[DIR_0M0]   = ftemp[DIR_0M0]   * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2)*f[DIR_0M0]   ;
-      f[DIR_PM0]  = ftemp[DIR_PM0]  * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2)*f[DIR_PM0]  ;
-      f[DIR_MM0]  = ftemp[DIR_MM0]  * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2)*f[DIR_MM0]  ;
-      f[DIR_0MP]  = ftemp[DIR_0MP]  * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2)*f[DIR_0MP]  ;
-      f[DIR_0MM]  = ftemp[DIR_0MM]  * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2)*f[DIR_0MM]  ;
-      f[DIR_PMP] = ftemp[DIR_PMP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2)*f[DIR_PMP] ;
-      f[DIR_MMP] = ftemp[DIR_MMP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2)*f[DIR_MMP] ;
-      f[DIR_PMM] = ftemp[DIR_PMM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2)*f[DIR_PMM] ;
-      f[DIR_MMM] = ftemp[DIR_MMM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2)*f[DIR_MMM] ;
-
-      distributions->setDistributionInvForDirection(f[DIR_0M0],   x1+DX1[DIR_0P0],   x2+DX2[DIR_0P0],   x3+DX3[DIR_0P0],     DIR_0P0);
-      distributions->setDistributionInvForDirection(f[DIR_PM0],  x1+DX1[DIR_MP0],  x2+DX2[DIR_MP0],  x3+DX3[DIR_MP0],   DIR_MP0);
-      distributions->setDistributionInvForDirection(f[DIR_MM0],  x1+DX1[DIR_PP0],  x2+DX2[DIR_PP0],  x3+DX3[DIR_PP0],   DIR_PP0);
-      distributions->setDistributionInvForDirection(f[DIR_0MP],  x1+DX1[DIR_0PM],  x2+DX2[DIR_0PM],  x3+DX3[DIR_0PM],   DIR_0PM);
-      distributions->setDistributionInvForDirection(f[DIR_0MM],  x1+DX1[DIR_0PP],  x2+DX2[DIR_0PP],  x3+DX3[DIR_0PP],   DIR_0PP);
-      distributions->setDistributionInvForDirection(f[DIR_PMP], x1+DX1[DIR_MPM], x2+DX2[DIR_MPM], x3+DX3[DIR_MPM], DIR_MPM);
-      distributions->setDistributionInvForDirection(f[DIR_MMP], x1+DX1[DIR_PPM], x2+DX2[DIR_PPM], x3+DX3[DIR_PPM], DIR_PPM);
-      distributions->setDistributionInvForDirection(f[DIR_PMM], x1+DX1[DIR_MPP], x2+DX2[DIR_MPP], x3+DX3[DIR_MPP], DIR_MPP);
-      distributions->setDistributionInvForDirection(f[DIR_MMM], x1+DX1[DIR_PPP], x2+DX2[DIR_PPP], x3+DX3[DIR_PPP], DIR_PPP);
-
-	  h[DIR_0M0]   = htemp[DIR_0M0]   * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2)*h[DIR_0M0]   ;
-	  h[DIR_PM0]  = htemp[DIR_PM0]  * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2)*h[DIR_PM0]  ;
-	  h[DIR_MM0]  = htemp[DIR_MM0]  * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2)*h[DIR_MM0]  ;
-	  h[DIR_0MP]  = htemp[DIR_0MP]  * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2)*h[DIR_0MP]  ;
-	  h[DIR_0MM]  = htemp[DIR_0MM]  * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2)*h[DIR_0MM]  ;
-	  h[DIR_PMP] = htemp[DIR_PMP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2)*h[DIR_PMP] ;
-	  h[DIR_MMP] = htemp[DIR_MMP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2)*h[DIR_MMP] ;
-	  h[DIR_PMM] = htemp[DIR_PMM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2)*h[DIR_PMM] ;
-	  h[DIR_MMM] = htemp[DIR_MMM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2)*h[DIR_MMM] ;
-
-	  distributionsH->setDistributionInvForDirection(h[DIR_0M0],   x1+DX1[DIR_0P0],   x2+DX2[DIR_0P0],   x3+DX3[DIR_0P0],     DIR_0P0);
-	  distributionsH->setDistributionInvForDirection(h[DIR_PM0],  x1+DX1[DIR_MP0],  x2+DX2[DIR_MP0],  x3+DX3[DIR_MP0],   DIR_MP0);
-	  distributionsH->setDistributionInvForDirection(h[DIR_MM0],  x1+DX1[DIR_PP0],  x2+DX2[DIR_PP0],  x3+DX3[DIR_PP0],   DIR_PP0);
-	  distributionsH->setDistributionInvForDirection(h[DIR_0MP],  x1+DX1[DIR_0PM],  x2+DX2[DIR_0PM],  x3+DX3[DIR_0PM],   DIR_0PM);
-	  distributionsH->setDistributionInvForDirection(h[DIR_0MM],  x1+DX1[DIR_0PP],  x2+DX2[DIR_0PP],  x3+DX3[DIR_0PP],   DIR_0PP);
-	  distributionsH->setDistributionInvForDirection(h[DIR_PMP], x1+DX1[DIR_MPM], x2+DX2[DIR_MPM], x3+DX3[DIR_MPM], DIR_MPM);
-	  distributionsH->setDistributionInvForDirection(h[DIR_MMP], x1+DX1[DIR_PPM], x2+DX2[DIR_PPM], x3+DX3[DIR_PPM], DIR_PPM);
-	  distributionsH->setDistributionInvForDirection(h[DIR_PMM], x1+DX1[DIR_MPP], x2+DX2[DIR_MPP], x3+DX3[DIR_MPP], DIR_MPP);
-	  distributionsH->setDistributionInvForDirection(h[DIR_MMM], x1+DX1[DIR_PPP], x2+DX2[DIR_PPP], x3+DX3[DIR_PPP], DIR_PPP);
-
-	  h2[DIR_0M0] = htemp[DIR_0M0] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h2[DIR_0M0];
-      h2[DIR_PM0] = htemp[DIR_PM0] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h2[DIR_PM0];
-      h2[DIR_MM0] = htemp[DIR_MM0] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h2[DIR_MM0];
-      h2[DIR_0MP] = htemp[DIR_0MP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h2[DIR_0MP];
-      h2[DIR_0MM] = htemp[DIR_0MM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h2[DIR_0MM];
-      h2[DIR_PMP] = htemp[DIR_PMP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h2[DIR_PMP];
-      h2[DIR_MMP] = htemp[DIR_MMP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h2[DIR_MMP];
-      h2[DIR_PMM] = htemp[DIR_PMM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h2[DIR_PMM];
-      h2[DIR_MMM] = htemp[DIR_MMM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h2[DIR_MMM];
-
-      distributionsH2->setDistributionInvForDirection(h2[DIR_0M0], x1 + DX1[DIR_0P0], x2 + DX2[DIR_0P0], x3 + DX3[DIR_0P0], DIR_0P0);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_PM0], x1 + DX1[DIR_MP0], x2 + DX2[DIR_MP0], x3 + DX3[DIR_MP0], DIR_MP0);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_MM0], x1 + DX1[DIR_PP0], x2 + DX2[DIR_PP0], x3 + DX3[DIR_PP0], DIR_PP0);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_0MP], x1 + DX1[DIR_0PM], x2 + DX2[DIR_0PM], x3 + DX3[DIR_0PM], DIR_0PM);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_0MM], x1 + DX1[DIR_0PP], x2 + DX2[DIR_0PP], x3 + DX3[DIR_0PP], DIR_0PP);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_PMP], x1 + DX1[DIR_MPM], x2 + DX2[DIR_MPM], x3 + DX3[DIR_MPM], DIR_MPM);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
-
-      break;
-   case DIR_00P:
-      f[DIR_00P]   = ftemp[DIR_00P]   * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3)*f[DIR_00P]   ;
-      f[DIR_P0P]  = ftemp[DIR_P0P]  * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3)*f[DIR_P0P]  ;
-      f[DIR_M0P]  = ftemp[DIR_M0P]  * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3)*f[DIR_M0P]  ;
-      f[DIR_0PP]  = ftemp[DIR_0PP]  * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3)*f[DIR_0PP]  ;
-      f[DIR_0MP]  = ftemp[DIR_0MP]  * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3)*f[DIR_0MP]  ;
-      f[DIR_PPP] = ftemp[DIR_PPP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3)*f[DIR_PPP] ;
-      f[DIR_MPP] = ftemp[DIR_MPP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3)*f[DIR_MPP] ;
-      f[DIR_PMP] = ftemp[DIR_PMP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3)*f[DIR_PMP] ;
-      f[DIR_MMP] = ftemp[DIR_MMP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3)*f[DIR_MMP] ;
-
-      distributions->setDistributionInvForDirection(f[DIR_00P],   x1+DX1[DIR_00M],   x2+DX2[DIR_00M],   x3+DX3[DIR_00M],     DIR_00M);
-      distributions->setDistributionInvForDirection(f[DIR_P0P],  x1+DX1[DIR_M0M],  x2+DX2[DIR_M0M],  x3+DX3[DIR_M0M],   DIR_M0M);
-      distributions->setDistributionInvForDirection(f[DIR_M0P],  x1+DX1[DIR_P0M],  x2+DX2[DIR_P0M],  x3+DX3[DIR_P0M],   DIR_P0M);
-      distributions->setDistributionInvForDirection(f[DIR_0PP],  x1+DX1[DIR_0MM],  x2+DX2[DIR_0MM],  x3+DX3[DIR_0MM],   DIR_0MM);
-      distributions->setDistributionInvForDirection(f[DIR_0MP],  x1+DX1[DIR_0PM],  x2+DX2[DIR_0PM],  x3+DX3[DIR_0PM],   DIR_0PM);
-      distributions->setDistributionInvForDirection(f[DIR_PPP], x1+DX1[DIR_MMM], x2+DX2[DIR_MMM], x3+DX3[DIR_MMM], DIR_MMM);
-      distributions->setDistributionInvForDirection(f[DIR_MPP], x1+DX1[DIR_PMM], x2+DX2[DIR_PMM], x3+DX3[DIR_PMM], DIR_PMM);
-      distributions->setDistributionInvForDirection(f[DIR_PMP], x1+DX1[DIR_MPM], x2+DX2[DIR_MPM], x3+DX3[DIR_MPM], DIR_MPM);
-      distributions->setDistributionInvForDirection(f[DIR_MMP], x1+DX1[DIR_PPM], x2+DX2[DIR_PPM], x3+DX3[DIR_PPM], DIR_PPM);
-
-	  h[DIR_00P]   = htemp[DIR_00P]   * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3)*h[DIR_00P]   ;
-	  h[DIR_P0P]  = htemp[DIR_P0P]  * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3)*h[DIR_P0P]  ;
-	  h[DIR_M0P]  = htemp[DIR_M0P]  * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3)*h[DIR_M0P]  ;
-	  h[DIR_0PP]  = htemp[DIR_0PP]  * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3)*h[DIR_0PP]  ;
-	  h[DIR_0MP]  = htemp[DIR_0MP]  * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3)*h[DIR_0MP]  ;
-	  h[DIR_PPP] = htemp[DIR_PPP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3)*h[DIR_PPP] ;
-	  h[DIR_MPP] = htemp[DIR_MPP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3)*h[DIR_MPP] ;
-	  h[DIR_PMP] = htemp[DIR_PMP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3)*h[DIR_PMP] ;
-	  h[DIR_MMP] = htemp[DIR_MMP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3)*h[DIR_MMP] ;
-
-	  distributionsH->setDistributionInvForDirection(h[DIR_00P],   x1+DX1[DIR_00M],   x2+DX2[DIR_00M],   x3+DX3[DIR_00M],     DIR_00M);
-	  distributionsH->setDistributionInvForDirection(h[DIR_P0P],  x1+DX1[DIR_M0M],  x2+DX2[DIR_M0M],  x3+DX3[DIR_M0M],   DIR_M0M);
-	  distributionsH->setDistributionInvForDirection(h[DIR_M0P],  x1+DX1[DIR_P0M],  x2+DX2[DIR_P0M],  x3+DX3[DIR_P0M],   DIR_P0M);
-	  distributionsH->setDistributionInvForDirection(h[DIR_0PP],  x1+DX1[DIR_0MM],  x2+DX2[DIR_0MM],  x3+DX3[DIR_0MM],   DIR_0MM);
-	  distributionsH->setDistributionInvForDirection(h[DIR_0MP],  x1+DX1[DIR_0PM],  x2+DX2[DIR_0PM],  x3+DX3[DIR_0PM],   DIR_0PM);
-	  distributionsH->setDistributionInvForDirection(h[DIR_PPP], x1+DX1[DIR_MMM], x2+DX2[DIR_MMM], x3+DX3[DIR_MMM], DIR_MMM);
-	  distributionsH->setDistributionInvForDirection(h[DIR_MPP], x1+DX1[DIR_PMM], x2+DX2[DIR_PMM], x3+DX3[DIR_PMM], DIR_PMM);
-	  distributionsH->setDistributionInvForDirection(h[DIR_PMP], x1+DX1[DIR_MPM], x2+DX2[DIR_MPM], x3+DX3[DIR_MPM], DIR_MPM);
-	  distributionsH->setDistributionInvForDirection(h[DIR_MMP], x1+DX1[DIR_PPM], x2+DX2[DIR_PPM], x3+DX3[DIR_PPM], DIR_PPM);
-
-	  h2[DIR_00P] = htemp[DIR_00P] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h2[DIR_00P];
-      h2[DIR_P0P] = htemp[DIR_P0P] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h2[DIR_P0P];
-      h2[DIR_M0P] = htemp[DIR_M0P] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h2[DIR_M0P];
-      h2[DIR_0PP] = htemp[DIR_0PP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h2[DIR_0PP];
-      h2[DIR_0MP] = htemp[DIR_0MP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h2[DIR_0MP];
-      h2[DIR_PPP] = htemp[DIR_PPP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h2[DIR_PPP];
-      h2[DIR_MPP] = htemp[DIR_MPP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h2[DIR_MPP];
-      h2[DIR_PMP] = htemp[DIR_PMP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h2[DIR_PMP];
-      h2[DIR_MMP] = htemp[DIR_MMP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h2[DIR_MMP];
-
-      distributionsH2->setDistributionInvForDirection(h2[DIR_00P], x1 + DX1[DIR_00M], x2 + DX2[DIR_00M], x3 + DX3[DIR_00M], DIR_00M);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_P0P], x1 + DX1[DIR_M0M], x2 + DX2[DIR_M0M], x3 + DX3[DIR_M0M], DIR_M0M);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_M0P], x1 + DX1[DIR_P0M], x2 + DX2[DIR_P0M], x3 + DX3[DIR_P0M], DIR_P0M);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_0PP], x1 + DX1[DIR_0MM], x2 + DX2[DIR_0MM], x3 + DX3[DIR_0MM], DIR_0MM);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_0MP], x1 + DX1[DIR_0PM], x2 + DX2[DIR_0PM], x3 + DX3[DIR_0PM], DIR_0PM);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_PPP], x1 + DX1[DIR_MMM], x2 + DX2[DIR_MMM], x3 + DX3[DIR_MMM], DIR_MMM);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_PMP], x1 + DX1[DIR_MPM], x2 + DX2[DIR_MPM], x3 + DX3[DIR_MPM], DIR_MPM);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
-
-      break;
-   case DIR_00M:
-      f[DIR_00M]   = ftemp[DIR_00M]   * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3)*f[DIR_00M]   ;
-      f[DIR_P0M]  = ftemp[DIR_P0M]  * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3)*f[DIR_P0M]  ;
-      f[DIR_M0M]  = ftemp[DIR_M0M]  * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3)*f[DIR_M0M]  ;
-      f[DIR_0PM]  = ftemp[DIR_0PM]  * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3)*f[DIR_0PM]  ;
-      f[DIR_0MM]  = ftemp[DIR_0MM]  * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3)*f[DIR_0MM]  ;
-      f[DIR_PPM] = ftemp[DIR_PPM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3)*f[DIR_PPM] ;
-      f[DIR_MPM] = ftemp[DIR_MPM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3)*f[DIR_MPM] ;
-      f[DIR_PMM] = ftemp[DIR_PMM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3)*f[DIR_PMM] ;
-      f[DIR_MMM] = ftemp[DIR_MMM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3)*f[DIR_MMM] ;
-
-      distributions->setDistributionInvForDirection(f[DIR_00M],   x1+DX1[DIR_00P],   x2+DX2[DIR_00P],   x3+DX3[DIR_00P],     DIR_00P);
-      distributions->setDistributionInvForDirection(f[DIR_P0M],  x1+DX1[DIR_M0P],  x2+DX2[DIR_M0P],  x3+DX3[DIR_M0P],   DIR_M0P);
-      distributions->setDistributionInvForDirection(f[DIR_M0M],  x1+DX1[DIR_P0P],  x2+DX2[DIR_P0P],  x3+DX3[DIR_P0P],   DIR_P0P);
-      distributions->setDistributionInvForDirection(f[DIR_0PM],  x1+DX1[DIR_0MP],  x2+DX2[DIR_0MP],  x3+DX3[DIR_0MP],   DIR_0MP);
-      distributions->setDistributionInvForDirection(f[DIR_0MM],  x1+DX1[DIR_0PP],  x2+DX2[DIR_0PP],  x3+DX3[DIR_0PP],   DIR_0PP);
-      distributions->setDistributionInvForDirection(f[DIR_PPM], x1+DX1[DIR_MMP], x2+DX2[DIR_MMP], x3+DX3[DIR_MMP], DIR_MMP);
-      distributions->setDistributionInvForDirection(f[DIR_MPM], x1+DX1[DIR_PMP], x2+DX2[DIR_PMP], x3+DX3[DIR_PMP], DIR_PMP);
-      distributions->setDistributionInvForDirection(f[DIR_PMM], x1+DX1[DIR_MPP], x2+DX2[DIR_MPP], x3+DX3[DIR_MPP], DIR_MPP);
-      distributions->setDistributionInvForDirection(f[DIR_MMM], x1+DX1[DIR_PPP], x2+DX2[DIR_PPP], x3+DX3[DIR_PPP], DIR_PPP);
-
-	  h[DIR_00M]   = htemp[DIR_00M]   * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3)*h[DIR_00M]   ;
-	  h[DIR_P0M]  = htemp[DIR_P0M]  * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3)*h[DIR_P0M]  ;
-	  h[DIR_M0M]  = htemp[DIR_M0M]  * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3)*h[DIR_M0M]  ;
-	  h[DIR_0PM]  = htemp[DIR_0PM]  * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3)*h[DIR_0PM]  ;
-	  h[DIR_0MM]  = htemp[DIR_0MM]  * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3)*h[DIR_0MM]  ;
-	  h[DIR_PPM] = htemp[DIR_PPM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3)*h[DIR_PPM] ;
-	  h[DIR_MPM] = htemp[DIR_MPM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3)*h[DIR_MPM] ;
-	  h[DIR_PMM] = htemp[DIR_PMM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3)*h[DIR_PMM] ;
-	  h[DIR_MMM] = htemp[DIR_MMM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3)*h[DIR_MMM] ;
-
-	  distributionsH->setDistributionInvForDirection(h[DIR_00M],   x1+DX1[DIR_00P],   x2+DX2[DIR_00P],   x3+DX3[DIR_00P],     DIR_00P);
-	  distributionsH->setDistributionInvForDirection(h[DIR_P0M],  x1+DX1[DIR_M0P],  x2+DX2[DIR_M0P],  x3+DX3[DIR_M0P],   DIR_M0P);
-	  distributionsH->setDistributionInvForDirection(h[DIR_M0M],  x1+DX1[DIR_P0P],  x2+DX2[DIR_P0P],  x3+DX3[DIR_P0P],   DIR_P0P);
-	  distributionsH->setDistributionInvForDirection(h[DIR_0PM],  x1+DX1[DIR_0MP],  x2+DX2[DIR_0MP],  x3+DX3[DIR_0MP],   DIR_0MP);
-	  distributionsH->setDistributionInvForDirection(h[DIR_0MM],  x1+DX1[DIR_0PP],  x2+DX2[DIR_0PP],  x3+DX3[DIR_0PP],   DIR_0PP);
-	  distributionsH->setDistributionInvForDirection(h[DIR_PPM], x1+DX1[DIR_MMP], x2+DX2[DIR_MMP], x3+DX3[DIR_MMP], DIR_MMP);
-	  distributionsH->setDistributionInvForDirection(h[DIR_MPM], x1+DX1[DIR_PMP], x2+DX2[DIR_PMP], x3+DX3[DIR_PMP], DIR_PMP);
-	  distributionsH->setDistributionInvForDirection(h[DIR_PMM], x1+DX1[DIR_MPP], x2+DX2[DIR_MPP], x3+DX3[DIR_MPP], DIR_MPP);
-	  distributionsH->setDistributionInvForDirection(h[DIR_MMM], x1+DX1[DIR_PPP], x2+DX2[DIR_PPP], x3+DX3[DIR_PPP], DIR_PPP);
-
-	  h2[DIR_00M] = htemp[DIR_00M] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h2[DIR_00M];
-      h2[DIR_P0M] = htemp[DIR_P0M] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h2[DIR_P0M];
-      h2[DIR_M0M] = htemp[DIR_M0M] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h2[DIR_M0M];
-      h2[DIR_0PM] = htemp[DIR_0PM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h2[DIR_0PM];
-      h2[DIR_0MM] = htemp[DIR_0MM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h2[DIR_0MM];
-      h2[DIR_PPM] = htemp[DIR_PPM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h2[DIR_PPM];
-      h2[DIR_MPM] = htemp[DIR_MPM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h2[DIR_MPM];
-      h2[DIR_PMM] = htemp[DIR_PMM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h2[DIR_PMM];
-      h2[DIR_MMM] = htemp[DIR_MMM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h2[DIR_MMM];
-
-      distributionsH2->setDistributionInvForDirection(h2[DIR_00M], x1 + DX1[DIR_00P], x2 + DX2[DIR_00P], x3 + DX3[DIR_00P], DIR_00P);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_P0M], x1 + DX1[DIR_M0P], x2 + DX2[DIR_M0P], x3 + DX3[DIR_M0P], DIR_M0P);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_M0M], x1 + DX1[DIR_P0P], x2 + DX2[DIR_P0P], x3 + DX3[DIR_P0P], DIR_P0P);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_0PM], x1 + DX1[DIR_0MP], x2 + DX2[DIR_0MP], x3 + DX3[DIR_0MP], DIR_0MP);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_0MM], x1 + DX1[DIR_0PP], x2 + DX2[DIR_0PP], x3 + DX3[DIR_0PP], DIR_0PP);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_PPM], x1 + DX1[DIR_MMP], x2 + DX2[DIR_MMP], x3 + DX3[DIR_MMP], DIR_MMP);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
-      distributionsH2->setDistributionInvForDirection(h2[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
-
-      break;
-   default:
-      UB_THROW(UbException(UB_EXARGS, "It isn't implemented non reflecting density boundary for this direction!"));
-   }
+    using namespace D3Q27System;
+//    using namespace UbMath;
+    using namespace vf::lbm::dir;
+    using namespace vf::basics::constant;
+
+    real f[ENDF + 1];
+    real ftemp[ENDF + 1];
+    real h[D3Q27System::ENDF + 1];
+    real htemp[ENDF + 1];
+    real h2[D3Q27System::ENDF + 1];
+    real h2temp[ENDF + 1];
+
+    int nx1 = x1;
+    int nx2 = x2;
+    int nx3 = x3;
+    int direction = -1;
+
+    // flag points in direction of fluid
+    if (bcPtr->hasDensityBoundaryFlag(DIR_P00)) {
+        nx1 += 1;
+        direction = DIR_P00;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_M00)) {
+        nx1 -= 1;
+        direction = DIR_M00;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_0P0)) {
+        nx2 += 1;
+        direction = DIR_0P0;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_0M0)) {
+        nx2 -= 1;
+        direction = DIR_0M0;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_00P)) {
+        nx3 += 1;
+        direction = DIR_00P;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_00M)) {
+        nx3 -= 1;
+        direction = DIR_00M;
+    } else
+        UB_THROW(UbException(UB_EXARGS, "Danger...no orthogonal BC-Flag on density boundary..."));
+
+    distributions->getDistribution(f, x1, x2, x3);
+    distributions->getDistribution(ftemp, nx1, nx2, nx3);
+    distributionsH->getDistribution(h, x1, x2, x3);
+    distributionsH->getDistribution(htemp, nx1, nx2, nx3);
+    distributionsH2->getDistribution(h2, x1, x2, x3);
+    distributionsH2->getDistribution(h2temp, nx1, nx2, nx3);
+
+    real /* phi,*/ p1, vx1, vx2, vx3;
+
+    // D3Q27System::calcDensity(h, phi);
+
+    calcMacrosFct(f, p1, vx1, vx2, vx3);
+
+    switch (direction) {
+        case DIR_P00:
+            f[DIR_P00] = ftemp[DIR_P00] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_P00];
+            f[DIR_PP0] = ftemp[DIR_PP0] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PP0];
+            f[DIR_PM0] = ftemp[DIR_PM0] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PM0];
+            f[DIR_P0P] = ftemp[DIR_P0P] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_P0P];
+            f[DIR_P0M] = ftemp[DIR_P0M] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_P0M];
+            f[DIR_PPP] = ftemp[DIR_PPP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PPP];
+            f[DIR_PMP] = ftemp[DIR_PMP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PMP];
+            f[DIR_PPM] = ftemp[DIR_PPM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PPM];
+            f[DIR_PMM] = ftemp[DIR_PMM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PMM];
+
+            distributions->setDistributionInvForDirection(f[DIR_P00], x1 + DX1[DIR_M00], x2 + DX2[DIR_M00], x3 + DX3[DIR_M00], DIR_M00);
+            distributions->setDistributionInvForDirection(f[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
+            distributions->setDistributionInvForDirection(f[DIR_PM0], x1 + DX1[DIR_MP0], x2 + DX2[DIR_MP0], x3 + DX3[DIR_MP0], DIR_MP0);
+            distributions->setDistributionInvForDirection(f[DIR_P0P], x1 + DX1[DIR_M0M], x2 + DX2[DIR_M0M], x3 + DX3[DIR_M0M], DIR_M0M);
+            distributions->setDistributionInvForDirection(f[DIR_P0M], x1 + DX1[DIR_M0P], x2 + DX2[DIR_M0P], x3 + DX3[DIR_M0P], DIR_M0P);
+            distributions->setDistributionInvForDirection(f[DIR_PPP], x1 + DX1[DIR_MMM], x2 + DX2[DIR_MMM], x3 + DX3[DIR_MMM], DIR_MMM);
+            distributions->setDistributionInvForDirection(f[DIR_PMP], x1 + DX1[DIR_MPM], x2 + DX2[DIR_MPM], x3 + DX3[DIR_MPM], DIR_MPM);
+            distributions->setDistributionInvForDirection(f[DIR_PPM], x1 + DX1[DIR_MMP], x2 + DX2[DIR_MMP], x3 + DX3[DIR_MMP], DIR_MMP);
+            distributions->setDistributionInvForDirection(f[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
+
+            h[DIR_P00] = htemp[DIR_P00] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h[DIR_P00];
+            h[DIR_PP0] = htemp[DIR_PP0] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h[DIR_PP0];
+            h[DIR_PM0] = htemp[DIR_PM0] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h[DIR_PM0];
+            h[DIR_P0P] = htemp[DIR_P0P] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h[DIR_P0P];
+            h[DIR_P0M] = htemp[DIR_P0M] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h[DIR_P0M];
+            h[DIR_PPP] = htemp[DIR_PPP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h[DIR_PPP];
+            h[DIR_PMP] = htemp[DIR_PMP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h[DIR_PMP];
+            h[DIR_PPM] = htemp[DIR_PPM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h[DIR_PPM];
+            h[DIR_PMM] = htemp[DIR_PMM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h[DIR_PMM];
+
+            distributionsH->setDistributionInvForDirection(h[DIR_P00], x1 + DX1[DIR_M00], x2 + DX2[DIR_M00], x3 + DX3[DIR_M00], DIR_M00);
+            distributionsH->setDistributionInvForDirection(h[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
+            distributionsH->setDistributionInvForDirection(h[DIR_PM0], x1 + DX1[DIR_MP0], x2 + DX2[DIR_MP0], x3 + DX3[DIR_MP0], DIR_MP0);
+            distributionsH->setDistributionInvForDirection(h[DIR_P0P], x1 + DX1[DIR_M0M], x2 + DX2[DIR_M0M], x3 + DX3[DIR_M0M], DIR_M0M);
+            distributionsH->setDistributionInvForDirection(h[DIR_P0M], x1 + DX1[DIR_M0P], x2 + DX2[DIR_M0P], x3 + DX3[DIR_M0P], DIR_M0P);
+            distributionsH->setDistributionInvForDirection(h[DIR_PPP], x1 + DX1[DIR_MMM], x2 + DX2[DIR_MMM], x3 + DX3[DIR_MMM], DIR_MMM);
+            distributionsH->setDistributionInvForDirection(h[DIR_PMP], x1 + DX1[DIR_MPM], x2 + DX2[DIR_MPM], x3 + DX3[DIR_MPM], DIR_MPM);
+            distributionsH->setDistributionInvForDirection(h[DIR_PPM], x1 + DX1[DIR_MMP], x2 + DX2[DIR_MMP], x3 + DX3[DIR_MMP], DIR_MMP);
+            distributionsH->setDistributionInvForDirection(h[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
+
+            h2[DIR_P00] = c1o2 * (h2temp[DIR_P00] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h2[DIR_P00]);
+            h2[DIR_PP0] = c1o2 * (h2temp[DIR_PP0] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h2[DIR_PP0]);
+            h2[DIR_PM0] = c1o2 * (h2temp[DIR_PM0] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h2[DIR_PM0]);
+            h2[DIR_P0P] = c1o2 * (h2temp[DIR_P0P] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h2[DIR_P0P]);
+            h2[DIR_P0M] = c1o2 * (h2temp[DIR_P0M] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h2[DIR_P0M]);
+            h2[DIR_PPP] = c1o2 * (h2temp[DIR_PPP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h2[DIR_PPP]);
+            h2[DIR_PMP] = c1o2 * (h2temp[DIR_PMP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h2[DIR_PMP]);
+            h2[DIR_PPM] = c1o2 * (h2temp[DIR_PPM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h2[DIR_PPM]);
+            h2[DIR_PMM] = c1o2 * (h2temp[DIR_PMM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * h2[DIR_PMM]);
+
+            distributionsH2->setDistributionInvForDirection(h2[DIR_P00], x1 + DX1[DIR_M00], x2 + DX2[DIR_M00], x3 + DX3[DIR_M00], DIR_M00);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_PM0], x1 + DX1[DIR_MP0], x2 + DX2[DIR_MP0], x3 + DX3[DIR_MP0], DIR_MP0);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_P0P], x1 + DX1[DIR_M0M], x2 + DX2[DIR_M0M], x3 + DX3[DIR_M0M], DIR_M0M);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_P0M], x1 + DX1[DIR_M0P], x2 + DX2[DIR_M0P], x3 + DX3[DIR_M0P], DIR_M0P);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_PPP], x1 + DX1[DIR_MMM], x2 + DX2[DIR_MMM], x3 + DX3[DIR_MMM], DIR_MMM);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_PMP], x1 + DX1[DIR_MPM], x2 + DX2[DIR_MPM], x3 + DX3[DIR_MPM], DIR_MPM);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_PPM], x1 + DX1[DIR_MMP], x2 + DX2[DIR_MMP], x3 + DX3[DIR_MMP], DIR_MMP);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
+
+            break;
+        case DIR_M00:
+            f[DIR_M00] = ftemp[DIR_M00] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_M00];
+            f[DIR_MP0] = ftemp[DIR_MP0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MP0];
+            f[DIR_MM0] = ftemp[DIR_MM0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MM0];
+            f[DIR_M0P] = ftemp[DIR_M0P] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_M0P];
+            f[DIR_M0M] = ftemp[DIR_M0M] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_M0M];
+            f[DIR_MPP] = ftemp[DIR_MPP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MPP];
+            f[DIR_MMP] = ftemp[DIR_MMP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MMP];
+            f[DIR_MPM] = ftemp[DIR_MPM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MPM];
+            f[DIR_MMM] = ftemp[DIR_MMM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MMM];
+
+            distributions->setDistributionInvForDirection(f[DIR_M00], x1 + DX1[DIR_P00], x2 + DX2[DIR_P00], x3 + DX3[DIR_P00], DIR_P00);
+            distributions->setDistributionInvForDirection(f[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
+            distributions->setDistributionInvForDirection(f[DIR_MM0], x1 + DX1[DIR_PP0], x2 + DX2[DIR_PP0], x3 + DX3[DIR_PP0], DIR_PP0);
+            distributions->setDistributionInvForDirection(f[DIR_M0P], x1 + DX1[DIR_P0M], x2 + DX2[DIR_P0M], x3 + DX3[DIR_P0M], DIR_P0M);
+            distributions->setDistributionInvForDirection(f[DIR_M0M], x1 + DX1[DIR_P0P], x2 + DX2[DIR_P0P], x3 + DX3[DIR_P0P], DIR_P0P);
+            distributions->setDistributionInvForDirection(f[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
+            distributions->setDistributionInvForDirection(f[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
+            distributions->setDistributionInvForDirection(f[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
+            distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
+
+            h[DIR_M00] = htemp[DIR_M00] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_M00];
+            h[DIR_MP0] = htemp[DIR_MP0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_MP0];
+            h[DIR_MM0] = htemp[DIR_MM0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_MM0];
+            h[DIR_M0P] = htemp[DIR_M0P] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_M0P];
+            h[DIR_M0M] = htemp[DIR_M0M] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_M0M];
+            h[DIR_MPP] = htemp[DIR_MPP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_MPP];
+            h[DIR_MMP] = htemp[DIR_MMP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_MMP];
+            h[DIR_MPM] = htemp[DIR_MPM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_MPM];
+            h[DIR_MMM] = htemp[DIR_MMM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h[DIR_MMM];
+
+            distributionsH->setDistributionInvForDirection(h[DIR_M00], x1 + DX1[DIR_P00], x2 + DX2[DIR_P00], x3 + DX3[DIR_P00], DIR_P00);
+            distributionsH->setDistributionInvForDirection(h[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
+            distributionsH->setDistributionInvForDirection(h[DIR_MM0], x1 + DX1[DIR_PP0], x2 + DX2[DIR_PP0], x3 + DX3[DIR_PP0], DIR_PP0);
+            distributionsH->setDistributionInvForDirection(h[DIR_M0P], x1 + DX1[DIR_P0M], x2 + DX2[DIR_P0M], x3 + DX3[DIR_P0M], DIR_P0M);
+            distributionsH->setDistributionInvForDirection(h[DIR_M0M], x1 + DX1[DIR_P0P], x2 + DX2[DIR_P0P], x3 + DX3[DIR_P0P], DIR_P0P);
+            distributionsH->setDistributionInvForDirection(h[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
+            distributionsH->setDistributionInvForDirection(h[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
+            distributionsH->setDistributionInvForDirection(h[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
+            distributionsH->setDistributionInvForDirection(h[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
+
+            h2[DIR_M00] = c1o2 * (htemp[DIR_M00] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h2[DIR_M00]);
+            h2[DIR_MP0] = c1o2 * (htemp[DIR_MP0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h2[DIR_MP0]);
+            h2[DIR_MM0] = c1o2 * (htemp[DIR_MM0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h2[DIR_MM0]);
+            h2[DIR_M0P] = c1o2 * (htemp[DIR_M0P] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h2[DIR_M0P]);
+            h2[DIR_M0M] = c1o2 * (htemp[DIR_M0M] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h2[DIR_M0M]);
+            h2[DIR_MPP] = c1o2 * (htemp[DIR_MPP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h2[DIR_MPP]);
+            h2[DIR_MMP] = c1o2 * (htemp[DIR_MMP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h2[DIR_MMP]);
+            h2[DIR_MPM] = c1o2 * (htemp[DIR_MPM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h2[DIR_MPM]);
+            h2[DIR_MMM] = c1o2 * (htemp[DIR_MMM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * h2[DIR_MMM]);
+
+            distributionsH2->setDistributionInvForDirection(h2[DIR_M00], x1 + DX1[DIR_P00], x2 + DX2[DIR_P00], x3 + DX3[DIR_P00], DIR_P00);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_MM0], x1 + DX1[DIR_PP0], x2 + DX2[DIR_PP0], x3 + DX3[DIR_PP0], DIR_PP0);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_M0P], x1 + DX1[DIR_P0M], x2 + DX2[DIR_P0M], x3 + DX3[DIR_P0M], DIR_P0M);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_M0M], x1 + DX1[DIR_P0P], x2 + DX2[DIR_P0P], x3 + DX3[DIR_P0P], DIR_P0P);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
+            break;
+        case DIR_0P0:
+            f[DIR_0P0] = ftemp[DIR_0P0] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_0P0];
+            f[DIR_PP0] = ftemp[DIR_PP0] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_PP0];
+            f[DIR_MP0] = ftemp[DIR_MP0] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_MP0];
+            f[DIR_0PP] = ftemp[DIR_0PP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_0PP];
+            f[DIR_0PM] = ftemp[DIR_0PM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_0PM];
+            f[DIR_PPP] = ftemp[DIR_PPP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_PPP];
+            f[DIR_MPP] = ftemp[DIR_MPP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_MPP];
+            f[DIR_PPM] = ftemp[DIR_PPM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_PPM];
+            f[DIR_MPM] = ftemp[DIR_MPM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_MPM];
+
+            distributions->setDistributionInvForDirection(f[DIR_0P0], x1 + DX1[DIR_0M0], x2 + DX2[DIR_0M0], x3 + DX3[DIR_0M0], DIR_0M0);
+            distributions->setDistributionInvForDirection(f[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
+            distributions->setDistributionInvForDirection(f[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
+            distributions->setDistributionInvForDirection(f[DIR_0PP], x1 + DX1[DIR_0MM], x2 + DX2[DIR_0MM], x3 + DX3[DIR_0MM], DIR_0MM);
+            distributions->setDistributionInvForDirection(f[DIR_0PM], x1 + DX1[DIR_0MP], x2 + DX2[DIR_0MP], x3 + DX3[DIR_0MP], DIR_0MP);
+            distributions->setDistributionInvForDirection(f[DIR_PPP], x1 + DX1[DIR_MMM], x2 + DX2[DIR_MMM], x3 + DX3[DIR_MMM], DIR_MMM);
+            distributions->setDistributionInvForDirection(f[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
+            distributions->setDistributionInvForDirection(f[DIR_PPM], x1 + DX1[DIR_MMP], x2 + DX2[DIR_MMP], x3 + DX3[DIR_MMP], DIR_MMP);
+            distributions->setDistributionInvForDirection(f[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
+
+            h[DIR_0P0] = htemp[DIR_0P0] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h[DIR_0P0];
+            h[DIR_PP0] = htemp[DIR_PP0] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h[DIR_PP0];
+            h[DIR_MP0] = htemp[DIR_MP0] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h[DIR_MP0];
+            h[DIR_0PP] = htemp[DIR_0PP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h[DIR_0PP];
+            h[DIR_0PM] = htemp[DIR_0PM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h[DIR_0PM];
+            h[DIR_PPP] = htemp[DIR_PPP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h[DIR_PPP];
+            h[DIR_MPP] = htemp[DIR_MPP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h[DIR_MPP];
+            h[DIR_PPM] = htemp[DIR_PPM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h[DIR_PPM];
+            h[DIR_MPM] = htemp[DIR_MPM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h[DIR_MPM];
+
+            distributionsH->setDistributionInvForDirection(h[DIR_0P0], x1 + DX1[DIR_0M0], x2 + DX2[DIR_0M0], x3 + DX3[DIR_0M0], DIR_0M0);
+            distributionsH->setDistributionInvForDirection(h[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
+            distributionsH->setDistributionInvForDirection(h[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
+            distributionsH->setDistributionInvForDirection(h[DIR_0PP], x1 + DX1[DIR_0MM], x2 + DX2[DIR_0MM], x3 + DX3[DIR_0MM], DIR_0MM);
+            distributionsH->setDistributionInvForDirection(h[DIR_0PM], x1 + DX1[DIR_0MP], x2 + DX2[DIR_0MP], x3 + DX3[DIR_0MP], DIR_0MP);
+            distributionsH->setDistributionInvForDirection(h[DIR_PPP], x1 + DX1[DIR_MMM], x2 + DX2[DIR_MMM], x3 + DX3[DIR_MMM], DIR_MMM);
+            distributionsH->setDistributionInvForDirection(h[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
+            distributionsH->setDistributionInvForDirection(h[DIR_PPM], x1 + DX1[DIR_MMP], x2 + DX2[DIR_MMP], x3 + DX3[DIR_MMP], DIR_MMP);
+            distributionsH->setDistributionInvForDirection(h[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
+
+            h2[DIR_0P0] = c1o2 * (htemp[DIR_0P0] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h2[DIR_0P0]);
+            h2[DIR_PP0] = c1o2 * (htemp[DIR_PP0] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h2[DIR_PP0]);
+            h2[DIR_MP0] = c1o2 * (htemp[DIR_MP0] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h2[DIR_MP0]);
+            h2[DIR_0PP] = c1o2 * (htemp[DIR_0PP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h2[DIR_0PP]);
+            h2[DIR_0PM] = c1o2 * (htemp[DIR_0PM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h2[DIR_0PM]);
+            h2[DIR_PPP] = c1o2 * (htemp[DIR_PPP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h2[DIR_PPP]);
+            h2[DIR_MPP] = c1o2 * (htemp[DIR_MPP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h2[DIR_MPP]);
+            h2[DIR_PPM] = c1o2 * (htemp[DIR_PPM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h2[DIR_PPM]);
+            h2[DIR_MPM] = c1o2 * (htemp[DIR_MPM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * h2[DIR_MPM]);
+
+            distributionsH2->setDistributionInvForDirection(h2[DIR_0P0], x1 + DX1[DIR_0M0], x2 + DX2[DIR_0M0], x3 + DX3[DIR_0M0], DIR_0M0);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_0PP], x1 + DX1[DIR_0MM], x2 + DX2[DIR_0MM], x3 + DX3[DIR_0MM], DIR_0MM);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_0PM], x1 + DX1[DIR_0MP], x2 + DX2[DIR_0MP], x3 + DX3[DIR_0MP], DIR_0MP);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_PPP], x1 + DX1[DIR_MMM], x2 + DX2[DIR_MMM], x3 + DX3[DIR_MMM], DIR_MMM);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_PPM], x1 + DX1[DIR_MMP], x2 + DX2[DIR_MMP], x3 + DX3[DIR_MMP], DIR_MMP);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
+
+            break;
+        case DIR_0M0:
+            f[DIR_0M0] = ftemp[DIR_0M0] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_0M0];
+            f[DIR_PM0] = ftemp[DIR_PM0] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_PM0];
+            f[DIR_MM0] = ftemp[DIR_MM0] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_MM0];
+            f[DIR_0MP] = ftemp[DIR_0MP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_0MP];
+            f[DIR_0MM] = ftemp[DIR_0MM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_0MM];
+            f[DIR_PMP] = ftemp[DIR_PMP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_PMP];
+            f[DIR_MMP] = ftemp[DIR_MMP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_MMP];
+            f[DIR_PMM] = ftemp[DIR_PMM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_PMM];
+            f[DIR_MMM] = ftemp[DIR_MMM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_MMM];
+
+            distributions->setDistributionInvForDirection(f[DIR_0M0], x1 + DX1[DIR_0P0], x2 + DX2[DIR_0P0], x3 + DX3[DIR_0P0], DIR_0P0);
+            distributions->setDistributionInvForDirection(f[DIR_PM0], x1 + DX1[DIR_MP0], x2 + DX2[DIR_MP0], x3 + DX3[DIR_MP0], DIR_MP0);
+            distributions->setDistributionInvForDirection(f[DIR_MM0], x1 + DX1[DIR_PP0], x2 + DX2[DIR_PP0], x3 + DX3[DIR_PP0], DIR_PP0);
+            distributions->setDistributionInvForDirection(f[DIR_0MP], x1 + DX1[DIR_0PM], x2 + DX2[DIR_0PM], x3 + DX3[DIR_0PM], DIR_0PM);
+            distributions->setDistributionInvForDirection(f[DIR_0MM], x1 + DX1[DIR_0PP], x2 + DX2[DIR_0PP], x3 + DX3[DIR_0PP], DIR_0PP);
+            distributions->setDistributionInvForDirection(f[DIR_PMP], x1 + DX1[DIR_MPM], x2 + DX2[DIR_MPM], x3 + DX3[DIR_MPM], DIR_MPM);
+            distributions->setDistributionInvForDirection(f[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
+            distributions->setDistributionInvForDirection(f[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
+            distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
+
+            h[DIR_0M0] = htemp[DIR_0M0] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h[DIR_0M0];
+            h[DIR_PM0] = htemp[DIR_PM0] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h[DIR_PM0];
+            h[DIR_MM0] = htemp[DIR_MM0] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h[DIR_MM0];
+            h[DIR_0MP] = htemp[DIR_0MP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h[DIR_0MP];
+            h[DIR_0MM] = htemp[DIR_0MM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h[DIR_0MM];
+            h[DIR_PMP] = htemp[DIR_PMP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h[DIR_PMP];
+            h[DIR_MMP] = htemp[DIR_MMP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h[DIR_MMP];
+            h[DIR_PMM] = htemp[DIR_PMM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h[DIR_PMM];
+            h[DIR_MMM] = htemp[DIR_MMM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h[DIR_MMM];
+
+            distributionsH->setDistributionInvForDirection(h[DIR_0M0], x1 + DX1[DIR_0P0], x2 + DX2[DIR_0P0], x3 + DX3[DIR_0P0], DIR_0P0);
+            distributionsH->setDistributionInvForDirection(h[DIR_PM0], x1 + DX1[DIR_MP0], x2 + DX2[DIR_MP0], x3 + DX3[DIR_MP0], DIR_MP0);
+            distributionsH->setDistributionInvForDirection(h[DIR_MM0], x1 + DX1[DIR_PP0], x2 + DX2[DIR_PP0], x3 + DX3[DIR_PP0], DIR_PP0);
+            distributionsH->setDistributionInvForDirection(h[DIR_0MP], x1 + DX1[DIR_0PM], x2 + DX2[DIR_0PM], x3 + DX3[DIR_0PM], DIR_0PM);
+            distributionsH->setDistributionInvForDirection(h[DIR_0MM], x1 + DX1[DIR_0PP], x2 + DX2[DIR_0PP], x3 + DX3[DIR_0PP], DIR_0PP);
+            distributionsH->setDistributionInvForDirection(h[DIR_PMP], x1 + DX1[DIR_MPM], x2 + DX2[DIR_MPM], x3 + DX3[DIR_MPM], DIR_MPM);
+            distributionsH->setDistributionInvForDirection(h[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
+            distributionsH->setDistributionInvForDirection(h[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
+            distributionsH->setDistributionInvForDirection(h[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
+
+            h2[DIR_0M0] = c1o2 * (htemp[DIR_0M0] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h2[DIR_0M0]);
+            h2[DIR_PM0] = c1o2 * (htemp[DIR_PM0] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h2[DIR_PM0]);
+            h2[DIR_MM0] = c1o2 * (htemp[DIR_MM0] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h2[DIR_MM0]);
+            h2[DIR_0MP] = c1o2 * (htemp[DIR_0MP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h2[DIR_0MP]);
+            h2[DIR_0MM] = c1o2 * (htemp[DIR_0MM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h2[DIR_0MM]);
+            h2[DIR_PMP] = c1o2 * (htemp[DIR_PMP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h2[DIR_PMP]);
+            h2[DIR_MMP] = c1o2 * (htemp[DIR_MMP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h2[DIR_MMP]);
+            h2[DIR_PMM] = c1o2 * (htemp[DIR_PMM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h2[DIR_PMM]);
+            h2[DIR_MMM] = c1o2 * (htemp[DIR_MMM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * h2[DIR_MMM]);
+
+            distributionsH2->setDistributionInvForDirection(h2[DIR_0M0], x1 + DX1[DIR_0P0], x2 + DX2[DIR_0P0], x3 + DX3[DIR_0P0], DIR_0P0);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_PM0], x1 + DX1[DIR_MP0], x2 + DX2[DIR_MP0], x3 + DX3[DIR_MP0], DIR_MP0);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_MM0], x1 + DX1[DIR_PP0], x2 + DX2[DIR_PP0], x3 + DX3[DIR_PP0], DIR_PP0);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_0MP], x1 + DX1[DIR_0PM], x2 + DX2[DIR_0PM], x3 + DX3[DIR_0PM], DIR_0PM);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_0MM], x1 + DX1[DIR_0PP], x2 + DX2[DIR_0PP], x3 + DX3[DIR_0PP], DIR_0PP);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_PMP], x1 + DX1[DIR_MPM], x2 + DX2[DIR_MPM], x3 + DX3[DIR_MPM], DIR_MPM);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
+
+            break;
+        case DIR_00P:
+            f[DIR_00P] = ftemp[DIR_00P] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_00P];
+            f[DIR_P0P] = ftemp[DIR_P0P] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_P0P];
+            f[DIR_M0P] = ftemp[DIR_M0P] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_M0P];
+            f[DIR_0PP] = ftemp[DIR_0PP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_0PP];
+            f[DIR_0MP] = ftemp[DIR_0MP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_0MP];
+            f[DIR_PPP] = ftemp[DIR_PPP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_PPP];
+            f[DIR_MPP] = ftemp[DIR_MPP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_MPP];
+            f[DIR_PMP] = ftemp[DIR_PMP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_PMP];
+            f[DIR_MMP] = ftemp[DIR_MMP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_MMP];
+
+            distributions->setDistributionInvForDirection(f[DIR_00P], x1 + DX1[DIR_00M], x2 + DX2[DIR_00M], x3 + DX3[DIR_00M], DIR_00M);
+            distributions->setDistributionInvForDirection(f[DIR_P0P], x1 + DX1[DIR_M0M], x2 + DX2[DIR_M0M], x3 + DX3[DIR_M0M], DIR_M0M);
+            distributions->setDistributionInvForDirection(f[DIR_M0P], x1 + DX1[DIR_P0M], x2 + DX2[DIR_P0M], x3 + DX3[DIR_P0M], DIR_P0M);
+            distributions->setDistributionInvForDirection(f[DIR_0PP], x1 + DX1[DIR_0MM], x2 + DX2[DIR_0MM], x3 + DX3[DIR_0MM], DIR_0MM);
+            distributions->setDistributionInvForDirection(f[DIR_0MP], x1 + DX1[DIR_0PM], x2 + DX2[DIR_0PM], x3 + DX3[DIR_0PM], DIR_0PM);
+            distributions->setDistributionInvForDirection(f[DIR_PPP], x1 + DX1[DIR_MMM], x2 + DX2[DIR_MMM], x3 + DX3[DIR_MMM], DIR_MMM);
+            distributions->setDistributionInvForDirection(f[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
+            distributions->setDistributionInvForDirection(f[DIR_PMP], x1 + DX1[DIR_MPM], x2 + DX2[DIR_MPM], x3 + DX3[DIR_MPM], DIR_MPM);
+            distributions->setDistributionInvForDirection(f[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
+
+            h[DIR_00P] = htemp[DIR_00P] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h[DIR_00P];
+            h[DIR_P0P] = htemp[DIR_P0P] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h[DIR_P0P];
+            h[DIR_M0P] = htemp[DIR_M0P] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h[DIR_M0P];
+            h[DIR_0PP] = htemp[DIR_0PP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h[DIR_0PP];
+            h[DIR_0MP] = htemp[DIR_0MP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h[DIR_0MP];
+            h[DIR_PPP] = htemp[DIR_PPP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h[DIR_PPP];
+            h[DIR_MPP] = htemp[DIR_MPP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h[DIR_MPP];
+            h[DIR_PMP] = htemp[DIR_PMP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h[DIR_PMP];
+            h[DIR_MMP] = htemp[DIR_MMP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h[DIR_MMP];
+
+            distributionsH->setDistributionInvForDirection(h[DIR_00P], x1 + DX1[DIR_00M], x2 + DX2[DIR_00M], x3 + DX3[DIR_00M], DIR_00M);
+            distributionsH->setDistributionInvForDirection(h[DIR_P0P], x1 + DX1[DIR_M0M], x2 + DX2[DIR_M0M], x3 + DX3[DIR_M0M], DIR_M0M);
+            distributionsH->setDistributionInvForDirection(h[DIR_M0P], x1 + DX1[DIR_P0M], x2 + DX2[DIR_P0M], x3 + DX3[DIR_P0M], DIR_P0M);
+            distributionsH->setDistributionInvForDirection(h[DIR_0PP], x1 + DX1[DIR_0MM], x2 + DX2[DIR_0MM], x3 + DX3[DIR_0MM], DIR_0MM);
+            distributionsH->setDistributionInvForDirection(h[DIR_0MP], x1 + DX1[DIR_0PM], x2 + DX2[DIR_0PM], x3 + DX3[DIR_0PM], DIR_0PM);
+            distributionsH->setDistributionInvForDirection(h[DIR_PPP], x1 + DX1[DIR_MMM], x2 + DX2[DIR_MMM], x3 + DX3[DIR_MMM], DIR_MMM);
+            distributionsH->setDistributionInvForDirection(h[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
+            distributionsH->setDistributionInvForDirection(h[DIR_PMP], x1 + DX1[DIR_MPM], x2 + DX2[DIR_MPM], x3 + DX3[DIR_MPM], DIR_MPM);
+            distributionsH->setDistributionInvForDirection(h[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
+
+            h2[DIR_00P] = c1o2 * (htemp[DIR_00P] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h2[DIR_00P]);
+            h2[DIR_P0P] = c1o2 * (htemp[DIR_P0P] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h2[DIR_P0P]);
+            h2[DIR_M0P] = c1o2 * (htemp[DIR_M0P] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h2[DIR_M0P]);
+            h2[DIR_0PP] = c1o2 * (htemp[DIR_0PP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h2[DIR_0PP]);
+            h2[DIR_0MP] = c1o2 * (htemp[DIR_0MP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h2[DIR_0MP]);
+            h2[DIR_PPP] = c1o2 * (htemp[DIR_PPP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h2[DIR_PPP]);
+            h2[DIR_MPP] = c1o2 * (htemp[DIR_MPP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h2[DIR_MPP]);
+            h2[DIR_PMP] = c1o2 * (htemp[DIR_PMP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h2[DIR_PMP]);
+            h2[DIR_MMP] = c1o2 * (htemp[DIR_MMP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * h2[DIR_MMP]);
+
+            distributionsH2->setDistributionInvForDirection(h2[DIR_00P], x1 + DX1[DIR_00M], x2 + DX2[DIR_00M], x3 + DX3[DIR_00M], DIR_00M);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_P0P], x1 + DX1[DIR_M0M], x2 + DX2[DIR_M0M], x3 + DX3[DIR_M0M], DIR_M0M);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_M0P], x1 + DX1[DIR_P0M], x2 + DX2[DIR_P0M], x3 + DX3[DIR_P0M], DIR_P0M);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_0PP], x1 + DX1[DIR_0MM], x2 + DX2[DIR_0MM], x3 + DX3[DIR_0MM], DIR_0MM);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_0MP], x1 + DX1[DIR_0PM], x2 + DX2[DIR_0PM], x3 + DX3[DIR_0PM], DIR_0PM);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_PPP], x1 + DX1[DIR_MMM], x2 + DX2[DIR_MMM], x3 + DX3[DIR_MMM], DIR_MMM);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_PMP], x1 + DX1[DIR_MPM], x2 + DX2[DIR_MPM], x3 + DX3[DIR_MPM], DIR_MPM);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
+
+            break;
+        case DIR_00M:
+            f[DIR_00M] = ftemp[DIR_00M] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_00M];
+            f[DIR_P0M] = ftemp[DIR_P0M] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_P0M];
+            f[DIR_M0M] = ftemp[DIR_M0M] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_M0M];
+            f[DIR_0PM] = ftemp[DIR_0PM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_0PM];
+            f[DIR_0MM] = ftemp[DIR_0MM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_0MM];
+            f[DIR_PPM] = ftemp[DIR_PPM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_PPM];
+            f[DIR_MPM] = ftemp[DIR_MPM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_MPM];
+            f[DIR_PMM] = ftemp[DIR_PMM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_PMM];
+            f[DIR_MMM] = ftemp[DIR_MMM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_MMM];
+
+            distributions->setDistributionInvForDirection(f[DIR_00M], x1 + DX1[DIR_00P], x2 + DX2[DIR_00P], x3 + DX3[DIR_00P], DIR_00P);
+            distributions->setDistributionInvForDirection(f[DIR_P0M], x1 + DX1[DIR_M0P], x2 + DX2[DIR_M0P], x3 + DX3[DIR_M0P], DIR_M0P);
+            distributions->setDistributionInvForDirection(f[DIR_M0M], x1 + DX1[DIR_P0P], x2 + DX2[DIR_P0P], x3 + DX3[DIR_P0P], DIR_P0P);
+            distributions->setDistributionInvForDirection(f[DIR_0PM], x1 + DX1[DIR_0MP], x2 + DX2[DIR_0MP], x3 + DX3[DIR_0MP], DIR_0MP);
+            distributions->setDistributionInvForDirection(f[DIR_0MM], x1 + DX1[DIR_0PP], x2 + DX2[DIR_0PP], x3 + DX3[DIR_0PP], DIR_0PP);
+            distributions->setDistributionInvForDirection(f[DIR_PPM], x1 + DX1[DIR_MMP], x2 + DX2[DIR_MMP], x3 + DX3[DIR_MMP], DIR_MMP);
+            distributions->setDistributionInvForDirection(f[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
+            distributions->setDistributionInvForDirection(f[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
+            distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
+
+            h[DIR_00M] = htemp[DIR_00M] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h[DIR_00M];
+            h[DIR_P0M] = htemp[DIR_P0M] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h[DIR_P0M];
+            h[DIR_M0M] = htemp[DIR_M0M] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h[DIR_M0M];
+            h[DIR_0PM] = htemp[DIR_0PM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h[DIR_0PM];
+            h[DIR_0MM] = htemp[DIR_0MM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h[DIR_0MM];
+            h[DIR_PPM] = htemp[DIR_PPM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h[DIR_PPM];
+            h[DIR_MPM] = htemp[DIR_MPM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h[DIR_MPM];
+            h[DIR_PMM] = htemp[DIR_PMM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h[DIR_PMM];
+            h[DIR_MMM] = htemp[DIR_MMM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h[DIR_MMM];
+
+            distributionsH->setDistributionInvForDirection(h[DIR_00M], x1 + DX1[DIR_00P], x2 + DX2[DIR_00P], x3 + DX3[DIR_00P], DIR_00P);
+            distributionsH->setDistributionInvForDirection(h[DIR_P0M], x1 + DX1[DIR_M0P], x2 + DX2[DIR_M0P], x3 + DX3[DIR_M0P], DIR_M0P);
+            distributionsH->setDistributionInvForDirection(h[DIR_M0M], x1 + DX1[DIR_P0P], x2 + DX2[DIR_P0P], x3 + DX3[DIR_P0P], DIR_P0P);
+            distributionsH->setDistributionInvForDirection(h[DIR_0PM], x1 + DX1[DIR_0MP], x2 + DX2[DIR_0MP], x3 + DX3[DIR_0MP], DIR_0MP);
+            distributionsH->setDistributionInvForDirection(h[DIR_0MM], x1 + DX1[DIR_0PP], x2 + DX2[DIR_0PP], x3 + DX3[DIR_0PP], DIR_0PP);
+            distributionsH->setDistributionInvForDirection(h[DIR_PPM], x1 + DX1[DIR_MMP], x2 + DX2[DIR_MMP], x3 + DX3[DIR_MMP], DIR_MMP);
+            distributionsH->setDistributionInvForDirection(h[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
+            distributionsH->setDistributionInvForDirection(h[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
+            distributionsH->setDistributionInvForDirection(h[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
+
+            h2[DIR_00M] = c1o2 * (htemp[DIR_00M] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h2[DIR_00M]);
+            h2[DIR_P0M] = c1o2 * (htemp[DIR_P0M] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h2[DIR_P0M]);
+            h2[DIR_M0M] = c1o2 * (htemp[DIR_M0M] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h2[DIR_M0M]);
+            h2[DIR_0PM] = c1o2 * (htemp[DIR_0PM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h2[DIR_0PM]);
+            h2[DIR_0MM] = c1o2 * (htemp[DIR_0MM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h2[DIR_0MM]);
+            h2[DIR_PPM] = c1o2 * (htemp[DIR_PPM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h2[DIR_PPM]);
+            h2[DIR_MPM] = c1o2 * (htemp[DIR_MPM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h2[DIR_MPM]);
+            h2[DIR_PMM] = c1o2 * (htemp[DIR_PMM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h2[DIR_PMM]);
+            h2[DIR_MMM] = c1o2 * (htemp[DIR_MMM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * h2[DIR_MMM]);
+
+            distributionsH2->setDistributionInvForDirection(h2[DIR_00M], x1 + DX1[DIR_00P], x2 + DX2[DIR_00P], x3 + DX3[DIR_00P], DIR_00P);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_P0M], x1 + DX1[DIR_M0P], x2 + DX2[DIR_M0P], x3 + DX3[DIR_M0P], DIR_M0P);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_M0M], x1 + DX1[DIR_P0P], x2 + DX2[DIR_P0P], x3 + DX3[DIR_P0P], DIR_P0P);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_0PM], x1 + DX1[DIR_0MP], x2 + DX2[DIR_0MP], x3 + DX3[DIR_0MP], DIR_0MP);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_0MM], x1 + DX1[DIR_0PP], x2 + DX2[DIR_0PP], x3 + DX3[DIR_0PP], DIR_0PP);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_PPM], x1 + DX1[DIR_MMP], x2 + DX2[DIR_MMP], x3 + DX3[DIR_MMP], DIR_MMP);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
+            distributionsH2->setDistributionInvForDirection(h2[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
+
+            break;
+        default:
+            UB_THROW(UbException(UB_EXARGS, "It isn't implemented non reflecting density boundary for this direction!"));
+    }
 }
-
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseSlipBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseSlipBCAlgorithm.cpp
index 230a543f120a8ca8d18c5d2bb6a1c27e550aae92..c15204f7b9e8c02714d55adf2d40aea0acba8d35 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseSlipBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseSlipBCAlgorithm.cpp
@@ -64,17 +64,19 @@ void MultiphaseSlipBCAlgorithm::addDistributionsH(SPtr<DistributionArray3D> dist
 //////////////////////////////////////////////////////////////////////////
 void MultiphaseSlipBCAlgorithm::applyBC()
 {
-   LBMReal f[D3Q27System::ENDF+1];
-   LBMReal h[D3Q27System::ENDF+1];
-   LBMReal feq[D3Q27System::ENDF+1];
-   LBMReal heq[D3Q27System::ENDF+1];
+    using namespace vf::lbm::dir;
+
+   real f[D3Q27System::ENDF+1];
+   real h[D3Q27System::ENDF+1];
+   real feq[D3Q27System::ENDF+1];
+   real heq[D3Q27System::ENDF+1];
    distributions->getDistributionInv(f, x1, x2, x3);
    distributionsH->getDistributionInv(h, x1, x2, x3);
 
-   LBMReal p1, vx1, vx2, vx3, phi, rho;
+   real p1, vx1, vx2, vx3, phi, rho;
 
    D3Q27System::calcDensity(h, phi);
-   //LBMReal collFactorM = collFactorL + (collFactorL - collFactorG)*(phi - phiH)/(phiH - phiL);
+   //real collFactorM = collFactorL + (collFactorL - collFactorG)*(phi - phiH)/(phiH - phiL);
 
 
    calcMacrosFct(f, p1, vx1, vx2, vx3);
@@ -82,7 +84,7 @@ void MultiphaseSlipBCAlgorithm::applyBC()
    D3Q27System::calcMultiphaseHeq(heq, phi, vx1, vx2, vx3); 
 
    UbTupleFloat3 normale = bcPtr->getNormalVector();
-   LBMReal amp = vx1*val<1>(normale)+vx2*val<2>(normale)+vx3*val<3>(normale);
+   real amp = vx1*val<1>(normale)+vx2*val<2>(normale)+vx3*val<3>(normale);
 
    vx1 = vx1 - amp * val<1>(normale); //normale zeigt von struktur weg!
    vx2 = vx2 - amp * val<2>(normale); //normale zeigt von struktur weg!
@@ -97,44 +99,44 @@ void MultiphaseSlipBCAlgorithm::applyBC()
       {
          //quadratic bounce back
          const int invDir = D3Q27System::INVDIR[fdir];
-         LBMReal q = bcPtr->getQ(invDir);// m+m q=0 stabiler
+         real q = bcPtr->getQ(invDir);// m+m q=0 stabiler
          //vx3=0;
-         LBMReal velocity = 0.0;
+         real velocity = 0.0;
          switch (invDir)
          {
-         case D3Q27System::DIR_P00: velocity = (UbMath::c4o9*(+vx1)); break;      //(2/cs^2)(=6)*rho_0(=1 bei imkompr)*wi*u*ei mit cs=1/sqrt(3)
-         case D3Q27System::DIR_M00: velocity = (UbMath::c4o9*(-vx1)); break;      //z.B. aus paper manfred MRT LB models in three dimensions (2002)   
-         case D3Q27System::DIR_0P0: velocity = (UbMath::c4o9*(+vx2)); break;
-         case D3Q27System::DIR_0M0: velocity = (UbMath::c4o9*(-vx2)); break;
-         case D3Q27System::DIR_00P: velocity = (UbMath::c4o9*(+vx3)); break;
-         case D3Q27System::DIR_00M: velocity = (UbMath::c4o9*(-vx3)); break;
-         case D3Q27System::DIR_PP0: velocity = (UbMath::c1o9*(+vx1+vx2)); break;
-         case D3Q27System::DIR_MM0: velocity = (UbMath::c1o9*(-vx1-vx2)); break;
-         case D3Q27System::DIR_PM0: velocity = (UbMath::c1o9*(+vx1-vx2)); break;
-         case D3Q27System::DIR_MP0: velocity = (UbMath::c1o9*(-vx1+vx2)); break;
-         case D3Q27System::DIR_P0P: velocity = (UbMath::c1o9*(+vx1             +vx3)); break;
-         case D3Q27System::DIR_M0M: velocity = (UbMath::c1o9*(-vx1             -vx3)); break;
-         case D3Q27System::DIR_P0M: velocity = (UbMath::c1o9*(+vx1             -vx3)); break;
-         case D3Q27System::DIR_M0P: velocity = (UbMath::c1o9*(-vx1             +vx3)); break;
-         case D3Q27System::DIR_0PP: velocity = (UbMath::c1o9*(+vx2+vx3)); break;
-         case D3Q27System::DIR_0MM: velocity = (UbMath::c1o9*(-vx2-vx3)); break;
-         case D3Q27System::DIR_0PM: velocity = (UbMath::c1o9*(+vx2-vx3)); break;
-         case D3Q27System::DIR_0MP: velocity = (UbMath::c1o9*(-vx2+vx3)); break;
-         case D3Q27System::DIR_PPP: velocity = (UbMath::c1o36*(+vx1+vx2+vx3)); break;
-         case D3Q27System::DIR_MMM: velocity = (UbMath::c1o36*(-vx1-vx2-vx3)); break;
-         case D3Q27System::DIR_PPM: velocity = (UbMath::c1o36*(+vx1+vx2-vx3)); break;
-         case D3Q27System::DIR_MMP: velocity = (UbMath::c1o36*(-vx1-vx2+vx3)); break;
-         case D3Q27System::DIR_PMP: velocity = (UbMath::c1o36*(+vx1-vx2+vx3)); break;
-         case D3Q27System::DIR_MPM: velocity = (UbMath::c1o36*(-vx1+vx2-vx3)); break;
-         case D3Q27System::DIR_PMM: velocity = (UbMath::c1o36*(+vx1-vx2-vx3)); break;
-         case D3Q27System::DIR_MPP: velocity = (UbMath::c1o36*(-vx1+vx2+vx3)); break;
+         case DIR_P00: velocity = (vf::basics::constant::c4o9*(+vx1)); break;      //(2/cs^2)(=6)*rho_0(=1 bei imkompr)*wi*u*ei mit cs=1/sqrt(3)
+         case DIR_M00: velocity = (vf::basics::constant::c4o9*(-vx1)); break;      //z.B. aus paper manfred MRT LB models in three dimensions (2002)   
+         case DIR_0P0: velocity = (vf::basics::constant::c4o9*(+vx2)); break;
+         case DIR_0M0: velocity = (vf::basics::constant::c4o9*(-vx2)); break;
+         case DIR_00P: velocity = (vf::basics::constant::c4o9*(+vx3)); break;
+         case DIR_00M: velocity = (vf::basics::constant::c4o9*(-vx3)); break;
+         case DIR_PP0: velocity = (vf::basics::constant::c1o9*(+vx1+vx2)); break;
+         case DIR_MM0: velocity = (vf::basics::constant::c1o9*(-vx1-vx2)); break;
+         case DIR_PM0: velocity = (vf::basics::constant::c1o9*(+vx1-vx2)); break;
+         case DIR_MP0: velocity = (vf::basics::constant::c1o9*(-vx1+vx2)); break;
+         case DIR_P0P: velocity = (vf::basics::constant::c1o9*(+vx1+vx3)); break;
+         case DIR_M0M: velocity = (vf::basics::constant::c1o9*(-vx1-vx3)); break;
+         case DIR_P0M: velocity = (vf::basics::constant::c1o9*(+vx1-vx3)); break;
+         case DIR_M0P: velocity = (vf::basics::constant::c1o9*(-vx1+vx3)); break;
+         case DIR_0PP: velocity = (vf::basics::constant::c1o9*(+vx2+vx3)); break;
+         case DIR_0MM: velocity = (vf::basics::constant::c1o9*(-vx2-vx3)); break;
+         case DIR_0PM: velocity = (vf::basics::constant::c1o9*(+vx2-vx3)); break;
+         case DIR_0MP: velocity = (vf::basics::constant::c1o9*(-vx2+vx3)); break;
+         case DIR_PPP: velocity = (vf::basics::constant::c1o36*(+vx1+vx2+vx3)); break;
+         case DIR_MMM: velocity = (vf::basics::constant::c1o36*(-vx1-vx2-vx3)); break;
+         case DIR_PPM: velocity = (vf::basics::constant::c1o36*(+vx1+vx2-vx3)); break;
+         case DIR_MMP: velocity = (vf::basics::constant::c1o36*(-vx1-vx2+vx3)); break;
+         case DIR_PMP: velocity = (vf::basics::constant::c1o36*(+vx1-vx2+vx3)); break;
+         case DIR_MPM: velocity = (vf::basics::constant::c1o36*(-vx1+vx2-vx3)); break;
+         case DIR_PMM: velocity = (vf::basics::constant::c1o36*(+vx1-vx2-vx3)); break;
+         case DIR_MPP: velocity = (vf::basics::constant::c1o36*(-vx1+vx2+vx3)); break;
          default: throw UbException(UB_EXARGS, "unknown error");
          }
-         LBMReal fReturn = ((1.0-q)/(1.0+q))*((f[invDir]-feq[invDir])/(1.0-collFactor)+feq[invDir])+((q*(f[invDir]+f[fdir])-velocity*rho)/(1.0+q));
+         real fReturn = ((1.0-q)/(1.0+q))*((f[invDir]-feq[invDir])/(1.0-collFactor)+feq[invDir])+((q*(f[invDir]+f[fdir])-velocity*rho)/(1.0+q));
          distributions->setDistributionForDirection(fReturn, x1+D3Q27System::DX1[invDir], x2+D3Q27System::DX2[invDir], x3+D3Q27System::DX3[invDir], fdir);
 
-		 //LBMReal hReturn = ((1.0-q)/(1.0+q))*((h[invDir]-heq[invDir])/(1.0-collFactorPh)+heq[invDir])+((q/(1.0+q))*(h[invDir]+h[fdir]));
-		 LBMReal hReturn = h[invDir];
+		 //real hReturn = ((1.0-q)/(1.0+q))*((h[invDir]-heq[invDir])/(1.0-collFactorPh)+heq[invDir])+((q/(1.0+q))*(h[invDir]+h[fdir]));
+		 real hReturn = h[invDir];
 		 distributionsH->setDistributionForDirection(hReturn, x1+D3Q27System::DX1[invDir], x2+D3Q27System::DX2[invDir], x3+D3Q27System::DX3[invDir], fdir);
       }
    }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseVelocityBCAdapter.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseVelocityBCAdapter.cpp
index 7211bc3725b4a2607dc000c739f4bf8e98865013..ee8761f98fe151282cedd24b07b4a608f8ad3873 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseVelocityBCAdapter.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseVelocityBCAdapter.cpp
@@ -47,7 +47,7 @@ MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const bool& vx1, const
    this->init();
 }
 /*==========================================================*/
-MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const mu::Parser& function, const LBMReal& phiBC, const double& startTime, const double& endTime )
+MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const mu::Parser& function, const real& phiBC, const real& startTime, const real& endTime )
 {
    if(vx1) this->vx1BCs.push_back(BCFunction(function,startTime,endTime));
    if(vx2) this->vx2BCs.push_back(BCFunction(function,startTime,endTime));
@@ -58,7 +58,7 @@ MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const bool& vx1, const
 
 }
 /*==========================================================*/
-MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const mu::Parser& function1, const mu::Parser& function2, const mu::Parser& function3, const LBMReal& phiBC, const double& startTime, const double& endTime )
+MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const mu::Parser& function1, const mu::Parser& function2, const mu::Parser& function3, const real& phiBC, const real& startTime, const real& endTime )
 {
    if(vx1) this->vx1BCs.push_back(BCFunction(function1,startTime,endTime));
    if(vx2) this->vx2BCs.push_back(BCFunction(function2,startTime,endTime));
@@ -67,7 +67,7 @@ MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const bool& vx1, const
    this->init();
 }
 /*==========================================================*/
-MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const string& functionstring, const double& startTime, const double& endTime )
+MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const string& functionstring, const real& startTime, const real& endTime )
 {
    if(vx1) this->vx1BCs.push_back(BCFunction(functionstring,startTime,endTime));
    if(vx2) this->vx2BCs.push_back(BCFunction(functionstring,startTime,endTime));
@@ -99,9 +99,9 @@ MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const vector< BCFunctio
    this->init();
 }
 /*==========================================================*/
-MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const double& vx1, const double& vx1StartTime, const double& vx1EndTime,
-                                               const double& vx2, const double& vx2StartTime, const double& vx2EndTime,
-                                               const double& vx3, const double& vx3StartTime, const double& vx3EndTime )
+MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const real& vx1, const real& vx1StartTime, const real& vx1EndTime,
+                                               const real& vx2, const real& vx2StartTime, const real& vx2EndTime,
+                                               const real& vx3, const real& vx3StartTime, const real& vx3EndTime )
 {
    this->vx1BCs.push_back(BCFunction(vx1,vx1StartTime,vx1EndTime));
    this->vx2BCs.push_back(BCFunction(vx2,vx2StartTime,vx2EndTime));
@@ -109,9 +109,9 @@ MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const double& vx1, cons
    this->init();
 }
 /*==========================================================*/
-MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const string& vx1Function, const double& vx1StartTime, const double& vx1EndTime,
-                                               const string& vx2Function, const double& vx2StartTime, const double& vx2EndTime,
-                                               const string& vx3Function, const double& vx3StartTime, const double& vx3EndTime ) 
+MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const string& vx1Function, const real& vx1StartTime, const real& vx1EndTime,
+                                               const string& vx2Function, const real& vx2StartTime, const real& vx2EndTime,
+                                               const string& vx3Function, const real& vx3StartTime, const real& vx3EndTime ) 
 {
    if(vx1Function.size()) this->vx1BCs.push_back(BCFunction(vx1Function,vx1StartTime,vx1EndTime));
    if(vx2Function.size()) this->vx2BCs.push_back(BCFunction(vx2Function,vx2StartTime,vx2EndTime));
@@ -119,9 +119,9 @@ MultiphaseVelocityBCAdapter::MultiphaseVelocityBCAdapter(const string& vx1Functi
    this->init();
 }
 /*==========================================================*/
-void MultiphaseVelocityBCAdapter::setNewVelocities(const double& vx1, const double& vx1StartTime, const double& vx1EndTime,
-                                              const double& vx2, const double& vx2StartTime, const double& vx2EndTime,
-                                              const double& vx3, const double& vx3StartTime, const double& vx3EndTime )
+void MultiphaseVelocityBCAdapter::setNewVelocities(const real& vx1, const real& vx1StartTime, const real& vx1EndTime,
+                                              const real& vx2, const real& vx2StartTime, const real& vx2EndTime,
+                                              const real& vx3, const real& vx3StartTime, const real& vx3EndTime )
 {
    this->clear();
    this->vx1BCs.push_back(BCFunction(vx1,vx1StartTime,vx1EndTime));
@@ -174,13 +174,13 @@ void MultiphaseVelocityBCAdapter::init(std::vector<BCFunction>& vxBCs)
    }
 }
 /*==========================================================*/
-void MultiphaseVelocityBCAdapter::init(const D3Q27Interactor* const& interactor, const double& time)
+void MultiphaseVelocityBCAdapter::init(const D3Q27Interactor* const& interactor, const real& time)
 {
    this->timeStep       = time;
    this->tmpVx1Function = this->tmpVx2Function = this->tmpVx3Function = NULL;
 
    //aktuelle velocityfunction bestimmen
-   double maxEndtime = -Ub::inf;
+   real maxEndtime = -Ub::inf;
    
    for(size_t pos=0; pos<vx1BCs.size(); ++pos)
    {
@@ -190,8 +190,8 @@ void MultiphaseVelocityBCAdapter::init(const D3Q27Interactor* const& interactor,
       if( UbMath::greaterEqual(this->timeStep,vx1BCs[pos].getStartTime()) ) 
       {
           if(   UbMath::lessEqual( this->timeStep     , vx1BCs[pos].getEndTime()     )
-             || UbMath::equal(     vx1BCs[pos].getEndTime(), (double)BCFunction::INFCONST        )
-             || UbMath::equal(     vx1BCs[pos].getEndTime(), (double)BCFunction::INFTIMEDEPENDENT)  )
+             || UbMath::equal(     vx1BCs[pos].getEndTime(), (real)BCFunction::INFCONST        )
+             || UbMath::equal(     vx1BCs[pos].getEndTime(), (real)BCFunction::INFTIMEDEPENDENT)  )
          {
             tmpVx1Function = &vx1BCs[pos].getFunction();
             break;
@@ -206,8 +206,8 @@ void MultiphaseVelocityBCAdapter::init(const D3Q27Interactor* const& interactor,
       if( UbMath::greaterEqual(this->timeStep,vx2BCs[pos].getStartTime()) ) 
       {
          if(   UbMath::lessEqual( this->timeStep     , vx2BCs[pos].getEndTime()      )
-            || UbMath::equal(     vx2BCs[pos].getEndTime(), (double)BCFunction::INFCONST         )
-            || UbMath::equal(     vx2BCs[pos].getEndTime(), (double)BCFunction::INFTIMEDEPENDENT )  )
+            || UbMath::equal(     vx2BCs[pos].getEndTime(), (real)BCFunction::INFCONST         )
+            || UbMath::equal(     vx2BCs[pos].getEndTime(), (real)BCFunction::INFTIMEDEPENDENT )  )
          {
             tmpVx2Function = &vx2BCs[pos].getFunction();
             break;
@@ -222,8 +222,8 @@ void MultiphaseVelocityBCAdapter::init(const D3Q27Interactor* const& interactor,
       if( UbMath::greaterEqual(this->timeStep,vx3BCs[pos].getStartTime()) ) 
       {
          if(   UbMath::lessEqual( this->timeStep     , vx3BCs[pos].getEndTime()      )
-            || UbMath::equal(     vx3BCs[pos].getEndTime(), (double)BCFunction::INFCONST         )
-            || UbMath::equal(     vx3BCs[pos].getEndTime(), (double)BCFunction::INFTIMEDEPENDENT )  )
+            || UbMath::equal(     vx3BCs[pos].getEndTime(), (real)BCFunction::INFCONST         )
+            || UbMath::equal(     vx3BCs[pos].getEndTime(), (real)BCFunction::INFTIMEDEPENDENT )  )
          {
             tmpVx3Function = &vx3BCs[pos].getFunction();
             break;
@@ -266,24 +266,24 @@ void MultiphaseVelocityBCAdapter::init(const D3Q27Interactor* const& interactor,
                    <<", timedependent="<<boolalpha<<this->isTimeDependent()   );
 }
 /*==========================================================*/
-void MultiphaseVelocityBCAdapter::update( const D3Q27Interactor* const& interactor, const double& time ) 
+void MultiphaseVelocityBCAdapter::update( const D3Q27Interactor* const& interactor, const real& time ) 
 {
    this->init(interactor,time);
 }
 /*==========================================================*/
-void MultiphaseVelocityBCAdapter::adaptBCForDirection( const D3Q27Interactor& interactor, SPtr<BoundaryConditions> bc, const double& worldX1, const double& worldX2, const double& worldX3, const double& q, const int& fdirection, const double& time )
+void MultiphaseVelocityBCAdapter::adaptBCForDirection( const D3Q27Interactor& interactor, SPtr<BoundaryConditions> bc, const real& worldX1, const real& worldX2, const real& worldX3, const real& q, const int& fdirection, const real& time )
 {
    bc->setVelocityBoundaryFlag(D3Q27System::INVDIR[fdirection],secondaryBcOption);
-   bc->setQ((float)q,fdirection);
+   bc->setQ((real)q,fdirection);
 }
 /*==========================================================*/
-void MultiphaseVelocityBCAdapter::adaptBC( const D3Q27Interactor& interactor, SPtr<BoundaryConditions> bc, const double& worldX1, const double& worldX2, const double& worldX3, const double& time ) 
+void MultiphaseVelocityBCAdapter::adaptBC( const D3Q27Interactor& interactor, SPtr<BoundaryConditions> bc, const real& worldX1, const real& worldX2, const real& worldX3, const real& time ) 
 {
    this->setNodeVelocity(interactor,bc,worldX1,worldX2,worldX3,time);
    bc->setBcAlgorithmType(algorithmType);
 }
 /*==========================================================*/
-void MultiphaseVelocityBCAdapter::setNodeVelocity( const D3Q27Interactor& interactor, SPtr<BoundaryConditions> bc, const double& worldX1, const double& worldX2, const double& worldX3, const double& timestep) 
+void MultiphaseVelocityBCAdapter::setNodeVelocity( const D3Q27Interactor& interactor, SPtr<BoundaryConditions> bc, const real& worldX1, const real& worldX2, const real& worldX3, const real& timestep) 
 {
    //Geschwindigkeiten setzen
    try
@@ -294,9 +294,9 @@ void MultiphaseVelocityBCAdapter::setNodeVelocity( const D3Q27Interactor& intera
       this->x3 = worldX3;
       this->timeStep = timestep;
 
-      if(tmpVx1Function) bc->setBoundaryVelocityX1((float)tmpVx1Function->Eval());  
-      if(tmpVx2Function) bc->setBoundaryVelocityX2((float)tmpVx2Function->Eval());
-      if(tmpVx3Function) bc->setBoundaryVelocityX3((float)tmpVx3Function->Eval());
+      if(tmpVx1Function) bc->setBoundaryVelocityX1((real)tmpVx1Function->Eval());  
+      if(tmpVx2Function) bc->setBoundaryVelocityX2((real)tmpVx2Function->Eval());
+      if(tmpVx3Function) bc->setBoundaryVelocityX3((real)tmpVx3Function->Eval());
 	  bc->setBoundaryPhaseField(this->phiBC);
    }
    catch(mu::Parser::exception_type& e){ stringstream error; error<<"mu::parser exception occurs, message("<<e.GetMsg()<<"), formula("<<e.GetExpr()+"), token("+e.GetToken()<<")"
@@ -304,11 +304,11 @@ void MultiphaseVelocityBCAdapter::setNodeVelocity( const D3Q27Interactor& intera
    catch(...)                          { throw UbException(UB_EXARGS,"unknown exception" ); }
 }
 /*==========================================================*/
-UbTupleDouble3 MultiphaseVelocityBCAdapter::getVelocity(const double& x1, const double& x2, const double& x3, const double& timeStep) const
+UbTupleDouble3 MultiphaseVelocityBCAdapter::getVelocity(const real& x1, const real& x2, const real& x3, const real& timeStep) const
 {
-	double vx1 = 0.0;
-	double vx2 = 0.0;
-	double vx3 = 0.0;
+	real vx1 = 0.0;
+	real vx2 = 0.0;
+	real vx3 = 0.0;
    this->x1 = x1;
    this->x2 = x2;
    this->x3 = x3;
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseVelocityBCAdapter.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseVelocityBCAdapter.h
index 998462398294d23725ec9985a8fcbd06fe9f65ad..60d93cdecd141834b9800c08fc9b6d1e4fab3c92 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseVelocityBCAdapter.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseVelocityBCAdapter.h
@@ -92,11 +92,11 @@ public:
    
    MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const BCFunction& velVxBC );
 
-   MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const mu::Parser& function, const LBMReal& phiBC, const double& startTime, const double& endTime  );
+   MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const mu::Parser& function, const real& phiBC, const real& startTime, const real& endTime  );
 
-   MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const mu::Parser& function1, const mu::Parser& function2, const mu::Parser& function3, const LBMReal& phiBC, const double& startTime, const double& endTime );
+   MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const mu::Parser& function1, const mu::Parser& function2, const mu::Parser& function3, const real& phiBC, const real& startTime, const real& endTime );
    
-   MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const std::string& functionstring, const double& startTime, const double& endTime );
+   MultiphaseVelocityBCAdapter(const bool& vx1, const bool& vx2, const bool& vx3, const std::string& functionstring, const real& startTime, const real& endTime );
 
    MultiphaseVelocityBCAdapter(const BCFunction& velBC, bool x1Dir, bool x2Dir, bool x3Dir);
 
@@ -104,13 +104,13 @@ public:
 
    MultiphaseVelocityBCAdapter(const std::vector< BCFunction >& velVx1BCs, const std::vector< BCFunction >& velVx2BCs, const std::vector< BCFunction >& velVx3BCs);
 
-   MultiphaseVelocityBCAdapter(const double& vx1, const double& vx1StartTime, const double& vx1EndTime,
-                          const double& vx2, const double& vx2StartTime, const double& vx2EndTime,
-                          const double& vx3, const double& vx3StartTime, const double& vx3EndTime);
+   MultiphaseVelocityBCAdapter(const real& vx1, const real& vx1StartTime, const real& vx1EndTime,
+                          const real& vx2, const real& vx2StartTime, const real& vx2EndTime,
+                          const real& vx3, const real& vx3StartTime, const real& vx3EndTime);
 
-   MultiphaseVelocityBCAdapter(const std::string& vx1Function, const double& vx1StartTime, const double& vx1EndTime,
-                          const std::string& vx2Function, const double& vx2StartTime, const double& vx2EndTime,
-                          const std::string& vx3Function, const double& vx3StartTime, const double& vx3EndTime ); 
+   MultiphaseVelocityBCAdapter(const std::string& vx1Function, const real& vx1StartTime, const real& vx1EndTime,
+                          const std::string& vx2Function, const real& vx2StartTime, const real& vx2EndTime,
+                          const std::string& vx3Function, const real& vx3StartTime, const real& vx3EndTime ); 
 
    //methods
    void setTimePeriodic()    { (this->type |=   TIMEPERIODIC); }
@@ -118,26 +118,26 @@ public:
    bool isTimePeriodic()     { return ((this->type & TIMEPERIODIC) ==  TIMEPERIODIC); }
 
    //folgendes ist fuer moving objects gedadacht... 
-   void setNewVelocities(const double& vx1, const double& vx1StartTime, const double& vx1EndTime,
-                         const double& vx2, const double& vx2StartTime, const double& vx2EndTime,
-                         const double& vx3, const double& vx3StartTime, const double& vx3EndTime);
+   void setNewVelocities(const real& vx1, const real& vx1StartTime, const real& vx1EndTime,
+                         const real& vx2, const real& vx2StartTime, const real& vx2EndTime,
+                         const real& vx3, const real& vx3StartTime, const real& vx3EndTime);
 
       
    //------------- implements D3Q27BoundaryConditionAdapter ----- start
    std::string toString();
    
-   void init(const D3Q27Interactor* const& interactor, const double& time=0);
-   void update(const D3Q27Interactor* const& interactor, const double& time=0);
+   void init(const D3Q27Interactor* const& interactor, const real& time=0);
+   void update(const D3Q27Interactor* const& interactor, const real& time=0);
 
-   void adaptBCForDirection(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double &worldX1,
-                            const double &worldX2, const double &worldX3, const double &q, const int &fdirection,
-                            const double &time = 0);
-   void adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double &worldX1,
-                const double &worldX2, const double &worldX3, const double &time = 0);
+   void adaptBCForDirection(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real &worldX1,
+                            const real &worldX2, const real &worldX3, const real &q, const int &fdirection,
+                            const real &time = 0);
+   void adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real &worldX1,
+                const real &worldX2, const real &worldX3, const real &time = 0);
 
    //------------- implements D3Q27BoundaryConditionAdapter ----- end
 
-   UbTupleDouble3 getVelocity(const double& x1, const double& x2, const double& x3, const double& timeStep) const;
+   UbTupleDouble3 getVelocity(const real& x1, const real& x2, const real& x3, const real& timeStep) const;
 
 
 protected:
@@ -149,7 +149,7 @@ protected:
    void unsetTimeDependent() { (this->type &=  ~TIMEDEPENDENT); }
 
    void clear() { vx1BCs.clear(); vx2BCs.clear();  vx3BCs.clear(); this->init(); }
-   void setNodeVelocity(const D3Q27Interactor& interactor, SPtr<BoundaryConditions> bc, const double& worldX1, const double& worldX2, const double& worldX3, const double& timestep);
+   void setNodeVelocity(const D3Q27Interactor& interactor, SPtr<BoundaryConditions> bc, const real& worldX1, const real& worldX2, const real& worldX3, const real& timestep);
 
 private:
    mutable mu::value_type x1, x2, x3;
@@ -163,7 +163,7 @@ private:
    std::vector<BCFunction> vx2BCs;
    std::vector<BCFunction> vx3BCs;
 
-   LBMReal phiBC;
+   real phiBC;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseVelocityBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseVelocityBCAlgorithm.cpp
index e4aaeeeb21a1e41617eed79ce05671c5425d01e4..b6ddf4b46925e770cfcdcc5390d41ed816b992bc 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseVelocityBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/MultiphaseVelocityBCAlgorithm.cpp
@@ -68,18 +68,20 @@ void MultiphaseVelocityBCAlgorithm::addDistributionsH2(SPtr<DistributionArray3D>
 //////////////////////////////////////////////////////////////////////////
 void MultiphaseVelocityBCAlgorithm::applyBC()
 {
-   LBMReal f[D3Q27System::ENDF+1];
-   LBMReal h[D3Q27System::ENDF+1];
-   LBMReal h2[D3Q27System::ENDF + 1];
-   LBMReal feq[D3Q27System::ENDF+1];
-   LBMReal heq[D3Q27System::ENDF+1];
-   LBMReal htemp[D3Q27System::ENDF+1];
+    using namespace vf::lbm::dir;
+
+   real f[D3Q27System::ENDF+1];
+   real h[D3Q27System::ENDF+1];
+   real h2[D3Q27System::ENDF + 1];
+   real feq[D3Q27System::ENDF+1];
+   real heq[D3Q27System::ENDF+1];
+   real htemp[D3Q27System::ENDF+1];
    
    distributions->getDistributionInv(f, x1, x2, x3);
    distributionsH->getDistributionInv(h, x1, x2, x3);
    if (distributionsH2)
        distributionsH2->getDistributionInv(h2, x1, x2, x3);
-   LBMReal phi, vx1, vx2, vx3, p1, phiBC;
+   real phi, vx1, vx2, vx3, p1, phiBC;
    
    D3Q27System::calcDensity(h, phi);
 
@@ -98,12 +100,12 @@ void MultiphaseVelocityBCAlgorithm::applyBC()
    int nx3 = x3;
 
    //flag points in direction of fluid
-   if      (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_P00)) { nx1 -= 1; }
-   else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_M00)) { nx1 += 1; }
-   else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_0P0)) { nx2 -= 1; }
-   else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_0M0)) { nx2 += 1; }
-   else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_00P)) { nx3 -= 1; }
-   else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_00M)) { nx3 += 1; }
+   if      (bcPtr->hasVelocityBoundaryFlag(DIR_P00)) { nx1 -= 1; }
+   else if (bcPtr->hasVelocityBoundaryFlag(DIR_M00)) { nx1 += 1; }
+   else if (bcPtr->hasVelocityBoundaryFlag(DIR_0P0)) { nx2 -= 1; }
+   else if (bcPtr->hasVelocityBoundaryFlag(DIR_0M0)) { nx2 += 1; }
+   else if (bcPtr->hasVelocityBoundaryFlag(DIR_00P)) { nx3 -= 1; }
+   else if (bcPtr->hasVelocityBoundaryFlag(DIR_00M)) { nx3 += 1; }
    //else UB_THROW(UbException(UB_EXARGS, "Danger...no orthogonal BC-Flag on velocity boundary..."));
    
    phiBC = bcPtr->getBoundaryPhaseField();
@@ -129,14 +131,14 @@ void MultiphaseVelocityBCAlgorithm::applyBC()
       {
          const int invDir = D3Q27System::INVDIR[fdir];
          //LBMReal q = bcPtr->getQ(invDir);// m+m q=0 stabiler
-         LBMReal velocity = bcPtr->getBoundaryVelocity(invDir);
+         real velocity = bcPtr->getBoundaryVelocity(invDir);
 		 //16.03.2021 quick fix for velocity BC
-         LBMReal fReturn = f[invDir] - velocity;
+         real fReturn = f[invDir] - velocity;
          //LBMReal fReturn = ((1.0-q)/(1.0+q))*((f[invDir]-feq[invDir])/(1.0-collFactor)+feq[invDir])+((q*(f[invDir]+f[fdir])-velocity)/(1.0+q));
         // distributions->setDistributionForDirection(fReturn, x1+D3Q27System::DX1[invDir], x2+D3Q27System::DX2[invDir], x3+D3Q27System::DX3[invDir], fdir);//no delay BB
          distributions->setDistributionForDirection(fReturn, x1, x2, x3, invDir);//delay BB  
 
-         LBMReal hReturn = htemp[invDir]+h[invDir] - heq[invDir];
+         real hReturn = htemp[invDir] + h[invDir] - heq[invDir] - velocity*phi;
          distributionsH->setDistributionForDirection(hReturn, x1, x2, x3, invDir);//delay BB  
          if (distributionsH2) {
              fReturn = h2[invDir] ;
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/NoSlipBCAdapter.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/NoSlipBCAdapter.h
index 68ebf73ffeafe88f9184c46a1144840fae8b27e1..52eda33082e8031454b00f578a6f520b738c0d42 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/NoSlipBCAdapter.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/NoSlipBCAdapter.h
@@ -43,18 +43,18 @@ public:
     NoSlipBCAdapter() : BCAdapter() {}
     NoSlipBCAdapter(const short &secondaryBcOption) : BCAdapter(secondaryBcOption) {}
 
-    void init(const D3Q27Interactor *const &interactor, const double &time = 0) override {}
-    void update(const D3Q27Interactor *const &interactor, const double &time = 0) override {}
+    void init(const D3Q27Interactor *const &interactor, const real &time = 0) override {}
+    void update(const D3Q27Interactor *const &interactor, const real &time = 0) override {}
 
     void adaptBCForDirection(const D3Q27Interactor & /*interactor*/, SPtr<BoundaryConditions> bc,
-                             const double & /*worldX1*/, const double & /*worldX2*/, const double & /*worldX3*/,
-                             const double &q, const int &fdirection, const double & /*time*/ = 0) override
+                             const real & /*worldX1*/, const real & /*worldX2*/, const real & /*worldX3*/,
+                             const real &q, const int &fdirection, const real & /*time*/ = 0) override
     {
         bc->setNoSlipBoundaryFlag(D3Q27System::INVDIR[fdirection], secondaryBcOption);
-        bc->setQ((float)q, fdirection);
+        bc->setQ((real)q, fdirection);
     }
-    void adaptBC(const D3Q27Interactor & /*interactor*/, SPtr<BoundaryConditions> bc, const double & /*worldX1*/,
-                 const double & /*worldX2*/, const double & /*worldX3*/, const double & /*time*/ = 0) override
+    void adaptBC(const D3Q27Interactor & /*interactor*/, SPtr<BoundaryConditions> bc, const real & /*worldX1*/,
+                 const real & /*worldX2*/, const real & /*worldX3*/, const real & /*time*/ = 0) override
     {
         bc->setBcAlgorithmType(algorithmType);
     }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/NoSlipBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/NoSlipBCAlgorithm.cpp
index d82a7865b1dc4542025b896914a5320495024bd6..f89c74513289ab2787cbef0dbe504d913510afc5 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/NoSlipBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/NoSlipBCAlgorithm.cpp
@@ -54,10 +54,10 @@ void NoSlipBCAlgorithm::addDistributions(SPtr<DistributionArray3D> distributions
 //////////////////////////////////////////////////////////////////////////
 void NoSlipBCAlgorithm::applyBC()
 {
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal feq[D3Q27System::ENDF + 1];
+    real f[D3Q27System::ENDF + 1];
+    real feq[D3Q27System::ENDF + 1];
     distributions->getDistributionInv(f, x1, x2, x3);
-    LBMReal rho, vx1, vx2, vx3;
+    real rho, vx1, vx2, vx3;
     calcMacrosFct(f, rho, vx1, vx2, vx3);
     calcFeqFct(feq, rho, vx1, vx2, vx3);
 
@@ -65,8 +65,8 @@ void NoSlipBCAlgorithm::applyBC()
         if (bcPtr->hasNoSlipBoundaryFlag(fdir)) {
             // quadratic bounce back
             const int invDir = D3Q27System::INVDIR[fdir];
-            LBMReal q        = bcPtr->getQ(invDir);
-            LBMReal fReturn = ((1.0 - q) / (1.0 + q)) * ((f[invDir] - feq[invDir]) / (1.0 - collFactor) + feq[invDir]) +
+            real q        = bcPtr->getQ(invDir);
+            real fReturn = ((1.0 - q) / (1.0 + q)) * ((f[invDir] - feq[invDir]) / (1.0 - collFactor) + feq[invDir]) +
                               ((q / (1.0 + q)) * (f[invDir] + f[fdir]));
             distributions->setDistributionForDirection(fReturn, x1 + D3Q27System::DX1[invDir],
                                                        x2 + D3Q27System::DX2[invDir], x3 + D3Q27System::DX3[invDir],
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/NonEqDensityBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/NonEqDensityBCAlgorithm.cpp
index cf7a627b0c649aa0e1d00a8b137225b9e65b8476..2787d685cbd3b71b879c3fabc1c114e0e61eef8d 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/NonEqDensityBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/NonEqDensityBCAlgorithm.cpp
@@ -56,40 +56,42 @@ void NonEqDensityBCAlgorithm::addDistributions(SPtr<DistributionArray3D> distrib
 //////////////////////////////////////////////////////////////////////////
 void NonEqDensityBCAlgorithm::applyBC()
 {
-    LBMReal f[D3Q27System::ENDF + 1];
+    using namespace vf::lbm::dir;
+
+    real f[D3Q27System::ENDF + 1];
     distributions->getDistributionInv(f, x1, x2, x3);
     int nx1 = x1;
     int nx2 = x2;
     int nx3 = x3;
 
     // flag points in direction of fluid
-    if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_P00)) {
+    if (bcPtr->hasDensityBoundaryFlag(DIR_P00)) {
         nx1 -= 1;
-    } else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_M00)) {
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_M00)) {
         nx1 += 1;
-    } else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_0P0)) {
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_0P0)) {
         nx2 -= 1;
-    } else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_0M0)) {
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_0M0)) {
         nx2 += 1;
-    } else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_00P)) {
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_00P)) {
         nx3 -= 1;
-    } else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_00M)) {
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_00M)) {
         nx3 += 1;
     } else
         return; // UB_THROW(UbException(UB_EXARGS, "Danger...no orthogonal BC-Flag on density boundary..."));
 
-    LBMReal rho, vx1, vx2, vx3;
+    real rho, vx1, vx2, vx3;
     calcMacrosFct(f, rho, vx1, vx2, vx3);
     // LBMReal vlimit=0.01;
     // vx1=(fabs(vx1)>vlimit) ? vx1/fabs(vx1)*vlimit : vx1;
     // vx2=(fabs(vx2)>vlimit) ? vx2/fabs(vx2)*vlimit : vx2;
     // vx3=(fabs(vx3)>vlimit) ? vx3/fabs(vx3)*vlimit : vx3;
-    LBMReal rhoBC = bcPtr->getBoundaryDensity();
+    real rhoBC = bcPtr->getBoundaryDensity();
     for (int fdir = D3Q27System::STARTF; fdir <= D3Q27System::ENDF; fdir++) {
         if (bcPtr->hasDensityBoundaryFlag(fdir)) {
             // Martins NEQ ADDON
             ////original: 15.2.2013:
-            LBMReal ftemp = calcFeqsForDirFct(fdir, rho, vx1, vx2, vx3);
+            real ftemp = calcFeqsForDirFct(fdir, rho, vx1, vx2, vx3);
             // rhoBC=(rho>rhoBC)? rhoBC : rho; //Limiter 08.08.2018
             ftemp = calcFeqsForDirFct(fdir, rhoBC, vx1, vx2, vx3) + f[fdir] - ftemp;
             distributions->setDistributionForDirection(ftemp, nx1, nx2, nx3, fdir);
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingInflowBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingInflowBCAlgorithm.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9fe55f9aa4ab87a8548ca04759fad16e809b682b
--- /dev/null
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingInflowBCAlgorithm.cpp
@@ -0,0 +1,341 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file NonReflectingInflowBCAlgorithm.cpp
+//! \ingroup BoundarConditions
+//! \author Hussein Alihussein
+//=======================================================================================
+#include "NonReflectingInflowBCAlgorithm.h"
+
+#include "BoundaryConditions.h"
+#include "D3Q27System.h"
+#include "DistributionArray3D.h"
+
+NonReflectingInflowBCAlgorithm::NonReflectingInflowBCAlgorithm()
+{
+    BCAlgorithm::type         = BCAlgorithm::NonReflectingInflowBCAlgorithm;
+    BCAlgorithm::preCollision = true;
+}
+//////////////////////////////////////////////////////////////////////////
+NonReflectingInflowBCAlgorithm::~NonReflectingInflowBCAlgorithm() = default;
+//////////////////////////////////////////////////////////////////////////
+SPtr<BCAlgorithm> NonReflectingInflowBCAlgorithm::clone()
+{
+    SPtr<BCAlgorithm> bc(new NonReflectingInflowBCAlgorithm());
+    return bc;
+}
+//////////////////////////////////////////////////////////////////////////
+void NonReflectingInflowBCAlgorithm::addDistributions(SPtr<DistributionArray3D> distributions)
+{
+    this->distributions = distributions;
+}
+//////////////////////////////////////////////////////////////////////////
+void NonReflectingInflowBCAlgorithm::applyBC()
+{
+    using namespace vf::lbm::dir;
+    using namespace D3Q27System;
+ //   using namespace UbMath;
+    using namespace vf::basics::constant;
+
+    LBMReal f[ENDF + 1];
+    LBMReal ftemp[ENDF + 1];
+
+    int nx1       = x1;
+    int nx2       = x2;
+    int nx3       = x3;
+    int direction = -1;
+
+    // flag points in direction of fluid
+    if (bcPtr->hasDensityBoundaryFlag(DIR_P00)) {
+        nx1 += 1;
+        direction = DIR_P00;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_M00)) {
+        nx1 -= 1;
+        direction = DIR_M00;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_0P0)) {
+        nx2 += 1;
+        direction = DIR_0P0;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_0M0)) {
+        nx2 -= 1;
+        direction = DIR_0M0;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_00P)) {
+        nx3 += 1;
+        direction = DIR_00P;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_00M)) {
+        nx3 -= 1;
+        direction = DIR_00M;
+    } else
+        UB_THROW(UbException(UB_EXARGS, "Danger...no orthogonal BC-Flag on density boundary..."));
+
+    distributions->getDistribution(f, x1, x2, x3);
+    distributions->getDistribution(ftemp, nx1, nx2, nx3);
+
+    LBMReal rho, vx1, vx2, vx3;
+    calcMacrosFct(f, rho, vx1, vx2, vx3);
+    //vx1                  = 0.;
+    LBMReal BCVeloWeight =  0.5;
+    // LBMReal velocity     = 0.004814077025232405; 
+     // LBMReal velocity     = 0.00057735;
+    //LBMReal velocity = 0.04; 
+      // LBMReal velocity = 0.01; 
+     // LBMReal velocity = 1./112.; 
+    // LBMReal velocity = 1./126.; 
+     LBMReal velocity = 1./200.; 
+     // LBMReal velocity = 0.005; 
+    //LBMReal delf         =(-velocity+vx1)*0.5 ;
+    LBMReal delf; 
+
+    switch (direction) {
+        case DIR_P00:
+            delf = (-velocity + vx1) * BCVeloWeight; 
+            // delf = (-velocity ) * BCVeloWeight;
+            f[DIR_P00]   = ftemp[DIR_P00] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_P00] - delf* WEIGTH[DIR_P00];
+            f[DIR_PP0]  = ftemp[DIR_PP0] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PP0]- delf* WEIGTH[DIR_PP0];
+            f[DIR_PM0]  = ftemp[DIR_PM0] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PM0]- delf* WEIGTH[DIR_PM0];
+            f[DIR_P0P]  = ftemp[DIR_P0P] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_P0P]- delf* WEIGTH[DIR_P0P];
+            f[DIR_P0M]  = ftemp[DIR_P0M] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_P0M]- delf* WEIGTH[DIR_P0M];
+            f[DIR_PPP] = ftemp[DIR_PPP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PPP]- delf* WEIGTH[DIR_PPP];
+            f[DIR_PMP] = ftemp[DIR_PMP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PMP]- delf* WEIGTH[DIR_PMP];
+            f[DIR_PPM] = ftemp[DIR_PPM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PPM]- delf* WEIGTH[DIR_PPM];
+            f[DIR_PMM] = ftemp[DIR_PMM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PMM]- delf* WEIGTH[DIR_PMM];
+            //f[DIR_P00] = (ftemp[DIR_P00] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_P00]) *
+            //           (1 - BCVeloWeight) +
+            //       (ftemp[DIR_M00] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_M00] +
+            //       velocity*(6)*WEIGTH[DIR_P00]/* bcPtr->getBoundaryVelocity(INVDIR[DIR_M00])*/) *
+            //           (BCVeloWeight)  ;
+            //f[DIR_PP0] = (ftemp[DIR_PP0] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PP0]) *
+            //            (1 - BCVeloWeight) +
+            //        (ftemp[DIR_MM0] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_MM0] +
+            //         velocity * (6) * WEIGTH[DIR_PP0] /*bcPtr->getBoundaryVelocity(INVDIR[DIR_MM0])*/) *
+            //            (BCVeloWeight); 
+            //f[DIR_PM0] = (ftemp[DIR_PM0] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PM0]) *
+            //            (1 - BCVeloWeight) +
+            //        (ftemp[DIR_MP0] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_MP0] +
+            //        velocity*(6)*WEIGTH[DIR_PP0]/* bcPtr->getBoundaryVelocity(INVDIR[DIR_MP0])*/) *
+            //            (BCVeloWeight); 
+            //f[DIR_P0P] = (ftemp[DIR_P0P] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_P0P]) *
+            //            (1 - BCVeloWeight) +
+            //        (ftemp[DIR_M0M] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_M0M] +
+            //        velocity*(6)*WEIGTH[DIR_P0P]/* bcPtr->getBoundaryVelocity(INVDIR[DIR_M0M])*/) *
+            //            (BCVeloWeight); 
+            //f[DIR_P0M] = (ftemp[DIR_P0M] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_P0M])*
+            //            (1 - BCVeloWeight) +
+            //        (ftemp[DIR_M0P] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_M0P] +
+            //        velocity*(6)*WEIGTH[DIR_P0M]/* bcPtr->getBoundaryVelocity(INVDIR[DIR_M0P])*/) *
+            //            (BCVeloWeight); 
+            //f[DIR_PPP] = (ftemp[DIR_PPP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PPP])*
+            //            (1 - BCVeloWeight) +
+            //        (ftemp[DIR_MMM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_MMM] +
+            //     velocity * (6) * WEIGTH[DIR_PPP] /* bcPtr->getBoundaryVelocity(INVDIR[DIR_MMM])*/) *
+            //            (BCVeloWeight); 
+            //f[DIR_PMP] = (ftemp[DIR_PMP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PMP]) *
+            //             (1 - BCVeloWeight) +
+            //         (ftemp[DIR_MPM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_MPM] +
+            //     velocity * (6) * WEIGTH[DIR_PPP] /*bcPtr->getBoundaryVelocity(INVDIR[DIR_MPM])*/) *
+            //             (BCVeloWeight); 
+            //f[DIR_PPM] = (ftemp[DIR_PPM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PPM]) *
+            //             (1 - BCVeloWeight) +
+            //         (ftemp[DIR_MMP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_MMP] +
+            //     velocity * (6) * WEIGTH[DIR_PPP] /* bcPtr->getBoundaryVelocity(INVDIR[DIR_MMP])*/) *
+            //             (BCVeloWeight); 
+            //f[DIR_PMM] = (ftemp[DIR_PMM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PMM]) *
+            //             (1 - BCVeloWeight) +
+            //         (ftemp[DIR_MPP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_MPP] +
+            //     velocity * (6) * WEIGTH[DIR_PPP] /* bcPtr->getBoundaryVelocity(INVDIR[DIR_MPP])*/) *
+            //             (BCVeloWeight); 
+
+            distributions->setDistributionInvForDirection(f[DIR_P00], x1 + DX1[DIR_M00], x2 + DX2[DIR_M00], x3 + DX3[DIR_M00], DIR_M00);
+            distributions->setDistributionInvForDirection(f[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
+            distributions->setDistributionInvForDirection(f[DIR_PM0], x1 + DX1[DIR_MP0], x2 + DX2[DIR_MP0], x3 + DX3[DIR_MP0], DIR_MP0);
+            distributions->setDistributionInvForDirection(f[DIR_P0P], x1 + DX1[DIR_M0M], x2 + DX2[DIR_M0M], x3 + DX3[DIR_M0M], DIR_M0M);
+            distributions->setDistributionInvForDirection(f[DIR_P0M], x1 + DX1[DIR_M0P], x2 + DX2[DIR_M0P], x3 + DX3[DIR_M0P], DIR_M0P);
+            distributions->setDistributionInvForDirection(f[DIR_PPP], x1 + DX1[DIR_MMM], x2 + DX2[DIR_MMM], x3 + DX3[DIR_MMM], DIR_MMM);
+            distributions->setDistributionInvForDirection(f[DIR_PMP], x1 + DX1[DIR_MPM], x2 + DX2[DIR_MPM], x3 + DX3[DIR_MPM], DIR_MPM);
+            distributions->setDistributionInvForDirection(f[DIR_PPM], x1 + DX1[DIR_MMP], x2 + DX2[DIR_MMP], x3 + DX3[DIR_MMP], DIR_MMP);
+            distributions->setDistributionInvForDirection(f[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
+            break;
+        case DIR_M00:
+            delf = (-velocity - vx1) * BCVeloWeight;
+            f[DIR_M00] = ftemp[DIR_M00] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_M00] -
+                   delf * WEIGTH[DIR_M00];
+            f[DIR_MP0] = ftemp[DIR_MP0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MP0] -
+                    delf * WEIGTH[DIR_MP0];
+            f[DIR_MM0] = ftemp[DIR_MM0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MM0] -
+                    delf * WEIGTH[DIR_MM0];
+            f[DIR_M0P] = ftemp[DIR_M0P] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_M0P] -
+                    delf * WEIGTH[DIR_M0P];
+            f[DIR_M0M] = ftemp[DIR_M0M] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_M0M] -
+                    delf * WEIGTH[DIR_M0M];
+            f[DIR_MPP] = ftemp[DIR_MPP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MPP] -
+                     delf * WEIGTH[DIR_MPP];
+            f[DIR_MMP] = ftemp[DIR_MMP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MMP] -
+                     delf * WEIGTH[DIR_MMP];
+            f[DIR_MPM] = ftemp[DIR_MPM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MPM] -
+                     delf * WEIGTH[DIR_MPM];
+            f[DIR_MMM] = ftemp[DIR_MMM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MMM] -
+                     delf * WEIGTH[DIR_MMM];
+
+            distributions->setDistributionInvForDirection(f[DIR_M00], x1 + DX1[DIR_P00], x2 + DX2[DIR_P00], x3 + DX3[DIR_P00], DIR_P00);
+            distributions->setDistributionInvForDirection(f[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
+            distributions->setDistributionInvForDirection(f[DIR_MM0], x1 + DX1[DIR_PP0], x2 + DX2[DIR_PP0], x3 + DX3[DIR_PP0], DIR_PP0);
+            distributions->setDistributionInvForDirection(f[DIR_M0P], x1 + DX1[DIR_P0M], x2 + DX2[DIR_P0M], x3 + DX3[DIR_P0M], DIR_P0M);
+            distributions->setDistributionInvForDirection(f[DIR_M0M], x1 + DX1[DIR_P0P], x2 + DX2[DIR_P0P], x3 + DX3[DIR_P0P], DIR_P0P);
+            distributions->setDistributionInvForDirection(f[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
+            distributions->setDistributionInvForDirection(f[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
+            distributions->setDistributionInvForDirection(f[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
+            distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
+            break;
+        case DIR_0P0:
+            delf = (-velocity + vx2) * BCVeloWeight;
+            f[DIR_0P0] = ftemp[DIR_0P0] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_0P0] -
+                   delf * WEIGTH[DIR_0P0];
+            f[DIR_PP0] = ftemp[DIR_PP0] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_PP0] -
+                    delf * WEIGTH[DIR_PP0];
+            f[DIR_MP0] = ftemp[DIR_MP0] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_MP0] -
+                    delf * WEIGTH[DIR_MP0];
+            f[DIR_0PP] = ftemp[DIR_0PP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_0PP] -
+                    delf * WEIGTH[DIR_0PP];
+            f[DIR_0PM] = ftemp[DIR_0PM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_0PM] -
+                    delf * WEIGTH[DIR_0PM];
+            f[DIR_PPP] = ftemp[DIR_PPP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_PPP] -
+                     delf * WEIGTH[DIR_PPP];
+            f[DIR_MPP] = ftemp[DIR_MPP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_MPP] -
+                     delf * WEIGTH[DIR_MPP];
+            f[DIR_PPM] = ftemp[DIR_PPM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_PPM] -
+                     delf * WEIGTH[DIR_PPM];
+            f[DIR_MPM] = ftemp[DIR_MPM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_MPM] -
+                     delf * WEIGTH[DIR_MPM];
+
+            distributions->setDistributionInvForDirection(f[DIR_0P0], x1 + DX1[DIR_0M0], x2 + DX2[DIR_0M0], x3 + DX3[DIR_0M0], DIR_0M0);
+            distributions->setDistributionInvForDirection(f[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
+            distributions->setDistributionInvForDirection(f[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
+            distributions->setDistributionInvForDirection(f[DIR_0PP], x1 + DX1[DIR_0MM], x2 + DX2[DIR_0MM], x3 + DX3[DIR_0MM], DIR_0MM);
+            distributions->setDistributionInvForDirection(f[DIR_0PM], x1 + DX1[DIR_0MP], x2 + DX2[DIR_0MP], x3 + DX3[DIR_0MP], DIR_0MP);
+            distributions->setDistributionInvForDirection(f[DIR_PPP], x1 + DX1[DIR_MMM], x2 + DX2[DIR_MMM], x3 + DX3[DIR_MMM], DIR_MMM);
+            distributions->setDistributionInvForDirection(f[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
+            distributions->setDistributionInvForDirection(f[DIR_PPM], x1 + DX1[DIR_MMP], x2 + DX2[DIR_MMP], x3 + DX3[DIR_MMP], DIR_MMP);
+            distributions->setDistributionInvForDirection(f[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
+            break;
+        case DIR_0M0:
+            delf = (-velocity - vx2) * BCVeloWeight;
+            f[DIR_0M0] = ftemp[DIR_0M0] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_0M0] -
+                   delf * WEIGTH[DIR_0M0];
+            f[DIR_PM0] = ftemp[DIR_PM0] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_PM0] -
+                    delf * WEIGTH[DIR_PM0];
+            f[DIR_MM0] = ftemp[DIR_MM0] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_MM0] -
+                    delf * WEIGTH[DIR_MM0];
+            f[DIR_0MP] = ftemp[DIR_0MP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_0MP] -
+                    delf * WEIGTH[DIR_0MP];
+            f[DIR_0MM] = ftemp[DIR_0MM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_0MM] -
+                    delf * WEIGTH[DIR_0MM];
+            f[DIR_PMP] = ftemp[DIR_PMP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_PMP] -
+                     delf * WEIGTH[DIR_PMP];
+            f[DIR_MMP] = ftemp[DIR_MMP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_MMP] -
+                     delf * WEIGTH[DIR_MMP];
+            f[DIR_PMM] = ftemp[DIR_PMM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_PMM] -
+                     delf * WEIGTH[DIR_PMM];
+            f[DIR_MMM] = ftemp[DIR_MMM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_MMM] -
+                     delf * WEIGTH[DIR_MMM];
+
+            distributions->setDistributionInvForDirection(f[DIR_0M0], x1 + DX1[DIR_0P0], x2 + DX2[DIR_0P0], x3 + DX3[DIR_0P0], DIR_0P0);
+            distributions->setDistributionInvForDirection(f[DIR_PM0], x1 + DX1[DIR_MP0], x2 + DX2[DIR_MP0], x3 + DX3[DIR_MP0], DIR_MP0);
+            distributions->setDistributionInvForDirection(f[DIR_MM0], x1 + DX1[DIR_PP0], x2 + DX2[DIR_PP0], x3 + DX3[DIR_PP0], DIR_PP0);
+            distributions->setDistributionInvForDirection(f[DIR_0MP], x1 + DX1[DIR_0PM], x2 + DX2[DIR_0PM], x3 + DX3[DIR_0PM], DIR_0PM);
+            distributions->setDistributionInvForDirection(f[DIR_0MM], x1 + DX1[DIR_0PP], x2 + DX2[DIR_0PP], x3 + DX3[DIR_0PP], DIR_0PP);
+            distributions->setDistributionInvForDirection(f[DIR_PMP], x1 + DX1[DIR_MPM], x2 + DX2[DIR_MPM], x3 + DX3[DIR_MPM], DIR_MPM);
+            distributions->setDistributionInvForDirection(f[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
+            distributions->setDistributionInvForDirection(f[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
+            distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
+            break;
+        case DIR_00P:
+            delf = (-velocity + vx3) * BCVeloWeight;
+            f[DIR_00P] = ftemp[DIR_00P] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_00P] -
+                   delf * WEIGTH[DIR_00P];
+            f[DIR_P0P] = ftemp[DIR_P0P] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_P0P] -
+                    delf * WEIGTH[DIR_P0P];
+            f[DIR_M0P] = ftemp[DIR_M0P] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_M0P] -
+                    delf * WEIGTH[DIR_M0P];
+            f[DIR_0PP] = ftemp[DIR_0PP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_0PP] -
+                    delf * WEIGTH[DIR_0PP];
+            f[DIR_0MP] = ftemp[DIR_0MP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_0MP] -
+                    delf * WEIGTH[DIR_0MP];
+            f[DIR_PPP] = ftemp[DIR_PPP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_PPP] -
+                     delf * WEIGTH[DIR_PPP];
+            f[DIR_MPP] = ftemp[DIR_MPP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_MPP] -
+                     delf * WEIGTH[DIR_MPP];
+            f[DIR_PMP] = ftemp[DIR_PMP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_PMP] -
+                     delf * WEIGTH[DIR_PMP];
+            f[DIR_MMP] = ftemp[DIR_MMP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_MMP] -
+                     delf * WEIGTH[DIR_MMP];
+
+            distributions->setDistributionInvForDirection(f[DIR_00P], x1 + DX1[DIR_00M], x2 + DX2[DIR_00M], x3 + DX3[DIR_00M], DIR_00M);
+            distributions->setDistributionInvForDirection(f[DIR_P0P], x1 + DX1[DIR_M0M], x2 + DX2[DIR_M0M], x3 + DX3[DIR_M0M], DIR_M0M);
+            distributions->setDistributionInvForDirection(f[DIR_M0P], x1 + DX1[DIR_P0M], x2 + DX2[DIR_P0M], x3 + DX3[DIR_P0M], DIR_P0M);
+            distributions->setDistributionInvForDirection(f[DIR_0PP], x1 + DX1[DIR_0MM], x2 + DX2[DIR_0MM], x3 + DX3[DIR_0MM], DIR_0MM);
+            distributions->setDistributionInvForDirection(f[DIR_0MP], x1 + DX1[DIR_0PM], x2 + DX2[DIR_0PM], x3 + DX3[DIR_0PM], DIR_0PM);
+            distributions->setDistributionInvForDirection(f[DIR_PPP], x1 + DX1[DIR_MMM], x2 + DX2[DIR_MMM], x3 + DX3[DIR_MMM], DIR_MMM);
+            distributions->setDistributionInvForDirection(f[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
+            distributions->setDistributionInvForDirection(f[DIR_PMP], x1 + DX1[DIR_MPM], x2 + DX2[DIR_MPM], x3 + DX3[DIR_MPM], DIR_MPM);
+            distributions->setDistributionInvForDirection(f[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
+            break;
+        case DIR_00M:
+            delf = (-velocity - vx3) * BCVeloWeight;
+            f[DIR_00M] = ftemp[DIR_00M] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_00M] -
+                   delf * WEIGTH[DIR_00M];
+            f[DIR_P0M] = ftemp[DIR_P0M] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_P0M] -
+                    delf * WEIGTH[DIR_P0M];
+            f[DIR_M0M] = ftemp[DIR_M0M] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_M0M] -
+                    delf * WEIGTH[DIR_M0M];
+            f[DIR_0PM] = ftemp[DIR_0PM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_0PM] -
+                    delf * WEIGTH[DIR_0PM];
+            f[DIR_0MM] = ftemp[DIR_0MM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_0MM] -
+                    delf * WEIGTH[DIR_0MM];
+            f[DIR_PPM] = ftemp[DIR_PPM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_PPM] -
+                     delf * WEIGTH[DIR_PPM];
+            f[DIR_MPM] = ftemp[DIR_MPM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_MPM] -
+                     delf * WEIGTH[DIR_MPM];
+            f[DIR_PMM] = ftemp[DIR_PMM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_PMM] -
+                     delf * WEIGTH[DIR_PMM];
+            f[DIR_MMM] = ftemp[DIR_MMM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_MMM] -
+                     delf * WEIGTH[DIR_MMM];
+
+            distributions->setDistributionInvForDirection(f[DIR_00M], x1 + DX1[DIR_00P], x2 + DX2[DIR_00P], x3 + DX3[DIR_00P], DIR_00P);
+            distributions->setDistributionInvForDirection(f[DIR_P0M], x1 + DX1[DIR_M0P], x2 + DX2[DIR_M0P], x3 + DX3[DIR_M0P], DIR_M0P);
+            distributions->setDistributionInvForDirection(f[DIR_M0M], x1 + DX1[DIR_P0P], x2 + DX2[DIR_P0P], x3 + DX3[DIR_P0P], DIR_P0P);
+            distributions->setDistributionInvForDirection(f[DIR_0PM], x1 + DX1[DIR_0MP], x2 + DX2[DIR_0MP], x3 + DX3[DIR_0MP], DIR_0MP);
+            distributions->setDistributionInvForDirection(f[DIR_0MM], x1 + DX1[DIR_0PP], x2 + DX2[DIR_0PP], x3 + DX3[DIR_0PP], DIR_0PP);
+            distributions->setDistributionInvForDirection(f[DIR_PPM], x1 + DX1[DIR_MMP], x2 + DX2[DIR_MMP], x3 + DX3[DIR_MMP], DIR_MMP);
+            distributions->setDistributionInvForDirection(f[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
+            distributions->setDistributionInvForDirection(f[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
+            distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
+            break;
+        default:
+            UB_THROW(
+                UbException(UB_EXARGS, "It isn't implemented non reflecting density boundary for this direction!"));
+    }
+}
diff --git a/src/basics/Singelton.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingInflowBCAlgorithm.h
similarity index 77%
rename from src/basics/Singelton.h
rename to src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingInflowBCAlgorithm.h
index f0979b5dd3d89e26ebbe4b4e82d2336e1f59a07e..1f3e87ce3fff371fbec30dbbe90721bd5ff975cc 100644
--- a/src/basics/Singelton.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingInflowBCAlgorithm.h
@@ -26,33 +26,25 @@
 //  You should have received a copy of the GNU General Public License along
 //  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
-//! \author Soeren Peters
+//! \file NonReflectingInflowBCAlgorithm.h
+//! \ingroup BoundarConditions
+//! \author Hussein Alihussein
 //=======================================================================================
+#ifndef NonReflectingInflowBCAlgorithm_h__
+#define NonReflectingInflowBCAlgorithm_h__
 
-#ifndef BASICS_SINGELTON_H
-#define BASICS_SINGELTON_H
+#include "BCAlgorithm.h"
+#include <PointerDefinitions.h>
 
-namespace vf::basics
-{
+class DistributionArray3D;
 
-template<typename T>
-class Singleton
+class NonReflectingInflowBCAlgorithm : public BCAlgorithm
 {
 public:
-   Singleton(const Singleton&) = delete;
-   Singleton & operator=(const Singleton& rhs) = delete;
-
-protected:
-   Singleton() = default;
-
-public:
-   static std::shared_ptr<Singleton> getInstance()
-   {
-     static std::shared_ptr<Singleton> s{new T};
-     return s;
-   }
+    NonReflectingInflowBCAlgorithm();
+    ~NonReflectingInflowBCAlgorithm() override;
+    SPtr<BCAlgorithm> clone() override;
+    void addDistributions(SPtr<DistributionArray3D> distributions) override;
+    void applyBC() override;
 };
-
-}
-
-#endif
\ No newline at end of file
+#endif // NonReflectingDensityBCAlgorithm_h__
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingOutflowBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingOutflowBCAlgorithm.cpp
index 6fa4c7b5d85f4b1e5135f95b48f7d75a0cdbf3a4..09adfefa8d246ff92f43eeeacb57ad3c4bd3ea16 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingOutflowBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingOutflowBCAlgorithm.cpp
@@ -57,11 +57,14 @@ void NonReflectingOutflowBCAlgorithm::addDistributions(SPtr<DistributionArray3D>
 //////////////////////////////////////////////////////////////////////////
 void NonReflectingOutflowBCAlgorithm::applyBC()
 {
+    using namespace vf::lbm::dir;
+
     using namespace D3Q27System;
-    using namespace UbMath;
+ //   using namespace UbMath;
+    using namespace vf::basics::constant;
 
-    LBMReal f[ENDF + 1];
-    LBMReal ftemp[ENDF + 1];
+    real f[ENDF + 1];
+    real ftemp[ENDF + 1];
 
     int nx1       = x1;
     int nx2       = x2;
@@ -93,20 +96,20 @@ void NonReflectingOutflowBCAlgorithm::applyBC()
     distributions->getDistribution(f, x1, x2, x3);
     distributions->getDistribution(ftemp, nx1, nx2, nx3);
 
-    LBMReal rho, vx1, vx2, vx3;
+    real rho, vx1, vx2, vx3;
     calcMacrosFct(f, rho, vx1, vx2, vx3);
 
     switch (direction) {
         case DIR_P00:
-            f[DIR_P00]   = ftemp[DIR_P00] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_P00];
-            f[DIR_PP0]  = ftemp[DIR_PP0] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_PP0];
-            f[DIR_PM0]  = ftemp[DIR_PM0] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_PM0];
-            f[DIR_P0P]  = ftemp[DIR_P0P] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_P0P];
-            f[DIR_P0M]  = ftemp[DIR_P0M] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_P0M];
-            f[DIR_PPP] = ftemp[DIR_PPP] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_PPP];
-            f[DIR_PMP] = ftemp[DIR_PMP] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_PMP];
-            f[DIR_PPM] = ftemp[DIR_PPM] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_PPM];
-            f[DIR_PMM] = ftemp[DIR_PMM] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_PMM];
+            f[DIR_P00]   = ftemp[DIR_P00] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * f[DIR_P00];
+            f[DIR_PP0]  = ftemp[DIR_PP0] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * f[DIR_PP0];
+            f[DIR_PM0]  = ftemp[DIR_PM0] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * f[DIR_PM0];
+            f[DIR_P0P]  = ftemp[DIR_P0P] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * f[DIR_P0P];
+            f[DIR_P0M]  = ftemp[DIR_P0M] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * f[DIR_P0M];
+            f[DIR_PPP] = ftemp[DIR_PPP] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * f[DIR_PPP];
+            f[DIR_PMP] = ftemp[DIR_PMP] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * f[DIR_PMP];
+            f[DIR_PPM] = ftemp[DIR_PPM] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * f[DIR_PPM];
+            f[DIR_PMM] = ftemp[DIR_PMM] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * f[DIR_PMM];
 
             distributions->setDistributionInvForDirection(f[DIR_P00], x1 + DX1[DIR_M00], x2 + DX2[DIR_M00], x3 + DX3[DIR_M00], DIR_M00);
             distributions->setDistributionInvForDirection(f[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
@@ -119,15 +122,15 @@ void NonReflectingOutflowBCAlgorithm::applyBC()
             distributions->setDistributionInvForDirection(f[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
             break;
         case DIR_M00:
-            f[DIR_M00]   = ftemp[DIR_M00] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_M00];
-            f[DIR_MP0]  = ftemp[DIR_MP0] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_MP0];
-            f[DIR_MM0]  = ftemp[DIR_MM0] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_MM0];
-            f[DIR_M0P]  = ftemp[DIR_M0P] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_M0P];
-            f[DIR_M0M]  = ftemp[DIR_M0M] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_M0M];
-            f[DIR_MPP] = ftemp[DIR_MPP] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_MPP];
-            f[DIR_MMP] = ftemp[DIR_MMP] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_MMP];
-            f[DIR_MPM] = ftemp[DIR_MPM] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_MPM];
-            f[DIR_MMM] = ftemp[DIR_MMM] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_MMM];
+            f[DIR_M00]   = ftemp[DIR_M00] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * f[DIR_M00];
+            f[DIR_MP0]  = ftemp[DIR_MP0] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * f[DIR_MP0];
+            f[DIR_MM0]  = ftemp[DIR_MM0] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * f[DIR_MM0];
+            f[DIR_M0P]  = ftemp[DIR_M0P] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * f[DIR_M0P];
+            f[DIR_M0M]  = ftemp[DIR_M0M] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * f[DIR_M0M];
+            f[DIR_MPP] = ftemp[DIR_MPP] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * f[DIR_MPP];
+            f[DIR_MMP] = ftemp[DIR_MMP] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * f[DIR_MMP];
+            f[DIR_MPM] = ftemp[DIR_MPM] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * f[DIR_MPM];
+            f[DIR_MMM] = ftemp[DIR_MMM] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * f[DIR_MMM];
 
             distributions->setDistributionInvForDirection(f[DIR_M00], x1 + DX1[DIR_P00], x2 + DX2[DIR_P00], x3 + DX3[DIR_P00], DIR_P00);
             distributions->setDistributionInvForDirection(f[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
@@ -140,15 +143,15 @@ void NonReflectingOutflowBCAlgorithm::applyBC()
             distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
             break;
         case DIR_0P0:
-            f[DIR_0P0]   = ftemp[DIR_0P0] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_0P0];
-            f[DIR_PP0]  = ftemp[DIR_PP0] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_PP0];
-            f[DIR_MP0]  = ftemp[DIR_MP0] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_MP0];
-            f[DIR_0PP]  = ftemp[DIR_0PP] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_0PP];
-            f[DIR_0PM]  = ftemp[DIR_0PM] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_0PM];
-            f[DIR_PPP] = ftemp[DIR_PPP] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_PPP];
-            f[DIR_MPP] = ftemp[DIR_MPP] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_MPP];
-            f[DIR_PPM] = ftemp[DIR_PPM] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_PPM];
-            f[DIR_MPM] = ftemp[DIR_MPM] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_MPM];
+            f[DIR_0P0]   = ftemp[DIR_0P0] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * f[DIR_0P0];
+            f[DIR_PP0]  = ftemp[DIR_PP0] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * f[DIR_PP0];
+            f[DIR_MP0]  = ftemp[DIR_MP0] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * f[DIR_MP0];
+            f[DIR_0PP]  = ftemp[DIR_0PP] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * f[DIR_0PP];
+            f[DIR_0PM]  = ftemp[DIR_0PM] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * f[DIR_0PM];
+            f[DIR_PPP] = ftemp[DIR_PPP] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * f[DIR_PPP];
+            f[DIR_MPP] = ftemp[DIR_MPP] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * f[DIR_MPP];
+            f[DIR_PPM] = ftemp[DIR_PPM] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * f[DIR_PPM];
+            f[DIR_MPM] = ftemp[DIR_MPM] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * f[DIR_MPM];
 
             distributions->setDistributionInvForDirection(f[DIR_0P0], x1 + DX1[DIR_0M0], x2 + DX2[DIR_0M0], x3 + DX3[DIR_0M0], DIR_0M0);
             distributions->setDistributionInvForDirection(f[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
@@ -161,15 +164,15 @@ void NonReflectingOutflowBCAlgorithm::applyBC()
             distributions->setDistributionInvForDirection(f[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
             break;
         case DIR_0M0:
-            f[DIR_0M0]   = ftemp[DIR_0M0] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_0M0];
-            f[DIR_PM0]  = ftemp[DIR_PM0] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_PM0];
-            f[DIR_MM0]  = ftemp[DIR_MM0] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_MM0];
-            f[DIR_0MP]  = ftemp[DIR_0MP] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_0MP];
-            f[DIR_0MM]  = ftemp[DIR_0MM] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_0MM];
-            f[DIR_PMP] = ftemp[DIR_PMP] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_PMP];
-            f[DIR_MMP] = ftemp[DIR_MMP] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_MMP];
-            f[DIR_PMM] = ftemp[DIR_PMM] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_PMM];
-            f[DIR_MMM] = ftemp[DIR_MMM] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_MMM];
+            f[DIR_0M0]   = ftemp[DIR_0M0] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * f[DIR_0M0];
+            f[DIR_PM0]  = ftemp[DIR_PM0] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * f[DIR_PM0];
+            f[DIR_MM0]  = ftemp[DIR_MM0] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * f[DIR_MM0];
+            f[DIR_0MP]  = ftemp[DIR_0MP] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * f[DIR_0MP];
+            f[DIR_0MM]  = ftemp[DIR_0MM] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * f[DIR_0MM];
+            f[DIR_PMP] = ftemp[DIR_PMP] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * f[DIR_PMP];
+            f[DIR_MMP] = ftemp[DIR_MMP] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * f[DIR_MMP];
+            f[DIR_PMM] = ftemp[DIR_PMM] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * f[DIR_PMM];
+            f[DIR_MMM] = ftemp[DIR_MMM] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * f[DIR_MMM];
 
             distributions->setDistributionInvForDirection(f[DIR_0M0], x1 + DX1[DIR_0P0], x2 + DX2[DIR_0P0], x3 + DX3[DIR_0P0], DIR_0P0);
             distributions->setDistributionInvForDirection(f[DIR_PM0], x1 + DX1[DIR_MP0], x2 + DX2[DIR_MP0], x3 + DX3[DIR_MP0], DIR_MP0);
@@ -182,15 +185,15 @@ void NonReflectingOutflowBCAlgorithm::applyBC()
             distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
             break;
         case DIR_00P:
-            f[DIR_00P]   = ftemp[DIR_00P] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_00P];
-            f[DIR_P0P]  = ftemp[DIR_P0P] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_P0P];
-            f[DIR_M0P]  = ftemp[DIR_M0P] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_M0P];
-            f[DIR_0PP]  = ftemp[DIR_0PP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_0PP];
-            f[DIR_0MP]  = ftemp[DIR_0MP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_0MP];
-            f[DIR_PPP] = ftemp[DIR_PPP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_PPP];
-            f[DIR_MPP] = ftemp[DIR_MPP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_MPP];
-            f[DIR_PMP] = ftemp[DIR_PMP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_PMP];
-            f[DIR_MMP] = ftemp[DIR_MMP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_MMP];
+            f[DIR_00P]   = ftemp[DIR_00P] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * f[DIR_00P];
+            f[DIR_P0P]  = ftemp[DIR_P0P] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * f[DIR_P0P];
+            f[DIR_M0P]  = ftemp[DIR_M0P] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * f[DIR_M0P];
+            f[DIR_0PP]  = ftemp[DIR_0PP] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * f[DIR_0PP];
+            f[DIR_0MP]  = ftemp[DIR_0MP] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * f[DIR_0MP];
+            f[DIR_PPP] = ftemp[DIR_PPP] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * f[DIR_PPP];
+            f[DIR_MPP] = ftemp[DIR_MPP] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * f[DIR_MPP];
+            f[DIR_PMP] = ftemp[DIR_PMP] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * f[DIR_PMP];
+            f[DIR_MMP] = ftemp[DIR_MMP] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * f[DIR_MMP];
 
             distributions->setDistributionInvForDirection(f[DIR_00P], x1 + DX1[DIR_00M], x2 + DX2[DIR_00M], x3 + DX3[DIR_00M], DIR_00M);
             distributions->setDistributionInvForDirection(f[DIR_P0P], x1 + DX1[DIR_M0M], x2 + DX2[DIR_M0M], x3 + DX3[DIR_M0M], DIR_M0M);
@@ -203,15 +206,15 @@ void NonReflectingOutflowBCAlgorithm::applyBC()
             distributions->setDistributionInvForDirection(f[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
             break;
         case DIR_00M:
-            f[DIR_00M]   = ftemp[DIR_00M] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_00M];
-            f[DIR_P0M]  = ftemp[DIR_P0M] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_P0M];
-            f[DIR_M0M]  = ftemp[DIR_M0M] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_M0M];
-            f[DIR_0PM]  = ftemp[DIR_0PM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_0PM];
-            f[DIR_0MM]  = ftemp[DIR_0MM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_0MM];
-            f[DIR_PPM] = ftemp[DIR_PPM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_PPM];
-            f[DIR_MPM] = ftemp[DIR_MPM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_MPM];
-            f[DIR_PMM] = ftemp[DIR_PMM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_PMM];
-            f[DIR_MMM] = ftemp[DIR_MMM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_MMM];
+            f[DIR_00M]   = ftemp[DIR_00M] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * f[DIR_00M];
+            f[DIR_P0M]  = ftemp[DIR_P0M] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * f[DIR_P0M];
+            f[DIR_M0M]  = ftemp[DIR_M0M] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * f[DIR_M0M];
+            f[DIR_0PM]  = ftemp[DIR_0PM] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * f[DIR_0PM];
+            f[DIR_0MM]  = ftemp[DIR_0MM] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * f[DIR_0MM];
+            f[DIR_PPM] = ftemp[DIR_PPM] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * f[DIR_PPM];
+            f[DIR_MPM] = ftemp[DIR_MPM] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * f[DIR_MPM];
+            f[DIR_PMM] = ftemp[DIR_PMM] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * f[DIR_PMM];
+            f[DIR_MMM] = ftemp[DIR_MMM] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * f[DIR_MMM];
 
             distributions->setDistributionInvForDirection(f[DIR_00M], x1 + DX1[DIR_00P], x2 + DX2[DIR_00P], x3 + DX3[DIR_00P], DIR_00P);
             distributions->setDistributionInvForDirection(f[DIR_P0M], x1 + DX1[DIR_M0P], x2 + DX2[DIR_M0P], x3 + DX3[DIR_M0P], DIR_M0P);
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingOutflowBCAlgorithmWithRelaxation.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingOutflowBCAlgorithmWithRelaxation.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d36e07118248363c0a81bc7d907c70b3d1b4fcea
--- /dev/null
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingOutflowBCAlgorithmWithRelaxation.cpp
@@ -0,0 +1,233 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file NonReflectingOutflowBCAlgorithmWithRelaxation.cpp
+//! \ingroup BoundarConditions
+//! \author Konstantin Kutscher, Hussein Alihussein
+//=======================================================================================
+#include "NonReflectingOutflowBCAlgorithmWithRelaxation.h"
+
+#include "BoundaryConditions.h"
+#include "D3Q27System.h"
+#include "DistributionArray3D.h"
+
+NonReflectingOutflowBCAlgorithmWithRelaxation::NonReflectingOutflowBCAlgorithmWithRelaxation()
+{
+    BCAlgorithm::type         = BCAlgorithm::NonReflectingOutflowBCAlgorithmWithRelaxation;
+    BCAlgorithm::preCollision = true;
+}
+//////////////////////////////////////////////////////////////////////////
+NonReflectingOutflowBCAlgorithmWithRelaxation::~NonReflectingOutflowBCAlgorithmWithRelaxation() = default;
+//////////////////////////////////////////////////////////////////////////
+SPtr<BCAlgorithm> NonReflectingOutflowBCAlgorithmWithRelaxation::clone()
+{
+    SPtr<BCAlgorithm> bc(new NonReflectingOutflowBCAlgorithmWithRelaxation());
+    return bc;
+}
+//////////////////////////////////////////////////////////////////////////
+void NonReflectingOutflowBCAlgorithmWithRelaxation::addDistributions(SPtr<DistributionArray3D> distributions)
+{
+    this->distributions = distributions;
+}
+//////////////////////////////////////////////////////////////////////////
+void NonReflectingOutflowBCAlgorithmWithRelaxation::applyBC()
+{
+    using namespace vf::lbm::dir;
+
+    using namespace D3Q27System;
+ //   using namespace UbMath;
+    using namespace vf::basics::constant;
+
+    LBMReal f[ENDF + 1];
+    LBMReal ftemp[ENDF + 1];
+
+    int nx1       = x1;
+    int nx2       = x2;
+    int nx3       = x3;
+    int direction = -1;
+
+    // flag points in direction of fluid
+    if (bcPtr->hasDensityBoundaryFlag(DIR_P00)) {
+        nx1 += 1;
+        direction = DIR_P00;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_M00)) {
+        nx1 -= 1;
+        direction = DIR_M00;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_0P0)) {
+        nx2 += 1;
+        direction = DIR_0P0;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_0M0)) {
+        nx2 -= 1;
+        direction = DIR_0M0;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_00P)) {
+        nx3 += 1;
+        direction = DIR_00P;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_00M)) {
+        nx3 -= 1;
+        direction = DIR_00M;
+    } else
+        UB_THROW(UbException(UB_EXARGS, "Danger...no orthogonal BC-Flag on density boundary..."));
+
+    distributions->getDistribution(f, x1, x2, x3);
+    distributions->getDistribution(ftemp, nx1, nx2, nx3);
+
+    LBMReal rho, vx1, vx2, vx3;
+    calcMacrosFct(f, rho, vx1, vx2, vx3);
+    LBMReal delf = rho*0.01;
+    switch (direction) {
+        case DIR_P00:
+            f[DIR_P00]   = ftemp[DIR_P00] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_P00] - delf* WEIGTH[DIR_P00];
+            f[DIR_PP0]  = ftemp[DIR_PP0] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PP0]- delf* WEIGTH[DIR_PP0];
+            f[DIR_PM0]  = ftemp[DIR_PM0] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PM0]- delf* WEIGTH[DIR_PM0];
+            f[DIR_P0P]  = ftemp[DIR_P0P] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_P0P]- delf* WEIGTH[DIR_P0P];
+            f[DIR_P0M]  = ftemp[DIR_P0M] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_P0M]- delf* WEIGTH[DIR_P0M];
+            f[DIR_PPP] = ftemp[DIR_PPP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PPP]- delf* WEIGTH[DIR_PPP];
+            f[DIR_PMP] = ftemp[DIR_PMP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PMP]- delf* WEIGTH[DIR_PMP];
+            f[DIR_PPM] = ftemp[DIR_PPM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PPM]- delf* WEIGTH[DIR_PPM];
+            f[DIR_PMM] = ftemp[DIR_PMM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PMM]- delf* WEIGTH[DIR_PMM];
+
+            distributions->setDistributionInvForDirection(f[DIR_P00], x1 + DX1[DIR_M00], x2 + DX2[DIR_M00], x3 + DX3[DIR_M00], DIR_M00);
+            distributions->setDistributionInvForDirection(f[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
+            distributions->setDistributionInvForDirection(f[DIR_PM0], x1 + DX1[DIR_MP0], x2 + DX2[DIR_MP0], x3 + DX3[DIR_MP0], DIR_MP0);
+            distributions->setDistributionInvForDirection(f[DIR_P0P], x1 + DX1[DIR_M0M], x2 + DX2[DIR_M0M], x3 + DX3[DIR_M0M], DIR_M0M);
+            distributions->setDistributionInvForDirection(f[DIR_P0M], x1 + DX1[DIR_M0P], x2 + DX2[DIR_M0P], x3 + DX3[DIR_M0P], DIR_M0P);
+            distributions->setDistributionInvForDirection(f[DIR_PPP], x1 + DX1[DIR_MMM], x2 + DX2[DIR_MMM], x3 + DX3[DIR_MMM], DIR_MMM);
+            distributions->setDistributionInvForDirection(f[DIR_PMP], x1 + DX1[DIR_MPM], x2 + DX2[DIR_MPM], x3 + DX3[DIR_MPM], DIR_MPM);
+            distributions->setDistributionInvForDirection(f[DIR_PPM], x1 + DX1[DIR_MMP], x2 + DX2[DIR_MMP], x3 + DX3[DIR_MMP], DIR_MMP);
+            distributions->setDistributionInvForDirection(f[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
+            break;
+        case DIR_M00:
+            f[DIR_M00]   = ftemp[DIR_M00] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_M00]- delf* WEIGTH[DIR_M00];
+            f[DIR_MP0]  = ftemp[DIR_MP0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MP0]- delf* WEIGTH[DIR_MP0];
+            f[DIR_MM0]  = ftemp[DIR_MM0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MM0]- delf* WEIGTH[DIR_MM0];
+            f[DIR_M0P]  = ftemp[DIR_M0P] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_M0P]- delf* WEIGTH[DIR_M0P];
+            f[DIR_M0M]  = ftemp[DIR_M0M] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_M0M]- delf* WEIGTH[DIR_M0M];
+            f[DIR_MPP] = ftemp[DIR_MPP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MPP]- delf* WEIGTH[DIR_MPP];
+            f[DIR_MMP] = ftemp[DIR_MMP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MMP]- delf* WEIGTH[DIR_MMP];
+            f[DIR_MPM] = ftemp[DIR_MPM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MPM]- delf* WEIGTH[DIR_MPM];
+            f[DIR_MMM] = ftemp[DIR_MMM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MMM]- delf* WEIGTH[DIR_MMM];
+
+            distributions->setDistributionInvForDirection(f[DIR_M00], x1 + DX1[DIR_P00], x2 + DX2[DIR_P00], x3 + DX3[DIR_P00], DIR_P00);
+            distributions->setDistributionInvForDirection(f[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
+            distributions->setDistributionInvForDirection(f[DIR_MM0], x1 + DX1[DIR_PP0], x2 + DX2[DIR_PP0], x3 + DX3[DIR_PP0], DIR_PP0);
+            distributions->setDistributionInvForDirection(f[DIR_M0P], x1 + DX1[DIR_P0M], x2 + DX2[DIR_P0M], x3 + DX3[DIR_P0M], DIR_P0M);
+            distributions->setDistributionInvForDirection(f[DIR_M0M], x1 + DX1[DIR_P0P], x2 + DX2[DIR_P0P], x3 + DX3[DIR_P0P], DIR_P0P);
+            distributions->setDistributionInvForDirection(f[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
+            distributions->setDistributionInvForDirection(f[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
+            distributions->setDistributionInvForDirection(f[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
+            distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
+            break;
+        case DIR_0P0:
+            f[DIR_0P0]   = ftemp[DIR_0P0] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_0P0]- delf* WEIGTH[DIR_0P0];
+            f[DIR_PP0]  = ftemp[DIR_PP0] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_PP0]- delf* WEIGTH[DIR_PP0];
+            f[DIR_MP0]  = ftemp[DIR_MP0] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_MP0]- delf* WEIGTH[DIR_MP0];
+            f[DIR_0PP]  = ftemp[DIR_0PP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_0PP]- delf* WEIGTH[DIR_0PP];
+            f[DIR_0PM]  = ftemp[DIR_0PM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_0PM]- delf* WEIGTH[DIR_0PM];
+            f[DIR_PPP] = ftemp[DIR_PPP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_PPP]- delf* WEIGTH[DIR_PPP];
+            f[DIR_MPP] = ftemp[DIR_MPP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_MPP]- delf* WEIGTH[DIR_MPP];
+            f[DIR_PPM] = ftemp[DIR_PPM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_PPM]- delf* WEIGTH[DIR_PPM];
+            f[DIR_MPM] = ftemp[DIR_MPM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_MPM]- delf* WEIGTH[DIR_MPM];
+
+            distributions->setDistributionInvForDirection(f[DIR_0P0], x1 + DX1[DIR_0M0], x2 + DX2[DIR_0M0], x3 + DX3[DIR_0M0], DIR_0M0);
+            distributions->setDistributionInvForDirection(f[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
+            distributions->setDistributionInvForDirection(f[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
+            distributions->setDistributionInvForDirection(f[DIR_0PP], x1 + DX1[DIR_0MM], x2 + DX2[DIR_0MM], x3 + DX3[DIR_0MM], DIR_0MM);
+            distributions->setDistributionInvForDirection(f[DIR_0PM], x1 + DX1[DIR_0MP], x2 + DX2[DIR_0MP], x3 + DX3[DIR_0MP], DIR_0MP);
+            distributions->setDistributionInvForDirection(f[DIR_PPP], x1 + DX1[DIR_MMM], x2 + DX2[DIR_MMM], x3 + DX3[DIR_MMM], DIR_MMM);
+            distributions->setDistributionInvForDirection(f[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
+            distributions->setDistributionInvForDirection(f[DIR_PPM], x1 + DX1[DIR_MMP], x2 + DX2[DIR_MMP], x3 + DX3[DIR_MMP], DIR_MMP);
+            distributions->setDistributionInvForDirection(f[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
+            break;
+        case DIR_0M0:
+            f[DIR_0M0]   = ftemp[DIR_0M0] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_0M0]- delf* WEIGTH[DIR_0M0];
+            f[DIR_PM0]  = ftemp[DIR_PM0] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_PM0]- delf* WEIGTH[DIR_PM0];
+            f[DIR_MM0]  = ftemp[DIR_MM0] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_MM0]- delf* WEIGTH[DIR_MM0];
+            f[DIR_0MP]  = ftemp[DIR_0MP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_0MP]- delf* WEIGTH[DIR_0MP];
+            f[DIR_0MM]  = ftemp[DIR_0MM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_0MM]- delf* WEIGTH[DIR_0MM];
+            f[DIR_PMP] = ftemp[DIR_PMP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_PMP]- delf* WEIGTH[DIR_PMP];
+            f[DIR_MMP] = ftemp[DIR_MMP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_MMP]- delf* WEIGTH[DIR_MMP];
+            f[DIR_PMM] = ftemp[DIR_PMM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_PMM]- delf* WEIGTH[DIR_PMM];
+            f[DIR_MMM] = ftemp[DIR_MMM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_MMM]- delf* WEIGTH[DIR_MMM];
+
+            distributions->setDistributionInvForDirection(f[DIR_0M0], x1 + DX1[DIR_0P0], x2 + DX2[DIR_0P0], x3 + DX3[DIR_0P0], DIR_0P0);
+            distributions->setDistributionInvForDirection(f[DIR_PM0], x1 + DX1[DIR_MP0], x2 + DX2[DIR_MP0], x3 + DX3[DIR_MP0], DIR_MP0);
+            distributions->setDistributionInvForDirection(f[DIR_MM0], x1 + DX1[DIR_PP0], x2 + DX2[DIR_PP0], x3 + DX3[DIR_PP0], DIR_PP0);
+            distributions->setDistributionInvForDirection(f[DIR_0MP], x1 + DX1[DIR_0PM], x2 + DX2[DIR_0PM], x3 + DX3[DIR_0PM], DIR_0PM);
+            distributions->setDistributionInvForDirection(f[DIR_0MM], x1 + DX1[DIR_0PP], x2 + DX2[DIR_0PP], x3 + DX3[DIR_0PP], DIR_0PP);
+            distributions->setDistributionInvForDirection(f[DIR_PMP], x1 + DX1[DIR_MPM], x2 + DX2[DIR_MPM], x3 + DX3[DIR_MPM], DIR_MPM);
+            distributions->setDistributionInvForDirection(f[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
+            distributions->setDistributionInvForDirection(f[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
+            distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
+            break;
+        case DIR_00P:
+            f[DIR_00P]   = ftemp[DIR_00P] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_00P]- delf* WEIGTH[DIR_00P];
+            f[DIR_P0P]  = ftemp[DIR_P0P] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_P0P]- delf* WEIGTH[DIR_P0P];
+            f[DIR_M0P]  = ftemp[DIR_M0P] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_M0P]- delf* WEIGTH[DIR_M0P];
+            f[DIR_0PP]  = ftemp[DIR_0PP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_0PP]- delf* WEIGTH[DIR_0PP];
+            f[DIR_0MP]  = ftemp[DIR_0MP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_0MP]- delf* WEIGTH[DIR_0MP];
+            f[DIR_PPP] = ftemp[DIR_PPP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_PPP]- delf* WEIGTH[DIR_PPP];
+            f[DIR_MPP] = ftemp[DIR_MPP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_MPP]- delf* WEIGTH[DIR_MPP];
+            f[DIR_PMP] = ftemp[DIR_PMP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_PMP]- delf* WEIGTH[DIR_PMP];
+            f[DIR_MMP] = ftemp[DIR_MMP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_MMP]- delf* WEIGTH[DIR_MMP];
+
+            distributions->setDistributionInvForDirection(f[DIR_00P], x1 + DX1[DIR_00M], x2 + DX2[DIR_00M], x3 + DX3[DIR_00M], DIR_00M);
+            distributions->setDistributionInvForDirection(f[DIR_P0P], x1 + DX1[DIR_M0M], x2 + DX2[DIR_M0M], x3 + DX3[DIR_M0M], DIR_M0M);
+            distributions->setDistributionInvForDirection(f[DIR_M0P], x1 + DX1[DIR_P0M], x2 + DX2[DIR_P0M], x3 + DX3[DIR_P0M], DIR_P0M);
+            distributions->setDistributionInvForDirection(f[DIR_0PP], x1 + DX1[DIR_0MM], x2 + DX2[DIR_0MM], x3 + DX3[DIR_0MM], DIR_0MM);
+            distributions->setDistributionInvForDirection(f[DIR_0MP], x1 + DX1[DIR_0PM], x2 + DX2[DIR_0PM], x3 + DX3[DIR_0PM], DIR_0PM);
+            distributions->setDistributionInvForDirection(f[DIR_PPP], x1 + DX1[DIR_MMM], x2 + DX2[DIR_MMM], x3 + DX3[DIR_MMM], DIR_MMM);
+            distributions->setDistributionInvForDirection(f[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
+            distributions->setDistributionInvForDirection(f[DIR_PMP], x1 + DX1[DIR_MPM], x2 + DX2[DIR_MPM], x3 + DX3[DIR_MPM], DIR_MPM);
+            distributions->setDistributionInvForDirection(f[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
+            break;
+        case DIR_00M:
+            f[DIR_00M]   = ftemp[DIR_00M] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_00M]- delf* WEIGTH[DIR_00M];
+            f[DIR_P0M]  = ftemp[DIR_P0M] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_P0M]- delf* WEIGTH[DIR_P0M];
+            f[DIR_M0M]  = ftemp[DIR_M0M] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_M0M]- delf* WEIGTH[DIR_M0M];
+            f[DIR_0PM]  = ftemp[DIR_0PM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_0PM]- delf* WEIGTH[DIR_0PM];
+            f[DIR_0MM]  = ftemp[DIR_0MM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_0MM]- delf* WEIGTH[DIR_0MM];
+            f[DIR_PPM] = ftemp[DIR_PPM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_PPM]- delf* WEIGTH[DIR_PPM];
+            f[DIR_MPM] = ftemp[DIR_MPM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_MPM]- delf* WEIGTH[DIR_MPM];
+            f[DIR_PMM] = ftemp[DIR_PMM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_PMM]- delf* WEIGTH[DIR_PMM];
+            f[DIR_MMM] = ftemp[DIR_MMM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_MMM]- delf* WEIGTH[DIR_MMM];
+
+            distributions->setDistributionInvForDirection(f[DIR_00M], x1 + DX1[DIR_00P], x2 + DX2[DIR_00P], x3 + DX3[DIR_00P], DIR_00P);
+            distributions->setDistributionInvForDirection(f[DIR_P0M], x1 + DX1[DIR_M0P], x2 + DX2[DIR_M0P], x3 + DX3[DIR_M0P], DIR_M0P);
+            distributions->setDistributionInvForDirection(f[DIR_M0M], x1 + DX1[DIR_P0P], x2 + DX2[DIR_P0P], x3 + DX3[DIR_P0P], DIR_P0P);
+            distributions->setDistributionInvForDirection(f[DIR_0PM], x1 + DX1[DIR_0MP], x2 + DX2[DIR_0MP], x3 + DX3[DIR_0MP], DIR_0MP);
+            distributions->setDistributionInvForDirection(f[DIR_0MM], x1 + DX1[DIR_0PP], x2 + DX2[DIR_0PP], x3 + DX3[DIR_0PP], DIR_0PP);
+            distributions->setDistributionInvForDirection(f[DIR_PPM], x1 + DX1[DIR_MMP], x2 + DX2[DIR_MMP], x3 + DX3[DIR_MMP], DIR_MMP);
+            distributions->setDistributionInvForDirection(f[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
+            distributions->setDistributionInvForDirection(f[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
+            distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
+            break;
+        default:
+            UB_THROW(
+                UbException(UB_EXARGS, "It isn't implemented non reflecting density boundary for this direction!"));
+    }
+}
diff --git a/src/gpu/GridGenerator/StreetPointFinder/SourceReader.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingOutflowBCAlgorithmWithRelaxation.h
similarity index 73%
rename from src/gpu/GridGenerator/StreetPointFinder/SourceReader.h
rename to src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingOutflowBCAlgorithmWithRelaxation.h
index f79c618d06ff9f72738c7b69767a8dd3c5443fac..97badb60dbe84e0b7a4a3fa82b950649e0a12d93 100644
--- a/src/gpu/GridGenerator/StreetPointFinder/SourceReader.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingOutflowBCAlgorithmWithRelaxation.h
@@ -26,38 +26,25 @@
 //  You should have received a copy of the GNU General Public License along
 //  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
-//! \file SourceReader.h
-//! \ingroup StreetPointFinder
-//! \author Stephan Lenz
+//! \file NonReflectingOutflowBCAlgorithmWithRelaxation.h
+//! \ingroup BoundarConditions
+//! \author Konstantin Kutscher, Hussein Alihussein
 //=======================================================================================
-#ifndef SOURCEREADER_H
-#define  SOURCEREADER_H
+#ifndef NonReflectingOutflowBCAlgorithmWithRelaxation_h__
+#define NonReflectingOutflowBCAlgorithmWithRelaxation_h__
 
-#include <vector>
+#include "BCAlgorithm.h"
+#include <PointerDefinitions.h>
 
-#include "Core/DataTypes.h"
-#include "Core/Logger/Logger.h"
+class DistributionArray3D;
 
-#include "StreetPointFinder.h"
-
-
-
-struct GRIDGENERATOR_EXPORT SourceReaderData {
-	unsigned int sourceIndex;
-	float sourcePossibility;
-	SourceReaderData(unsigned int sourceIndex, float sourcePossibility);
-};
-
-struct GRIDGENERATOR_EXPORT SourceReader
+class NonReflectingOutflowBCAlgorithmWithRelaxation : public BCAlgorithm
 {
-	std::vector<SourceReaderData> sources;
-	StreetPointFinder* streetPointFinder;
-
-	void readSources(std::string filename, StreetPointFinder* streetPointFinder);
-
-private:
-	unsigned int getCellIndexStart(unsigned int streetIndex);
+public:
+    NonReflectingOutflowBCAlgorithmWithRelaxation();
+    ~NonReflectingOutflowBCAlgorithmWithRelaxation() override;
+    SPtr<BCAlgorithm> clone() override;
+    void addDistributions(SPtr<DistributionArray3D> distributions) override;
+    void applyBC() override;
 };
-
-
-#endif
\ No newline at end of file
+#endif // NonReflectingDensityBCAlgorithm_h__
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyBinghamModelNoSlipBCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyBinghamModelNoSlipBCAlgorithm.h
index d4a99846b6da226bf8d1d09e66763db61a90d2b0..45c9c0c21dba308862c8d4a8c1c1827a4a07e7c7 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyBinghamModelNoSlipBCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyBinghamModelNoSlipBCAlgorithm.h
@@ -52,7 +52,7 @@ public:
       return bc;
    }
 protected:
-   LBMReal getRheologyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho) const override 
+   real getRheologyCollFactor(real omegaInf, real shearRate, real drho) const override 
    { 
       return Rheology::getBinghamCollFactor(omegaInf, shearRate, drho);
    }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyBinghamModelVelocityBCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyBinghamModelVelocityBCAlgorithm.h
index 9673a009f75bccd71924985ec9a27187d9e1e12e..2837238c40ec02bffe7a8eccb4fedb5100846d55 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyBinghamModelVelocityBCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyBinghamModelVelocityBCAlgorithm.h
@@ -52,7 +52,7 @@ public:
       return bc;
    }
 protected:
-   LBMReal getRheologyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho) const override 
+   real getRheologyCollFactor(real omegaInf, real shearRate, real drho) const override 
    { 
       return Rheology::getBinghamCollFactor(omegaInf, shearRate, drho);
    }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyHerschelBulkleyModelNoSlipBCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyHerschelBulkleyModelNoSlipBCAlgorithm.h
index 19220dbd57f9100e71dbf611cccad7fadf8fae1e..c9b76b563dd16044ee109acbdc8eff73cde95959 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyHerschelBulkleyModelNoSlipBCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyHerschelBulkleyModelNoSlipBCAlgorithm.h
@@ -51,7 +51,7 @@ public:
       return bc;
    }
 protected:
-   LBMReal getRheologyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho) const override
+   real getRheologyCollFactor(real omegaInf, real shearRate, real drho) const override
    {
       return Rheology::getHerschelBulkleyCollFactor(omegaInf, shearRate, drho);
    }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyNoSlipBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyNoSlipBCAlgorithm.cpp
index 5d2ec04aac4280a141e6f3b2044c56c8eed842db..73bf54ad7eccbd42deb2454fa5d0a060cf6b5c15 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyNoSlipBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyNoSlipBCAlgorithm.cpp
@@ -42,15 +42,15 @@ void RheologyNoSlipBCAlgorithm::addDistributions(SPtr<DistributionArray3D> distr
 //////////////////////////////////////////////////////////////////////////
 void RheologyNoSlipBCAlgorithm::applyBC()
 {
-   LBMReal f[D3Q27System::ENDF + 1];
-   LBMReal feq[D3Q27System::ENDF + 1];
+   real f[D3Q27System::ENDF + 1];
+   real feq[D3Q27System::ENDF + 1];
    distributions->getDistribution(f, x1, x2, x3);
-   LBMReal rho, vx1, vx2, vx3;
+   real rho, vx1, vx2, vx3;
    calcMacrosFct(f, rho, vx1, vx2, vx3);
    calcFeqFct(feq, rho, vx1, vx2, vx3);
 
-   LBMReal shearRate = D3Q27System::getShearRate(f, collFactor);
-   LBMReal collFactorF = getRheologyCollFactor(collFactor, shearRate, rho);
+   real shearRate = D3Q27System::getShearRate(f, collFactor);
+   real collFactorF = getRheologyCollFactor(collFactor, shearRate, rho);
 
    for (int fDir = D3Q27System::FSTARTDIR; fDir <= D3Q27System::FENDDIR; fDir++)
    {
@@ -58,8 +58,8 @@ void RheologyNoSlipBCAlgorithm::applyBC()
       {
          //quadratic bounce back
          const int invDir = D3Q27System::INVDIR[fDir];
-         LBMReal q = bcPtr->getQ(invDir);
-         LBMReal fReturn =(f[invDir] + q * f[fDir] + q * collFactorF * (feq[invDir] - f[invDir] + feq[fDir] - f[fDir])) / (1.0 + q);
+         real q = bcPtr->getQ(invDir);
+         real fReturn =(f[invDir] + q * f[fDir] + q * collFactorF * (feq[invDir] - f[invDir] + feq[fDir] - f[fDir])) / (1.0 + q);
          distributions->setDistributionInvForDirection(fReturn, x1 + D3Q27System::DX1[invDir], x2 + D3Q27System::DX2[invDir], x3 + D3Q27System::DX3[invDir], invDir);
       }
    }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyNoSlipBCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyNoSlipBCAlgorithm.h
index 71be7caf0787edb38877d6c3bb0f891c095ead05..c8c38ad7fcf3e35378b1e5dd14938cdad230f185 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyNoSlipBCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyNoSlipBCAlgorithm.h
@@ -43,10 +43,10 @@ class RheologyNoSlipBCAlgorithm : public BCAlgorithm
 public:
    RheologyNoSlipBCAlgorithm() = default;
    ~RheologyNoSlipBCAlgorithm() = default;
-   virtual SPtr<BCAlgorithm> clone() override { UB_THROW(UbException("LBMReal clone() - belongs in the derived class")); }
+   virtual SPtr<BCAlgorithm> clone() override { UB_THROW(UbException("real clone() - belongs in the derived class")); }
    void addDistributions(SPtr<DistributionArray3D> distributions) override;
    void applyBC() override;
 protected:
-   virtual LBMReal getRheologyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho) const = 0; // { UB_THROW(UbException("LBMReal getRheologyCollFactor() - belongs in the derived class")); }
+   virtual real getRheologyCollFactor(real omegaInf, real shearRate, real drho) const = 0; // { UB_THROW(UbException("real getRheologyCollFactor() - belongs in the derived class")); }
 };
 #endif // RheologyNoSlipBCAlgorithm_h__
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyPowellEyringModelNoSlipBCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyPowellEyringModelNoSlipBCAlgorithm.h
index 49a7df6be41f37dd4dc2ac7a67f8d8645aa70c15..a6a3a5a745f193d66f2d87303ab5df1fd62826eb 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyPowellEyringModelNoSlipBCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyPowellEyringModelNoSlipBCAlgorithm.h
@@ -51,7 +51,7 @@ public:
       return bc;
    }
 protected:
-   LBMReal getRheologyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho) const override
+   real getRheologyCollFactor(real omegaInf, real shearRate, real drho) const override
    {
       return Rheology::getHerschelBulkleyCollFactor(omegaInf, shearRate, drho);
    }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyVelocityBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyVelocityBCAlgorithm.cpp
index e2979d462d5ec1d166cac48c67c903cf280b5ff2..9f7881af1705fca4ef24402f0fed4dbcb701127b 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyVelocityBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyVelocityBCAlgorithm.cpp
@@ -51,15 +51,15 @@ void RheologyVelocityBCAlgorithm::addDistributions(SPtr<DistributionArray3D> dis
 //////////////////////////////////////////////////////////////////////////
 void RheologyVelocityBCAlgorithm::applyBC()
 {
-   LBMReal f[D3Q27System::ENDF+1];
-   LBMReal feq[D3Q27System::ENDF+1];
+   real f[D3Q27System::ENDF+1];
+   real feq[D3Q27System::ENDF+1];
    distributions->getDistributionInv(f, x1, x2, x3);
-   LBMReal rho, vx1, vx2, vx3, drho;
+   real rho, vx1, vx2, vx3, drho;
    calcMacrosFct(f, drho, vx1, vx2, vx3);
    calcFeqFct(feq, drho, vx1, vx2, vx3);
 
-    LBMReal shearRate = D3Q27System::getShearRate(f, collFactor);
-    LBMReal collFactorF = getRheologyCollFactor(collFactor, shearRate, drho);
+    real shearRate = D3Q27System::getShearRate(f, collFactor);
+    real collFactorF = getRheologyCollFactor(collFactor, shearRate, drho);
 
     rho = 1.0+drho*compressibleFactor;
 
@@ -68,9 +68,9 @@ void RheologyVelocityBCAlgorithm::applyBC()
       if (bcPtr->hasVelocityBoundaryFlag(fdir))
       {
          const int invDir = D3Q27System::INVDIR[fdir];
-         LBMReal q = bcPtr->getQ(invDir);// m+m q=0 stabiler
-         LBMReal velocity = bcPtr->getBoundaryVelocity(invDir);
-         LBMReal fReturn = ((1.0-q)/(1.0+q))*((f[invDir]-feq[invDir])/(1.0-collFactorF)+feq[invDir])+((q*(f[invDir]+f[fdir])-velocity*rho)/(1.0+q));
+         real q = bcPtr->getQ(invDir);// m+m q=0 stabiler
+         real velocity = bcPtr->getBoundaryVelocity(invDir);
+         real fReturn = ((1.0-q)/(1.0+q))*((f[invDir]-feq[invDir])/(1.0-collFactorF)+feq[invDir])+((q*(f[invDir]+f[fdir])-velocity*rho)/(1.0+q));
          distributions->setDistributionForDirection(fReturn, x1+D3Q27System::DX1[invDir], x2+D3Q27System::DX2[invDir], x3+D3Q27System::DX3[invDir], fdir);
       }
    }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyVelocityBCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyVelocityBCAlgorithm.h
index fc523c4619b8f9c804b2366b671db7475495e151..91ac9ec574b9252c4d2842b9134d4190878d9daf 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyVelocityBCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyVelocityBCAlgorithm.h
@@ -43,11 +43,11 @@ class RheologyVelocityBCAlgorithm : public BCAlgorithm
 public:
    RheologyVelocityBCAlgorithm();
    ~RheologyVelocityBCAlgorithm();
-   virtual SPtr<BCAlgorithm> clone() override { UB_THROW(UbException("LBMReal clone() - belongs in the derived class")); }
+   virtual SPtr<BCAlgorithm> clone() override { UB_THROW(UbException("real clone() - belongs in the derived class")); }
    void addDistributions(SPtr<DistributionArray3D> distributions) override;
    void applyBC() override;
 protected:
-   virtual LBMReal getRheologyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho) const = 0; // { UB_THROW(UbException("LBMReal getRheologyCollFactor() - belongs in the derived class")); }
+   virtual real getRheologyCollFactor(real omegaInf, real shearRate, real drho) const = 0; // { UB_THROW(UbException("real getRheologyCollFactor() - belongs in the derived class")); }
 };
 
 #endif // RheologyVelocityBCAlgorithm_h__
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/SimpleSlipBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/SimpleSlipBCAlgorithm.cpp
index 151e10be4987e27622ce25b86c91c320c0d24406..e02ee9fb7766217411ac37a104bf8c59a60a741e 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/SimpleSlipBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/SimpleSlipBCAlgorithm.cpp
@@ -58,17 +58,19 @@ void SimpleSlipBCAlgorithm::addDistributions(SPtr<DistributionArray3D> distribut
 //////////////////////////////////////////////////////////////////////////
 void SimpleSlipBCAlgorithm::applyBC()
 {
-   LBMReal f[D3Q27System::ENDF+1];
-   LBMReal feq[D3Q27System::ENDF+1];
+    using namespace vf::lbm::dir;
+
+   real f[D3Q27System::ENDF+1];
+   real feq[D3Q27System::ENDF+1];
    distributions->getDistributionInv(f, x1, x2, x3);
-   LBMReal vx1, vx2, vx3, drho, rho;
+   real vx1, vx2, vx3, drho, rho;
    calcMacrosFct(f, drho, vx1, vx2, vx3);
    calcFeqFct(feq, drho, vx1, vx2, vx3);
 
    rho = 1.0 + drho * compressibleFactor;
 
    UbTupleFloat3 normale = bcPtr->getNormalVector();
-   LBMReal amp = vx1*val<1>(normale)+vx2*val<2>(normale)+vx3*val<3>(normale);
+   real amp = vx1*val<1>(normale)+vx2*val<2>(normale)+vx3*val<3>(normale);
 
    vx1 = vx1 - amp * val<1>(normale); //normale zeigt von struktur weg!
    vx2 = vx2 - amp * val<2>(normale); //normale zeigt von struktur weg!
@@ -80,38 +82,38 @@ void SimpleSlipBCAlgorithm::applyBC()
       {
          //quadratic bounce back
          const int invDir = D3Q27System::INVDIR[fdir];
-         LBMReal velocity = 0.0;
+         real velocity = 0.0;
          switch (invDir)
          {
-         case D3Q27System::DIR_P00: velocity = (UbMath::c4o9*(+vx1)); break;      //(2/cs^2)(=6)*rho_0(=1 bei imkompr)*wi*u*ei mit cs=1/sqrt(3)
-         case D3Q27System::DIR_M00: velocity = (UbMath::c4o9*(-vx1)); break;      //z.B. aus paper manfred MRT LB models in three dimensions (2002)   
-         case D3Q27System::DIR_0P0: velocity = (UbMath::c4o9*(+vx2)); break;
-         case D3Q27System::DIR_0M0: velocity = (UbMath::c4o9*(-vx2)); break;
-         case D3Q27System::DIR_00P: velocity = (UbMath::c4o9*(+vx3)); break;
-         case D3Q27System::DIR_00M: velocity = (UbMath::c4o9*(-vx3)); break;
-         case D3Q27System::DIR_PP0: velocity = (UbMath::c1o9*(+vx1+vx2)); break;
-         case D3Q27System::DIR_MM0: velocity = (UbMath::c1o9*(-vx1-vx2)); break;
-         case D3Q27System::DIR_PM0: velocity = (UbMath::c1o9*(+vx1-vx2)); break;
-         case D3Q27System::DIR_MP0: velocity = (UbMath::c1o9*(-vx1+vx2)); break;
-         case D3Q27System::DIR_P0P: velocity = (UbMath::c1o9*(+vx1+vx3)); break;
-         case D3Q27System::DIR_M0M: velocity = (UbMath::c1o9*(-vx1-vx3)); break;
-         case D3Q27System::DIR_P0M: velocity = (UbMath::c1o9*(+vx1-vx3)); break;
-         case D3Q27System::DIR_M0P: velocity = (UbMath::c1o9*(-vx1+vx3)); break;
-         case D3Q27System::DIR_0PP: velocity = (UbMath::c1o9*(+vx2+vx3)); break;
-         case D3Q27System::DIR_0MM: velocity = (UbMath::c1o9*(-vx2-vx3)); break;
-         case D3Q27System::DIR_0PM: velocity = (UbMath::c1o9*(+vx2-vx3)); break;
-         case D3Q27System::DIR_0MP: velocity = (UbMath::c1o9*(-vx2+vx3)); break;
-         case D3Q27System::DIR_PPP: velocity = (UbMath::c1o36*(+vx1+vx2+vx3)); break;
-         case D3Q27System::DIR_MMM: velocity = (UbMath::c1o36*(-vx1-vx2-vx3)); break;
-         case D3Q27System::DIR_PPM: velocity = (UbMath::c1o36*(+vx1+vx2-vx3)); break;
-         case D3Q27System::DIR_MMP: velocity = (UbMath::c1o36*(-vx1-vx2+vx3)); break;
-         case D3Q27System::DIR_PMP: velocity = (UbMath::c1o36*(+vx1-vx2+vx3)); break;
-         case D3Q27System::DIR_MPM: velocity = (UbMath::c1o36*(-vx1+vx2-vx3)); break;
-         case D3Q27System::DIR_PMM: velocity = (UbMath::c1o36*(+vx1-vx2-vx3)); break;
-         case D3Q27System::DIR_MPP: velocity = (UbMath::c1o36*(-vx1+vx2+vx3)); break;
+         case DIR_P00: velocity = (vf::basics::constant::c4o9*(+vx1)); break;      //(2/cs^2)(=6)*rho_0(=1 bei imkompr)*wi*u*ei mit cs=1/sqrt(3)
+         case DIR_M00: velocity = (vf::basics::constant::c4o9*(-vx1)); break;      //z.B. aus paper manfred MRT LB models in three dimensions (2002)   
+         case DIR_0P0: velocity = (vf::basics::constant::c4o9*(+vx2)); break;
+         case DIR_0M0: velocity = (vf::basics::constant::c4o9*(-vx2)); break;
+         case DIR_00P: velocity = (vf::basics::constant::c4o9*(+vx3)); break;
+         case DIR_00M: velocity = (vf::basics::constant::c4o9*(-vx3)); break;
+         case DIR_PP0: velocity = (vf::basics::constant::c1o9*(+vx1+vx2)); break;
+         case DIR_MM0: velocity = (vf::basics::constant::c1o9*(-vx1-vx2)); break;
+         case DIR_PM0: velocity = (vf::basics::constant::c1o9*(+vx1-vx2)); break;
+         case DIR_MP0: velocity = (vf::basics::constant::c1o9*(-vx1+vx2)); break;
+         case DIR_P0P: velocity = (vf::basics::constant::c1o9*(+vx1+vx3)); break;
+         case DIR_M0M: velocity = (vf::basics::constant::c1o9*(-vx1-vx3)); break;
+         case DIR_P0M: velocity = (vf::basics::constant::c1o9*(+vx1-vx3)); break;
+         case DIR_M0P: velocity = (vf::basics::constant::c1o9*(-vx1+vx3)); break;
+         case DIR_0PP: velocity = (vf::basics::constant::c1o9*(+vx2+vx3)); break;
+         case DIR_0MM: velocity = (vf::basics::constant::c1o9*(-vx2-vx3)); break;
+         case DIR_0PM: velocity = (vf::basics::constant::c1o9*(+vx2-vx3)); break;
+         case DIR_0MP: velocity = (vf::basics::constant::c1o9*(-vx2+vx3)); break;
+         case DIR_PPP: velocity = (vf::basics::constant::c1o36*(+vx1+vx2+vx3)); break;
+         case DIR_MMM: velocity = (vf::basics::constant::c1o36*(-vx1-vx2-vx3)); break;
+         case DIR_PPM: velocity = (vf::basics::constant::c1o36*(+vx1+vx2-vx3)); break;
+         case DIR_MMP: velocity = (vf::basics::constant::c1o36*(-vx1-vx2+vx3)); break;
+         case DIR_PMP: velocity = (vf::basics::constant::c1o36*(+vx1-vx2+vx3)); break;
+         case DIR_MPM: velocity = (vf::basics::constant::c1o36*(-vx1+vx2-vx3)); break;
+         case DIR_PMM: velocity = (vf::basics::constant::c1o36*(+vx1-vx2-vx3)); break;
+         case DIR_MPP: velocity = (vf::basics::constant::c1o36*(-vx1+vx2+vx3)); break;
          default: throw UbException(UB_EXARGS, "unknown error");
          }
-         LBMReal fReturn = f[invDir] - velocity * rho;
+         real fReturn = f[invDir] - velocity * rho;
          distributions->setDistributionForDirection(fReturn, x1+D3Q27System::DX1[invDir], x2+D3Q27System::DX2[invDir], x3+D3Q27System::DX3[invDir], fdir);
       }
    }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/SimpleVelocityBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/SimpleVelocityBCAlgorithm.cpp
index 6529ea85184f5b2d86a977e64008437fe0401491..83badd723e2c9bda222abaccdb09fbc352bc46af 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/SimpleVelocityBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/SimpleVelocityBCAlgorithm.cpp
@@ -58,10 +58,10 @@ void SimpleVelocityBCAlgorithm::addDistributions(SPtr<DistributionArray3D> distr
 //////////////////////////////////////////////////////////////////////////
 void SimpleVelocityBCAlgorithm::applyBC()
 {
-   LBMReal f[D3Q27System::ENDF+1];
-   LBMReal feq[D3Q27System::ENDF+1];
+   real f[D3Q27System::ENDF+1];
+   real feq[D3Q27System::ENDF+1];
    distributions->getDistributionInv(f, x1, x2, x3);
-   LBMReal vx1, vx2, vx3, drho;
+   real vx1, vx2, vx3, drho;
    calcMacrosFct(f, drho, vx1, vx2, vx3);
    calcFeqFct(feq, drho, vx1, vx2, vx3);
 
@@ -70,8 +70,8 @@ void SimpleVelocityBCAlgorithm::applyBC()
       if (bcPtr->hasVelocityBoundaryFlag(fdir))
       {
          const int invDir = D3Q27System::INVDIR[fdir];
-         LBMReal velocity = bcPtr->getBoundaryVelocity(invDir);
-         LBMReal fReturn = f[invDir] - velocity;
+         real velocity = bcPtr->getBoundaryVelocity(invDir);
+         real fReturn = f[invDir] - velocity;
          distributions->setDistributionForDirection(fReturn, x1+D3Q27System::DX1[invDir], x2+D3Q27System::DX2[invDir], x3+D3Q27System::DX3[invDir], fdir);
       }
    }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/SlipBCAdapter.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/SlipBCAdapter.cpp
index 0dc2d5d66e639b3b46bc9fe12cec96eba6e6adac..8f8299850d2299f75903a42a2b59512e093d6172 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/SlipBCAdapter.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/SlipBCAdapter.cpp
@@ -46,9 +46,11 @@
 //   return D3Q27SlipBCAdapterCreator::getInstance();
 //}
 //*==========================================================*/
-void SlipBCAdapter::adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double & /*worldX1*/,
-                            const double & /*worldX2*/, const double & /*worldX3*/, const double & /*time*/)
+void SlipBCAdapter::adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real & /*worldX1*/,
+                            const real & /*worldX2*/, const real & /*worldX3*/, const real & /*time*/)
 {
+    using namespace vf::lbm::dir;
+
     //////////////////////////////////////////////////////////////////////////
     //>>> nur workaround! -> Hendrick nach normalen berechnung aus qs fragen
 
@@ -56,17 +58,17 @@ void SlipBCAdapter::adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryCond
     if (!geo)
         throw UbException(UB_EXARGS, "derzeit nur fuer Cubes valide");
 
-    if (bc->hasSlipBoundaryFlag(D3Q27System::DIR_P00))
+    if (bc->hasSlipBoundaryFlag(DIR_P00))
         bc->setNormalVector(1.0, 0.0, 0.0);
-    else if (bc->hasSlipBoundaryFlag(D3Q27System::DIR_M00))
+    else if (bc->hasSlipBoundaryFlag(DIR_M00))
         bc->setNormalVector(-1.0, 0.0, 0.0);
-    else if (bc->hasSlipBoundaryFlag(D3Q27System::DIR_0P0))
+    else if (bc->hasSlipBoundaryFlag(DIR_0P0))
         bc->setNormalVector(0.0, 1.0, 0.0);
-    else if (bc->hasSlipBoundaryFlag(D3Q27System::DIR_0M0))
+    else if (bc->hasSlipBoundaryFlag(DIR_0M0))
         bc->setNormalVector(0.0, -1.0, 0.0);
-    else if (bc->hasSlipBoundaryFlag(D3Q27System::DIR_00P))
+    else if (bc->hasSlipBoundaryFlag(DIR_00P))
         bc->setNormalVector(0.0, 0.0, 1.0);
-    else if (bc->hasSlipBoundaryFlag(D3Q27System::DIR_00M))
+    else if (bc->hasSlipBoundaryFlag(DIR_00M))
         bc->setNormalVector(0.0, 0.0, -1.0);
 
     bc->setBcAlgorithmType(algorithmType);
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/SlipBCAdapter.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/SlipBCAdapter.h
index b0f6d87bf938480b6568dcb648d5e8541a94ef4e..5c2225e0a1212931805207da7bacf1a1a797e290 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/SlipBCAdapter.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/SlipBCAdapter.h
@@ -66,18 +66,18 @@ public:
 
     //------------- implements D3Q27BoundaryConditionAdapter ----- start
 
-    void init(const D3Q27Interactor *const &interactor, const double &timestep = 0) override {}
-    void update(const D3Q27Interactor *const &interactor, const double &timestep = 0) override {}
+    void init(const D3Q27Interactor *const &interactor, const real &timestep = 0) override {}
+    void update(const D3Q27Interactor *const &interactor, const real &timestep = 0) override {}
 
     void adaptBCForDirection(const D3Q27Interactor & /*interactor*/, SPtr<BoundaryConditions> bc,
-                             const double & /*worldX1*/, const double & /*worldX2*/, const double & /*worldX3*/,
-                             const double &q, const int &fdirection, const double & /*time*/ = 0) override
+                             const real & /*worldX1*/, const real & /*worldX2*/, const real & /*worldX3*/,
+                             const real &q, const int &fdirection, const real & /*time*/ = 0) override
     {
         bc->setSlipBoundaryFlag(D3Q27System::INVDIR[fdirection], secondaryBcOption);
-        bc->setQ((float)q, fdirection);
+        bc->setQ((real)q, fdirection);
     }
-    void adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double &worldX1,
-                 const double &worldX2, const double &worldX3, const double &time = 0) override;
+    void adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real &worldX1,
+                 const real &worldX2, const real &worldX3, const real &time = 0) override;
 
     //------------- implements D3Q27BoundaryConditionAdapter ----- end
 
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/SlipBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/SlipBCAlgorithm.cpp
index 5d9993c459b756dc1d8663907ee90bc0eabef51c..4232ae91a8d4806e1615beacfb57c5eb5deb10ca 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/SlipBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/SlipBCAlgorithm.cpp
@@ -20,15 +20,17 @@ void SlipBCAlgorithm::addDistributions(SPtr<DistributionArray3D> distributions)
 //////////////////////////////////////////////////////////////////////////
 void SlipBCAlgorithm::applyBC()
 {
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal feq[D3Q27System::ENDF + 1];
+    using namespace vf::lbm::dir;
+
+    real f[D3Q27System::ENDF + 1];
+    real feq[D3Q27System::ENDF + 1];
     distributions->getDistributionInv(f, x1, x2, x3);
-    LBMReal rho, vx1, vx2, vx3, drho;
+    real rho, vx1, vx2, vx3, drho;
     calcMacrosFct(f, drho, vx1, vx2, vx3);
     calcFeqFct(feq, drho, vx1, vx2, vx3);
 
     UbTupleFloat3 normale = bcPtr->getNormalVector();
-    LBMReal amp            = vx1 * val<1>(normale) + vx2 * val<2>(normale) + vx3 * val<3>(normale);
+    real amp            = vx1 * val<1>(normale) + vx2 * val<2>(normale) + vx3 * val<3>(normale);
 
     vx1 = vx1 - amp * val<1>(normale); // normale zeigt von struktur weg!
     vx2 = vx2 - amp * val<2>(normale); // normale zeigt von struktur weg!
@@ -42,40 +44,40 @@ void SlipBCAlgorithm::applyBC()
       {
          //quadratic bounce back
          const int invDir = D3Q27System::INVDIR[fdir];
-         LBMReal q = bcPtr->getQ(invDir);// m+m q=0 stabiler
+         real q = bcPtr->getQ(invDir);// m+m q=0 stabiler
          //vx3=0;
-         LBMReal velocity = 0.0;
+         real velocity = 0.0;
          switch (invDir)
          {
-         case D3Q27System::DIR_P00: velocity = (UbMath::c4o9*(+vx1)); break;      //(2/cs^2)(=6)*rho_0(=1 bei imkompr)*wi*u*ei mit cs=1/sqrt(3)
-         case D3Q27System::DIR_M00: velocity = (UbMath::c4o9*(-vx1)); break;      //z.B. aus paper manfred MRT LB models in three dimensions (2002)   
-         case D3Q27System::DIR_0P0: velocity = (UbMath::c4o9*(+vx2)); break;
-         case D3Q27System::DIR_0M0: velocity = (UbMath::c4o9*(-vx2)); break;
-         case D3Q27System::DIR_00P: velocity = (UbMath::c4o9*(+vx3)); break;
-         case D3Q27System::DIR_00M: velocity = (UbMath::c4o9*(-vx3)); break;
-         case D3Q27System::DIR_PP0: velocity = (UbMath::c1o9*(+vx1+vx2)); break;
-         case D3Q27System::DIR_MM0: velocity = (UbMath::c1o9*(-vx1-vx2)); break;
-         case D3Q27System::DIR_PM0: velocity = (UbMath::c1o9*(+vx1-vx2)); break;
-         case D3Q27System::DIR_MP0: velocity = (UbMath::c1o9*(-vx1+vx2)); break;
-         case D3Q27System::DIR_P0P: velocity = (UbMath::c1o9*(+vx1+vx3)); break;
-         case D3Q27System::DIR_M0M: velocity = (UbMath::c1o9*(-vx1-vx3)); break;
-         case D3Q27System::DIR_P0M: velocity = (UbMath::c1o9*(+vx1-vx3)); break;
-         case D3Q27System::DIR_M0P: velocity = (UbMath::c1o9*(-vx1+vx3)); break;
-         case D3Q27System::DIR_0PP: velocity = (UbMath::c1o9*(+vx2+vx3)); break;
-         case D3Q27System::DIR_0MM: velocity = (UbMath::c1o9*(-vx2-vx3)); break;
-         case D3Q27System::DIR_0PM: velocity = (UbMath::c1o9*(+vx2-vx3)); break;
-         case D3Q27System::DIR_0MP: velocity = (UbMath::c1o9*(-vx2+vx3)); break;
-         case D3Q27System::DIR_PPP: velocity = (UbMath::c1o36*(+vx1+vx2+vx3)); break;
-         case D3Q27System::DIR_MMM: velocity = (UbMath::c1o36*(-vx1-vx2-vx3)); break;
-         case D3Q27System::DIR_PPM: velocity = (UbMath::c1o36*(+vx1+vx2-vx3)); break;
-         case D3Q27System::DIR_MMP: velocity = (UbMath::c1o36*(-vx1-vx2+vx3)); break;
-         case D3Q27System::DIR_PMP: velocity = (UbMath::c1o36*(+vx1-vx2+vx3)); break;
-         case D3Q27System::DIR_MPM: velocity = (UbMath::c1o36*(-vx1+vx2-vx3)); break;
-         case D3Q27System::DIR_PMM: velocity = (UbMath::c1o36*(+vx1-vx2-vx3)); break;
-         case D3Q27System::DIR_MPP: velocity = (UbMath::c1o36*(-vx1+vx2+vx3)); break;
+         case DIR_P00: velocity = (vf::basics::constant::c4o9*(+vx1)); break;      //(2/cs^2)(=6)*rho_0(=1 bei imkompr)*wi*u*ei mit cs=1/sqrt(3)
+         case DIR_M00: velocity = (vf::basics::constant::c4o9*(-vx1)); break;      //z.B. aus paper manfred MRT LB models in three dimensions (2002)   
+         case DIR_0P0: velocity = (vf::basics::constant::c4o9*(+vx2)); break;
+         case DIR_0M0: velocity = (vf::basics::constant::c4o9*(-vx2)); break;
+         case DIR_00P: velocity = (vf::basics::constant::c4o9*(+vx3)); break;
+         case DIR_00M: velocity = (vf::basics::constant::c4o9*(-vx3)); break;
+         case DIR_PP0: velocity = (vf::basics::constant::c1o9*(+vx1+vx2)); break;
+         case DIR_MM0: velocity = (vf::basics::constant::c1o9*(-vx1-vx2)); break;
+         case DIR_PM0: velocity = (vf::basics::constant::c1o9*(+vx1-vx2)); break;
+         case DIR_MP0: velocity = (vf::basics::constant::c1o9*(-vx1+vx2)); break;
+         case DIR_P0P: velocity = (vf::basics::constant::c1o9*(+vx1+vx3)); break;
+         case DIR_M0M: velocity = (vf::basics::constant::c1o9*(-vx1-vx3)); break;
+         case DIR_P0M: velocity = (vf::basics::constant::c1o9*(+vx1-vx3)); break;
+         case DIR_M0P: velocity = (vf::basics::constant::c1o9*(-vx1+vx3)); break;
+         case DIR_0PP: velocity = (vf::basics::constant::c1o9*(+vx2+vx3)); break;
+         case DIR_0MM: velocity = (vf::basics::constant::c1o9*(-vx2-vx3)); break;
+         case DIR_0PM: velocity = (vf::basics::constant::c1o9*(+vx2-vx3)); break;
+         case DIR_0MP: velocity = (vf::basics::constant::c1o9*(-vx2+vx3)); break;
+         case DIR_PPP: velocity = (vf::basics::constant::c1o36*(+vx1+vx2+vx3)); break;
+         case DIR_MMM: velocity = (vf::basics::constant::c1o36*(-vx1-vx2-vx3)); break;
+         case DIR_PPM: velocity = (vf::basics::constant::c1o36*(+vx1+vx2-vx3)); break;
+         case DIR_MMP: velocity = (vf::basics::constant::c1o36*(-vx1-vx2+vx3)); break;
+         case DIR_PMP: velocity = (vf::basics::constant::c1o36*(+vx1-vx2+vx3)); break;
+         case DIR_MPM: velocity = (vf::basics::constant::c1o36*(-vx1+vx2-vx3)); break;
+         case DIR_PMM: velocity = (vf::basics::constant::c1o36*(+vx1-vx2-vx3)); break;
+         case DIR_MPP: velocity = (vf::basics::constant::c1o36*(-vx1+vx2+vx3)); break;
          default: throw UbException(UB_EXARGS, "unknown error");
          }
-         LBMReal fReturn = ((1.0-q)/(1.0+q))*((f[invDir]-feq[invDir])/(1.0-collFactor)+feq[invDir])+((q*(f[invDir]+f[fdir])-velocity*rho)/(1.0+q));
+         real fReturn = ((1.0-q)/(1.0+q))*((f[invDir]-feq[invDir])/(1.0-collFactor)+feq[invDir])+((q*(f[invDir]+f[fdir])-velocity*rho)/(1.0+q));
          distributions->setDistributionForDirection(fReturn, x1+D3Q27System::DX1[invDir], x2+D3Q27System::DX2[invDir], x3+D3Q27System::DX3[invDir], fdir);
       }
    }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThinWallNoSlipBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThinWallNoSlipBCAlgorithm.cpp
index 10c10f14f6b2bd6f4f85d7fbe0c7d9d4650cbe73..b3c97393af0e21f5732ee2763c09f9fc60017862 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThinWallNoSlipBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThinWallNoSlipBCAlgorithm.cpp
@@ -52,20 +52,20 @@ SPtr<BCAlgorithm> ThinWallNoSlipBCAlgorithm::clone()
 //////////////////////////////////////////////////////////////////////////
 void ThinWallNoSlipBCAlgorithm::applyBC()
 {
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal feq[D3Q27System::ENDF + 1];
+    real f[D3Q27System::ENDF + 1];
+    real feq[D3Q27System::ENDF + 1];
     distributions->getDistributionInv(f, x1, x2, x3);
-    LBMReal rho, vx1, vx2, vx3;
+    real rho, vx1, vx2, vx3;
     calcMacrosFct(f, rho, vx1, vx2, vx3);
     calcFeqFct(feq, rho, vx1, vx2, vx3);
 
-    LBMReal fReturn;
+    real fReturn;
 
     for (int fdir = D3Q27System::FSTARTDIR; fdir <= D3Q27System::FENDDIR; fdir++) {
         if (bcPtr->hasNoSlipBoundaryFlag(fdir)) {
             const int invDir = D3Q27System::INVDIR[fdir];
             if (pass == 1) {
-                LBMReal q = bcPtr->getQ(invDir);
+                real q = bcPtr->getQ(invDir);
                 fReturn   = ((1.0 - q) / (1.0 + q)) * 0.5 *
                           (f[invDir] - f[fdir] +
                            (f[invDir] + f[fdir] - collFactor * (feq[fdir] + feq[invDir])) / (1.0 - collFactor));
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThinWallNoSlipBCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThinWallNoSlipBCAlgorithm.h
index e21c9b4fbb417242b0cc858afb26ddd16fffce18..f9995d49fae300b44e30df4b8e3f47cd7ac95929 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThinWallNoSlipBCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThinWallNoSlipBCAlgorithm.h
@@ -53,6 +53,6 @@ protected:
 
 private:
     int pass;
-    LBMReal fTemp[D3Q27System::ENDF + 1];
+    real fTemp[D3Q27System::ENDF + 1];
 };
 #endif // ThinWallNoSlipBCAlgorithm_h__
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyDensityBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyDensityBCAlgorithm.cpp
index bec8e139e333f5fa18847ddbb5fbb11c5c5c1eac..ebdf07f25ba489a87b637646271171bdc6de6d58 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyDensityBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyDensityBCAlgorithm.cpp
@@ -72,21 +72,22 @@ void ThixotropyDensityBCAlgorithm::addDistributionsH(SPtr<DistributionArray3D> d
 //////////////////////////////////////////////////////////////////////////
 void ThixotropyDensityBCAlgorithm::applyBC()
 {
-   using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+    using namespace D3Q27System;
 
-	LBMReal f[D3Q27System::ENDF + 1];
-	LBMReal feq[D3Q27System::ENDF + 1];
-	LBMReal h[D3Q27System::ENDF + 1];
-	LBMReal heq[D3Q27System::ENDF + 1];
+	real f[D3Q27System::ENDF + 1];
+	real feq[D3Q27System::ENDF + 1];
+	real h[D3Q27System::ENDF + 1];
+	real heq[D3Q27System::ENDF + 1];
 	distributions->getDistributionInv(f, x1, x2, x3);
 	distributionsH->getDistributionInv(h, x1, x2, x3);
 	
-	LBMReal rho, vx1, vx2, vx3;
+	real rho, vx1, vx2, vx3;
 	
 	calcMacrosFct(f, rho, vx1, vx2, vx3);
 	calcFeqFct(feq, rho, vx1, vx2, vx3);
 
-	LBMReal lambda = D3Q27System::getDensity(h);
+	real lambda = D3Q27System::getDensity(h);
 	D3Q27System::calcCompFeq(heq, lambda, vx1, vx2, vx3);
 
 
@@ -95,25 +96,25 @@ void ThixotropyDensityBCAlgorithm::applyBC()
 	int nx3 = x3;
 
 	//flag points in direction of fluid
-	if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_P00)) { nx1 -= 1; }
-	else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_M00)) { nx1 += 1; }
-	else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_0P0)) { nx2 -= 1; }
-	else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_0M0)) { nx2 += 1; }
-	else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_00P)) { nx3 -= 1; }
-	else if (bcPtr->hasDensityBoundaryFlag(D3Q27System::DIR_00M)) { nx3 += 1; }
+	if (bcPtr->hasDensityBoundaryFlag(DIR_P00)) { nx1 -= 1; }
+	else if (bcPtr->hasDensityBoundaryFlag(DIR_M00)) { nx1 += 1; }
+	else if (bcPtr->hasDensityBoundaryFlag(DIR_0P0)) { nx2 -= 1; }
+	else if (bcPtr->hasDensityBoundaryFlag(DIR_0M0)) { nx2 += 1; }
+	else if (bcPtr->hasDensityBoundaryFlag(DIR_00P)) { nx3 -= 1; }
+	else if (bcPtr->hasDensityBoundaryFlag(DIR_00M)) { nx3 += 1; }
 	else	 UB_THROW(UbException(UB_EXARGS, "Danger...no orthogonal BC-Flag on density boundary..."));
 
-	LBMReal rhoBC = bcPtr->getBoundaryDensity();
+	real rhoBC = bcPtr->getBoundaryDensity();
 
 	for (int fdir = D3Q27System::STARTF; fdir <= D3Q27System::ENDF; fdir++)
 	{
 		if (bcPtr->hasDensityBoundaryFlag(fdir))
 		{
-			LBMReal ftemp = calcFeqsForDirFct(fdir, rho, vx1, vx2, vx3);
+			real ftemp = calcFeqsForDirFct(fdir, rho, vx1, vx2, vx3);
 			ftemp = calcFeqsForDirFct(fdir, rhoBC, vx1, vx2, vx3) + f[fdir] - ftemp;
 			distributions->setDistributionForDirection(ftemp, nx1, nx2, nx3, fdir);
 
-			LBMReal htemp = D3Q27System::getCompFeqForDirection(fdir, lambda, vx1, vx2, vx3);
+			real htemp = D3Q27System::getCompFeqForDirection(fdir, lambda, vx1, vx2, vx3);
 			htemp = D3Q27System::getCompFeqForDirection(fdir,lambdaBC, vx1, vx2, vx3) + h[fdir] - htemp;
 			distributionsH->setDistributionForDirection(htemp, nx1, nx2, nx3, fdir);
 		}
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyDensityBCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyDensityBCAlgorithm.h
index 0ed191335ac05eb0e246271f577b024cc11b8de9..2b83eed0ef9720b247751011e4d49d70df4b5e71 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyDensityBCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyDensityBCAlgorithm.h
@@ -47,12 +47,12 @@ public:
 	//void addDistributionsF(SPtr<DistributionArray3D> distributions);
 	void addDistributionsH(SPtr<DistributionArray3D> distributions);
 	void applyBC();
-	void setLambdaBC(LBMReal lambda) { this->lambdaBC = lambda; }
-	LBMReal getLambdaBC() { return this->lambdaBC; }
+	void setLambdaBC(real lambda) { this->lambdaBC = lambda; }
+	real getLambdaBC() { return this->lambdaBC; }
 protected:
 	SPtr<DistributionArray3D> distributionsH;
 private:
-	LBMReal lambdaBC;
+	real lambdaBC;
 };
 #endif // ThixotropyDensityBCAlgorithm_h__
 
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyNoSlipBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyNoSlipBCAlgorithm.cpp
index 48b15fde31369a1857055263f9fc070ce9415a1b..e973a0091ea12db88e21052c3addc8fa4db8e995 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyNoSlipBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyNoSlipBCAlgorithm.cpp
@@ -70,18 +70,18 @@ void ThixotropyNoSlipBCAlgorithm::addDistributionsH(SPtr<DistributionArray3D> di
 //////////////////////////////////////////////////////////////////////////
 void ThixotropyNoSlipBCAlgorithm::applyBC()
 {
-	LBMReal f[D3Q27System::ENDF + 1];
-	LBMReal feq[D3Q27System::ENDF + 1];
-	LBMReal h[D3Q27System::ENDF + 1];
-	LBMReal heq[D3Q27System::ENDF + 1];
+	real f[D3Q27System::ENDF + 1];
+	real feq[D3Q27System::ENDF + 1];
+	real h[D3Q27System::ENDF + 1];
+	real heq[D3Q27System::ENDF + 1];
 	distributions->getDistributionInv(f, x1, x2, x3);
 	distributionsH->getDistributionInv(h, x1, x2, x3);
-	LBMReal rho, vx1, vx2, vx3;//, concentration, fl1, fl2, fl3, m100;
+	real rho, vx1, vx2, vx3;//, concentration, fl1, fl2, fl3, m100;
 	calcMacrosFct(f, rho, vx1, vx2, vx3);
 	calcFeqFct(feq, rho, vx1, vx2, vx3);
 
 	//calcDiffusionMacrosFctPost(h, concentration, fl1, fl2, fl3, m100, collFactor);
-	LBMReal lambda = D3Q27System::getDensity(h);
+	real lambda = D3Q27System::getDensity(h);
 	D3Q27System::calcCompFeq(heq, lambda, 0., 0., 0.);
 
 	for (int fdir = D3Q27System::FSTARTDIR; fdir <= D3Q27System::FENDDIR; fdir++)
@@ -90,9 +90,9 @@ void ThixotropyNoSlipBCAlgorithm::applyBC()
 		{
 			//quadratic bounce back
 			const int invDir = D3Q27System::INVDIR[fdir];
-			LBMReal q = bcPtr->getQ(invDir);
-			LBMReal fReturnf = ((1.0 - q) / (1.0 + q))*((f[invDir] - feq[invDir]) / (1.0 - collFactor) + feq[invDir]) + ((q / (1.0 + q))*(f[invDir] + f[fdir]));
-			LBMReal fReturnh = ((1.0 - q) / (1.0 + q))*((h[invDir] - heq[invDir]) / (1.0 - collFactor) + heq[invDir]) + ((q / (1.0 + q))*(h[invDir] + h[fdir]));
+			real q = bcPtr->getQ(invDir);
+			real fReturnf = ((1.0 - q) / (1.0 + q))*((f[invDir] - feq[invDir]) / (1.0 - collFactor) + feq[invDir]) + ((q / (1.0 + q))*(f[invDir] + f[fdir]));
+			real fReturnh = ((1.0 - q) / (1.0 + q))*((h[invDir] - heq[invDir]) / (1.0 - collFactor) + heq[invDir]) + ((q / (1.0 + q))*(h[invDir] + h[fdir]));
 
 			distributions->setDistributionForDirection(fReturnf, x1 + D3Q27System::DX1[invDir], x2 + D3Q27System::DX2[invDir], x3 + D3Q27System::DX3[invDir], fdir);
 			distributionsH->setDistributionForDirection(fReturnh, x1 + D3Q27System::DX1[invDir], x2 + D3Q27System::DX2[invDir], x3 + D3Q27System::DX3[invDir], fdir);
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyNonReflectingOutflowBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyNonReflectingOutflowBCAlgorithm.cpp
index ed90cc7596e186ab9984f25e2ba0ecdb625c9135..257b2b6f227a71f2b22312aad20bb49d93dbf4e9 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyNonReflectingOutflowBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyNonReflectingOutflowBCAlgorithm.cpp
@@ -69,9 +69,11 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::addDistributionsH(SPtr<Distribut
 //////////////////////////////////////////////////////////////////////////
 void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
 {
+   using namespace vf::lbm::dir;
    using namespace D3Q27System;
-   LBMReal f[ENDF + 1];
-   LBMReal ftemp[ENDF + 1];
+
+   real f[ENDF + 1];
+   real ftemp[ENDF + 1];
 
    int nx1 = x1;
    int nx2 = x2;
@@ -90,21 +92,21 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
    distributions->getDistribution(f, x1, x2, x3);
    distributions->getDistribution(ftemp, nx1, nx2, nx3);
 
-   LBMReal rho, vx1, vx2, vx3;
+   real rho, vx1, vx2, vx3;
    calcMacrosFct(f, rho, vx1, vx2, vx3);
 
    switch (direction)
    {
    case DIR_P00:
-      f[DIR_P00] = ftemp[DIR_P00] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_P00];
-      f[DIR_PP0] = ftemp[DIR_PP0] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_PP0];
-      f[DIR_PM0] = ftemp[DIR_PM0] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_PM0];
-      f[DIR_P0P] = ftemp[DIR_P0P] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_P0P];
-      f[DIR_P0M] = ftemp[DIR_P0M] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_P0M];
-      f[DIR_PPP] = ftemp[DIR_PPP] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_PPP];
-      f[DIR_PMP] = ftemp[DIR_PMP] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_PMP];
-      f[DIR_PPM] = ftemp[DIR_PPM] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_PPM];
-      f[DIR_PMM] = ftemp[DIR_PMM] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * f[DIR_PMM];
+      f[DIR_P00] = ftemp[DIR_P00] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * f[DIR_P00];
+      f[DIR_PP0] = ftemp[DIR_PP0] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * f[DIR_PP0];
+      f[DIR_PM0] = ftemp[DIR_PM0] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * f[DIR_PM0];
+      f[DIR_P0P] = ftemp[DIR_P0P] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * f[DIR_P0P];
+      f[DIR_P0M] = ftemp[DIR_P0M] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * f[DIR_P0M];
+      f[DIR_PPP] = ftemp[DIR_PPP] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * f[DIR_PPP];
+      f[DIR_PMP] = ftemp[DIR_PMP] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * f[DIR_PMP];
+      f[DIR_PPM] = ftemp[DIR_PPM] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * f[DIR_PPM];
+      f[DIR_PMM] = ftemp[DIR_PMM] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * f[DIR_PMM];
 
       distributions->setDistributionInvForDirection(f[DIR_P00], x1 + DX1[DIR_M00], x2 + DX2[DIR_M00], x3 + DX3[DIR_M00], DIR_M00);
       distributions->setDistributionInvForDirection(f[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
@@ -117,15 +119,15 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
       distributions->setDistributionInvForDirection(f[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
       break;
    case DIR_M00:
-      f[DIR_M00] = ftemp[DIR_M00] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_M00];
-      f[DIR_MP0] = ftemp[DIR_MP0] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_MP0];
-      f[DIR_MM0] = ftemp[DIR_MM0] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_MM0];
-      f[DIR_M0P] = ftemp[DIR_M0P] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_M0P];
-      f[DIR_M0M] = ftemp[DIR_M0M] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_M0M];
-      f[DIR_MPP] = ftemp[DIR_MPP] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_MPP];
-      f[DIR_MMP] = ftemp[DIR_MMP] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_MMP];
-      f[DIR_MPM] = ftemp[DIR_MPM] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_MPM];
-      f[DIR_MMM] = ftemp[DIR_MMM] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * f[DIR_MMM];
+      f[DIR_M00] = ftemp[DIR_M00] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * f[DIR_M00];
+      f[DIR_MP0] = ftemp[DIR_MP0] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * f[DIR_MP0];
+      f[DIR_MM0] = ftemp[DIR_MM0] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * f[DIR_MM0];
+      f[DIR_M0P] = ftemp[DIR_M0P] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * f[DIR_M0P];
+      f[DIR_M0M] = ftemp[DIR_M0M] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * f[DIR_M0M];
+      f[DIR_MPP] = ftemp[DIR_MPP] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * f[DIR_MPP];
+      f[DIR_MMP] = ftemp[DIR_MMP] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * f[DIR_MMP];
+      f[DIR_MPM] = ftemp[DIR_MPM] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * f[DIR_MPM];
+      f[DIR_MMM] = ftemp[DIR_MMM] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * f[DIR_MMM];
 
       distributions->setDistributionInvForDirection(f[DIR_M00], x1 + DX1[DIR_P00], x2 + DX2[DIR_P00], x3 + DX3[DIR_P00], DIR_P00);
       distributions->setDistributionInvForDirection(f[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
@@ -138,15 +140,15 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
       distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
       break;
    case DIR_0P0:
-      f[DIR_0P0] = ftemp[DIR_0P0] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_0P0];
-      f[DIR_PP0] = ftemp[DIR_PP0] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_PP0];
-      f[DIR_MP0] = ftemp[DIR_MP0] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_MP0];
-      f[DIR_0PP] = ftemp[DIR_0PP] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_0PP];
-      f[DIR_0PM] = ftemp[DIR_0PM] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_0PM];
-      f[DIR_PPP] = ftemp[DIR_PPP] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_PPP];
-      f[DIR_MPP] = ftemp[DIR_MPP] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_MPP];
-      f[DIR_PPM] = ftemp[DIR_PPM] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_PPM];
-      f[DIR_MPM] = ftemp[DIR_MPM] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * f[DIR_MPM];
+      f[DIR_0P0] = ftemp[DIR_0P0] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * f[DIR_0P0];
+      f[DIR_PP0] = ftemp[DIR_PP0] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * f[DIR_PP0];
+      f[DIR_MP0] = ftemp[DIR_MP0] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * f[DIR_MP0];
+      f[DIR_0PP] = ftemp[DIR_0PP] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * f[DIR_0PP];
+      f[DIR_0PM] = ftemp[DIR_0PM] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * f[DIR_0PM];
+      f[DIR_PPP] = ftemp[DIR_PPP] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * f[DIR_PPP];
+      f[DIR_MPP] = ftemp[DIR_MPP] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * f[DIR_MPP];
+      f[DIR_PPM] = ftemp[DIR_PPM] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * f[DIR_PPM];
+      f[DIR_MPM] = ftemp[DIR_MPM] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * f[DIR_MPM];
 
       distributions->setDistributionInvForDirection(f[DIR_0P0], x1 + DX1[DIR_0M0], x2 + DX2[DIR_0M0], x3 + DX3[DIR_0M0], DIR_0M0);
       distributions->setDistributionInvForDirection(f[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
@@ -159,15 +161,15 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
       distributions->setDistributionInvForDirection(f[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
       break;
    case DIR_0M0:
-      f[DIR_0M0] = ftemp[DIR_0M0] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_0M0];
-      f[DIR_PM0] = ftemp[DIR_PM0] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_PM0];
-      f[DIR_MM0] = ftemp[DIR_MM0] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_MM0];
-      f[DIR_0MP] = ftemp[DIR_0MP] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_0MP];
-      f[DIR_0MM] = ftemp[DIR_0MM] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_0MM];
-      f[DIR_PMP] = ftemp[DIR_PMP] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_PMP];
-      f[DIR_MMP] = ftemp[DIR_MMP] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_MMP];
-      f[DIR_PMM] = ftemp[DIR_PMM] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_PMM];
-      f[DIR_MMM] = ftemp[DIR_MMM] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * f[DIR_MMM];
+      f[DIR_0M0] = ftemp[DIR_0M0] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * f[DIR_0M0];
+      f[DIR_PM0] = ftemp[DIR_PM0] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * f[DIR_PM0];
+      f[DIR_MM0] = ftemp[DIR_MM0] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * f[DIR_MM0];
+      f[DIR_0MP] = ftemp[DIR_0MP] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * f[DIR_0MP];
+      f[DIR_0MM] = ftemp[DIR_0MM] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * f[DIR_0MM];
+      f[DIR_PMP] = ftemp[DIR_PMP] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * f[DIR_PMP];
+      f[DIR_MMP] = ftemp[DIR_MMP] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * f[DIR_MMP];
+      f[DIR_PMM] = ftemp[DIR_PMM] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * f[DIR_PMM];
+      f[DIR_MMM] = ftemp[DIR_MMM] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * f[DIR_MMM];
 
       distributions->setDistributionInvForDirection(f[DIR_0M0], x1 + DX1[DIR_0P0], x2 + DX2[DIR_0P0], x3 + DX3[DIR_0P0], DIR_0P0);
       distributions->setDistributionInvForDirection(f[DIR_PM0], x1 + DX1[DIR_MP0], x2 + DX2[DIR_MP0], x3 + DX3[DIR_MP0], DIR_MP0);
@@ -180,15 +182,15 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
       distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
       break;
    case DIR_00P:
-      f[DIR_00P] = ftemp[DIR_00P] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_00P];
-      f[DIR_P0P] = ftemp[DIR_P0P] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_P0P];
-      f[DIR_M0P] = ftemp[DIR_M0P] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_M0P];
-      f[DIR_0PP] = ftemp[DIR_0PP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_0PP];
-      f[DIR_0MP] = ftemp[DIR_0MP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_0MP];
-      f[DIR_PPP] = ftemp[DIR_PPP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_PPP];
-      f[DIR_MPP] = ftemp[DIR_MPP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_MPP];
-      f[DIR_PMP] = ftemp[DIR_PMP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_PMP];
-      f[DIR_MMP] = ftemp[DIR_MMP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * f[DIR_MMP];
+      f[DIR_00P] = ftemp[DIR_00P] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * f[DIR_00P];
+      f[DIR_P0P] = ftemp[DIR_P0P] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * f[DIR_P0P];
+      f[DIR_M0P] = ftemp[DIR_M0P] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * f[DIR_M0P];
+      f[DIR_0PP] = ftemp[DIR_0PP] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * f[DIR_0PP];
+      f[DIR_0MP] = ftemp[DIR_0MP] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * f[DIR_0MP];
+      f[DIR_PPP] = ftemp[DIR_PPP] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * f[DIR_PPP];
+      f[DIR_MPP] = ftemp[DIR_MPP] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * f[DIR_MPP];
+      f[DIR_PMP] = ftemp[DIR_PMP] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * f[DIR_PMP];
+      f[DIR_MMP] = ftemp[DIR_MMP] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * f[DIR_MMP];
 
       distributions->setDistributionInvForDirection(f[DIR_00P], x1 + DX1[DIR_00M], x2 + DX2[DIR_00M], x3 + DX3[DIR_00M], DIR_00M);
       distributions->setDistributionInvForDirection(f[DIR_P0P], x1 + DX1[DIR_M0M], x2 + DX2[DIR_M0M], x3 + DX3[DIR_M0M], DIR_M0M);
@@ -201,15 +203,15 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
       distributions->setDistributionInvForDirection(f[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
       break;
    case DIR_00M:
-      f[DIR_00M] = ftemp[DIR_00M] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_00M];
-      f[DIR_P0M] = ftemp[DIR_P0M] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_P0M];
-      f[DIR_M0M] = ftemp[DIR_M0M] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_M0M];
-      f[DIR_0PM] = ftemp[DIR_0PM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_0PM];
-      f[DIR_0MM] = ftemp[DIR_0MM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_0MM];
-      f[DIR_PPM] = ftemp[DIR_PPM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_PPM];
-      f[DIR_MPM] = ftemp[DIR_MPM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_MPM];
-      f[DIR_PMM] = ftemp[DIR_PMM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_PMM];
-      f[DIR_MMM] = ftemp[DIR_MMM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * f[DIR_MMM];
+      f[DIR_00M] = ftemp[DIR_00M] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * f[DIR_00M];
+      f[DIR_P0M] = ftemp[DIR_P0M] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * f[DIR_P0M];
+      f[DIR_M0M] = ftemp[DIR_M0M] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * f[DIR_M0M];
+      f[DIR_0PM] = ftemp[DIR_0PM] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * f[DIR_0PM];
+      f[DIR_0MM] = ftemp[DIR_0MM] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * f[DIR_0MM];
+      f[DIR_PPM] = ftemp[DIR_PPM] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * f[DIR_PPM];
+      f[DIR_MPM] = ftemp[DIR_MPM] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * f[DIR_MPM];
+      f[DIR_PMM] = ftemp[DIR_PMM] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * f[DIR_PMM];
+      f[DIR_MMM] = ftemp[DIR_MMM] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * f[DIR_MMM];
 
       distributions->setDistributionInvForDirection(f[DIR_00M], x1 + DX1[DIR_00P], x2 + DX2[DIR_00P], x3 + DX3[DIR_00P], DIR_00P);
       distributions->setDistributionInvForDirection(f[DIR_P0M], x1 + DX1[DIR_M0P], x2 + DX2[DIR_M0P], x3 + DX3[DIR_M0P], DIR_M0P);
@@ -224,8 +226,8 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
    default:
       UB_THROW(UbException(UB_EXARGS, "It isn't implemented non reflecting density boundary for this direction!"));
    }
-   LBMReal h[D3Q27System::ENDF + 1];
-   LBMReal htemp[ENDF + 1];
+   real h[D3Q27System::ENDF + 1];
+   real htemp[ENDF + 1];
 
    distributionsH->getDistribution(h, x1, x2, x3);
    distributionsH->getDistribution(htemp, nx1, nx2, nx3);
@@ -240,15 +242,15 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
    switch (direction)
    {
    case DIR_P00:
-      h[DIR_P00]  = htemp[DIR_P00] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * h[DIR_P00];
-      h[DIR_PP0] = htemp[DIR_PP0] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * h[DIR_PP0];
-      h[DIR_PM0] = htemp[DIR_PM0] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * h[DIR_PM0];
-      h[DIR_P0P] = htemp[DIR_P0P] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * h[DIR_P0P];
-      h[DIR_P0M] = htemp[DIR_P0M] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * h[DIR_P0M];
-      h[DIR_PPP] = htemp[DIR_PPP] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * h[DIR_PPP];
-      h[DIR_PMP] = htemp[DIR_PMP] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * h[DIR_PMP];
-      h[DIR_PPM] = htemp[DIR_PPM] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * h[DIR_PPM];
-      h[DIR_PMM] = htemp[DIR_PMM] * (UbMath::one_over_sqrt3 + vx1) + (1.0 - UbMath::one_over_sqrt3 - vx1) * h[DIR_PMM];
+      h[DIR_P00]  = htemp[DIR_P00] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * h[DIR_P00];
+      h[DIR_PP0] = htemp[DIR_PP0] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * h[DIR_PP0];
+      h[DIR_PM0] = htemp[DIR_PM0] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * h[DIR_PM0];
+      h[DIR_P0P] = htemp[DIR_P0P] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * h[DIR_P0P];
+      h[DIR_P0M] = htemp[DIR_P0M] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * h[DIR_P0M];
+      h[DIR_PPP] = htemp[DIR_PPP] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * h[DIR_PPP];
+      h[DIR_PMP] = htemp[DIR_PMP] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * h[DIR_PMP];
+      h[DIR_PPM] = htemp[DIR_PPM] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * h[DIR_PPM];
+      h[DIR_PMM] = htemp[DIR_PMM] * (vf::basics::constant::one_over_sqrt3 + vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx1) * h[DIR_PMM];
 
       distributionsH->setDistributionInvForDirection(h[DIR_P00], x1 + DX1[DIR_M00], x2 + DX2[DIR_M00], x3 + DX3[DIR_M00], DIR_M00);
       distributionsH->setDistributionInvForDirection(h[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
@@ -261,15 +263,15 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
       distributionsH->setDistributionInvForDirection(h[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
       break;
    case DIR_M00:
-      h[DIR_M00] = htemp[DIR_M00] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * h[DIR_M00];
-      h[DIR_MP0] = htemp[DIR_MP0] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * h[DIR_MP0];
-      h[DIR_MM0] = htemp[DIR_MM0] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * h[DIR_MM0];
-      h[DIR_M0P] = htemp[DIR_M0P] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * h[DIR_M0P];
-      h[DIR_M0M] = htemp[DIR_M0M] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * h[DIR_M0M];
-      h[DIR_MPP] = htemp[DIR_MPP] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * h[DIR_MPP];
-      h[DIR_MMP] = htemp[DIR_MMP] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * h[DIR_MMP];
-      h[DIR_MPM] = htemp[DIR_MPM] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * h[DIR_MPM];
-      h[DIR_MMM] = htemp[DIR_MMM] * (UbMath::one_over_sqrt3 - vx1) + (1.0 - UbMath::one_over_sqrt3 + vx1) * h[DIR_MMM];
+      h[DIR_M00] = htemp[DIR_M00] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * h[DIR_M00];
+      h[DIR_MP0] = htemp[DIR_MP0] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * h[DIR_MP0];
+      h[DIR_MM0] = htemp[DIR_MM0] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * h[DIR_MM0];
+      h[DIR_M0P] = htemp[DIR_M0P] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * h[DIR_M0P];
+      h[DIR_M0M] = htemp[DIR_M0M] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * h[DIR_M0M];
+      h[DIR_MPP] = htemp[DIR_MPP] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * h[DIR_MPP];
+      h[DIR_MMP] = htemp[DIR_MMP] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * h[DIR_MMP];
+      h[DIR_MPM] = htemp[DIR_MPM] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * h[DIR_MPM];
+      h[DIR_MMM] = htemp[DIR_MMM] * (vf::basics::constant::one_over_sqrt3 - vx1) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx1) * h[DIR_MMM];
 
       distributionsH->setDistributionInvForDirection(h[DIR_M00], x1 + DX1[DIR_P00], x2 + DX2[DIR_P00], x3 + DX3[DIR_P00], DIR_P00);
       distributionsH->setDistributionInvForDirection(h[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
@@ -282,15 +284,15 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
       distributionsH->setDistributionInvForDirection(h[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
       break;
    case DIR_0P0:
-      h[DIR_0P0] = htemp[DIR_0P0] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * h[DIR_0P0];
-      h[DIR_PP0] = htemp[DIR_PP0] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * h[DIR_PP0];
-      h[DIR_MP0] = htemp[DIR_MP0] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * h[DIR_MP0];
-      h[DIR_0PP] = htemp[DIR_0PP] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * h[DIR_0PP];
-      h[DIR_0PM] = htemp[DIR_0PM] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * h[DIR_0PM];
-      h[DIR_PPP] = htemp[DIR_PPP] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * h[DIR_PPP];
-      h[DIR_MPP] = htemp[DIR_MPP] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * h[DIR_MPP];
-      h[DIR_PPM] = htemp[DIR_PPM] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * h[DIR_PPM];
-      h[DIR_MPM] = htemp[DIR_MPM] * (UbMath::one_over_sqrt3 + vx2) + (1.0 - UbMath::one_over_sqrt3 - vx2) * h[DIR_MPM];
+      h[DIR_0P0] = htemp[DIR_0P0] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * h[DIR_0P0];
+      h[DIR_PP0] = htemp[DIR_PP0] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * h[DIR_PP0];
+      h[DIR_MP0] = htemp[DIR_MP0] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * h[DIR_MP0];
+      h[DIR_0PP] = htemp[DIR_0PP] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * h[DIR_0PP];
+      h[DIR_0PM] = htemp[DIR_0PM] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * h[DIR_0PM];
+      h[DIR_PPP] = htemp[DIR_PPP] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * h[DIR_PPP];
+      h[DIR_MPP] = htemp[DIR_MPP] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * h[DIR_MPP];
+      h[DIR_PPM] = htemp[DIR_PPM] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * h[DIR_PPM];
+      h[DIR_MPM] = htemp[DIR_MPM] * (vf::basics::constant::one_over_sqrt3 + vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx2) * h[DIR_MPM];
 
       distributionsH->setDistributionInvForDirection(h[DIR_0P0], x1 + DX1[DIR_0M0], x2 + DX2[DIR_0M0], x3 + DX3[DIR_0M0], DIR_0M0);
       distributionsH->setDistributionInvForDirection(h[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
@@ -303,15 +305,15 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
       distributionsH->setDistributionInvForDirection(h[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
       break;
    case DIR_0M0:
-      h[DIR_0M0] = htemp[DIR_0M0] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * h[DIR_0M0];
-      h[DIR_PM0] = htemp[DIR_PM0] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * h[DIR_PM0];
-      h[DIR_MM0] = htemp[DIR_MM0] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * h[DIR_MM0];
-      h[DIR_0MP] = htemp[DIR_0MP] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * h[DIR_0MP];
-      h[DIR_0MM] = htemp[DIR_0MM] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * h[DIR_0MM];
-      h[DIR_PMP] = htemp[DIR_PMP] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * h[DIR_PMP];
-      h[DIR_MMP] = htemp[DIR_MMP] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * h[DIR_MMP];
-      h[DIR_PMM] = htemp[DIR_PMM] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * h[DIR_PMM];
-      h[DIR_MMM] = htemp[DIR_MMM] * (UbMath::one_over_sqrt3 - vx2) + (1.0 - UbMath::one_over_sqrt3 + vx2) * h[DIR_MMM];
+      h[DIR_0M0] = htemp[DIR_0M0] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * h[DIR_0M0];
+      h[DIR_PM0] = htemp[DIR_PM0] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * h[DIR_PM0];
+      h[DIR_MM0] = htemp[DIR_MM0] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * h[DIR_MM0];
+      h[DIR_0MP] = htemp[DIR_0MP] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * h[DIR_0MP];
+      h[DIR_0MM] = htemp[DIR_0MM] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * h[DIR_0MM];
+      h[DIR_PMP] = htemp[DIR_PMP] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * h[DIR_PMP];
+      h[DIR_MMP] = htemp[DIR_MMP] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * h[DIR_MMP];
+      h[DIR_PMM] = htemp[DIR_PMM] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * h[DIR_PMM];
+      h[DIR_MMM] = htemp[DIR_MMM] * (vf::basics::constant::one_over_sqrt3 - vx2) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx2) * h[DIR_MMM];
 
       distributionsH->setDistributionInvForDirection(h[DIR_0M0], x1 + DX1[DIR_0P0], x2 + DX2[DIR_0P0], x3 + DX3[DIR_0P0], DIR_0P0);
       distributionsH->setDistributionInvForDirection(h[DIR_PM0], x1 + DX1[DIR_MP0], x2 + DX2[DIR_MP0], x3 + DX3[DIR_MP0], DIR_MP0);
@@ -324,15 +326,15 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
       distributionsH->setDistributionInvForDirection(h[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
       break;
    case DIR_00P:
-      h[DIR_00P] = htemp[DIR_00P] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * h[DIR_00P];
-      h[DIR_P0P] = htemp[DIR_P0P] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * h[DIR_P0P];
-      h[DIR_M0P] = htemp[DIR_M0P] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * h[DIR_M0P];
-      h[DIR_0PP] = htemp[DIR_0PP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * h[DIR_0PP];
-      h[DIR_0MP] = htemp[DIR_0MP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * h[DIR_0MP];
-      h[DIR_PPP] = htemp[DIR_PPP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * h[DIR_PPP];
-      h[DIR_MPP] = htemp[DIR_MPP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * h[DIR_MPP];
-      h[DIR_PMP] = htemp[DIR_PMP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * h[DIR_PMP];
-      h[DIR_MMP] = htemp[DIR_MMP] * (UbMath::one_over_sqrt3 + vx3) + (1.0 - UbMath::one_over_sqrt3 - vx3) * h[DIR_MMP];
+      h[DIR_00P] = htemp[DIR_00P] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * h[DIR_00P];
+      h[DIR_P0P] = htemp[DIR_P0P] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * h[DIR_P0P];
+      h[DIR_M0P] = htemp[DIR_M0P] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * h[DIR_M0P];
+      h[DIR_0PP] = htemp[DIR_0PP] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * h[DIR_0PP];
+      h[DIR_0MP] = htemp[DIR_0MP] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * h[DIR_0MP];
+      h[DIR_PPP] = htemp[DIR_PPP] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * h[DIR_PPP];
+      h[DIR_MPP] = htemp[DIR_MPP] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * h[DIR_MPP];
+      h[DIR_PMP] = htemp[DIR_PMP] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * h[DIR_PMP];
+      h[DIR_MMP] = htemp[DIR_MMP] * (vf::basics::constant::one_over_sqrt3 + vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 - vx3) * h[DIR_MMP];
 
       distributionsH->setDistributionInvForDirection(h[DIR_00P], x1 + DX1[DIR_00M], x2 + DX2[DIR_00M], x3 + DX3[DIR_00M], DIR_00M);
       distributionsH->setDistributionInvForDirection(h[DIR_P0P], x1 + DX1[DIR_M0M], x2 + DX2[DIR_M0M], x3 + DX3[DIR_M0M], DIR_M0M);
@@ -345,15 +347,15 @@ void ThixotropyNonReflectingOutflowBCAlgorithm::applyBC()
       distributionsH->setDistributionInvForDirection(h[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
       break;
    case DIR_00M:
-      h[DIR_00M] = htemp[DIR_00M] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * h[DIR_00M];
-      h[DIR_P0M] = htemp[DIR_P0M] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * h[DIR_P0M];
-      h[DIR_M0M] = htemp[DIR_M0M] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * h[DIR_M0M];
-      h[DIR_0PM] = htemp[DIR_0PM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * h[DIR_0PM];
-      h[DIR_0MM] = htemp[DIR_0MM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * h[DIR_0MM];
-      h[DIR_PPM] = htemp[DIR_PPM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * h[DIR_PPM];
-      h[DIR_MPM] = htemp[DIR_MPM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * h[DIR_MPM];
-      h[DIR_PMM] = htemp[DIR_PMM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * h[DIR_PMM];
-      h[DIR_MMM] = htemp[DIR_MMM] * (UbMath::one_over_sqrt3 - vx3) + (1.0 - UbMath::one_over_sqrt3 + vx3) * h[DIR_MMM];
+      h[DIR_00M] = htemp[DIR_00M] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * h[DIR_00M];
+      h[DIR_P0M] = htemp[DIR_P0M] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * h[DIR_P0M];
+      h[DIR_M0M] = htemp[DIR_M0M] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * h[DIR_M0M];
+      h[DIR_0PM] = htemp[DIR_0PM] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * h[DIR_0PM];
+      h[DIR_0MM] = htemp[DIR_0MM] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * h[DIR_0MM];
+      h[DIR_PPM] = htemp[DIR_PPM] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * h[DIR_PPM];
+      h[DIR_MPM] = htemp[DIR_MPM] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * h[DIR_MPM];
+      h[DIR_PMM] = htemp[DIR_PMM] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * h[DIR_PMM];
+      h[DIR_MMM] = htemp[DIR_MMM] * (vf::basics::constant::one_over_sqrt3 - vx3) + (1.0 - vf::basics::constant::one_over_sqrt3 + vx3) * h[DIR_MMM];
 
       distributionsH->setDistributionInvForDirection(h[DIR_00M], x1 + DX1[DIR_00P], x2 + DX2[DIR_00P], x3 + DX3[DIR_00P], DIR_00P);
       distributionsH->setDistributionInvForDirection(h[DIR_P0M], x1 + DX1[DIR_M0P], x2 + DX2[DIR_M0P], x3 + DX3[DIR_M0P], DIR_M0P);
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityBCAlgorithm.cpp
index 4748212417600b18615a938a6c7a2696ee00eb00..189f52fd3088392be0db5b144158ed97623beafe 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityBCAlgorithm.cpp
@@ -71,33 +71,35 @@ void ThixotropyVelocityBCAlgorithm::addDistributionsH(SPtr<DistributionArray3D>
 //////////////////////////////////////////////////////////////////////////
 void ThixotropyVelocityBCAlgorithm::applyBC()
 {
-	LBMReal f[D3Q27System::ENDF + 1];
-	LBMReal feq[D3Q27System::ENDF + 1];
-	LBMReal h[D3Q27System::ENDF + 1];
+	using namespace vf::lbm::dir;
+
+	real f[D3Q27System::ENDF + 1];
+	real feq[D3Q27System::ENDF + 1];
+	real h[D3Q27System::ENDF + 1];
 
 	distributions->getDistributionInv(f, x1, x2, x3);
 	distributionsH->getDistributionInv(h, x1, x2, x3);
 	
-	LBMReal rho, vx1, vx2, vx3, drho;
+	real rho, vx1, vx2, vx3, drho;
 	calcMacrosFct(f, drho, vx1, vx2, vx3);
 	calcFeqFct(feq, drho, vx1, vx2, vx3);
 
 	rho = 1.0 + drho * compressibleFactor;
 
 	//calcDiffusionMacrosFctPost(h, concentration, fl1, fl2, fl3, m100, collFactor);
-	LBMReal lambda = D3Q27System::getDensity(h);
+	real lambda = D3Q27System::getDensity(h);
 
 	int nx1 = x1;
 	int nx2 = x2;
 	int nx3 = x3;
 
 	//flag points in direction of fluid
-	if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_P00)) { nx1 -= 1; }
-	else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_M00)) { nx1 += 1; }
-	else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_0P0)) { nx2 -= 1; }
-	else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_0M0)) { nx2 += 1; }
-	else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_00P)) { nx3 -= 1; }
-	else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_00M)) { nx3 += 1; }
+	if (bcPtr->hasVelocityBoundaryFlag(DIR_P00)) { nx1 -= 1; }
+	else if (bcPtr->hasVelocityBoundaryFlag(DIR_M00)) { nx1 += 1; }
+	else if (bcPtr->hasVelocityBoundaryFlag(DIR_0P0)) { nx2 -= 1; }
+	else if (bcPtr->hasVelocityBoundaryFlag(DIR_0M0)) { nx2 += 1; }
+	else if (bcPtr->hasVelocityBoundaryFlag(DIR_00P)) { nx3 -= 1; }
+	else if (bcPtr->hasVelocityBoundaryFlag(DIR_00M)) { nx3 += 1; }
 	else	 UB_THROW(UbException(UB_EXARGS, "Danger...no orthogonal BC-Flag on velocity boundary..."));
 
 	//lambdaBC = bcPtr->getBoundaryThixotropy();
@@ -123,12 +125,12 @@ void ThixotropyVelocityBCAlgorithm::applyBC()
 		if (bcPtr->hasVelocityBoundaryFlag(fdir))
 		{
 			const int invDir = D3Q27System::INVDIR[fdir];
-			LBMReal q = bcPtr->getQ(invDir);// m+m q=0 stabiler
-			LBMReal velocity = bcPtr->getBoundaryVelocity(invDir);
-			LBMReal fReturn = ((1.0 - q) / (1.0 + q)) * ((f[invDir] - feq[invDir]) / (1.0 - collFactor) + feq[invDir]) + ((q * (f[invDir] + f[fdir]) - velocity * rho) / (1.0 + q));
+			real q = bcPtr->getQ(invDir);// m+m q=0 stabiler
+			real velocity = bcPtr->getBoundaryVelocity(invDir);
+			real fReturn = ((1.0 - q) / (1.0 + q)) * ((f[invDir] - feq[invDir]) / (1.0 - collFactor) + feq[invDir]) + ((q * (f[invDir] + f[fdir]) - velocity * rho) / (1.0 + q));
 			distributions->setDistributionForDirection(fReturn, x1 + D3Q27System::DX1[invDir], x2 + D3Q27System::DX2[invDir], x3 + D3Q27System::DX3[invDir], fdir);
 
-			LBMReal htemp = D3Q27System::getCompFeqForDirection(fdir, lambda, vx1, vx2, vx3);
+			real htemp = D3Q27System::getCompFeqForDirection(fdir, lambda, vx1, vx2, vx3);
 			htemp = D3Q27System::getCompFeqForDirection(fdir, lambdaBC, vx1, vx2, vx3) + h[fdir] - htemp;
 			distributionsH->setDistributionForDirection(htemp, nx1, nx2, nx3, fdir);
 		}
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityBCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityBCAlgorithm.h
index 86147cb21a232d8558737f28cb023ff2394a6f0d..bea3b0a374a128f1de88b1ef3d6e75c0ad34190a 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityBCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityBCAlgorithm.h
@@ -45,12 +45,12 @@ public:
 	void addDistributions(SPtr<DistributionArray3D> distributions);
 	void addDistributionsH(SPtr<DistributionArray3D> distributions);
 	void applyBC();
-	void setLambdaBC(LBMReal lambda) { this->lambdaBC = lambda; }
-	LBMReal getLambdaBC() { return this->lambdaBC; }
+	void setLambdaBC(real lambda) { this->lambdaBC = lambda; }
+	real getLambdaBC() { return this->lambdaBC; }
 protected:
 	SPtr<DistributionArray3D> distributionsH;
 private:
-	LBMReal lambdaBC;
+	real lambdaBC;
 };
 #endif // ThixotropyVelocityBCAlgorithm_h__
 
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityWithDensityBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityWithDensityBCAlgorithm.cpp
index 454b29bc459045b1f61746eeb7f5f5987f1762a7..6c2622fc7838381de8fa94b12a97d35146b78b43 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityWithDensityBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityWithDensityBCAlgorithm.cpp
@@ -64,33 +64,35 @@ void ThixotropyVelocityWithDensityBCAlgorithm::addDistributionsH(SPtr<Distributi
 //////////////////////////////////////////////////////////////////////////
 void ThixotropyVelocityWithDensityBCAlgorithm::applyBC()
 {
+    using namespace vf::lbm::dir;
+
    //velocity bc for non reflecting pressure bc
-   LBMReal f[D3Q27System::ENDF+1];
+   real f[D3Q27System::ENDF+1];
    distributions->getDistributionInv(f, x1, x2, x3);
    
-   LBMReal h[D3Q27System::ENDF + 1];
+   real h[D3Q27System::ENDF + 1];
    distributionsH->getDistributionInv(h, x1, x2, x3);
 
-   LBMReal rho, vx1, vx2, vx3, drho;
+   real rho, vx1, vx2, vx3, drho;
    calcMacrosFct(f, drho, vx1, vx2, vx3);
    
    rho = 1.0+drho*compressibleFactor;
   
    ///////////////////////////////////////////////////////////////////
    // Rheology
-   LBMReal lambda = D3Q27System::getDensity(h);
+   real lambda = D3Q27System::getDensity(h);
 
    int nx1 = x1;
    int nx2 = x2;
    int nx3 = x3;
 
    //flag points in direction of fluid
-   if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_P00)) { nx1 -= 1; }
-   else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_M00)) { nx1 += 1; }
-   else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_0P0)) { nx2 -= 1; }
-   else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_0M0)) { nx2 += 1; }
-   else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_00P)) { nx3 -= 1; }
-   else if (bcPtr->hasVelocityBoundaryFlag(D3Q27System::DIR_00M)) { nx3 += 1; }
+   if (bcPtr->hasVelocityBoundaryFlag(DIR_P00)) { nx1 -= 1; }
+   else if (bcPtr->hasVelocityBoundaryFlag(DIR_M00)) { nx1 += 1; }
+   else if (bcPtr->hasVelocityBoundaryFlag(DIR_0P0)) { nx2 -= 1; }
+   else if (bcPtr->hasVelocityBoundaryFlag(DIR_0M0)) { nx2 += 1; }
+   else if (bcPtr->hasVelocityBoundaryFlag(DIR_00P)) { nx3 -= 1; }
+   else if (bcPtr->hasVelocityBoundaryFlag(DIR_00M)) { nx3 += 1; }
    else	 UB_THROW(UbException(UB_EXARGS, "Danger...no orthogonal BC-Flag on velocity boundary..."));
 
    for (int fdir = D3Q27System::FSTARTDIR; fdir <= D3Q27System::FENDDIR; fdir++)
@@ -112,16 +114,16 @@ void ThixotropyVelocityWithDensityBCAlgorithm::applyBC()
          if (bcArray->isSolid(nX1,nX2,nX3))
          {
             const int invDir = D3Q27System::INVDIR[fdir];
-            LBMReal velocity = bcPtr->getBoundaryVelocity(fdir);
+            real velocity = bcPtr->getBoundaryVelocity(fdir);
 
-            LBMReal fReturn = (f[fdir] + f[invDir] - velocity*rho) / 2.0 - drho*D3Q27System::WEIGTH[invDir];
+            real fReturn = (f[fdir] + f[invDir] - velocity*rho) / 2.0 - drho*D3Q27System::WEIGTH[invDir];
             distributions->setDistributionForDirection(fReturn, nX1, nX2, nX3, invDir);
          }
       }
       
       if (bcPtr->hasVelocityBoundaryFlag(fdir))
       {
-         LBMReal htemp = D3Q27System::getCompFeqForDirection(fdir, lambda, vx1, vx2, vx3);
+         real htemp = D3Q27System::getCompFeqForDirection(fdir, lambda, vx1, vx2, vx3);
          htemp = D3Q27System::getCompFeqForDirection(fdir, lambdaBC, vx1, vx2, vx3) + h[fdir] - htemp;
          distributionsH->setDistributionForDirection(htemp, nx1, nx2, nx3, fdir);
       }
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityWithDensityBCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityWithDensityBCAlgorithm.h
index c24b6c28e2f494ced4a85fe4d8b9d2f33125424a..d69e79a223e3db56cb6a37014bbf030183adf606 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityWithDensityBCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/ThixotropyVelocityWithDensityBCAlgorithm.h
@@ -50,11 +50,11 @@ public:
    void addDistributions(SPtr<DistributionArray3D> distributions);
    void addDistributionsH(SPtr<DistributionArray3D> distributions);
    void applyBC();
-   void setLambdaBC(LBMReal lambda) { this->lambdaBC = lambda; }
-   LBMReal getLambdaBC() { return this->lambdaBC; }
+   void setLambdaBC(real lambda) { this->lambdaBC = lambda; }
+   real getLambdaBC() { return this->lambdaBC; }
 protected:
    SPtr<DistributionArray3D> distributionsH;
 private:
-   LBMReal lambdaBC;
+   real lambdaBC;
 };
 #endif // ThixotropyVelocityWithDensityBCAlgorithm_h__
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAdapter.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAdapter.cpp
index 55d65f629b0311c8599b81b39a62e8be06f35090..6ed8affe2b907764b56ad9f186d7f8adba2b6867 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAdapter.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAdapter.cpp
@@ -50,7 +50,7 @@ VelocityBCAdapter::VelocityBCAdapter(const bool &vx1, const bool &vx2, const boo
 }
 /*==========================================================*/
 VelocityBCAdapter::VelocityBCAdapter(const bool &vx1, const bool &vx2, const bool &vx3, const mu::Parser &function,
-                                     const double &startTime, const double &endTime)
+                                     const real &startTime, const real &endTime)
 {
     if (vx1)
         this->vx1BCs.emplace_back(function, startTime, endTime);
@@ -62,8 +62,8 @@ VelocityBCAdapter::VelocityBCAdapter(const bool &vx1, const bool &vx2, const boo
 }
 /*==========================================================*/
 VelocityBCAdapter::VelocityBCAdapter(const bool &vx1, const bool &vx2, const bool &vx3, const mu::Parser &function1,
-                                     const mu::Parser &function2, const mu::Parser &function3, const double &startTime,
-                                     const double &endTime)
+                                     const mu::Parser &function2, const mu::Parser &function3, const real &startTime,
+                                     const real &endTime)
 {
     if (vx1)
         this->vx1BCs.emplace_back(function1, startTime, endTime);
@@ -75,7 +75,7 @@ VelocityBCAdapter::VelocityBCAdapter(const bool &vx1, const bool &vx2, const boo
 }
 /*==========================================================*/
 VelocityBCAdapter::VelocityBCAdapter(const bool &vx1, const bool &vx2, const bool &vx3, const string &functionstring,
-                                     const double &startTime, const double &endTime)
+                                     const real &startTime, const real &endTime)
 {
     if (vx1)
         this->vx1BCs.emplace_back(functionstring, startTime, endTime);
@@ -117,9 +117,9 @@ VelocityBCAdapter::VelocityBCAdapter(const vector<BCFunction> &velVx1BCs, const
     this->init();
 }
 /*==========================================================*/
-VelocityBCAdapter::VelocityBCAdapter(const double &vx1, const double &vx1StartTime, const double &vx1EndTime,
-                                     const double &vx2, const double &vx2StartTime, const double &vx2EndTime,
-                                     const double &vx3, const double &vx3StartTime, const double &vx3EndTime)
+VelocityBCAdapter::VelocityBCAdapter(const real &vx1, const real &vx1StartTime, const real &vx1EndTime,
+                                     const real &vx2, const real &vx2StartTime, const real &vx2EndTime,
+                                     const real &vx3, const real &vx3StartTime, const real &vx3EndTime)
 {
     this->vx1BCs.emplace_back(vx1, vx1StartTime, vx1EndTime);
     this->vx2BCs.emplace_back(vx2, vx2StartTime, vx2EndTime);
@@ -127,9 +127,9 @@ VelocityBCAdapter::VelocityBCAdapter(const double &vx1, const double &vx1StartTi
     this->init();
 }
 /*==========================================================*/
-VelocityBCAdapter::VelocityBCAdapter(const string &vx1Function, const double &vx1StartTime, const double &vx1EndTime,
-                                     const string &vx2Function, const double &vx2StartTime, const double &vx2EndTime,
-                                     const string &vx3Function, const double &vx3StartTime, const double &vx3EndTime)
+VelocityBCAdapter::VelocityBCAdapter(const string &vx1Function, const real &vx1StartTime, const real &vx1EndTime,
+                                     const string &vx2Function, const real &vx2StartTime, const real &vx2EndTime,
+                                     const string &vx3Function, const real &vx3StartTime, const real &vx3EndTime)
 {
     if (vx1Function.size())
         this->vx1BCs.emplace_back(vx1Function, vx1StartTime, vx1EndTime);
@@ -140,9 +140,9 @@ VelocityBCAdapter::VelocityBCAdapter(const string &vx1Function, const double &vx
     this->init();
 }
 /*==========================================================*/
-void VelocityBCAdapter::setNewVelocities(const double &vx1, const double &vx1StartTime, const double &vx1EndTime,
-                                         const double &vx2, const double &vx2StartTime, const double &vx2EndTime,
-                                         const double &vx3, const double &vx3StartTime, const double &vx3EndTime)
+void VelocityBCAdapter::setNewVelocities(const real &vx1, const real &vx1StartTime, const real &vx1EndTime,
+                                         const real &vx2, const real &vx2StartTime, const real &vx2EndTime,
+                                         const real &vx3, const real &vx3StartTime, const real &vx3EndTime)
 {
     this->clear();
     this->vx1BCs.emplace_back(vx1, vx1StartTime, vx1EndTime);
@@ -198,13 +198,13 @@ void VelocityBCAdapter::init(std::vector<BCFunction> &vxBCs)
     }
 }
 /*==========================================================*/
-void VelocityBCAdapter::init(const D3Q27Interactor *const &interactor, const double &time)
+void VelocityBCAdapter::init(const D3Q27Interactor *const &interactor, const real &time)
 {
     this->timeStep       = time;
     this->tmpVx1Function = this->tmpVx2Function = this->tmpVx3Function = NULL;
 
     // aktuelle velocityfunction bestimmen
-    double maxEndtime = -Ub::inf;
+    real maxEndtime = -Ub::inf;
 
     for (size_t pos = 0; pos < vx1BCs.size(); ++pos) {
         if (UbMath::equal(vx1BCs[pos].getEndTime(), BCFunction::INFTIMEDEPENDENT))
@@ -214,8 +214,8 @@ void VelocityBCAdapter::init(const D3Q27Interactor *const &interactor, const dou
 
         if (UbMath::greaterEqual(this->timeStep, vx1BCs[pos].getStartTime())) {
             if (UbMath::lessEqual(this->timeStep, vx1BCs[pos].getEndTime()) ||
-                UbMath::equal(vx1BCs[pos].getEndTime(), (double)BCFunction::INFCONST) ||
-                UbMath::equal(vx1BCs[pos].getEndTime(), (double)BCFunction::INFTIMEDEPENDENT)) {
+                UbMath::equal(vx1BCs[pos].getEndTime(), (real)BCFunction::INFCONST) ||
+                UbMath::equal(vx1BCs[pos].getEndTime(), (real)BCFunction::INFTIMEDEPENDENT)) {
                 tmpVx1Function = &vx1BCs[pos].getFunction();
                 break;
             }
@@ -229,8 +229,8 @@ void VelocityBCAdapter::init(const D3Q27Interactor *const &interactor, const dou
 
         if (UbMath::greaterEqual(this->timeStep, vx2BCs[pos].getStartTime())) {
             if (UbMath::lessEqual(this->timeStep, vx2BCs[pos].getEndTime()) ||
-                UbMath::equal(vx2BCs[pos].getEndTime(), (double)BCFunction::INFCONST) ||
-                UbMath::equal(vx2BCs[pos].getEndTime(), (double)BCFunction::INFTIMEDEPENDENT)) {
+                UbMath::equal(vx2BCs[pos].getEndTime(), (real)BCFunction::INFCONST) ||
+                UbMath::equal(vx2BCs[pos].getEndTime(), (real)BCFunction::INFTIMEDEPENDENT)) {
                 tmpVx2Function = &vx2BCs[pos].getFunction();
                 break;
             }
@@ -244,8 +244,8 @@ void VelocityBCAdapter::init(const D3Q27Interactor *const &interactor, const dou
 
         if (UbMath::greaterEqual(this->timeStep, vx3BCs[pos].getStartTime())) {
             if (UbMath::lessEqual(this->timeStep, vx3BCs[pos].getEndTime()) ||
-                UbMath::equal(vx3BCs[pos].getEndTime(), (double)BCFunction::INFCONST) ||
-                UbMath::equal(vx3BCs[pos].getEndTime(), (double)BCFunction::INFTIMEDEPENDENT)) {
+                UbMath::equal(vx3BCs[pos].getEndTime(), (real)BCFunction::INFCONST) ||
+                UbMath::equal(vx3BCs[pos].getEndTime(), (real)BCFunction::INFTIMEDEPENDENT)) {
                 tmpVx3Function = &vx3BCs[pos].getFunction();
                 break;
             }
@@ -284,30 +284,30 @@ void VelocityBCAdapter::init(const D3Q27Interactor *const &interactor, const dou
                          << ", timedependent=" << boolalpha << this->isTimeDependent());
 }
 /*==========================================================*/
-void VelocityBCAdapter::update(const D3Q27Interactor *const &interactor, const double &time)
+void VelocityBCAdapter::update(const D3Q27Interactor *const &interactor, const real &time)
 {
     this->init(interactor, time);
 }
 /*==========================================================*/
 void VelocityBCAdapter::adaptBCForDirection(const D3Q27Interactor & /*interactor*/, SPtr<BoundaryConditions> bc,
-                                            const double & /*worldX1*/, const double & /*worldX2*/,
-                                            const double & /*worldX3*/, const double &q, const int &fdirection,
-                                            const double & /*time*/)
+                                            const real & /*worldX1*/, const real & /*worldX2*/,
+                                            const real & /*worldX3*/, const real &q, const int &fdirection,
+                                            const real & /*time*/)
 {
     bc->setVelocityBoundaryFlag(D3Q27System::INVDIR[fdirection], secondaryBcOption);
-    bc->setQ((float)q, fdirection);
+    bc->setQ((real)q, fdirection);
 }
 /*==========================================================*/
-void VelocityBCAdapter::adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double &worldX1,
-                                const double &worldX2, const double &worldX3, const double &time)
+void VelocityBCAdapter::adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real &worldX1,
+                                const real &worldX2, const real &worldX3, const real &time)
 {
     this->setNodeVelocity(interactor, bc, worldX1, worldX2, worldX3, time);
     bc->setBcAlgorithmType(algorithmType);
 }
 /*==========================================================*/
 void VelocityBCAdapter::setNodeVelocity(const D3Q27Interactor & /*interactor*/, SPtr<BoundaryConditions> bc,
-                                        const double &worldX1, const double &worldX2, const double &worldX3,
-                                        const double &timestep)
+                                        const real &worldX1, const real &worldX2, const real &worldX3,
+                                        const real &timestep)
 {
     // Geschwindigkeiten setzen
     try {
@@ -318,11 +318,11 @@ void VelocityBCAdapter::setNodeVelocity(const D3Q27Interactor & /*interactor*/,
         this->timeStep = timestep;
 
         if (tmpVx1Function)
-            bc->setBoundaryVelocityX1((LBMReal)tmpVx1Function->Eval());
+            bc->setBoundaryVelocityX1((real)tmpVx1Function->Eval());
         if (tmpVx2Function)
-            bc->setBoundaryVelocityX2((LBMReal)tmpVx2Function->Eval());
+            bc->setBoundaryVelocityX2((real)tmpVx2Function->Eval());
         if (tmpVx3Function)
-            bc->setBoundaryVelocityX3((LBMReal)tmpVx3Function->Eval());
+            bc->setBoundaryVelocityX3((real)tmpVx3Function->Eval());
     } catch (mu::Parser::exception_type &e) {
         stringstream error;
         error << "mu::parser exception occurs, message(" << e.GetMsg() << "), formula("
@@ -334,12 +334,12 @@ void VelocityBCAdapter::setNodeVelocity(const D3Q27Interactor & /*interactor*/,
     }
 }
 /*==========================================================*/
-UbTupleDouble3 VelocityBCAdapter::getVelocity(const double &x1, const double &x2, const double &x3,
-                                              const double &timeStep) const
+UbTupleDouble3 VelocityBCAdapter::getVelocity(const real &x1, const real &x2, const real &x3,
+                                              const real &timeStep) const
 {
-    double vx1     = 0.0;
-    double vx2     = 0.0;
-    double vx3     = 0.0;
+    real vx1     = 0.0;
+    real vx2     = 0.0;
+    real vx3     = 0.0;
     this->x1       = x1;
     this->x2       = x2;
     this->x3       = x3;
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAdapter.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAdapter.h
index c6f5039a3ea3b2612e765235c88d357a25f9a89c..c0596b8e234f581726243a8a95beb6eb71121be4 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAdapter.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAdapter.h
@@ -88,14 +88,14 @@ public:
     VelocityBCAdapter(const bool &vx1, const bool &vx2, const bool &vx3, const BCFunction &velVxBC);
 
     VelocityBCAdapter(const bool &vx1, const bool &vx2, const bool &vx3, const mu::Parser &function,
-                      const double &startTime, const double &endTime);
+                      const real &startTime, const real &endTime);
 
     VelocityBCAdapter(const bool &vx1, const bool &vx2, const bool &vx3, const mu::Parser &function1,
-                      const mu::Parser &function2, const mu::Parser &function3, const double &startTime,
-                      const double &endTime);
+                      const mu::Parser &function2, const mu::Parser &function3, const real &startTime,
+                      const real &endTime);
 
     VelocityBCAdapter(const bool &vx1, const bool &vx2, const bool &vx3, const std::string &functionstring,
-                      const double &startTime, const double &endTime);
+                      const real &startTime, const real &endTime);
 
     VelocityBCAdapter(const BCFunction &velBC, bool x1Dir, bool x2Dir, bool x3Dir);
 
@@ -104,13 +104,13 @@ public:
     VelocityBCAdapter(const std::vector<BCFunction> &velVx1BCs, const std::vector<BCFunction> &velVx2BCs,
                       const std::vector<BCFunction> &velVx3BCs);
 
-    VelocityBCAdapter(const double &vx1, const double &vx1StartTime, const double &vx1EndTime, const double &vx2,
-                      const double &vx2StartTime, const double &vx2EndTime, const double &vx3,
-                      const double &vx3StartTime, const double &vx3EndTime);
+    VelocityBCAdapter(const real &vx1, const real &vx1StartTime, const real &vx1EndTime, const real &vx2,
+                      const real &vx2StartTime, const real &vx2EndTime, const real &vx3,
+                      const real &vx3StartTime, const real &vx3EndTime);
 
-    VelocityBCAdapter(const std::string &vx1Function, const double &vx1StartTime, const double &vx1EndTime,
-                      const std::string &vx2Function, const double &vx2StartTime, const double &vx2EndTime,
-                      const std::string &vx3Function, const double &vx3StartTime, const double &vx3EndTime);
+    VelocityBCAdapter(const std::string &vx1Function, const real &vx1StartTime, const real &vx1EndTime,
+                      const std::string &vx2Function, const real &vx2StartTime, const real &vx2EndTime,
+                      const std::string &vx3Function, const real &vx3StartTime, const real &vx3EndTime);
 
     // methods
     void setTimePeriodic() { (this->type |= TIMEPERIODIC); }
@@ -118,25 +118,25 @@ public:
     bool isTimePeriodic() { return ((this->type & TIMEPERIODIC) == TIMEPERIODIC); }
 
     // The following is meant for moving objects...
-    void setNewVelocities(const double &vx1, const double &vx1StartTime, const double &vx1EndTime, const double &vx2,
-                          const double &vx2StartTime, const double &vx2EndTime, const double &vx3,
-                          const double &vx3StartTime, const double &vx3EndTime);
+    void setNewVelocities(const real &vx1, const real &vx1StartTime, const real &vx1EndTime, const real &vx2,
+                          const real &vx2StartTime, const real &vx2EndTime, const real &vx3,
+                          const real &vx3StartTime, const real &vx3EndTime);
 
     //------------- implements BCAdapter ----- start
     std::string toString();
 
-    void init(const D3Q27Interactor *const &interactor, const double &time = 0) override;
-    void update(const D3Q27Interactor *const &interactor, const double &time = 0) override;
+    void init(const D3Q27Interactor *const &interactor, const real &time = 0) override;
+    void update(const D3Q27Interactor *const &interactor, const real &time = 0) override;
 
-    void adaptBCForDirection(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double &worldX1,
-                             const double &worldX2, const double &worldX3, const double &q, const int &fdirection,
-                             const double &time = 0) override;
-    void adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double &worldX1,
-                 const double &worldX2, const double &worldX3, const double &time = 0) override;
+    void adaptBCForDirection(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real &worldX1,
+                             const real &worldX2, const real &worldX3, const real &q, const int &fdirection,
+                             const real &time = 0) override;
+    void adaptBC(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real &worldX1,
+                 const real &worldX2, const real &worldX3, const real &time = 0) override;
 
     //------------- implements BCAdapter ----- end
 
-    UbTupleDouble3 getVelocity(const double &x1, const double &x2, const double &x3, const double &timeStep) const;
+    UbTupleDouble3 getVelocity(const real &x1, const real &x2, const real &x3, const real &timeStep) const;
 
 protected:
     void init();
@@ -153,8 +153,8 @@ protected:
         vx3BCs.clear();
         this->init();
     }
-    void setNodeVelocity(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const double &worldX1,
-                         const double &worldX2, const double &worldX3, const double &timestep);
+    void setNodeVelocity(const D3Q27Interactor &interactor, SPtr<BoundaryConditions> bc, const real &worldX1,
+                         const real &worldX2, const real &worldX3, const real &timestep);
 
 private:
     mutable mu::value_type x1, x2, x3;
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAlgorithm.cpp
index 15768aeeb043620aece86194319eafe00ea1df60..9f1bf39d8d6d11747a71b04d9cfc7e3b70870802 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAlgorithm.cpp
@@ -55,10 +55,10 @@ void VelocityBCAlgorithm::addDistributions(SPtr<DistributionArray3D> distributio
 //////////////////////////////////////////////////////////////////////////
 void VelocityBCAlgorithm::applyBC()
 {
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal feq[D3Q27System::ENDF + 1];
+    real f[D3Q27System::ENDF + 1];
+    real feq[D3Q27System::ENDF + 1];
     distributions->getDistributionInv(f, x1, x2, x3);
-    LBMReal rho, vx1, vx2, vx3, drho;
+    real rho, vx1, vx2, vx3, drho;
     calcMacrosFct(f, drho, vx1, vx2, vx3);
     calcFeqFct(feq, drho, vx1, vx2, vx3);
 
@@ -70,9 +70,9 @@ void VelocityBCAlgorithm::applyBC()
     for (int fdir = D3Q27System::FSTARTDIR; fdir <= D3Q27System::FENDDIR; fdir++) {
         if (bcPtr->hasVelocityBoundaryFlag(fdir)) {
             const int invDir = D3Q27System::INVDIR[fdir];
-            LBMReal q        = bcPtr->getQ(invDir);
-            LBMReal velocity = bcPtr->getBoundaryVelocity(invDir);
-            LBMReal fReturn = ((1.0 - q) / (1.0 + q)) * ((f[invDir] - feq[invDir]) / (1.0 - collFactor) + feq[invDir]) +
+            real q        = bcPtr->getQ(invDir);
+            real velocity = bcPtr->getBoundaryVelocity(invDir);
+            real fReturn = ((1.0 - q) / (1.0 + q)) * ((f[invDir] - feq[invDir]) / (1.0 - collFactor) + feq[invDir]) +
                               ((q * (f[invDir] + f[fdir]) - velocity * rho) / (1.0 + q));
             distributions->setDistributionForDirection(fReturn, x1 + D3Q27System::DX1[invDir],
                                                        x2 + D3Q27System::DX2[invDir], x3 + D3Q27System::DX3[invDir],
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityWithDensityBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityWithDensityBCAlgorithm.cpp
index 1fe6632b9de8cb64d98c072bfccaa72ce4bb9ee8..09a52798e62013fdc5878678a4c47aec003b68a2 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityWithDensityBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityWithDensityBCAlgorithm.cpp
@@ -56,10 +56,10 @@ void VelocityWithDensityBCAlgorithm::addDistributions(SPtr<DistributionArray3D>
 void VelocityWithDensityBCAlgorithm::applyBC()
 {
    //velocity bc for non reflecting pressure bc
-   LBMReal f[D3Q27System::ENDF+1];
-   //LBMReal feq[D3Q27System::ENDF+1];
+   real f[D3Q27System::ENDF+1];
+   //real feq[D3Q27System::ENDF+1];
    distributions->getDistributionInv(f, x1, x2, x3);
-   LBMReal rho, vx1, vx2, vx3, drho;
+   real rho, vx1, vx2, vx3, drho;
    calcMacrosFct(f, drho, vx1, vx2, vx3);
    //calcFeqFct(feq, drho, vx1, vx2, vx3);
    
@@ -83,7 +83,7 @@ void VelocityWithDensityBCAlgorithm::applyBC()
             if (bcArray->isSolid(nX1, nX2, nX3)) {
                 const int invDir = D3Q27System::INVDIR[fdir];
                 //LBMReal q =1.0;// bcPtr->getQ(invDir);// m+m q=0 stabiler
-                LBMReal velocity = bcPtr->getBoundaryVelocity(fdir);
+                real velocity = bcPtr->getBoundaryVelocity(fdir);
                 
                 //LBMReal fReturn = ((1.0 - q) / (1.0 + q))*((f[fdir] - feq[fdir]*collFactor) / (1.0 -
                 //collFactor)) + ((q*(f[fdir] + f[invDir]) - velocity*rho) / (1.0 +
@@ -92,7 +92,7 @@ void VelocityWithDensityBCAlgorithm::applyBC()
                 // if q=1
                 // LBMReal fReturn = ((q*(f[fdir] + f[invDir]) - velocity*rho) / (1.0 +
                 // q))-drho*D3Q27System::WEIGTH[invDir];
-                LBMReal fReturn = (f[fdir] + f[invDir] - velocity * rho) / 2.0 - drho * D3Q27System::WEIGTH[invDir];
+                real fReturn = (f[fdir] + f[invDir] - velocity * rho) / 2.0 - drho * D3Q27System::WEIGTH[invDir];
 
                 distributions->setDistributionForDirection(fReturn, nX1, nX2, nX3, invDir);
             }
diff --git a/src/cpu/VirtualFluidsCore/CMakeLists.txt b/src/cpu/VirtualFluidsCore/CMakeLists.txt
index b691fd34be1cff4f2bfed0fa4e0cf8860014ed90..5300e898bd17e45b2a0a5c5e2b0d083c975ad1fb 100644
--- a/src/cpu/VirtualFluidsCore/CMakeLists.txt
+++ b/src/cpu/VirtualFluidsCore/CMakeLists.txt
@@ -16,10 +16,6 @@ IF(${USE_CATALYST})
    list(APPEND VF_LIBRARIES optimized vtkParallelMPI debug vtkParallelMPI )
 ENDIF()
 
-IF(${USE_DEM_COUPLING})
-   INCLUDE(${CMAKE_CURRENT_SOURCE_DIR}/../DemCoupling/DemCoupling.cmake)
-ENDIF()
-
 if(BUILD_USE_OPENMP)
     list(APPEND VF_LIBRARIES OpenMP::OpenMP_CXX)
 endif()
@@ -30,6 +26,8 @@ ENDIF()
 
 vf_add_library(BUILDTYPE static PUBLIC_LINK basics muparser ${VF_LIBRARIES} PRIVATE_LINK lbm mpi logger)
 
+vf_add_tests()
+
 vf_get_library_name(library_name)
 
 target_include_directories(${library_name} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/BoundaryConditions)
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/AdjustForcingCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/AdjustForcingCoProcessor.cpp
index d02c249a62f60cdb91fbd4af9e975d39c6c4e29d..f3137e5f63dff257d1311eee8d75550f186b480e 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/AdjustForcingCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/AdjustForcingCoProcessor.cpp
@@ -13,7 +13,7 @@
 #include <SetForcingBlockVisitor.h>
 
 AdjustForcingCoProcessor::AdjustForcingCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                                   SPtr<IntegrateValuesHelper> integrateValues, double vTarged,
+                                                   SPtr<IntegrateValuesHelper> integrateValues, real vTarged,
                                                    std::shared_ptr<vf::mpi::Communicator> comm)
 
     : CoProcessor(grid, s), path(path), integrateValues(integrateValues), comm(comm), vx1Targed(vTarged)
@@ -71,13 +71,13 @@ AdjustForcingCoProcessor::AdjustForcingCoProcessor(SPtr<Grid3D> grid, SPtr<UbSch
 }
 
 //////////////////////////////////////////////////////////////////////////
-void AdjustForcingCoProcessor::process(double step)
+void AdjustForcingCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
 }
 //////////////////////////////////////////////////////////////////////////
-void AdjustForcingCoProcessor::collectData(double step)
+void AdjustForcingCoProcessor::collectData(real step)
 {
     //////////////////////////////////////////////////////////////////////////////////////////////////
     // temporary solution
@@ -104,7 +104,7 @@ void AdjustForcingCoProcessor::collectData(double step)
 
     if (root) {
         cellsVolume = integrateValues->getCellsVolume();
-        double vx1  = integrateValues->getVx1();
+        real vx1  = integrateValues->getVx1();
         vx1Average  = (vx1 / cellsVolume);
 
         //////////////////////////////////////////////////////////////////////////
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/AdjustForcingCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/AdjustForcingCoProcessor.h
index fbf75d066e626a3cf3d44c481138a9b1007b3107..be8dbc69f957521cff88cfbcc2b7260db05d6cdc 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/AdjustForcingCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/AdjustForcingCoProcessor.h
@@ -5,6 +5,7 @@
 #include <string>
 
 #include "CoProcessor.h"
+#include "lbm/constants/D3Q27.h"
 
 namespace vf::mpi {class Communicator;}
 class UbScheduler;
@@ -21,35 +22,35 @@ class AdjustForcingCoProcessor : public CoProcessor
 {
 public:
     AdjustForcingCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                             SPtr<IntegrateValuesHelper> integrateValues, double vTarged, std::shared_ptr<vf::mpi::Communicator> comm);
+                             SPtr<IntegrateValuesHelper> integrateValues, real vTarged, std::shared_ptr<vf::mpi::Communicator> comm);
     //!< calls collect PostprocessData
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
     //!< object that can compute spacial average values in 3D-subdomain.
     SPtr<IntegrateValuesHelper> integrateValues;
     //!< compares velocity in integrateValues with target velocity and adjusts forcing accordingly.
-    void collectData(double step);
+    void collectData(real step);
     std::shared_ptr<vf::mpi::Communicator> comm;
 
 private:
-    double vx1Targed; //!< target velocity.
-    double forcing;   //!< forcing at previous update step.
-    double cellsVolume;
-    double vx1Average;
+    real vx1Targed; //!< target velocity.
+    real forcing;   //!< forcing at previous update step.
+    real cellsVolume;
+    real vx1Average;
     bool root;
-    double Kpcrit; // Kp critical
-    double Tcrit;  // the oscillation period
-    double Tn;
-    double Tv;
-    double e;
-    double Ta;
-    double Kp;
-    double Ki;
-    double Kd;
-    double y;
-    double esum;
-    double eold;
+    real Kpcrit; // Kp critical
+    real Tcrit;  // the oscillation period
+    real Tn;
+    real Tv;
+    real e;
+    real Ta;
+    real Kp;
+    real Ki;
+    real Kd;
+    real y;
+    real esum;
+    real eold;
     // std::vector<CalcNodes> cnodes;
     std::string path;
 };
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/AverageValuesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/AverageValuesCoProcessor.cpp
index adce3f920ed36850ff711c10c7777a5035de027e..1a5276fcf44098254cee825af503a4752df60cd5 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/AverageValuesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/AverageValuesCoProcessor.cpp
@@ -25,7 +25,7 @@ AverageValuesCoProcessor::AverageValuesCoProcessor(SPtr<Grid3D> grid, const std:
 {
     resetStepMeans  = (int)rsMeans->getMinBegin();
     resetStepRMS    = (int)rsRMS->getMinBegin();
-    averageInterval = (double)Avs->getMinStep();
+    averageInterval = (real)Avs->getMinStep();
 
     gridRank     = grid->getRank();
     minInitLevel = this->grid->getCoarsestInitializedLevel();
@@ -54,7 +54,7 @@ AverageValuesCoProcessor::AverageValuesCoProcessor(SPtr<Grid3D> grid, const std:
     // restartStep = 0.0;
 }
 //////////////////////////////////////////////////////////////////////////
-void AverageValuesCoProcessor::process(double step)
+void AverageValuesCoProcessor::process(real step)
 {
     // resetRMS(step);
     if (resetSchedulerRMS->isDue(step))
@@ -76,7 +76,7 @@ void AverageValuesCoProcessor::process(double step)
     UBLOG(logDEBUG3, "AverageValuesCoProcessor::update:" << step);
 }
 
-void AverageValuesCoProcessor::resetDataRMS(double step)
+void AverageValuesCoProcessor::resetDataRMS(real step)
 {
     resetStepRMS = (int)step;
 
@@ -120,7 +120,7 @@ void AverageValuesCoProcessor::resetDataRMS(double step)
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void AverageValuesCoProcessor::resetDataMeans(double step)
+void AverageValuesCoProcessor::resetDataMeans(real step)
 {
     resetStepMeans = (int)step;
 
@@ -161,7 +161,7 @@ void AverageValuesCoProcessor::resetDataMeans(double step)
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void AverageValuesCoProcessor::collectData(double step)
+void AverageValuesCoProcessor::collectData(real step)
 {
     int istep = int(step);
 
@@ -219,7 +219,7 @@ void AverageValuesCoProcessor::addData(const SPtr<Block3D> block)
     UbTupleDouble3 org = grid->getBlockWorldCoordinates(block);
     //	UbTupleDouble3 blockLengths = grid->getBlockLengths(block);
     UbTupleDouble3 nodeOffset = grid->getNodeOffset(block);
-    double dx                 = grid->getDeltaX(block);
+    real dx                 = grid->getDeltaX(block);
 
     // Diese Daten werden geschrieben:
     datanames.resize(0);
@@ -272,20 +272,20 @@ void AverageValuesCoProcessor::addData(const SPtr<Block3D> block)
                                                 float(val<2>(org) - val<2>(nodeOffset) + ix2 * dx),
                                                 float(val<3>(org) - val<3>(nodeOffset) + ix3 * dx)));
 
-                    LBMReal vx = (*av)(AvVx, ix1, ix2, ix3);
-                    LBMReal vy = (*av)(AvVy, ix1, ix2, ix3);
-                    LBMReal vz = (*av)(AvVz, ix1, ix2, ix3);
+                    real vx = (*av)(AvVx, ix1, ix2, ix3);
+                    real vy = (*av)(AvVy, ix1, ix2, ix3);
+                    real vz = (*av)(AvVz, ix1, ix2, ix3);
 
-                    LBMReal vxx = (*av)(AvVxx, ix1, ix2, ix3);
-                    LBMReal vyy = (*av)(AvVyy, ix1, ix2, ix3);
-                    LBMReal vzz = (*av)(AvVzz, ix1, ix2, ix3);
+                    real vxx = (*av)(AvVxx, ix1, ix2, ix3);
+                    real vyy = (*av)(AvVyy, ix1, ix2, ix3);
+                    real vzz = (*av)(AvVzz, ix1, ix2, ix3);
 
-                    LBMReal vxy = (*av)(AvVxy, ix1, ix2, ix3);
-                    LBMReal vxz = (*av)(AvVxz, ix1, ix2, ix3);
-                    LBMReal vyz = (*av)(AvVyz, ix1, ix2, ix3);
+                    real vxy = (*av)(AvVxy, ix1, ix2, ix3);
+                    real vxz = (*av)(AvVxz, ix1, ix2, ix3);
+                    real vyz = (*av)(AvVyz, ix1, ix2, ix3);
 
-                    LBMReal vp    = (*av)(AvP, ix1, ix2, ix3);
-                    LBMReal vprms = (*av)(AvPrms, ix1, ix2, ix3);
+                    real vp    = (*av)(AvP, ix1, ix2, ix3);
+                    real vprms = (*av)(AvPrms, ix1, ix2, ix3);
 
                     data[index++].push_back(vx);
                     data[index++].push_back(vy);
@@ -330,7 +330,7 @@ void AverageValuesCoProcessor::addData(const SPtr<Block3D> block)
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void AverageValuesCoProcessor::calculateAverageValues(double timeStep)
+void AverageValuesCoProcessor::calculateAverageValues(real timeStep)
 {
     using namespace D3Q27System;
 
@@ -342,7 +342,7 @@ void AverageValuesCoProcessor::calculateAverageValues(double timeStep)
         calcMacros = &calcIncompMacroscopicValues;
     }
 
-    LBMReal f[27];
+    real f[27];
 
     for (int level = minInitLevel; level <= maxInitLevel; level++) {
         for (SPtr<Block3D> block : blockVector[level]) {
@@ -375,18 +375,18 @@ void AverageValuesCoProcessor::calculateAverageValues(double timeStep)
                                 //////////////////////////////////////////////////////////////////////////
                                 // compute velocity
                                 //////////////////////////////////////////////////////////////////////////
-                                LBMReal vx, vy, vz, rho;
+                                real vx, vy, vz, rho;
                                 calcMacros(f, rho, vx, vy, vz);
-                                double press = D3Q27System::calcPress(f, rho, vx, vy, vz);
+                                real press = D3Q27System::calcPress(f, rho, vx, vy, vz);
 
                                 //////////////////////////////////////////////////////////////////////////
                                 // compute average values
                                 //////////////////////////////////////////////////////////////////////////
 
-                                LBMReal timeStepAfterResetRMS =
-                                    (double)(timeStep - resetStepRMS) / ((double)averageInterval);
-                                LBMReal timeStepAfterResetMeans =
-                                    (double)(timeStep - resetStepMeans) / ((double)averageInterval);
+                                real timeStepAfterResetRMS =
+                                    (real)(timeStep - resetStepRMS) / ((real)averageInterval);
+                                real timeStepAfterResetMeans =
+                                    (real)(timeStep - resetStepMeans) / ((real)averageInterval);
 
                                 // mean velocity
                                 (*av)(AvVx, ix1, ix2, ix3) =
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/AverageValuesCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/AverageValuesCoProcessor.h
index 5ba922824167c4e6a686a4bc46b0ccc2813dbae7..b45118adb5b9a18de0eabcc061fa18e11c042bdf 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/AverageValuesCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/AverageValuesCoProcessor.h
@@ -29,21 +29,21 @@ public:
     AverageValuesCoProcessor(SPtr<Grid3D> grid, const std::string &path, WbWriter *const writer, SPtr<UbScheduler> s,
                              SPtr<UbScheduler> Avs, SPtr<UbScheduler> rsMeans, SPtr<UbScheduler> rsRMS, bool restart);
     //! Make update
-    void process(double step) override;
+    void process(real step) override;
     //! Resets averaged velocity and RMS-values according to ResetSceduler
-    void reset(double step);
+    void reset(real step);
 
 protected:
     //! Prepare data and write in .vtk file
-    void collectData(double step);
+    void collectData(real step);
     //! Reset data
-    void resetDataRMS(double step);
-    void resetDataMeans(double step);
+    void resetDataRMS(real step);
+    void resetDataMeans(real step);
     //! prepare data
     void addData(const SPtr<Block3D> block);
     void clearData();
     //! Computes average and RMS values of macroscopic quantities
-    void calculateAverageValues(double timeStep);
+    void calculateAverageValues(real timeStep);
     ////! write .txt file spatial intergrated averaged value, fluctuation, porous features
     // void collectPlotDataZ(double step);
     ////! create txt file and write head line
@@ -53,14 +53,14 @@ private:
     std::vector<UbTupleFloat3> nodes;
     std::vector<UbTupleUInt8> cells;
     std::vector<std::string> datanames;
-    std::vector<std::vector<double>> data;
+    std::vector<std::vector<real>> data;
     std::vector<std::vector<SPtr<Block3D>>> blockVector;
     int minInitLevel; // min init level
     int maxInitLevel;
     int gridRank;
     int resetStepRMS;
     int resetStepMeans;
-    double averageInterval;
+    real averageInterval;
     std::string path;
     WbWriter *writer;
     bool restart, compressible;
@@ -83,7 +83,7 @@ private:
         AvPrms = 10
     };
 
-    using CalcMacrosFct = void (*)(const LBMReal *const &, LBMReal &, LBMReal &, LBMReal &, LBMReal &);
+    using CalcMacrosFct = void (*)(const real *const &, real &, real &, real &, real &);
     CalcMacrosFct calcMacros;
 };
 #endif
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateForcesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateForcesCoProcessor.cpp
index 4e75e6337a44d46586a62a74d2e592b7d0839c57..d50c58d5698a7faa0939fedd613075873d9e5363 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateForcesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateForcesCoProcessor.cpp
@@ -14,7 +14,7 @@
 #include "UbScheduler.h"
 
 CalculateForcesCoProcessor::CalculateForcesCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                                       std::shared_ptr<vf::mpi::Communicator> comm, double v, double a)
+                                                       std::shared_ptr<vf::mpi::Communicator> comm, real v, real a)
     : CoProcessor(grid, s), path(path), comm(comm), v(v), a(a), forceX1global(0), forceX2global(0), forceX3global(0)
 {
     if (comm->getProcessID() == comm->getRoot()) {
@@ -57,7 +57,7 @@ CalculateForcesCoProcessor::CalculateForcesCoProcessor(SPtr<Grid3D> grid, SPtr<U
 //////////////////////////////////////////////////////////////////////////
 CalculateForcesCoProcessor::~CalculateForcesCoProcessor() = default;
 //////////////////////////////////////////////////////////////////////////
-void CalculateForcesCoProcessor::process(double step)
+void CalculateForcesCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
@@ -65,7 +65,7 @@ void CalculateForcesCoProcessor::process(double step)
     UBLOG(logDEBUG3, "D3Q27ForcesCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void CalculateForcesCoProcessor::collectData(double step)
+void CalculateForcesCoProcessor::collectData(real step)
 {
     calculateForces();
 
@@ -109,9 +109,9 @@ void CalculateForcesCoProcessor::calculateForces()
 
     for (SPtr<D3Q27Interactor> interactor : interactors) {
         for (BcNodeIndicesMap::value_type t : interactor->getBcNodeIndicesMap()) {
-            double forceX1 = 0.0;
-            double forceX2 = 0.0;
-            double forceX3 = 0.0;
+            real forceX1 = 0.0;
+            real forceX2 = 0.0;
+            real forceX3 = 0.0;
 
             SPtr<Block3D> block                             = t.first;
             std::set<std::vector<int>> &transNodeIndicesSet = t.second;
@@ -151,8 +151,8 @@ void CalculateForcesCoProcessor::calculateForces()
             }
             // if we have got discretization with more level
             // deltaX is LBM deltaX and equal LBM deltaT
-            double deltaX = LBMSystem::getDeltaT(block->getLevel()); // grid->getDeltaT(block);
-            double deltaXquadrat = deltaX * deltaX;
+            real deltaX = LBMSystem::getDeltaT(block->getLevel()); // grid->getDeltaT(block);
+            real deltaXquadrat = deltaX * deltaX;
             forceX1 *= deltaXquadrat;
             forceX2 *= deltaXquadrat;
             forceX3 *= deltaXquadrat;
@@ -164,8 +164,8 @@ void CalculateForcesCoProcessor::calculateForces()
             forceX3global += forceX3;
         }
     }
-    std::vector<double> values;
-    std::vector<double> rvalues;
+    std::vector<real> values;
+    std::vector<real> rvalues;
     values.push_back(forceX1global);
     values.push_back(forceX2global);
     values.push_back(forceX3global);
@@ -191,10 +191,10 @@ UbTupleDouble3 CalculateForcesCoProcessor::getForces(int x1, int x2, int x3, SPt
 
     if (bc) {
         // references to tuple "force"
-        double &forceX1 = val<1>(force);
-        double &forceX2 = val<2>(force);
-        double &forceX3 = val<3>(force);
-        double f, fnbr;
+        real &forceX1 = val<1>(force);
+        real &forceX2 = val<2>(force);
+        real &forceX3 = val<3>(force);
+        real f, fnbr;
 
         for (int fdir = D3Q27System::FSTARTDIR; fdir <= D3Q27System::FENDDIR; fdir++) {
             if (bc->hasNoSlipBoundaryFlag(fdir)) {
@@ -217,9 +217,9 @@ UbTupleDouble3 CalculateForcesCoProcessor::getForces(int x1, int x2, int x3, SPt
 //////////////////////////////////////////////////////////////////////////
 void CalculateForcesCoProcessor::calculateCoefficients()
 {
-    double F1 = forceX1global;
-    double F2 = forceX2global;
-    double F3 = forceX3global;
+    real F1 = forceX1global;
+    real F2 = forceX2global;
+    real F3 = forceX3global;
 
     // return 2*F/(rho*v*v*a);
     C1 = 2.0 * F1 / (v * v * a);
@@ -229,7 +229,7 @@ void CalculateForcesCoProcessor::calculateCoefficients()
 //////////////////////////////////////////////////////////////////////////
 void CalculateForcesCoProcessor::addInteractor(SPtr<D3Q27Interactor> interactor) { interactors.push_back(interactor); }
 //////////////////////////////////////////////////////////////////////////
-void CalculateForcesCoProcessor::write(std::ofstream *fileObject, double value, char *separator)
+void CalculateForcesCoProcessor::write(std::ofstream *fileObject, real value, char *separator)
 {
     (*fileObject).width(12);
     //(*fileObject).precision(2);
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateForcesCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateForcesCoProcessor.h
index aa6bfd47799ed5d426550c756eccfff706709e9e..d282ae8ed10ae177ae78e4f559018d7ff0959be7 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateForcesCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateForcesCoProcessor.h
@@ -14,6 +14,7 @@
 
 #include "CoProcessor.h"
 #include "UbTuple.h"
+#include "lbm/constants/D3Q27.h"
 
 class ForceCalculator;
 namespace vf::mpi {class Communicator;}
@@ -30,31 +31,31 @@ public:
     //! \param v - velocity of fluid in LB units
     //! \param a - area of object in LB units
     CalculateForcesCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm,
-                               double v, double a);
+                               real v, real a);
     ~CalculateForcesCoProcessor() override;
-    void process(double step) override;
+    void process(real step) override;
     void addInteractor(SPtr<D3Q27Interactor> interactor);
 
 protected:
-    void collectData(double step);
+    void collectData(real step);
     void calculateForces();
     UbTupleDouble3 getForces(int x1, int x2, int x3, SPtr<DistributionArray3D> distributions,
                              SPtr<BoundaryConditions> bc);
     void calculateCoefficients();
-    void write(std::ofstream *fileObject, double value, char *separator);
+    void write(std::ofstream *fileObject, real value, char *separator);
 
 private:
     std::string path;
     std::shared_ptr<vf::mpi::Communicator> comm;
     std::vector<SPtr<D3Q27Interactor>> interactors;
-    double forceX1global;
-    double forceX2global;
-    double forceX3global;
-    double v; //!< is the speed of the object relative to the fluid
-    double a; //!< is the reference area
-    double C1;
-    double C2;
-    double C3;
+    real forceX1global;
+    real forceX2global;
+    real forceX3global;
+    real v; //!< is the speed of the object relative to the fluid
+    real a; //!< is the reference area
+    real C1;
+    real C2;
+    real C3;
 };
 
 #endif /* D3Q27ForcesCoProcessor_H */
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.cpp
index b2c7466f7cd6e7d5dd0aeb0baa152bfb6ced93ae..93b3854070c0b9f1f589e6d32f1872cc8521ca86 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.cpp
@@ -41,7 +41,7 @@ CalculateTorqueCoProcessor::~CalculateTorqueCoProcessor()
 
 }
 //////////////////////////////////////////////////////////////////////////
-void CalculateTorqueCoProcessor::process( double step )
+void CalculateTorqueCoProcessor::process( real step )
 {
    if(scheduler->isDue(step) )
       collectData(step);
@@ -49,7 +49,7 @@ void CalculateTorqueCoProcessor::process( double step )
    UBLOG(logDEBUG3, "D3Q27ForcesCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void CalculateTorqueCoProcessor::collectData( double step )
+void CalculateTorqueCoProcessor::collectData( real step )
 {
    calculateForces();
 
@@ -84,20 +84,20 @@ void CalculateTorqueCoProcessor::calculateForces()
 
    for(SPtr<D3Q27Interactor> interactor : interactors)
    {
-      double x1Centre = interactor->getGbObject3D()->getX1Centroid();
-      double x2Centre = interactor->getGbObject3D()->getX2Centroid();
-      double x3Centre = interactor->getGbObject3D()->getX3Centroid();
+      real x1Centre = interactor->getGbObject3D()->getX1Centroid();
+      real x2Centre = interactor->getGbObject3D()->getX2Centroid();
+      real x3Centre = interactor->getGbObject3D()->getX3Centroid();
 
       for(BcNodeIndicesMap::value_type t : interactor->getBcNodeIndicesMap())
       {
-         double torqueX1 = 0.0;
-         double torqueX2 = 0.0;
-         double torqueX3 = 0.0;
+         real torqueX1 = 0.0;
+         real torqueX2 = 0.0;
+         real torqueX3 = 0.0;
 
          SPtr<Block3D> block = t.first;
          std::set< std::vector<int> >& transNodeIndicesSet = t.second;
 
-         double deltaX = grid->getDeltaX(block);
+         real deltaX = grid->getDeltaX(block);
 
          SPtr<ILBMKernel> kernel = block->getKernel();
 
@@ -126,14 +126,14 @@ void CalculateTorqueCoProcessor::calculateForces()
             {
                SPtr<BoundaryConditions> bc = bcArray->getBC(x1,x2,x3);
                UbTupleDouble3 forceVec     = getForces(x1,x2,x3,distributions,bc);
-               double Fx                   = val<1>(forceVec);
-               double Fy                   = val<2>(forceVec);
-               double Fz                   = val<3>(forceVec);
+               real Fx                   = val<1>(forceVec);
+               real Fy                   = val<2>(forceVec);
+               real Fz                   = val<3>(forceVec);
 
                Vector3D worldCoordinates = grid->getNodeCoordinates(block, x1, x2, x3);
-               double rx                 = (worldCoordinates[0] - x1Centre) / deltaX;
-               double ry                 = (worldCoordinates[1] - x2Centre) / deltaX;
-               double rz                 = (worldCoordinates[2] - x3Centre) / deltaX;
+               real rx                 = (worldCoordinates[0] - x1Centre) / deltaX;
+               real ry                 = (worldCoordinates[1] - x2Centre) / deltaX;
+               real rz                 = (worldCoordinates[2] - x3Centre) / deltaX;
 
                torqueX1 += ry * Fz - rz * Fy;
                torqueX2 += rz * Fx - rx * Fz;
@@ -148,8 +148,8 @@ void CalculateTorqueCoProcessor::calculateForces()
          torqueX3global += torqueX3;
       }
    }
-   std::vector<double> values;
-   std::vector<double> rvalues;
+   std::vector<real> values;
+   std::vector<real> rvalues;
    values.push_back(torqueX1global);
    values.push_back(torqueX2global);
    values.push_back(torqueX3global);
@@ -174,16 +174,16 @@ UbTupleDouble3 CalculateTorqueCoProcessor::getForces(int x1, int x2, int x3,  SP
 {
    UbTupleDouble3 force(0.0,0.0,0.0);
 
-   LBMReal fs[D3Q27System::ENDF + 1];
+   real fs[D3Q27System::ENDF + 1];
    distributions->getDistributionInv(fs, x1, x2, x3);
    
    if(bc)
    {
       //references to tuple "force"
-      double& forceX1 = val<1>(force);
-      double& forceX2 = val<2>(force);
-      double& forceX3 = val<3>(force);
-      double f,  fnbr;
+      real& forceX1 = val<1>(force);
+      real& forceX2 = val<2>(force);
+      real& forceX3 = val<3>(force);
+      real f,  fnbr;
 
       for(int fdir=D3Q27System::FSTARTDIR; fdir<=D3Q27System::FENDDIR; fdir++)
       {
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.h
index e488b442b60b2f726747a521e51cad9d4bacdbe9..26686239bdca68a047c4f8c8f4c33f5a09f53bb5 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.h
@@ -30,19 +30,19 @@ public:
    //! Constructor
    CalculateTorqueCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm);
 	virtual ~CalculateTorqueCoProcessor();             
-	void process(double step); 
+	void process(real step); 
    void addInteractor(SPtr<D3Q27Interactor> interactor);
 protected:
-	void collectData(double step);
+	void collectData(real step);
    void calculateForces();
    UbTupleDouble3 getForces(int x1, int x2, int x3, SPtr<DistributionArray3D> distributions, SPtr<BoundaryConditions> bc);
 private:
    std::string path;
    std::shared_ptr<vf::mpi::Communicator> comm;
    std::vector<SPtr<D3Q27Interactor> > interactors;
-   double torqueX1global;
-   double torqueX2global;
-   double torqueX3global;
+   real torqueX1global;
+   real torqueX2global;
+   real torqueX3global;
 };
 
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/CoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/CoProcessor.h
index 7d8efad7606b57bb24ac11740843b30d3678fcbb..7aae7505b02ed9248a31b2a009cdc75f09ecd73a 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/CoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/CoProcessor.h
@@ -35,6 +35,7 @@
 #define CoProcessor_H
 
 #include <PointerDefinitions.h>
+#include "lbm/constants/D3Q27.h"
 
 class Grid3D;
 class UbScheduler;
@@ -58,7 +59,7 @@ public:
     virtual ~CoProcessor();
     //! \brief Updates observer
     //! \param step is the actual time step
-    virtual void process(double step) = 0;
+    virtual void process(real step) = 0;
 
 protected:
     SPtr<Grid3D> grid;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/DecreaseViscosityCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/DecreaseViscosityCoProcessor.cpp
index 6dae1c7049ea3c0d779b31fff2e79104e034790f..0221bf38a599352728dbd42f37b78bc5fa6ff1ee 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/DecreaseViscosityCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/DecreaseViscosityCoProcessor.cpp
@@ -26,13 +26,13 @@ DecreaseViscosityCoProcessor::DecreaseViscosityCoProcessor(SPtr<Grid3D> grid, SP
 //////////////////////////////////////////////////////////////////////////
 DecreaseViscosityCoProcessor::~DecreaseViscosityCoProcessor() = default;
 //////////////////////////////////////////////////////////////////////////
-void DecreaseViscosityCoProcessor::process(double step)
+void DecreaseViscosityCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         setViscosity(step);
 }
 //////////////////////////////////////////////////////////////////////////
-void DecreaseViscosityCoProcessor::setViscosity(double step)
+void DecreaseViscosityCoProcessor::setViscosity(real step)
 {
 
     UBLOG(logDEBUG3, "DecreaseViscosityCoProcessor::update:" << step);
@@ -53,7 +53,7 @@ void DecreaseViscosityCoProcessor::setViscosity(double step)
         int istep      = static_cast<int>(step);
         this->timeStep = istep;
         nueFunc->DefineVar("t", &this->timeStep);
-        double nue = nueFunc->Eval();
+        real nue = nueFunc->Eval();
 
         for (int level = minInitLevel; level <= maxInitLevel; level++) {
             std::vector<SPtr<Block3D>> blockVector;
@@ -61,7 +61,7 @@ void DecreaseViscosityCoProcessor::setViscosity(double step)
             for (SPtr<Block3D> block : blockVector) {
                 SPtr<ILBMKernel> kernel = block->getKernel();
                 if (kernel) {
-                    LBMReal collFactor = LBMSystem::calcCollisionFactor(nue, block->getLevel());
+                    real collFactor = LBMSystem::calcCollisionFactor(nue, block->getLevel());
                     kernel->setCollisionFactor(collFactor);
                 }
             }
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/DecreaseViscosityCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/DecreaseViscosityCoProcessor.h
index ca413ba2d5201d3043594f4a4b4803091bb51cc8..aaa8a3c0b8db4d9adb9b28c19ef993444e7106c0 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/DecreaseViscosityCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/DecreaseViscosityCoProcessor.h
@@ -31,11 +31,11 @@ public:
     DecreaseViscosityCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, mu::Parser *nueFunc, std::shared_ptr<vf::mpi::Communicator> comm);
     ~DecreaseViscosityCoProcessor() override;
     //! calls collect PostprocessData.
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
     //! resets the collision factor depending on the current timestep.
-    void setViscosity(double step);
+    void setViscosity(real step);
     std::shared_ptr<vf::mpi::Communicator> comm;
 
 private:
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/EmergencyExitCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/EmergencyExitCoProcessor.cpp
index 3195ea4dfc6a9be9cf49ef7e04bfe57bce6e70f2..fc181e266e0453a12a149a4c69083497617c61ea 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/EmergencyExitCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/EmergencyExitCoProcessor.cpp
@@ -22,7 +22,7 @@ EmergencyExitCoProcessor::EmergencyExitCoProcessor(SPtr<Grid3D> grid, SPtr<UbSch
 //////////////////////////////////////////////////////////////////////////
 EmergencyExitCoProcessor::~EmergencyExitCoProcessor() = default;
 //////////////////////////////////////////////////////////////////////////
-void EmergencyExitCoProcessor::process(double step)
+void EmergencyExitCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
@@ -30,7 +30,7 @@ void EmergencyExitCoProcessor::process(double step)
     UBLOG(logDEBUG3, "EmergencyExitCoProcessor::update:" << step);
 }
 
-void EmergencyExitCoProcessor::collectData(double step)
+void EmergencyExitCoProcessor::collectData(real step)
 {
     if (readMetafile()) {
         rp->process((int)step);
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/EmergencyExitCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/EmergencyExitCoProcessor.h
index 8894420c979eb6e7879c1788010d7e5d7e807eec..13eaa7832cf5815005d30910332a7d382f047186 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/EmergencyExitCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/EmergencyExitCoProcessor.h
@@ -25,10 +25,10 @@ public:
                              SPtr<MPIIORestartCoProcessor> rp, std::shared_ptr<vf::mpi::Communicator> comm);
     ~EmergencyExitCoProcessor() override;
 
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
-    void collectData(double step);
+    void collectData(real step);
     void writeMetafile(int status);
     bool readMetafile();
     void checkMetafile();
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/ForceCalculator.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/ForceCalculator.cpp
index cb2fd908ff78803e4262ae64e906b8dad3c14dff..918e3afbe8d454ac97707b8c79f927bca324cb52 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/ForceCalculator.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/ForceCalculator.cpp
@@ -20,20 +20,20 @@ ForceCalculator::~ForceCalculator() = default;
 Vector3D ForceCalculator::getForces(int x1, int x2, int x3, SPtr<DistributionArray3D> distributions,
                                     SPtr<BoundaryConditions> bc, const Vector3D &boundaryVelocity) const
 {
-    double forceX1 = 0;
-    double forceX2 = 0;
-    double forceX3 = 0;
+    real forceX1 = 0;
+    real forceX2 = 0;
+    real forceX3 = 0;
     if (bc) {
         for (int fdir = D3Q27System::FSTARTDIR; fdir <= D3Q27System::FENDDIR; fdir++) {
             if (bc->hasNoSlipBoundaryFlag(fdir) || bc->hasVelocityBoundaryFlag(fdir)) {
                 const int invDir  = D3Q27System::INVDIR[fdir];
-                const double f    = distributions->getDistributionInvForDirection(x1, x2, x3, invDir);
-                const double fnbr = distributions->getDistributionInvForDirection(
+                const real f    = distributions->getDistributionInvForDirection(x1, x2, x3, invDir);
+                const real fnbr = distributions->getDistributionInvForDirection(
                     x1 + D3Q27System::DX1[invDir], x2 + D3Q27System::DX2[invDir], x3 + D3Q27System::DX3[invDir], fdir);
 
-                double correction[3] = { 0.0, 0.0, 0.0 };
+                real correction[3] = { 0.0, 0.0, 0.0 };
                 if (bc->hasVelocityBoundaryFlag(fdir)) {
-                    const double forceTerm = f - fnbr;
+                    const real forceTerm = f - fnbr;
                     correction[0]          = forceTerm * boundaryVelocity[0];
                     correction[1]          = forceTerm * boundaryVelocity[1];
                     correction[2]          = forceTerm * boundaryVelocity[2];
@@ -60,9 +60,9 @@ void ForceCalculator::calculateForces(std::vector<SPtr<D3Q27Interactor>> interac
 
     for (const auto &interactor : interactors) {
         for (const auto &t : interactor->getBcNodeIndicesMap()) {
-            double forceX1 = 0.0;
-            double forceX2 = 0.0;
-            double forceX3 = 0.0;
+            real forceX1 = 0.0;
+            real forceX2 = 0.0;
+            real forceX3 = 0.0;
 
             SPtr<Block3D> block                     = t.first;
             SPtr<ILBMKernel> kernel                 = block->getKernel();
@@ -86,8 +86,8 @@ void ForceCalculator::calculateForces(std::vector<SPtr<D3Q27Interactor>> interac
             }
             // if we have got discretization with more level
             // deltaX is LBM deltaX and equal LBM deltaT
-            double deltaX = LBMSystem::getDeltaT(block->getLevel()); // grid->getDeltaT(block);
-            double deltaXquadrat = deltaX * deltaX;
+            real deltaX = LBMSystem::getDeltaT(block->getLevel()); // grid->getDeltaT(block);
+            real deltaXquadrat = deltaX * deltaX;
             forceX1 *= deltaXquadrat;
             forceX2 *= deltaXquadrat;
             forceX3 *= deltaXquadrat;
@@ -104,12 +104,12 @@ void ForceCalculator::calculateForces(std::vector<SPtr<D3Q27Interactor>> interac
 
 void ForceCalculator::gatherGlobalForces()
 {
-    std::vector<double>
+    std::vector<real>
         values; // intel compiler 17 dasn't support this { forceX1global , forceX2global, forceX3global };
     values.push_back(forceX1global);
     values.push_back(forceX2global);
     values.push_back(forceX3global);
-    std::vector<double> rvalues = comm->gather(values);
+    std::vector<real> rvalues = comm->gather(values);
 
     if (comm->isRoot()) {
         forceX1global = 0.0;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/ForceCalculator.h b/src/cpu/VirtualFluidsCore/CoProcessors/ForceCalculator.h
index 7aeb514abe426020af59a936d5f8b8c184ea496f..03b00f3603c3e8aac25567b7f370e81b61d3ef76 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/ForceCalculator.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/ForceCalculator.h
@@ -4,7 +4,8 @@
  *  Created on: 25.10.2017
  *  Author: S. Peters
  */
-
+#include "lbm/constants/D3Q27.h"
+ 
 #ifndef ForceCalculator_H
 #define ForceCalculator_H
 
@@ -36,9 +37,9 @@ private:
 
     std::shared_ptr<vf::mpi::Communicator> comm;
 
-    double forceX1global;
-    double forceX2global;
-    double forceX3global;
+    real forceX1global;
+    real forceX2global;
+    real forceX3global;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/InSituCatalystCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/InSituCatalystCoProcessor.cpp
index 40a8011ca871965f4b389ce32559b847021d2fe2..741f1f340c4c02e9f5b08854e7205ab2aa5e507b 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/InSituCatalystCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/InSituCatalystCoProcessor.cpp
@@ -42,7 +42,7 @@ InSituCatalystCoProcessor::InSituCatalystCoProcessor(SPtr<Grid3D> grid, SPtr<UbS
 //////////////////////////////////////////////////////////////////////////
 InSituCatalystCoProcessor::~InSituCatalystCoProcessor() {}
 //////////////////////////////////////////////////////////////////////////
-void InSituCatalystCoProcessor::process(double step)
+void InSituCatalystCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
@@ -50,7 +50,7 @@ void InSituCatalystCoProcessor::process(double step)
     UBLOG(logDEBUG3, "InSituCatalystCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void InSituCatalystCoProcessor::collectData(double step)
+void InSituCatalystCoProcessor::collectData(real step)
 {
     unsigned int istep = static_cast<int>(step);
 
@@ -94,13 +94,13 @@ void InSituCatalystCoProcessor::addData(SPtr<Block3D> block)
     UbTupleDouble3 org          = grid->getBlockWorldCoordinates(block);
     UbTupleDouble3 blockLengths = grid->getBlockLengths(block);
     UbTupleDouble3 nodeOffset   = grid->getNodeOffset(block);
-    double dx                   = grid->getDeltaX(block);
+    real dx                   = grid->getDeltaX(block);
 
     SPtr<LBMKernel> kernel                  = block->getKernel();
     SPtr<BCArray3D> bcArray                 = kernel->getBCProcessor()->getBCArray();
     SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions();
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal vx1, vx2, vx3, rho;
+    real f[D3Q27System::ENDF + 1];
+    real vx1, vx2, vx3, rho;
 
     int minX1 = 0;
     int minX2 = 0;
@@ -122,7 +122,7 @@ void InSituCatalystCoProcessor::addData(SPtr<Block3D> block)
                 if (!bcArray->isUndefined(ix1, ix2, ix3) && !bcArray->isSolid(ix1, ix2, ix3)) {
                     distributions->getDistribution(f, ix1, ix2, ix3);
                     calcMacros(f, rho, vx1, vx2, vx3);
-                    double press = D3Q27System::calcPress(f, rho, vx1, vx2, vx3);
+                    real press = D3Q27System::calcPress(f, rho, vx1, vx2, vx3);
 
                     if (UbMath::isNaN(rho) || UbMath::isInfinity(rho))
                         UB_THROW(UbException(
@@ -211,13 +211,13 @@ void InSituCatalystCoProcessor::addVTKGridData(SPtr<Block3D> block)
     UbTupleDouble3 org          = grid->getBlockWorldCoordinates(block);
     UbTupleDouble3 blockLengths = grid->getBlockLengths(block);
     UbTupleDouble3 nodeOffset   = grid->getNodeOffset(block);
-    double dx                   = grid->getDeltaX(block);
+    real dx                   = grid->getDeltaX(block);
 
     SPtr<LBMKernel> kernel                  = block->getKernel();
     SPtr<BCArray3D> bcArray                 = kernel->getBCProcessor()->getBCArray();
     SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions();
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal vx1, vx2, vx3, rho;
+    real f[D3Q27System::ENDF + 1];
+    real vx1, vx2, vx3, rho;
 
     // knotennummerierung faengt immer bei 0 an!
     int SWB, SEB, NEB, NWB, SWT, SET, NET, NWT;
@@ -251,15 +251,15 @@ void InSituCatalystCoProcessor::addVTKGridData(SPtr<Block3D> block)
     SPtr<BoundaryConditions> bcPtr;
     int nr = points->GetNumberOfPoints();
 
-    double x[3];
+    real x[3];
 
     for (size_t ix3 = minX3; ix3 <= maxX3; ix3++) {
         for (size_t ix2 = minX2; ix2 <= maxX2; ix2++) {
             for (size_t ix1 = minX1; ix1 <= maxX1; ix1++) {
                 if (!bcArray->isUndefined(ix1, ix2, ix3) && !bcArray->isSolid(ix1, ix2, ix3)) {
-                    x[0] = double(val<1>(org) - val<1>(nodeOffset) + ix1 * dx);
-                    x[1] = double(val<2>(org) - val<2>(nodeOffset) + ix2 * dx);
-                    x[2] = double(val<3>(org) - val<3>(nodeOffset) + ix3 * dx);
+                    x[0] = real(val<1>(org) - val<1>(nodeOffset) + ix1 * dx);
+                    x[1] = real(val<2>(org) - val<2>(nodeOffset) + ix2 * dx);
+                    x[2] = real(val<3>(org) - val<3>(nodeOffset) + ix3 * dx);
 
                     points->InsertPoint((vtkIdType)nr, x);
 
@@ -267,7 +267,7 @@ void InSituCatalystCoProcessor::addVTKGridData(SPtr<Block3D> block)
 
                     distributions->getDistribution(f, ix1, ix2, ix3);
                     calcMacros(f, rho, vx1, vx2, vx3);
-                    double press = D3Q27System::calcPress(f, rho, vx1, vx2, vx3);
+                    real press = D3Q27System::calcPress(f, rho, vx1, vx2, vx3);
 
                     if (UbMath::isNaN(rho) || UbMath::isInfinity(rho))
                         UB_THROW(UbException(
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/InSituCatalystCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/InSituCatalystCoProcessor.h
index 5fa6d3fd13529431c125b799bcbb7ea47dccf1ed..e1c9fb95c7ff420362c3dcbe4b7444902beb6ba1 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/InSituCatalystCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/InSituCatalystCoProcessor.h
@@ -6,6 +6,7 @@
 #include <CoProcessor.h>
 #include <Grid3D.h>
 #include <LBMUnitConverter.h>
+#include "lbm/constants/D3Q27.h"
 
 #include <string>
 
@@ -24,10 +25,10 @@ public:
     InSituCatalystCoProcessor();
     InSituCatalystCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, std::string script);
     virtual ~InSituCatalystCoProcessor();
-    void process(double step);
+    void process(real step);
 
 protected:
-    void collectData(double step);
+    void collectData(real step);
     void addData(SPtr<Block3D> block);
     void buildVTKGrid();
     void addVTKGridData(SPtr<Block3D> block);
@@ -41,14 +42,14 @@ private:
     vtkSmartPointer<vtkUnstructuredGrid> unstructuredGrid;
     vtkSmartPointer<vtkPoints> points;
     vtkSmartPointer<vtkDoubleArray> arrays[4];
-    std::vector<double> vx1Array;
-    std::vector<double> vx2Array;
-    std::vector<double> vx3Array;
-    std::vector<double> rhoArray;
+    std::vector<real> vx1Array;
+    std::vector<real> vx2Array;
+    std::vector<real> vx3Array;
+    std::vector<real> rhoArray;
     int index;
     int numOfPoints;
-    typedef void (*CalcMacrosFct)(const LBMReal *const & /*feq[27]*/, LBMReal & /*(d)rho*/, LBMReal & /*vx1*/,
-                                  LBMReal & /*vx2*/, LBMReal & /*vx3*/);
+    typedef void (*CalcMacrosFct)(const real *const & /*feq[27]*/, real & /*(d)rho*/, real & /*vx1*/,
+                                  real & /*vx2*/, real & /*vx3*/);
     CalcMacrosFct calcMacros;
 };
 #endif // InSituCatalystCoProcessor_h__
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/InSituVTKCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/InSituVTKCoProcessor.cpp
index ed258864a4a87b473ca276064abf60ad5910828d..6b8026cedd8331c02b52a06c86c0e5d7821d0aa2 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/InSituVTKCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/InSituVTKCoProcessor.cpp
@@ -57,7 +57,7 @@ InSituVTKCoProcessor::InSituVTKCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler>
 //////////////////////////////////////////////////////////////////////////
 InSituVTKCoProcessor::~InSituVTKCoProcessor() { comm->CloseConnection(); }
 //////////////////////////////////////////////////////////////////////////
-void InSituVTKCoProcessor::process(double step)
+void InSituVTKCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
@@ -65,7 +65,7 @@ void InSituVTKCoProcessor::process(double step)
     UBLOG(logDEBUG3, "InSituVTKCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void InSituVTKCoProcessor::collectData(double step)
+void InSituVTKCoProcessor::collectData(real step)
 {
     int istep = static_cast<int>(step);
 
@@ -127,20 +127,20 @@ void InSituVTKCoProcessor::addData(SPtr<Block3D> block)
     UbTupleDouble3 org          = grid->getBlockWorldCoordinates(block);
     UbTupleDouble3 blockLengths = grid->getBlockLengths(block);
     UbTupleDouble3 nodeOffset   = grid->getNodeOffset(block);
-    double dx                   = grid->getDeltaX(block);
+    real dx                   = grid->getDeltaX(block);
 
     SPtr<ILBMKernel> kernel                 = block->getKernel();
     SPtr<BCArray3D> bcArray                 = kernel->getBCProcessor()->getBCArray();
     SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions();
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal vx1, vx2, vx3, rho;
+    real f[D3Q27System::ENDF + 1];
+    real vx1, vx2, vx3, rho;
 
     // knotennummerierung faengt immer bei 0 an!
     int SWB, SEB, NEB, NWB, SWT, SET, NET, NWT;
 
     // Funktionszeiger
-    typedef void (*CalcMacrosFct)(const LBMReal *const & /*feq[27]*/, LBMReal & /*(d)rho*/, LBMReal & /*vx1*/,
-                                  LBMReal & /*vx2*/, LBMReal & /*vx3*/);
+    typedef void (*CalcMacrosFct)(const real *const & /*feq[27]*/, real & /*(d)rho*/, real & /*vx1*/,
+                                  real & /*vx2*/, real & /*vx3*/);
 
     CalcMacrosFct calcMacros = NULL;
 
@@ -175,7 +175,7 @@ void InSituVTKCoProcessor::addData(SPtr<Block3D> block)
     SPtr<BoundaryConditions> bcPtr;
     int nr = points->GetNumberOfPoints();
 
-    double x[3];
+    real x[3];
 
     for (size_t ix3 = minX3; ix3 <= maxX3; ix3++) {
         for (size_t ix2 = minX2; ix2 <= maxX2; ix2++) {
@@ -183,9 +183,9 @@ void InSituVTKCoProcessor::addData(SPtr<Block3D> block)
                 if (!bcArray->isUndefined(ix1, ix2, ix3) && !bcArray->isSolid(ix1, ix2, ix3)) {
                     int index = 0;
 
-                    x[0] = double(val<1>(org) - val<1>(nodeOffset) + ix1 * dx);
-                    x[1] = double(val<2>(org) - val<2>(nodeOffset) + ix2 * dx);
-                    x[2] = double(val<3>(org) - val<3>(nodeOffset) + ix3 * dx);
+                    x[0] = real(val<1>(org) - val<1>(nodeOffset) + ix1 * dx);
+                    x[1] = real(val<2>(org) - val<2>(nodeOffset) + ix2 * dx);
+                    x[2] = real(val<3>(org) - val<3>(nodeOffset) + ix3 * dx);
 
                     points->InsertPoint((vtkIdType)nr, x);
 
@@ -193,7 +193,7 @@ void InSituVTKCoProcessor::addData(SPtr<Block3D> block)
 
                     distributions->getDistribution(f, ix1, ix2, ix3);
                     calcMacros(f, rho, vx1, vx2, vx3);
-                    double press = D3Q27System::calcPress(f, rho, vx1, vx2, vx3);
+                    real press = D3Q27System::calcPress(f, rho, vx1, vx2, vx3);
 
                     if (UbMath::isNaN(rho) || UbMath::isInfinity(rho))
                         UB_THROW(UbException(
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/InSituVTKCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/InSituVTKCoProcessor.h
index 6789509569cabbdc39319f20749d9e0091736158..9456bf298b4e6e4af1e07ae14a46c3e11259cbaa 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/InSituVTKCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/InSituVTKCoProcessor.h
@@ -23,10 +23,10 @@ public:
     InSituVTKCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &configFile,
                          SPtr<LBMUnitConverter> conv);
     virtual ~InSituVTKCoProcessor();
-    void process(double step);
+    void process(real step);
 
 protected:
-    void collectData(double step);
+    void collectData(real step);
     void addData(SPtr<Block3D> block);
     void readConfigFile(const std::string &configFile);
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/IntegrateValuesHelper.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/IntegrateValuesHelper.cpp
index 4e711bd7c03b1da262c427230dc1c357966e1681..0d8b7827b48fb2a5e6e16d13538f63209e03e244 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/IntegrateValuesHelper.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/IntegrateValuesHelper.cpp
@@ -10,8 +10,8 @@
 #include "LBMKernel.h"
 
 //////////////////////////////////////////////////////////////////////////
-IntegrateValuesHelper::IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm, double minX1, double minX2,
-                                             double minX3, double maxX1, double maxX2, double maxX3)
+IntegrateValuesHelper::IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm, real minX1, real minX2,
+                                             real minX3, real maxX1, real maxX2, real maxX3)
     :
 
       grid(grid), comm(comm), sVx1(0.0), sVx2(0.0), sVx3(0.0), sRho(0.0), sCellVolume(0.0), numberOfFluidsNodes(0),
@@ -21,8 +21,8 @@ IntegrateValuesHelper::IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<
     init(-1);
 }
 //////////////////////////////////////////////////////////////////////////
-IntegrateValuesHelper::IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm, double minX1, double minX2,
-                                             double minX3, double maxX1, double maxX2, double maxX3, int level)
+IntegrateValuesHelper::IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm, real minX1, real minX2,
+                                             real minX3, real maxX1, real maxX2, real maxX3, int level)
     :
 
       grid(grid), comm(comm), sVx1(0.0), sVx2(0.0), sVx3(0.0), sRho(0.0), sCellVolume(0.0), numberOfFluidsNodes(0),
@@ -38,7 +38,7 @@ void IntegrateValuesHelper::init(int level)
 {
     root = comm->isRoot();
 
-    double orgX1, orgX2, orgX3;
+    real orgX1, orgX2, orgX3;
     int gridRank = grid->getRank();
     int minInitLevel, maxInitLevel;
     if (level < 0) {
@@ -49,8 +49,8 @@ void IntegrateValuesHelper::init(int level)
         maxInitLevel = level;
     }
 
-    double numSolids = 0.0;
-    double numFluids = 0.0;
+    real numSolids = 0.0;
+    real numFluids = 0.0;
     for (int level_it = minInitLevel; level_it <= maxInitLevel; level_it++) {
         std::vector<SPtr<Block3D>> blockVector;
         grid->getBlocks(level_it, gridRank, blockVector);
@@ -68,9 +68,9 @@ void IntegrateValuesHelper::init(int level)
             SPtr<BCArray3D> bcArray                 = kernel->getBCProcessor()->getBCArray();
             int ghostLayerWitdh                     = kernel->getGhostLayerWidth();
             SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions();
-            double internX1, internX2, internX3;
+            real internX1, internX2, internX3;
 
-            double dx               = grid->getDeltaX(block);
+            real dx               = grid->getDeltaX(block);
             UbTupleDouble3 orgDelta = grid->getNodeOffset(block);
 
             for (int ix3 = ghostLayerWitdh; ix3 < (int)distributions->getNX3() - ghostLayerWitdh; ix3++) {
@@ -94,8 +94,8 @@ void IntegrateValuesHelper::init(int level)
                 cnodes.push_back(cn);
         }
     }
-    std::vector<double> rvalues;
-    std::vector<double> values;
+    std::vector<real> rvalues;
+    std::vector<real> values;
     values.push_back(numSolids);
     values.push_back(numFluids);
     rvalues = comm->gather(values);
@@ -122,15 +122,15 @@ void IntegrateValuesHelper::calculateAV()
         SPtr<AverageValuesArray3D> averagedValues = kernel->getDataSet()->getAverageValues();
 
         for (UbTupleInt3 node : cn.nodes) {
-            double Avx = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVx);
-            double Avy = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVy);
-            double Avz = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVz);
+            real Avx = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVx);
+            real Avy = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVy);
+            real Avz = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVz);
 
-            double Avxx = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVxx);
-            double Avyy = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVyy);
-            double Avzz = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVzz);
+            real Avxx = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVxx);
+            real Avyy = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVyy);
+            real Avzz = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVzz);
 
-            double Avxz = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVxz);
+            real Avxz = (*averagedValues)(val<1>(node), val<2>(node), val<3>(node), AvVxz);
             sAvVx1 += std::abs(Avx);
             sAvVx2 += std::abs(Avy);
             sAvVx3 += std::abs(Avz);
@@ -143,8 +143,8 @@ void IntegrateValuesHelper::calculateAV()
             numberOfFluidsNodes++;
         }
     }
-    std::vector<double> values;
-    std::vector<double> rvalues;
+    std::vector<real> values;
+    std::vector<real> rvalues;
     values.push_back(sAvVx1);
     values.push_back(sAvVx2);
     values.push_back(sAvVx3);
@@ -172,20 +172,20 @@ void IntegrateValuesHelper::calculateAV()
 //////////////////////////////////////////////////////////////////////////
 void IntegrateValuesHelper::calculateMQ()
 {
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal vx1, vx2, vx3, rho;
+    real f[D3Q27System::ENDF + 1];
+    real vx1, vx2, vx3, rho;
     clearData();
 
     // Funktionszeiger
-    typedef void (*CalcMacrosFct)(const LBMReal *const & /*feq[27]*/, LBMReal & /*(d)rho*/, LBMReal & /*vx1*/,
-                                  LBMReal & /*vx2*/, LBMReal & /*vx3*/);
+    typedef void (*CalcMacrosFct)(const real *const & /*feq[27]*/, real & /*(d)rho*/, real & /*vx1*/,
+                                  real & /*vx2*/, real & /*vx3*/);
 
     CalcMacrosFct calcMacros = NULL;
 
     for (CalcNodes cn : cnodes) {
         SPtr<ILBMKernel> kernel = cn.block->getKernel();
-        LBMReal dx              = 1.0 / (LBMReal)(1 << cn.block->getLevel());
-        LBMReal cellVolume      = dx * dx * dx;
+        real dx              = 1.0 / (real)(1 << cn.block->getLevel());
+        real cellVolume      = dx * dx * dx;
 
         if (kernel->getCompressible()) {
             calcMacros = &D3Q27System::calcCompMacroscopicValues;
@@ -205,8 +205,8 @@ void IntegrateValuesHelper::calculateMQ()
             sCellVolume += cellVolume;
         }
     }
-    std::vector<double> values(5);
-    std::vector<double> rvalues;
+    std::vector<real> values(5);
+    std::vector<real> rvalues;
     values[0] = sRho;
     values[1] = sVx1;
     values[2] = sVx2;
@@ -247,9 +247,9 @@ void IntegrateValuesHelper::clearData()
     sTSx1x3 = 0.0;
 }
 //////////////////////////////////////////////////////////////////////////
-LBMReal IntegrateValuesHelper::getNumberOfFluidsNodes() { return this->numberOfFluidsNodes; }
+real IntegrateValuesHelper::getNumberOfFluidsNodes() { return this->numberOfFluidsNodes; }
 //////////////////////////////////////////////////////////////////////////
-LBMReal IntegrateValuesHelper::getNumberOfSolidNodes() { return this->numberOfSolidNodes; }
+real IntegrateValuesHelper::getNumberOfSolidNodes() { return this->numberOfSolidNodes; }
 //////////////////////////////////////////////////////////////////////////
 GbCuboid3DPtr IntegrateValuesHelper::getBoundingBox() { return this->boundingBox; }
 //////////////////////////////////////////////////////////////////////////
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/IntegrateValuesHelper.h b/src/cpu/VirtualFluidsCore/CoProcessors/IntegrateValuesHelper.h
index d6c87dcfd604bc1f1ded813b04e6ee71829c0d27..c804d74628570c4592c6715b7f76cd450c90ecfb 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/IntegrateValuesHelper.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/IntegrateValuesHelper.h
@@ -36,33 +36,33 @@ public:
     };
 
 public:
-    IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm, double minX1, double minX2, double minX3,
-                          double maxX1, double maxX2, double maxX3);
-    IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm, double minX1, double minX2, double minX3,
-                          double maxX1, double maxX2, double maxX3, int level);
+    IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm, real minX1, real minX2, real minX3,
+                          real maxX1, real maxX2, real maxX3);
+    IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm, real minX1, real minX2, real minX3,
+                          real maxX1, real maxX2, real maxX3, int level);
     virtual ~IntegrateValuesHelper();
 
     void calculateMQ();
     void calculateAV();
     void clearData();
 
-    double getRho() { return sRho; }
-    double getVx1() { return sVx1; }
-    double getVx2() { return sVx2; }
-    double getVx3() { return sVx3; }
-    double getCellsVolume() { return sCellVolume; }
+    real getRho() { return sRho; }
+    real getVx1() { return sVx1; }
+    real getVx2() { return sVx2; }
+    real getVx3() { return sVx3; }
+    real getCellsVolume() { return sCellVolume; }
     //  LBMReal getVm() { return sVm; }
     // LBMReal getPress() {return sPress;}
-    double getAvVx1() { return sAvVx1; }
-    double getAvVx2() { return sAvVx2; }
-    double getAvVx3() { return sAvVx3; }
-    double getTSx1() { return sTSx1; }
-    double getTSx2() { return sTSx2; }
-    double getTSx3() { return sTSx3; }
-    double getTSx1x3() { return sTSx1x3; }
+    real getAvVx1() { return sAvVx1; }
+    real getAvVx2() { return sAvVx2; }
+    real getAvVx3() { return sAvVx3; }
+    real getTSx1() { return sTSx1; }
+    real getTSx2() { return sTSx2; }
+    real getTSx3() { return sTSx3; }
+    real getTSx1x3() { return sTSx1x3; }
 
-    LBMReal getNumberOfFluidsNodes();
-    LBMReal getNumberOfSolidNodes();
+    real getNumberOfFluidsNodes();
+    real getNumberOfSolidNodes();
     GbCuboid3DPtr getBoundingBox();
     std::vector<CalcNodes> getCNodes();
 
@@ -72,9 +72,9 @@ private:
 
     bool root;
     SPtr<Grid3D> grid;
-    double sVx1, sVx2, sVx3, sRho, sCellVolume; // sPress, sVm;
-    double numberOfFluidsNodes, numberOfSolidNodes;
-    double sAvVx1, sAvVx2, sAvVx3, sTSx1, sTSx2, sTSx3, sTSx1x3;
+    real sVx1, sVx2, sVx3, sRho, sCellVolume; // sPress, sVm;
+    real numberOfFluidsNodes, numberOfSolidNodes;
+    real sAvVx1, sAvVx2, sAvVx3, sTSx1, sTSx2, sTSx3, sTSx1x3;
     std::vector<CalcNodes> cnodes;
     GbCuboid3DPtr boundingBox;
     std::shared_ptr<vf::mpi::Communicator> comm;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/LineTimeSeriesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/LineTimeSeriesCoProcessor.cpp
index 9e5fa087fccf6d1121052ece7673a406984d52c0..98d0188fc25def9ee613e85f98d9623b0dfdf702 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/LineTimeSeriesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/LineTimeSeriesCoProcessor.cpp
@@ -23,12 +23,12 @@ LineTimeSeriesCoProcessor::LineTimeSeriesCoProcessor(SPtr<Grid3D> grid, SPtr<UbS
     numOfProc = comm->getNumberOfProcesses();
     gridRank  = comm->getProcessID();
 
-    double dx = CoProcessor::grid->getDeltaX(level);
+    real dx = CoProcessor::grid->getDeltaX(level);
 
     SPtr<CoordinateTransformation3D> trafo = grid->getCoordinateTransformator();
-    double orgX1                           = trafo->getX1CoordinateOffset();
-    double orgX2                           = trafo->getX2CoordinateOffset();
-    double orgX3                           = trafo->getX3CoordinateOffset();
+    real orgX1                           = trafo->getX1CoordinateOffset();
+    real orgX2                           = trafo->getX2CoordinateOffset();
+    real orgX3                           = trafo->getX3CoordinateOffset();
 
     int x1min = (int)((line->getX1Minimum() - orgX1) / dx);
     int x1max = (int)((line->getX1Maximum() - orgX1) / dx);
@@ -61,7 +61,7 @@ LineTimeSeriesCoProcessor::LineTimeSeriesCoProcessor(SPtr<Grid3D> grid, SPtr<UbS
     ix3 = x3min % val<3>(blockNx) + 1;
 }
 //////////////////////////////////////////////////////////////////////////
-void LineTimeSeriesCoProcessor::process(double step)
+void LineTimeSeriesCoProcessor::process(real step)
 {
     if (scheduler->isDue(step)) {
         collectData();
@@ -74,12 +74,12 @@ void LineTimeSeriesCoProcessor::writeLine(const std::string &path)
 {
     std::vector<UbTupleFloat3> nodes(2);
     std::vector<UbTupleInt2> lines(1);
-    val<1>(nodes[0])            = (float)line->getX1Minimum();
-    val<2>(nodes[0])            = (float)line->getX2Minimum();
-    val<3>(nodes[0])            = (float)line->getX3Minimum();
-    val<1>(nodes[1])            = (float)line->getX1Maximum();
-    val<2>(nodes[1])            = (float)line->getX2Maximum();
-    val<3>(nodes[1])            = (float)line->getX3Maximum();
+    val<1>(nodes[0])            = (real)line->getX1Minimum();
+    val<2>(nodes[0])            = (real)line->getX2Minimum();
+    val<3>(nodes[0])            = (real)line->getX3Minimum();
+    val<1>(nodes[1])            = (real)line->getX1Maximum();
+    val<2>(nodes[1])            = (real)line->getX2Maximum();
+    val<3>(nodes[1])            = (real)line->getX3Maximum();
     val<1>(lines[0])            = 0;
     val<1>(lines[0])            = 1;
     WbWriterVtkXmlASCII *writer = WbWriterVtkXmlASCII::getInstance();
@@ -88,13 +88,13 @@ void LineTimeSeriesCoProcessor::writeLine(const std::string &path)
 //////////////////////////////////////////////////////////////////////////
 void LineTimeSeriesCoProcessor::collectData()
 {
-    LBMReal f[27];
-    LBMReal vx1, vx2, vx3, rho;
+    real f[27];
+    real vx1, vx2, vx3, rho;
     MPI_Status status;
-    std::vector<double> v1(length, 0);
-    std::vector<double> v2(length, 0);
-    std::vector<double> v3(length, 0);
-    std::vector<double> p(length, 0);
+    std::vector<real> v1(length, 0);
+    std::vector<real> v2(length, 0);
+    std::vector<real> v3(length, 0);
+    std::vector<real> p(length, 0);
     for (int x = 0; x < length; x += blocknx) {
         if (dir == X1) {
             blockix1 = x / blocknx;
@@ -137,10 +137,10 @@ void LineTimeSeriesCoProcessor::collectData()
 
     if (root) {
         for (int i = 1; i < numOfProc; i++) {
-            std::vector<double> v1temp(length, 0);
-            std::vector<double> v2temp(length, 0);
-            std::vector<double> v3temp(length, 0);
-            std::vector<double> ptemp(length, 0);
+            std::vector<real> v1temp(length, 0);
+            std::vector<real> v2temp(length, 0);
+            std::vector<real> v3temp(length, 0);
+            std::vector<real> ptemp(length, 0);
             MPI_Recv(&v1temp[0], length, MPI_DOUBLE, i, 1, mpi_comm, &status);
             MPI_Recv(&v2temp[0], length, MPI_DOUBLE, i, 2, mpi_comm, &status);
             MPI_Recv(&v3temp[0], length, MPI_DOUBLE, i, 3, mpi_comm, &status);
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/LineTimeSeriesCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/LineTimeSeriesCoProcessor.h
index 16061b0b259b9118a82f7f46abbb919250b5dfea..c510a9b46b265344242ab8e3c75560d370a53921 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/LineTimeSeriesCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/LineTimeSeriesCoProcessor.h
@@ -30,7 +30,7 @@ public:
                               int level, std::shared_ptr<vf::mpi::Communicator> comm);
     ~LineTimeSeriesCoProcessor() override = default;
 
-    void process(double step) override;
+    void process(real step) override;
     void writeLine(const std::string &path);
 
 protected:
@@ -42,7 +42,7 @@ private:
     bool root;
     SPtr<GbLine3D> line;
     // function pointer
-    using CalcMacrosFct = void (*)(const LBMReal *const &, LBMReal &, LBMReal &, LBMReal &, LBMReal &);
+    using CalcMacrosFct = void (*)(const real *const &, real &, real &, real &, real &);
     CalcMacrosFct calcMacros;
     int blocknx;
     int blockix1;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.cpp
index a16f32c7d9e0d83dff90a55bb139d4115285a196..e238ec8b0f4f342fd24c39ed1e60ab2ad1d137fc 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.cpp
@@ -203,8 +203,8 @@ void MPIIOCoProcessor::writeBlocks(int step)
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     MPI_Offset write_offset = (MPI_Offset)(size * sizeof(int));
 
     if (comm->isRoot()) {
@@ -242,8 +242,8 @@ void MPIIOCoProcessor::readBlocks(int step)
                            << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.h
index edee5255ebdb14ed23cd3f53e4738a3fd8d58186..68759639d10d0032114bfc30ca0ed5d650f35a3f 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.h
@@ -19,7 +19,7 @@ public:
     ~MPIIOCoProcessor() override;
 
     //! Each timestep writes the grid into the files
-    void process(double step) override = 0;
+    void process(real step) override = 0;
 
     //! Writes the blocks of the grid into the file cpBlocks.bin
     void writeBlocks(int step);
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.cpp
index 34c247345d2d3cf1a9c527ab7a7b15e23f7ab5da..fd08df50a3f0cb9b85fd3255465170a4ef1681a9 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.cpp
@@ -40,7 +40,7 @@ MPIIOMigrationBECoProcessor::MPIIOMigrationBECoProcessor(SPtr<Grid3D> grid, SPtr
 //////////////////////////////////////////////////////////////////////////
 MPIIOMigrationBECoProcessor::~MPIIOMigrationBECoProcessor() { MPI_Type_free(&sendBlockIntType); }
 
-void MPIIOMigrationBECoProcessor::process(double step)
+void MPIIOMigrationBECoProcessor::process(real step)
 {
     if (scheduler->isDue(step)) {
         if (comm->isRoot())
@@ -115,9 +115,9 @@ void MPIIOMigrationBECoProcessor::writeDataSet(int step)
 
     dataSetParam dataSetParamStr1, dataSetParamStr2, dataSetParamStr3;
     int firstGlobalID {0};
-    std::vector<double> doubleValuesArrayF; // double-values (arrays of f's) in all blocks  Fdistribution
-    std::vector<double> doubleValuesArrayH1; // double-values (arrays of f's) in all blocks  H1distribution
-    std::vector<double> doubleValuesArrayH2; // double-values (arrays of f's) in all blocks  H2distribution
+    std::vector<real> doubleValuesArrayF; // real-values (arrays of f's) in all blocks  Fdistribution
+    std::vector<real> doubleValuesArrayH1; // real-values (arrays of f's) in all blocks  H1distribution
+    std::vector<real> doubleValuesArrayH2; // real-values (arrays of f's) in all blocks  H2distribution
 
     if (comm->isRoot()) 
     {
@@ -132,9 +132,9 @@ void MPIIOMigrationBECoProcessor::writeDataSet(int step)
     int doubleCountInBlock = 0;
     int ic                 = 0;
     SPtr<D3Q27EsoTwist3DSplittedVector> D3Q27EsoTwist3DSplittedVectorPtrF = 0, D3Q27EsoTwist3DSplittedVectorPtrH1 = 0, D3Q27EsoTwist3DSplittedVectorPtrH2 = 0;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF = 0, localDistributionsH1 = 0, localDistributionsH2 = 0;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF = 0, nonLocalDistributionsH1 = 0, nonLocalDistributionsH2 = 0;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsF = 0, zeroDistributionsH1 = 0, zeroDistributionsH2 = 0;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF = 0, localDistributionsH1 = 0, localDistributionsH2 = 0;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF = 0, nonLocalDistributionsH1 = 0, nonLocalDistributionsH2 = 0;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsF = 0, zeroDistributionsH1 = 0, zeroDistributionsH2 = 0;
     
     for (int level = minInitLevel; level <= maxInitLevel; level++) 
     {
@@ -197,55 +197,55 @@ void MPIIOMigrationBECoProcessor::writeDataSet(int step)
                     dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3] +
                     dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> averageDensityArray = block->getKernel()->getDataSet()->getAverageDensity();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> averageDensityArray = block->getKernel()->getDataSet()->getAverageDensity();
                 if (averageDensityArray)
                     arrPresence.isAverageDensityArrayPresent = true;
                 else
                     arrPresence.isAverageDensityArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageVelocityArray3DPtr = block->getKernel()->getDataSet()->getAverageVelocity();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> AverageVelocityArray3DPtr = block->getKernel()->getDataSet()->getAverageVelocity();
                 if (AverageVelocityArray3DPtr)
                     arrPresence.isAverageVelocityArrayPresent = true;
                 else
                     arrPresence.isAverageVelocityArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageFluctArray3DPtr = block->getKernel()->getDataSet()->getAverageFluctuations();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> AverageFluctArray3DPtr = block->getKernel()->getDataSet()->getAverageFluctuations();
                 if (AverageFluctArray3DPtr)
                     arrPresence.isAverageFluktuationsArrayPresent = true;
                 else
                     arrPresence.isAverageFluktuationsArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageTripleArray3DPtr = block->getKernel()->getDataSet()->getAverageTriplecorrelations();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> AverageTripleArray3DPtr = block->getKernel()->getDataSet()->getAverageTriplecorrelations();
                 if (AverageTripleArray3DPtr)
                     arrPresence.isAverageTripleArrayPresent = true;
                 else
                     arrPresence.isAverageTripleArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ShearStressValArray3DPtr = block->getKernel()->getDataSet()->getShearStressValues();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> ShearStressValArray3DPtr = block->getKernel()->getDataSet()->getShearStressValues();
                 if (ShearStressValArray3DPtr)
                     arrPresence.isShearStressValArrayPresent = true;
                 else
                     arrPresence.isShearStressValArrayPresent = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> relaxationFactor3DPtr = block->getKernel()->getDataSet()->getRelaxationFactor();
+                SPtr<CbArray3D<real, IndexerX3X2X1>> relaxationFactor3DPtr = block->getKernel()->getDataSet()->getRelaxationFactor();
                 if (relaxationFactor3DPtr)
                     arrPresence.isRelaxationFactorPresent = true;
                 else
                     arrPresence.isRelaxationFactorPresent = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> phaseField3DPtr1 = block->getKernel()->getDataSet()->getPhaseField();
+                SPtr<CbArray3D<real, IndexerX3X2X1>> phaseField3DPtr1 = block->getKernel()->getDataSet()->getPhaseField();
                 if (phaseField3DPtr1)
                     arrPresence.isPhaseField1Present = true;
                 else
                     arrPresence.isPhaseField1Present = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> phaseField3DPtr2 = block->getKernel()->getDataSet()->getPhaseField2();
+                SPtr<CbArray3D<real, IndexerX3X2X1>> phaseField3DPtr2 = block->getKernel()->getDataSet()->getPhaseField2();
                 if (phaseField3DPtr2)
                     arrPresence.isPhaseField2Present = true;
                 else
                     arrPresence.isPhaseField2Present = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> pressureFieldPtr = block->getKernel()->getDataSet()->getPressureField();
+                SPtr<CbArray3D<real, IndexerX3X2X1>> pressureFieldPtr = block->getKernel()->getDataSet()->getPressureField();
                 if (pressureFieldPtr)
                     arrPresence.isPressureFieldPresent = true;
                 else
@@ -295,8 +295,8 @@ void MPIIOMigrationBECoProcessor::writeDataSet(int step)
     }
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -315,7 +315,7 @@ void MPIIOMigrationBECoProcessor::writeDataSet(int step)
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
-    MPI_Offset write_offset = (MPI_Offset)(3 * sizeof(dataSetParam)) + (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(double));
+    MPI_Offset write_offset = (MPI_Offset)(3 * sizeof(dataSetParam)) + (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(real));
 
     MPI_File_write_at(file_handler, (MPI_Offset)0, &dataSetParamStr1, 1, dataSetParamType, MPI_STATUS_IGNORE);
     MPI_File_write_at(file_handler, (MPI_Offset)(sizeof(dataSetParam)), &dataSetParamStr2, 1, dataSetParamType, MPI_STATUS_IGNORE);
@@ -333,7 +333,7 @@ void MPIIOMigrationBECoProcessor::writeDataSet(int step)
         if (rc != MPI_SUCCESS)
             throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
-        write_offset = (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(double));
+        write_offset = (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(real));
         MPI_File_write_at(file_handler, write_offset, &doubleValuesArrayH1[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
 
         MPI_File_sync(file_handler);
@@ -348,7 +348,7 @@ void MPIIOMigrationBECoProcessor::writeDataSet(int step)
         if (rc != MPI_SUCCESS)
             throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
-        write_offset = (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(double));
+        write_offset = (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(real));
         MPI_File_write_at(file_handler, write_offset, &doubleValuesArrayH2[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
 
         MPI_File_sync(file_handler);
@@ -427,12 +427,12 @@ void MPIIOMigrationBECoProcessor::write4DArray(int step, Arrays arrayType, std::
     }
 
     int firstGlobalID {0};
-    std::vector<double> doubleValuesArray; // double-values of the data array in all blocks
+    std::vector<real> doubleValuesArray; // real-values of the data array in all blocks
     dataSetParam dataSetParamStr;
     bool firstBlock        = true;
     int doubleCountInBlock = 0;
     int ic                 = 0;
-    SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ___Array;
+    SPtr<CbArray4D<real, IndexerX4X3X2X1>> ___Array;
 
     if (comm->isRoot()) 
     {
@@ -498,8 +498,8 @@ void MPIIOMigrationBECoProcessor::write4DArray(int step, Arrays arrayType, std::
     MPI_Type_commit(&dataSetDoubleType);
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -511,7 +511,7 @@ void MPIIOMigrationBECoProcessor::write4DArray(int step, Arrays arrayType, std::
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
-    MPI_Offset write_offset = (MPI_Offset)(sizeof(dataSetParam)) + (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(double));
+    MPI_Offset write_offset = (MPI_Offset)(sizeof(dataSetParam)) + (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(real));
 
     // each process writes common parameters of a dataSet
     MPI_File_write_at(file_handler, 0, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
@@ -546,12 +546,12 @@ void MPIIOMigrationBECoProcessor::write3DArray(int step, Arrays arrayType, std::
     }
 
     int firstGlobalID {0};
-    std::vector<double> doubleValuesArray; // double-values of the data array in all blocks
+    std::vector<real> doubleValuesArray; // real-values of the data array in all blocks
     dataSetParam dataSetParamStr;
     bool firstBlock        = true;
     int doubleCountInBlock = 0;
     int ic                 = 0;
-    SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> ___Array;
+    SPtr<CbArray3D<real, IndexerX3X2X1>> ___Array;
 
     if (comm->isRoot()) 
     {
@@ -615,8 +615,8 @@ void MPIIOMigrationBECoProcessor::write3DArray(int step, Arrays arrayType, std::
     MPI_Type_commit(&dataSetDoubleType);
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -628,7 +628,7 @@ void MPIIOMigrationBECoProcessor::write3DArray(int step, Arrays arrayType, std::
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
-    MPI_Offset write_offset = (MPI_Offset)(sizeof(dataSetParam)) + (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(double));
+    MPI_Offset write_offset = (MPI_Offset)(sizeof(dataSetParam)) + (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(real));
 
     // each process writes common parameters of a dataSet
     MPI_File_write_at(file_handler, 0, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
@@ -706,16 +706,16 @@ void MPIIOMigrationBECoProcessor::writeBoundaryConds(int step)
                     bouCond->velocityBoundaryFlags  = bcArr->bcvector[bc]->getVelocityBoundary();
                     bouCond->densityBoundaryFlags   = bcArr->bcvector[bc]->getDensityBoundary();
                     bouCond->wallModelBoundaryFlags = bcArr->bcvector[bc]->getWallModelBoundary();
-                    bouCond->bcVelocityX1           = (float)bcArr->bcvector[bc]->getBoundaryVelocityX1();
-                    bouCond->bcVelocityX2           = (float)bcArr->bcvector[bc]->getBoundaryVelocityX2();
-                    bouCond->bcVelocityX3           = (float)bcArr->bcvector[bc]->getBoundaryVelocityX3();
-                    bouCond->bcDensity              = (float)bcArr->bcvector[bc]->getBoundaryDensity();
-                    bouCond->bcPhaseField           = (float)bcArr->bcvector[bc]->getBoundaryPhaseField();
-                    bouCond->nx1                    = (float)bcArr->bcvector[bc]->nx1;
-                    bouCond->nx2                    = (float)bcArr->bcvector[bc]->nx2;
-                    bouCond->nx3                    = (float)bcArr->bcvector[bc]->nx3;
+                    bouCond->bcVelocityX1           = (real)bcArr->bcvector[bc]->getBoundaryVelocityX1();
+                    bouCond->bcVelocityX2           = (real)bcArr->bcvector[bc]->getBoundaryVelocityX2();
+                    bouCond->bcVelocityX3           = (real)bcArr->bcvector[bc]->getBoundaryVelocityX3();
+                    bouCond->bcDensity              = (real)bcArr->bcvector[bc]->getBoundaryDensity();
+                    bouCond->bcPhaseField           = (real)bcArr->bcvector[bc]->getBoundaryPhaseField();
+                    bouCond->nx1                    = (real)bcArr->bcvector[bc]->nx1;
+                    bouCond->nx2                    = (real)bcArr->bcvector[bc]->nx2;
+                    bouCond->nx3                    = (real)bcArr->bcvector[bc]->nx3;
                     for (int iq = 0; iq < 26; iq++)
-                        bouCond->q[iq] = (float)bcArr->bcvector[bc]->getQ(iq);
+                        bouCond->q[iq] = (real)bcArr->bcvector[bc]->getQ(iq);
                     bouCond->algorithmType = bcArr->bcvector[bc]->getBcAlgorithmType();
                 }
 
@@ -757,8 +757,8 @@ void MPIIOMigrationBECoProcessor::writeBoundaryConds(int step)
     }
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -865,7 +865,7 @@ void MPIIOMigrationBECoProcessor::restart(int step)
 
 void MPIIOMigrationBECoProcessor::readBlocks(int step) { MPIIOCoProcessor::readBlocks(step); }
 
-void MPIIOMigrationBECoProcessor::blocksExchange(int tagN, int ind1, int ind2, int doubleCountInBlock, std::vector<double> &pV, std::vector<double> *rawDataReceive)
+void MPIIOMigrationBECoProcessor::blocksExchange(int tagN, int ind1, int ind2, int doubleCountInBlock, std::vector<real> &pV, std::vector<real> *rawDataReceive)
 {
     int rank, size;
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
@@ -878,7 +878,7 @@ void MPIIOMigrationBECoProcessor::blocksExchange(int tagN, int ind1, int ind2, i
     int *blocksCounterSend = new int[size];
     int *blocksCounterRec  = new int[size];
 
-    std::vector<double> *rawDataSend = new std::vector<double>[size];
+    std::vector<real> *rawDataSend = new std::vector<real>[size];
     for (int r = 0; r < size; r++) 
     {
         rawDataSend[r].resize(0);
@@ -900,13 +900,13 @@ void MPIIOMigrationBECoProcessor::blocksExchange(int tagN, int ind1, int ind2, i
         if (tempRank == rank) // no need to send data, the process already has it
         {
             blocksCounterRec[tempRank]++;
-            rawDataReceive[tempRank].push_back(double(indexB + ind));
+            rawDataReceive[tempRank].push_back(real(indexB + ind));
             rawDataReceive[tempRank].insert(rawDataReceive[tempRank].end(), pV.begin() + ind * size_t(doubleCountInBlock),
                                             pV.begin() + ind * size_t(doubleCountInBlock) + size_t(doubleCountInBlock));
         } else // we must send data to other processes
         {
             blocksCounterSend[tempRank]++;
-            rawDataSend[tempRank].push_back(double(indexB + ind));
+            rawDataSend[tempRank].push_back(real(indexB + ind));
             rawDataSend[tempRank].insert(rawDataSend[tempRank].end(), pV.begin() + ind * size_t(doubleCountInBlock),
                                          pV.begin() + ind * size_t(doubleCountInBlock) + size_t(doubleCountInBlock));
         }
@@ -1040,8 +1040,8 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
     int indexE = indexB + int(myBlocksCount); // the latest "my" block
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -1058,15 +1058,15 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
     size_t doubleCountInBlock = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3] +
         dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3] +
         dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
-    std::vector<double> doubleValuesArrayF(size_t(myBlocksCount * doubleCountInBlock)); // double-values in all blocks  Fdistributions
-    std::vector<double> doubleValuesArrayH1; // double-values in all blocks  H1distributions
-    std::vector<double> doubleValuesArrayH2; // double-values in all blocks  H2distributions
+    std::vector<real> doubleValuesArrayF(size_t(myBlocksCount * doubleCountInBlock)); // real-values in all blocks  Fdistributions
+    std::vector<real> doubleValuesArrayH1; // real-values in all blocks  H1distributions
+    std::vector<real> doubleValuesArrayH2; // real-values in all blocks  H2distributions
 
     MPI_Type_contiguous(int(doubleCountInBlock), MPI_DOUBLE, &dataSetDoubleType);
     MPI_Type_commit(&dataSetDoubleType);
 
     //--------------------------------- F ---------------------------------------------------------
-    MPI_Offset read_offset = (MPI_Offset)(3 * sizeof(dataSetParam)) + (MPI_Offset)(indexB * doubleCountInBlock * sizeof(double));
+    MPI_Offset read_offset = (MPI_Offset)(3 * sizeof(dataSetParam)) + (MPI_Offset)(indexB * doubleCountInBlock * sizeof(real));
     MPI_File_read_at(file_handler, read_offset, &doubleValuesArrayF[0], int(myBlocksCount), dataSetDoubleType, MPI_STATUS_IGNORE);
 
     MPI_File_close(&file_handler);
@@ -1083,7 +1083,7 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
         multiPhase1 = true;
         doubleValuesArrayH1.resize(myBlocksCount * doubleCountInBlock);
 
-        read_offset = (MPI_Offset)(indexB * doubleCountInBlock * sizeof(double)) ;
+        read_offset = (MPI_Offset)(indexB * doubleCountInBlock * sizeof(real)) ;
         MPI_File_read_at(file_handler, read_offset, &doubleValuesArrayH1[0], int(myBlocksCount), dataSetDoubleType, MPI_STATUS_IGNORE);
     }
     MPI_File_close(&file_handler);
@@ -1099,7 +1099,7 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
         multiPhase2 = true;
         doubleValuesArrayH2.resize(myBlocksCount * doubleCountInBlock);
 
-        read_offset = (MPI_Offset)(indexB * doubleCountInBlock * sizeof(double));
+        read_offset = (MPI_Offset)(indexB * doubleCountInBlock * sizeof(real));
         MPI_File_read_at(file_handler, read_offset, &doubleValuesArrayH2[0], int(myBlocksCount), dataSetDoubleType, MPI_STATUS_IGNORE);
     }
     MPI_File_close(&file_handler);
@@ -1114,13 +1114,13 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
         UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
-    std::vector<double>* rawDataReceiveF = new std::vector<double>[size];
+    std::vector<real>* rawDataReceiveF = new std::vector<real>[size];
     for (int r = 0; r < size; r++)
         rawDataReceiveF[r].resize(0);
     blocksExchange(MESSAGE_TAG, indexB, indexE, int(doubleCountInBlock), doubleValuesArrayF, rawDataReceiveF);
     
 
-    std::vector<double>* rawDataReceiveH1 = new std::vector<double>[size];
+    std::vector<real>* rawDataReceiveH1 = new std::vector<real>[size];
     if (multiPhase1)
     {
         for (int r = 0; r < size; r++)
@@ -1128,7 +1128,7 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
         blocksExchange(MESSAGE_TAG, indexB, indexE, int(doubleCountInBlock), doubleValuesArrayH1, rawDataReceiveH1);
     }
 
-    std::vector<double>* rawDataReceiveH2 = new std::vector<double>[size];
+    std::vector<real>* rawDataReceiveH2 = new std::vector<real>[size];
     if (multiPhase2)
     {
         for (int r = 0; r < size; r++)
@@ -1146,9 +1146,9 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
     
     //-------------------------------------- restore blocks ---------------------------------
     int blockID;
-    std::vector<double> vectorsOfValuesF1, vectorsOfValuesF2, vectorsOfValuesF3;
-    std::vector<double> vectorsOfValuesH11, vectorsOfValuesH12, vectorsOfValuesH13;
-    std::vector<double> vectorsOfValuesH21, vectorsOfValuesH22, vectorsOfValuesH23;
+    std::vector<real> vectorsOfValuesF1, vectorsOfValuesF2, vectorsOfValuesF3;
+    std::vector<real> vectorsOfValuesH11, vectorsOfValuesH12, vectorsOfValuesH13;
+    std::vector<real> vectorsOfValuesH21, vectorsOfValuesH22, vectorsOfValuesH23;
 
     size_t vectorSize1 = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3];
     size_t vectorSize2 = dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3];
@@ -1185,11 +1185,11 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
             index += vectorSize3;
 
             SPtr<DistributionArray3D> mFdistributions(new D3Q27EsoTwist3DSplittedVector());
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesF1, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesF2, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                    new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesF1, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNonLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                    new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesF2, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setZeroDistributions(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                         vectorsOfValuesF3, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
 
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX1(dataSetParamStr1.nx1);
@@ -1199,11 +1199,11 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
             SPtr<DistributionArray3D> mH1distributions(new D3Q27EsoTwist3DSplittedVector());
             if (multiPhase1)
             {
-                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH11, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
-                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH12, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
-                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                    new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesH11, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNonLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                    new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesH12, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setZeroDistributions(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                     vectorsOfValuesH13, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
 
                 dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX1(dataSetParamStr1.nx1);
@@ -1214,11 +1214,11 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
             SPtr<DistributionArray3D> mH2distributions(new D3Q27EsoTwist3DSplittedVector());
             if (multiPhase2)
             {
-                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH21, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
-                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                        new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH22, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
-                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                    new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesH21, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNonLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                        new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesH22, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setZeroDistributions(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                         vectorsOfValuesH23, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
 
                 dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX1(dataSetParamStr1.nx1);
@@ -1231,9 +1231,9 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
             this->lbmKernel->setBlock(block);
             this->lbmKernel->setNX(std::array<int, 3>{ {dataSetParamStr1.nx1, dataSetParamStr1.nx2, dataSetParamStr1.nx3}});
             SPtr<LBMKernel> kernel = this->lbmKernel->clone();
-            LBMReal collFactor = LBMSystem::calcCollisionFactor(this->nue, block->getLevel());
-            LBMReal collFactorL = LBMSystem::calcCollisionFactor(this->nuL, block->getLevel());
-            LBMReal collFactorG = LBMSystem::calcCollisionFactor(this->nuG, block->getLevel());
+            real collFactor = LBMSystem::calcCollisionFactor(this->nue, block->getLevel());
+            real collFactorL = LBMSystem::calcCollisionFactor(this->nuL, block->getLevel());
+            real collFactorG = LBMSystem::calcCollisionFactor(this->nuG, block->getLevel());
             kernel->setCollisionFactor(collFactor);
             kernel->setIndex(block->getX1(), block->getX2(), block->getX3());
             kernel->setDeltaT(LBMSystem::getDeltaT(block->getLevel()));
@@ -1319,8 +1319,8 @@ void MPIIOMigrationBECoProcessor::readArray(int step, Arrays arrType, std::strin
     }
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -1348,12 +1348,12 @@ void MPIIOMigrationBECoProcessor::readArray(int step, Arrays arrType, std::strin
     MPI_File_read_at(file_handler, (MPI_Offset)0, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
 
     size_t doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> doubleValuesArray(myBlocksCount * doubleCountInBlock); // double-values in all blocks
+    std::vector<real> doubleValuesArray(myBlocksCount * doubleCountInBlock); // real-values in all blocks
 
     MPI_Type_contiguous(int(doubleCountInBlock), MPI_DOUBLE, &dataSetDoubleType);
     MPI_Type_commit(&dataSetDoubleType);
 
-    MPI_Offset read_offset = (MPI_Offset)(sizeof(dataSetParam)) + (MPI_Offset)(indexB) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(double));
+    MPI_Offset read_offset = (MPI_Offset)(sizeof(dataSetParam)) + (MPI_Offset)(indexB) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(real));
     MPI_File_read_at(file_handler, read_offset, &doubleValuesArray[0], int(myBlocksCount), dataSetDoubleType, MPI_STATUS_IGNORE);
 
     MPI_File_close(&file_handler);
@@ -1367,7 +1367,7 @@ void MPIIOMigrationBECoProcessor::readArray(int step, Arrays arrType, std::strin
         UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
-    std::vector<double> *rawDataReceive = new std::vector<double>[size];
+    std::vector<real> *rawDataReceive = new std::vector<real>[size];
     for (int r = 0; r < size; r++)
         rawDataReceive[r].resize(0);
 
@@ -1384,7 +1384,7 @@ void MPIIOMigrationBECoProcessor::readArray(int step, Arrays arrType, std::strin
 
     //----------------------------- restore data ---------------------------------
     int blockID;
-    std::vector<double> vectorsOfValues;
+    std::vector<real> vectorsOfValues;
     size_t index;
     size_t nextVectorSize = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
 
@@ -1401,53 +1401,53 @@ void MPIIOMigrationBECoProcessor::readArray(int step, Arrays arrType, std::strin
             index += nextVectorSize;
 
             // fill arrays
-            SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ___4DArray;
-            SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> ___3DArray;
+            SPtr<CbArray4D<real, IndexerX4X3X2X1>> ___4DArray;
+            SPtr<CbArray3D<real, IndexerX3X2X1>> ___3DArray;
 
             switch (arrType) 
             {
                 case AverageDensity:
-                    ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                    ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
                             vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                     block->getKernel()->getDataSet()->setAverageDensity(___4DArray);
                     break;
                 case AverageVelocity:
-                    ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                    ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
                             vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                     block->getKernel()->getDataSet()->setAverageVelocity(___4DArray);
                     break;
                 case AverageFluktuations:
-                    ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                    ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
                             vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                     block->getKernel()->getDataSet()->setAverageFluctuations(___4DArray);
                     break;
                 case AverageTriple:
-                    ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                    ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
                             vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                     block->getKernel()->getDataSet()->setAverageTriplecorrelations(___4DArray);
                     break;
                 case ShearStressVal:
-                    ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                    ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
                             vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                     block->getKernel()->getDataSet()->setShearStressValues(___4DArray);
                     break;
                 case RelaxationFactor:
-                    ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                    ___3DArray = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                         vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
                     block->getKernel()->getDataSet()->setRelaxationFactor(___3DArray);
                     break;
                 case PhaseField1:
-                    ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                    ___3DArray = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                         vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
                     block->getKernel()->getDataSet()->setPhaseField(___3DArray);
                     break;
                 case PhaseField2:
-                    ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                    ___3DArray = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                         vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
                     block->getKernel()->getDataSet()->setPhaseField2(___3DArray);
                     break;
                 case PressureField:
-                    ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                    ___3DArray = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                         vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
                     block->getKernel()->getDataSet()->setPressureField(___3DArray);
                     break;
@@ -1480,8 +1480,8 @@ void MPIIOMigrationBECoProcessor::readBoundaryConds(int step)
     }
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -1725,9 +1725,9 @@ void MPIIOMigrationBECoProcessor::setLBMKernel(SPtr<LBMKernel> kernel) { this->l
 //////////////////////////////////////////////////////////////////////////
 void MPIIOMigrationBECoProcessor::setBCProcessor(SPtr<BCProcessor> bcProcessor) { this->bcProcessor = bcProcessor; }
 //////////////////////////////////////////////////////////////////////////
-void MPIIOMigrationBECoProcessor::setNu(double nu) { this->nue = nu; }
+void MPIIOMigrationBECoProcessor::setNu(real nu) { this->nue = nu; }
 
-void MPIIOMigrationBECoProcessor::setNuLG(double cfL, double cfG) { this->nuL = cfL;  this->nuG = cfG; }
+void MPIIOMigrationBECoProcessor::setNuLG(real cfL, real cfG) { this->nuL = cfL;  this->nuG = cfG; }
 
-void MPIIOMigrationBECoProcessor::setDensityRatio(double dr) { this->densityRatio = dr; }
+void MPIIOMigrationBECoProcessor::setDensityRatio(real dr) { this->densityRatio = dr; }
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.h
index c60800ccd18e5ac523c5c85ea47219a96f8a69c5..d29ecca36d194cd0498b3347b9e59eefced8a475 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.h
@@ -36,7 +36,7 @@ public:
                                 std::shared_ptr<vf::mpi::Communicator> comm);
     ~MPIIOMigrationBECoProcessor() override;
     //! Each timestep writes the grid into the files
-    void process(double step) override;
+    void process(real step) override;
     //! Reads the grid from the files before grid reconstruction
     void restart(int step);
     //! Writes the blocks of the grid into the file cpBlocks.bin
@@ -73,12 +73,12 @@ public:
     void setBCProcessor(SPtr<BCProcessor> bcProcessor);
     //! The function truncates the data files
     void clearAllFiles(int step);
-    void setNu(double nu);
-    void setNuLG(double cfL, double cfG);
-    void setDensityRatio(double dr);
+    void setNu(real nu);
+    void setNuLG(real cfL, real cfG);
+    void setDensityRatio(real dr);
 
-    void blocksExchange(int tagN, int ind1, int ind2, int doubleCountInBlock, std::vector<double> &pV,
-                        std::vector<double> *rawDataReceive);
+    void blocksExchange(int tagN, int ind1, int ind2, int doubleCountInBlock, std::vector<real> &pV,
+                        std::vector<real> *rawDataReceive);
 
 private:
     // MPI_Datatype gridParamType, block3dType;
@@ -92,10 +92,10 @@ private:
     SPtr<LBMKernel> lbmKernel;
     SPtr<BCProcessor> bcProcessor;
     SPtr<Grid3DVisitor> metisVisitor;
-    double nue;
-    double nuL;
-    double nuG;
-    double densityRatio;
+    real nue;
+    real nuL;
+    real nuG;
+    real densityRatio;
 
 };
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.cpp
index 285d6c28ae92b3bad7fb6b1171f3a09a637e0729..4e3a84aa8a47051aee123181fc63741266dbacc0 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.cpp
@@ -69,7 +69,7 @@ MPIIOMigrationCoProcessor::~MPIIOMigrationCoProcessor()
 }
 
 //////////////////////////////////////////////////////////////////////////
-void MPIIOMigrationCoProcessor::process(double step)
+void MPIIOMigrationCoProcessor::process(real step)
 {
     if (scheduler->isDue(step)) 
     {
@@ -133,9 +133,9 @@ void MPIIOMigrationCoProcessor::writeDataSet(int step)
 
     dataSetParam dataSetParamStr1, dataSetParamStr2, dataSetParamStr3;
     DataSetMigration *dataSetArray = new DataSetMigration[blocksCount];
-    std::vector<double> doubleValuesArrayF; // double-values (arrays of f's) in all blocks  Fdistribution
-    std::vector<double> doubleValuesArrayH1; // double-values (arrays of f's) in all blocks  H1distribution
-    std::vector<double> doubleValuesArrayH2; // double-values (arrays of f's) in all blocks  H2distribution
+    std::vector<real> doubleValuesArrayF; // real-values (arrays of f's) in all blocks  Fdistribution
+    std::vector<real> doubleValuesArrayH1; // real-values (arrays of f's) in all blocks  H1distribution
+    std::vector<real> doubleValuesArrayH2; // real-values (arrays of f's) in all blocks  H2distribution
 
     if (comm->isRoot()) 
     {
@@ -151,9 +151,9 @@ void MPIIOMigrationCoProcessor::writeDataSet(int step)
     size_t doubleCountInBlock = 0;
     int ic                    = 0;
     SPtr<D3Q27EsoTwist3DSplittedVector> D3Q27EsoTwist3DSplittedVectorPtrF = 0, D3Q27EsoTwist3DSplittedVectorPtrH1 = 0, D3Q27EsoTwist3DSplittedVectorPtrH2 = 0;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF = 0, localDistributionsH1 = 0, localDistributionsH2 = 0;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF = 0, nonLocalDistributionsH1 = 0, nonLocalDistributionsH2 = 0;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsF = 0, zeroDistributionsH1 = 0, zeroDistributionsH2 = 0;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF = 0, localDistributionsH1 = 0, localDistributionsH2 = 0;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF = 0, nonLocalDistributionsH1 = 0, nonLocalDistributionsH2 = 0;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsF = 0, zeroDistributionsH1 = 0, zeroDistributionsH2 = 0;
 
     SPtr<LBMKernel> kernel;
 
@@ -230,55 +230,55 @@ void MPIIOMigrationCoProcessor::writeDataSet(int step)
                     dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3] +
                     dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> averageDensityArray = block->getKernel()->getDataSet()->getAverageDensity();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> averageDensityArray = block->getKernel()->getDataSet()->getAverageDensity();
                 if (averageDensityArray)
                     arrPresence.isAverageDensityArrayPresent = true;
                 else
                     arrPresence.isAverageDensityArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageVelocityArray3DPtr = block->getKernel()->getDataSet()->getAverageVelocity();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> AverageVelocityArray3DPtr = block->getKernel()->getDataSet()->getAverageVelocity();
                 if (AverageVelocityArray3DPtr)
                     arrPresence.isAverageVelocityArrayPresent = true;
                 else
                     arrPresence.isAverageVelocityArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageFluctArray3DPtr = block->getKernel()->getDataSet()->getAverageFluctuations();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> AverageFluctArray3DPtr = block->getKernel()->getDataSet()->getAverageFluctuations();
                 if (AverageFluctArray3DPtr)
                     arrPresence.isAverageFluktuationsArrayPresent = true;
                 else
                     arrPresence.isAverageFluktuationsArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageTripleArray3DPtr = block->getKernel()->getDataSet()->getAverageTriplecorrelations();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> AverageTripleArray3DPtr = block->getKernel()->getDataSet()->getAverageTriplecorrelations();
                 if (AverageTripleArray3DPtr)
                     arrPresence.isAverageTripleArrayPresent = true;
                 else
                     arrPresence.isAverageTripleArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ShearStressValArray3DPtr = block->getKernel()->getDataSet()->getShearStressValues();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> ShearStressValArray3DPtr = block->getKernel()->getDataSet()->getShearStressValues();
                 if (ShearStressValArray3DPtr)
                     arrPresence.isShearStressValArrayPresent = true;
                 else
                     arrPresence.isShearStressValArrayPresent = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> relaxationFactor3DPtr = block->getKernel()->getDataSet()->getRelaxationFactor();
+                SPtr<CbArray3D<real, IndexerX3X2X1>> relaxationFactor3DPtr = block->getKernel()->getDataSet()->getRelaxationFactor();
                 if (relaxationFactor3DPtr)
                     arrPresence.isRelaxationFactorPresent = true;
                 else
                     arrPresence.isRelaxationFactorPresent = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> phaseField3DPtr1 = block->getKernel()->getDataSet()->getPhaseField();
+                SPtr<CbArray3D<real, IndexerX3X2X1>> phaseField3DPtr1 = block->getKernel()->getDataSet()->getPhaseField();
                 if (phaseField3DPtr1)
                     arrPresence.isPhaseField1Present = true;
                 else
                     arrPresence.isPhaseField1Present = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> phaseField3DPtr2 = block->getKernel()->getDataSet()->getPhaseField2();
+                SPtr<CbArray3D<real, IndexerX3X2X1>> phaseField3DPtr2 = block->getKernel()->getDataSet()->getPhaseField2();
                 if (phaseField3DPtr2)
                     arrPresence.isPhaseField2Present = true;
                 else
                     arrPresence.isPhaseField2Present = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> pressureFieldPtr = block->getKernel()->getDataSet()->getPressureField();
+                SPtr<CbArray3D<real, IndexerX3X2X1>> pressureFieldPtr = block->getKernel()->getDataSet()->getPressureField();
                 if (pressureFieldPtr)
                     arrPresence.isPressureFieldPresent = true;
                 else
@@ -328,8 +328,8 @@ void MPIIOMigrationCoProcessor::writeDataSet(int step)
         UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -352,7 +352,7 @@ void MPIIOMigrationCoProcessor::writeDataSet(int step)
     MPI_File_write_at(file_handler, (MPI_Offset)(2 * sizeof(dataSetParam)), &dataSetParamStr3, 1, dataSetParamType, MPI_STATUS_IGNORE);
     
     MPI_Offset write_offset;
-    size_t sizeofOneDataSet = sizeof(DataSetMigration) + doubleCountInBlock * sizeof(double);
+    size_t sizeofOneDataSet = sizeof(DataSetMigration) + doubleCountInBlock * sizeof(real);
 
     for (int nb = 0; nb < blocksCount; nb++) 
     {
@@ -372,7 +372,7 @@ void MPIIOMigrationCoProcessor::writeDataSet(int step)
         if (rc != MPI_SUCCESS)
             throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
-        sizeofOneDataSet = doubleCountInBlock * sizeof(double);
+        sizeofOneDataSet = doubleCountInBlock * sizeof(real);
 
         for (int nb = 0; nb < blocksCount; nb++) 
         {
@@ -392,7 +392,7 @@ void MPIIOMigrationCoProcessor::writeDataSet(int step)
         if (rc != MPI_SUCCESS)
             throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
-        sizeofOneDataSet = doubleCountInBlock * sizeof(double);
+        sizeofOneDataSet = doubleCountInBlock * sizeof(real);
 
         for (int nb = 0; nb < blocksCount; nb++) 
         {
@@ -471,7 +471,7 @@ void MPIIOMigrationCoProcessor::write4DArray(int step, Arrays arrayType, std::st
     }
 
     DataSetSmallMigration *dataSetSmallArray = new DataSetSmallMigration[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values of the AverageDensityArray in all blocks
+    std::vector<real> doubleValuesArray; // real-values of the AverageDensityArray in all blocks
     dataSetParam dataSetParamStr;
 
     if (comm->isRoot()) 
@@ -483,7 +483,7 @@ void MPIIOMigrationCoProcessor::write4DArray(int step, Arrays arrayType, std::st
     bool firstBlock           = true;
     size_t doubleCountInBlock = 0;
     int ic                    = 0;
-    SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ___Array;
+    SPtr<CbArray4D<real, IndexerX4X3X2X1>> ___Array;
 
     for (int level = minInitLevel; level <= maxInitLevel; level++) 
     {
@@ -543,8 +543,8 @@ void MPIIOMigrationCoProcessor::write4DArray(int step, Arrays arrayType, std::st
     }
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -560,7 +560,7 @@ void MPIIOMigrationCoProcessor::write4DArray(int step, Arrays arrayType, std::st
     MPI_File_write_at(file_handler, 0, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
 
     MPI_Offset write_offset;
-    size_t sizeofOneDataSet = sizeof(DataSetSmallMigration) + doubleCountInBlock * sizeof(double);
+    size_t sizeofOneDataSet = sizeof(DataSetSmallMigration) + doubleCountInBlock * sizeof(real);
 
     for (int nb = 0; nb < blocksCount; nb++) 
     {
@@ -601,7 +601,7 @@ void MPIIOMigrationCoProcessor::write3DArray(int step, Arrays arrayType, std::st
     }
 
     DataSetSmallMigration *dataSetSmallArray = new DataSetSmallMigration[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
+    std::vector<real> doubleValuesArray; // real-values (arrays of f's) in all blocks
     dataSetParam dataSetParamStr;
 
     if (comm->isRoot()) 
@@ -613,7 +613,7 @@ void MPIIOMigrationCoProcessor::write3DArray(int step, Arrays arrayType, std::st
     bool firstBlock           = true;
     size_t doubleCountInBlock = 0;
     int ic                    = 0;
-    SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> ___Array;
+    SPtr<CbArray3D<real, IndexerX3X2X1>> ___Array;
 
     for (int level = minInitLevel; level <= maxInitLevel; level++) 
     {
@@ -670,8 +670,8 @@ void MPIIOMigrationCoProcessor::write3DArray(int step, Arrays arrayType, std::st
     }
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -692,7 +692,7 @@ void MPIIOMigrationCoProcessor::write3DArray(int step, Arrays arrayType, std::st
     // each process writes common parameters of a dataSet
     MPI_File_write_at(file_handler, 0, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
 
-    size_t sizeofOneDataSet = sizeof(DataSetSmallMigration) + doubleCountInBlock * sizeof(double);
+    size_t sizeofOneDataSet = sizeof(DataSetSmallMigration) + doubleCountInBlock * sizeof(real);
 
     MPI_Offset write_offset;
     for (int nb = 0; nb < blocksCount; nb++) 
@@ -778,16 +778,16 @@ void MPIIOMigrationCoProcessor::writeBoundaryConds(int step)
                     bouCond->velocityBoundaryFlags  = bcArr->bcvector[bc]->getVelocityBoundary();
                     bouCond->densityBoundaryFlags   = bcArr->bcvector[bc]->getDensityBoundary();
                     bouCond->wallModelBoundaryFlags = bcArr->bcvector[bc]->getWallModelBoundary();
-                    bouCond->bcVelocityX1           = (float)bcArr->bcvector[bc]->getBoundaryVelocityX1();
-                    bouCond->bcVelocityX2           = (float)bcArr->bcvector[bc]->getBoundaryVelocityX2();
-                    bouCond->bcVelocityX3           = (float)bcArr->bcvector[bc]->getBoundaryVelocityX3();
-                    bouCond->bcDensity              = (float)bcArr->bcvector[bc]->getBoundaryDensity();
-                    bouCond->bcPhaseField           = (float)bcArr->bcvector[bc]->getBoundaryPhaseField();
-                    bouCond->nx1                    = (float)bcArr->bcvector[bc]->nx1;
-                    bouCond->nx2                    = (float)bcArr->bcvector[bc]->nx2;
-                    bouCond->nx3                    = (float)bcArr->bcvector[bc]->nx3;
+                    bouCond->bcVelocityX1           = (real)bcArr->bcvector[bc]->getBoundaryVelocityX1();
+                    bouCond->bcVelocityX2           = (real)bcArr->bcvector[bc]->getBoundaryVelocityX2();
+                    bouCond->bcVelocityX3           = (real)bcArr->bcvector[bc]->getBoundaryVelocityX3();
+                    bouCond->bcDensity              = (real)bcArr->bcvector[bc]->getBoundaryDensity();
+                    bouCond->bcPhaseField           = (real)bcArr->bcvector[bc]->getBoundaryPhaseField();
+                    bouCond->nx1                    = (real)bcArr->bcvector[bc]->nx1;
+                    bouCond->nx2                    = (real)bcArr->bcvector[bc]->nx2;
+                    bouCond->nx3                    = (real)bcArr->bcvector[bc]->nx3;
                     for (int iq = 0; iq < 26; iq++)
-                        bouCond->q[iq] = (float)bcArr->bcvector[bc]->getQ(iq);
+                        bouCond->q[iq] = (real)bcArr->bcvector[bc]->getQ(iq);
                     bouCond->algorithmType = bcArr->bcvector[bc]->getBcAlgorithmType();
                 }
 
@@ -829,8 +829,8 @@ void MPIIOMigrationCoProcessor::writeBoundaryConds(int step)
     }
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -942,8 +942,8 @@ void MPIIOMigrationCoProcessor::readDataSet(int step)
         UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -977,9 +977,9 @@ void MPIIOMigrationCoProcessor::readDataSet(int step)
     size_t doubleCountInBlock = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3] +
         dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3] +
         dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
-    std::vector<double> doubleValuesArrayF(size_t(blocksCount * doubleCountInBlock)); // double-values in all blocks  Fdistributions
-    std::vector<double> doubleValuesArrayH1; // double-values in all blocks  H1distributions
-    std::vector<double> doubleValuesArrayH2; // double-values in all blocks  H2distributions
+    std::vector<real> doubleValuesArrayF(size_t(blocksCount * doubleCountInBlock)); // real-values in all blocks  Fdistributions
+    std::vector<real> doubleValuesArrayH1; // real-values in all blocks  H1distributions
+    std::vector<real> doubleValuesArrayH2; // real-values in all blocks  H2distributions
 
     // define MPI_types depending on the block-specific information
     MPI_Type_contiguous(int(doubleCountInBlock), MPI_DOUBLE, &dataSetDoubleType);
@@ -987,7 +987,7 @@ void MPIIOMigrationCoProcessor::readDataSet(int step)
 
     size_t ic = 0;
     MPI_Offset read_offset;
-    size_t sizeofOneDataSet = size_t(sizeof(DataSetMigration) + doubleCountInBlock * sizeof(double));
+    size_t sizeofOneDataSet = size_t(sizeof(DataSetMigration) + doubleCountInBlock * sizeof(real));
 
     for (int level = minInitLevel; level <= maxInitLevel; level++) 
     {
@@ -1016,7 +1016,7 @@ void MPIIOMigrationCoProcessor::readDataSet(int step)
         multiPhase1 = true;
         doubleValuesArrayH1.resize(blocksCount * doubleCountInBlock);
 
-        sizeofOneDataSet = size_t(doubleCountInBlock * sizeof(double));
+        sizeofOneDataSet = size_t(doubleCountInBlock * sizeof(real));
 
         for (int level = minInitLevel; level <= maxInitLevel; level++)
         {
@@ -1043,7 +1043,7 @@ void MPIIOMigrationCoProcessor::readDataSet(int step)
         multiPhase2 = true;
         doubleValuesArrayH2.resize(blocksCount * doubleCountInBlock);
 
-        sizeofOneDataSet = size_t(doubleCountInBlock * sizeof(double));
+        sizeofOneDataSet = size_t(doubleCountInBlock * sizeof(real));
 
         for (int level = minInitLevel; level <= maxInitLevel; level++)
         {
@@ -1069,9 +1069,9 @@ void MPIIOMigrationCoProcessor::readDataSet(int step)
     }
 
     size_t index = 0;
-    std::vector<double> vectorsOfValuesF1, vectorsOfValuesF2, vectorsOfValuesF3;
-    std::vector<double> vectorsOfValuesH11, vectorsOfValuesH12, vectorsOfValuesH13;
-    std::vector<double> vectorsOfValuesH21, vectorsOfValuesH22, vectorsOfValuesH23;
+    std::vector<real> vectorsOfValuesF1, vectorsOfValuesF2, vectorsOfValuesF3;
+    std::vector<real> vectorsOfValuesH11, vectorsOfValuesH12, vectorsOfValuesH13;
+    std::vector<real> vectorsOfValuesH21, vectorsOfValuesH22, vectorsOfValuesH23;
 
     size_t vectorSize1 = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3];
     size_t vectorSize2 = dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3];
@@ -1101,22 +1101,22 @@ void MPIIOMigrationCoProcessor::readDataSet(int step)
         index += vectorSize3;
  
         SPtr<DistributionArray3D> mFdistributions(new D3Q27EsoTwist3DSplittedVector());
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-            new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesF1, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-            new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesF2, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+            new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesF1, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNonLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+            new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesF2, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setZeroDistributions(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
             vectorsOfValuesF3, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
         
         //----------------------------------------- H1 ----------------------------------------------------
        SPtr<DistributionArray3D> mH1distributions(new D3Q27EsoTwist3DSplittedVector());
        if (multiPhase1)
         {
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH11, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH12, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesH11, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNonLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesH12, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setZeroDistributions(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                 vectorsOfValuesH13, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
 
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX1(dataSetParamStr1.nx1);
@@ -1127,11 +1127,11 @@ void MPIIOMigrationCoProcessor::readDataSet(int step)
         SPtr<DistributionArray3D> mH2distributions(new D3Q27EsoTwist3DSplittedVector());
         if (multiPhase2)
         {
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH21, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH22, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesH21, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNonLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                    new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesH22, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setZeroDistributions(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                     vectorsOfValuesH23, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
 
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX1(dataSetParamStr1.nx1);
@@ -1228,8 +1228,8 @@ void MPIIOMigrationCoProcessor::readArray(int step, Arrays arrType, std::string
         UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -1257,7 +1257,7 @@ void MPIIOMigrationCoProcessor::readArray(int step, Arrays arrType, std::string
 
     DataSetSmallMigration *dataSetSmallArray = new DataSetSmallMigration[blocksCount];
     size_t doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
+    std::vector<real> doubleValuesArray(blocksCount * doubleCountInBlock); // real-values in all blocks
 
     // define MPI_types depending on the block-specific information
     MPI_Type_contiguous(int(doubleCountInBlock), MPI_DOUBLE, &dataSetDoubleType);
@@ -1265,7 +1265,7 @@ void MPIIOMigrationCoProcessor::readArray(int step, Arrays arrType, std::string
 
     size_t ic = 0;
     MPI_Offset read_offset;
-    size_t sizeofOneDataSet = size_t(sizeof(DataSetSmallMigration) + doubleCountInBlock * sizeof(double));
+    size_t sizeofOneDataSet = size_t(sizeof(DataSetSmallMigration) + doubleCountInBlock * sizeof(real));
 
     for (int level = minInitLevel; level <= maxInitLevel; level++) 
     {
@@ -1293,9 +1293,9 @@ void MPIIOMigrationCoProcessor::readArray(int step, Arrays arrType, std::string
     //----------------------------- restore data ---------------------------------
     size_t index = 0;
     size_t nextVectorSize = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
-    SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ___4DArray;
-    SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> ___3DArray;
+    std::vector<real> vectorsOfValues;
+    SPtr<CbArray4D<real, IndexerX4X3X2X1>> ___4DArray;
+    SPtr<CbArray3D<real, IndexerX3X2X1>> ___3DArray;
 
     for (std::size_t n = 0; n < blocksCount; n++)
     {
@@ -1308,47 +1308,47 @@ void MPIIOMigrationCoProcessor::readArray(int step, Arrays arrType, std::string
         switch (arrType) 
         {
             case AverageDensity:
-                ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
                     vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                 block->getKernel()->getDataSet()->setAverageDensity(___4DArray);
                 break;
             case AverageVelocity:
-                ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
                     vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                 block->getKernel()->getDataSet()->setAverageVelocity(___4DArray);
                 break;
             case AverageFluktuations:
-                ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
                     vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                 block->getKernel()->getDataSet()->setAverageFluctuations(___4DArray);
                 break;
             case AverageTriple:
-                ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
                     vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                 block->getKernel()->getDataSet()->setAverageTriplecorrelations(___4DArray);
                 break;
             case ShearStressVal:
-                ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
                     vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                 block->getKernel()->getDataSet()->setShearStressValues(___4DArray);
                 break;
             case RelaxationFactor:
-                ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                ___3DArray = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                     vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
                 block->getKernel()->getDataSet()->setRelaxationFactor(___3DArray);
                 break;
             case PhaseField1:
-                ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                ___3DArray = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                     vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
                 block->getKernel()->getDataSet()->setPhaseField(___3DArray);
                 break;
             case PhaseField2:
-                ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                ___3DArray = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                     vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
                 block->getKernel()->getDataSet()->setPhaseField2(___3DArray);
                 break;
             case PressureField:
-                ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                ___3DArray = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
                     vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
                 block->getKernel()->getDataSet()->setPressureField(___3DArray);
                 break;
@@ -1380,8 +1380,8 @@ void MPIIOMigrationCoProcessor::readBoundaryConds(int step)
     }
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.h
index b822b783edd3628f947aadf20d6dc6109e9e3c31..4b8c6231176923618b1a394f125ff32b0e450d54 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.h
@@ -34,7 +34,7 @@ public:
     MPIIOMigrationCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, SPtr<Grid3DVisitor> mV, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm);
     ~MPIIOMigrationCoProcessor() override;
     //! Each timestep writes the grid into the files
-    void process(double step) override;
+    void process(real step) override;
     //! Reads the grid from the files before grid reconstruction
     void restart(int step);
     //! Writes the blocks of the grid into the file cpBlocks.bin
@@ -71,7 +71,7 @@ public:
     void setBCProcessor(SPtr<BCProcessor> bcProcessor);
     //! The function truncates the data files
     void clearAllFiles(int step);
-    // void setNu(double nu);
+    // void setNu(real nu);
 
 private:
     // MPI_Datatype gridParamType, block3dType;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.cpp
index ea45bb14110a071724f816b3c7840ce0dfbd7327..f55d2e08a847e0892810ea12da8d97454c1416f7 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.cpp
@@ -77,7 +77,7 @@ MPIIORestartCoProcessor::~MPIIORestartCoProcessor()
 }
 
 //////////////////////////////////////////////////////////////////////////
-void MPIIORestartCoProcessor::process(double step)
+void MPIIORestartCoProcessor::process(real step)
 {
     if (scheduler->isDue(step)) 
     {
@@ -138,9 +138,9 @@ void MPIIORestartCoProcessor::writeDataSet(int step)
 
     dataSetParam dataSetParamStr1, dataSetParamStr2, dataSetParamStr3;
     DataSetRestart *dataSetArray = new DataSetRestart[blocksCount];
-    std::vector<double> doubleValuesArrayF; // double-values (arrays of f's) in all blocks  Fdistribution
-    std::vector<double> doubleValuesArrayH1; // double-values (arrays of f's) in all blocks  H1distribution
-    std::vector<double> doubleValuesArrayH2; // double-values (arrays of f's) in all blocks  H2distribution
+    std::vector<real> doubleValuesArrayF; // real-values (arrays of f's) in all blocks  Fdistribution
+    std::vector<real> doubleValuesArrayH1; // real-values (arrays of f's) in all blocks  H1distribution
+    std::vector<real> doubleValuesArrayH2; // real-values (arrays of f's) in all blocks  H2distribution
 
     if (comm->isRoot()) 
     {
@@ -156,9 +156,9 @@ void MPIIORestartCoProcessor::writeDataSet(int step)
     int ic                 = 0;
 
     SPtr<D3Q27EsoTwist3DSplittedVector> D3Q27EsoTwist3DSplittedVectorPtrF, D3Q27EsoTwist3DSplittedVectorPtrH1, D3Q27EsoTwist3DSplittedVectorPtrH2;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF, localDistributionsH1, localDistributionsH2;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF, nonLocalDistributionsH1, nonLocalDistributionsH2;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsF, zeroDistributionsH1, zeroDistributionsH2;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF, localDistributionsH1, localDistributionsH2;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF, nonLocalDistributionsH1, nonLocalDistributionsH2;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsF, zeroDistributionsH1, zeroDistributionsH2;
  
     SPtr<LBMKernel> kernel;
 
@@ -238,55 +238,55 @@ void MPIIORestartCoProcessor::writeDataSet(int step)
                      dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3] +
                      dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> averageDensityArray = kernel->getDataSet()->getAverageDensity();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> averageDensityArray = kernel->getDataSet()->getAverageDensity();
                 if (averageDensityArray)
                     arrPresence.isAverageDensityArrayPresent = true;
                 else
                     arrPresence.isAverageDensityArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageVelocityArray3DPtr = kernel->getDataSet()->getAverageVelocity();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> AverageVelocityArray3DPtr = kernel->getDataSet()->getAverageVelocity();
                 if (AverageVelocityArray3DPtr)
                     arrPresence.isAverageVelocityArrayPresent = true;
                 else
                     arrPresence.isAverageVelocityArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageFluctArray3DPtr = kernel->getDataSet()->getAverageFluctuations();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> AverageFluctArray3DPtr = kernel->getDataSet()->getAverageFluctuations();
                 if (AverageFluctArray3DPtr)
                     arrPresence.isAverageFluktuationsArrayPresent = true;
                 else
                     arrPresence.isAverageFluktuationsArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageTripleArray3DPtr = kernel->getDataSet()->getAverageTriplecorrelations();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> AverageTripleArray3DPtr = kernel->getDataSet()->getAverageTriplecorrelations();
                 if (AverageTripleArray3DPtr)
                     arrPresence.isAverageTripleArrayPresent = true;
                 else
                     arrPresence.isAverageTripleArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ShearStressValArray3DPtr = kernel->getDataSet()->getShearStressValues();
+                SPtr<CbArray4D<real, IndexerX4X3X2X1>> ShearStressValArray3DPtr = kernel->getDataSet()->getShearStressValues();
                 if (ShearStressValArray3DPtr)
                     arrPresence.isShearStressValArrayPresent = true;
                 else
                     arrPresence.isShearStressValArrayPresent = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> relaxationFactor3DPtr = kernel->getDataSet()->getRelaxationFactor();
+                SPtr<CbArray3D<real, IndexerX3X2X1>> relaxationFactor3DPtr = kernel->getDataSet()->getRelaxationFactor();
                 if (relaxationFactor3DPtr)
                     arrPresence.isRelaxationFactorPresent = true;
                 else
                     arrPresence.isRelaxationFactorPresent = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> phaseField3DPtr1 = kernel->getDataSet()->getPhaseField();
+                SPtr<CbArray3D<real, IndexerX3X2X1>> phaseField3DPtr1 = kernel->getDataSet()->getPhaseField();
                 if (phaseField3DPtr1)
                     arrPresence.isPhaseField1Present = true;
                 else
                     arrPresence.isPhaseField1Present = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> phaseField3DPtr2 = kernel->getDataSet()->getPhaseField2();
+                SPtr<CbArray3D<real, IndexerX3X2X1>> phaseField3DPtr2 = kernel->getDataSet()->getPhaseField2();
                 if (phaseField3DPtr2)
                     arrPresence.isPhaseField2Present = true;
                 else
                     arrPresence.isPhaseField2Present = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> pressureFieldPtr = block->getKernel()->getDataSet()->getPressureField();
+                SPtr<CbArray3D<real, IndexerX3X2X1>> pressureFieldPtr = block->getKernel()->getDataSet()->getPressureField();
                 if (pressureFieldPtr)
                     arrPresence.isPressureFieldPresent = true;
                 else
@@ -345,21 +345,21 @@ void MPIIORestartCoProcessor::writeDataSet(int step)
     {
         if (rank == 0) 
         {
-            next_write_offset = write_offset + 3 * sizeof(dataSetParam) + blocksCount * (sizeof(DataSetRestart) + doubleCountInBlock * sizeof(double));
+            next_write_offset = write_offset + 3 * sizeof(dataSetParam) + blocksCount * (sizeof(DataSetRestart) + doubleCountInBlock * sizeof(real));
             MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
         } 
         else 
         {
             MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + 3 * sizeof(dataSetParam) + blocksCount * (sizeof(DataSetRestart) + doubleCountInBlock * sizeof(double));
+            next_write_offset = write_offset + 3 * sizeof(dataSetParam) + blocksCount * (sizeof(DataSetRestart) + doubleCountInBlock * sizeof(real));
             if (rank < size - 1)
                 MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
     }
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -472,33 +472,6 @@ void MPIIORestartCoProcessor::writeDataSet(int step)
     if (arrPresence.isPressureFieldPresent)
         write3DArray(step, PressureField, std::string("/cpPressureField.bin"));
 
-    /*if (arrPresence.isAverageDensityArrayPresent)
-        writeAverageDensityArray(step);
-
-    if (arrPresence.isAverageVelocityArrayPresent)
-        writeAverageVelocityArray(step);
-
-    if (arrPresence.isAverageFluktuationsArrayPresent)
-        writeAverageFluktuationsArray(step);
-
-    if (arrPresence.isAverageTripleArrayPresent)
-        writeAverageTripleArray(step);
-
-    if (arrPresence.isShearStressValArrayPresent)
-        writeShearStressValArray(step);
-
-    if (arrPresence.isRelaxationFactorPresent)
-        writeRelaxationFactor(step);
-
-    if (arrPresence.isPhaseField1Present)
-        writePhaseField(step, 1);
-
-    if (arrPresence.isPhaseField2Present)
-        writePhaseField(step, 2);
-
-    if (arrPresence.isPressureFieldPresent)
-        writePressureField(step);*/
-
 }
 
 void MPIIORestartCoProcessor::write4DArray(int step, Arrays arrayType, std::string fname)
@@ -519,7 +492,7 @@ void MPIIORestartCoProcessor::write4DArray(int step, Arrays arrayType, std::stri
     }
 
     DataSetSmallRestart* dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values of the AverageDensityArray in all blocks
+    std::vector<real> doubleValuesArray; // real-values of the AverageDensityArray in all blocks
     dataSetParam dataSetParamStr;
 
     if (comm->isRoot())
@@ -531,7 +504,7 @@ void MPIIORestartCoProcessor::write4DArray(int step, Arrays arrayType, std::stri
     bool firstBlock = true;
     int doubleCountInBlock = 0;
     int ic = 0;
-    SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ___Array;
+    SPtr<CbArray4D<real, IndexerX4X3X2X1>> ___Array;
 
     for (int level = minInitLevel; level <= maxInitLevel; level++)
     {
@@ -603,20 +576,20 @@ void MPIIORestartCoProcessor::write4DArray(int step, Arrays arrayType, std::stri
     {
         if (rank == 0)
         {
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(real));
             MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
         }
         else
         {
             MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(real));
             if (rank < size - 1)
                 MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
     }
 
-    double start{ 0. };
-    double finish{ 0. };
+    real start{ 0. };
+    real finish{ 0. };
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -676,7 +649,7 @@ void MPIIORestartCoProcessor::write3DArray(int step, Arrays arrayType, std::stri
     }
 
     DataSetSmallRestart* dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
+    std::vector<real> doubleValuesArray; // real-values (arrays of f's) in all blocks
     dataSetParam dataSetParamStr;
 
     if (comm->isRoot())
@@ -688,7 +661,7 @@ void MPIIORestartCoProcessor::write3DArray(int step, Arrays arrayType, std::stri
     bool firstBlock = true;
     size_t doubleCountInBlock = 0;
     int ic = 0;
-    SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> ___Array;
+    SPtr<CbArray3D<real, IndexerX3X2X1>> ___Array;
 
     for (int level = minInitLevel; level <= maxInitLevel; level++)
     {
@@ -757,21 +730,21 @@ void MPIIORestartCoProcessor::write3DArray(int step, Arrays arrayType, std::stri
     {
         if (rank == 0)
         {
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(real));
             MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
     }
         else
         {
             MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(real));
             if (rank < size - 1)
                 MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
 }
 
 
-    double start{ 0. };
-    double finish{ 0. };
+    real start{ 0. };
+    real finish{ 0. };
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -815,13 +788,22 @@ void MPIIORestartCoProcessor::write3DArray(int step, Arrays arrayType, std::stri
     delete[] dataSetSmallArray;
 }
 
-/*void MPIIORestartCoProcessor::writeAverageDensityArray(int step)
+void MPIIORestartCoProcessor::writeBoundaryConds(int step)
 {
     int rank, size;
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
     MPI_Comm_size(MPI_COMM_WORLD, &size);
 
-    int blocksCount = 0; // quantity of blocks in the grid, max 2147483648 blocks!
+    if (comm->isRoot()) 
+    {
+        UBLOG(logINFO, "MPIIORestartCoProcessor::writeBoundaryConds start collect data rank = " << rank);
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+    }
+
+    int blocksCount          = 0; // quantity of blocks in the grid, max 2147483648 blocks!
+    size_t count_boundCond   = 0; // how many BoundaryConditions in all blocks
+    int count_indexContainer = 0; // how many indexContainer-values in all blocks
+    size_t byteCount         = 0; // how many bytes writes this process in the file
 
     std::vector<SPtr<Block3D>> blocksVector[25];
     int minInitLevel = this->grid->getCoarsestInitializedLevel();
@@ -832,87 +814,128 @@ void MPIIORestartCoProcessor::write3DArray(int step, Arrays arrayType, std::stri
         blocksCount += static_cast<int>(blocksVector[level].size());
     }
 
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values of the AverageDensityArray in all blocks
-    dataSetParam dataSetParamStr;
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageDensityArray start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    bool firstBlock        = true;
-    int doubleCountInBlock = 0;
-    int ic                 = 0;
-    SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> averageDensityArray;
+    BCAddRestart *bcAddArray = new BCAddRestart[blocksCount];
+    std::vector<BoundaryCondition> bcVector;
+    std::vector<int> bcindexmatrixV;
+    std::vector<int> indexContainerV;
+    bool bcindexmatrixCountNotInit = true;
+    int ic = 0;
+    SPtr<BCArray3D> bcArr;
 
     for (int level = minInitLevel; level <= maxInitLevel; level++) 
     {
-        for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
+        for (SPtr<Block3D> block : blocksVector[level]) // all the blocks of the current level
         {
-            dataSetSmallArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
-            dataSetSmallArray[ic].x2 = block->getX2();
-            dataSetSmallArray[ic].x3 = block->getX3();
-            dataSetSmallArray[ic].level = block->getLevel();
+            bcArr = block->getKernel()->getBCProcessor()->getBCArray();
 
-            averageDensityArray = block->getKernel()->getDataSet()->getAverageDensity();
+            bcAddArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
+            bcAddArray[ic].x2                   = block->getX2();
+            bcAddArray[ic].x3                   = block->getX3();
+            bcAddArray[ic].level                = block->getLevel();
+            bcAddArray[ic].boundCond_count      = 0; // how many BoundaryConditions in this block
+            bcAddArray[ic].indexContainer_count = 0; // how many indexContainer-values in this block
 
-            if (firstBlock) // when first (any) valid block...
+            for (std::size_t bc = 0; bc < bcArr->getBCVectorSize(); bc++) 
             {
-                dataSetParamStr.nx1 = dataSetParamStr.nx2 = dataSetParamStr.nx3 = 0;
-                dataSetParamStr.nx[0] = static_cast<int>(averageDensityArray->getNX1());
-                dataSetParamStr.nx[1] = static_cast<int>(averageDensityArray->getNX2());
-                dataSetParamStr.nx[2] = static_cast<int>(averageDensityArray->getNX3());
-                dataSetParamStr.nx[3] = static_cast<int>(averageDensityArray->getNX4());
-                doubleCountInBlock =
-                    dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
+                BoundaryCondition *bouCond = new BoundaryCondition();
+                if (bcArr->bcvector[bc] == NULL) 
+                {
+                    memset(bouCond, 0, sizeof(BoundaryCondition));
+                } 
+                else 
+                {
+                    bouCond->noslipBoundaryFlags    = bcArr->bcvector[bc]->getNoSlipBoundary();
+                    bouCond->slipBoundaryFlags      = bcArr->bcvector[bc]->getSlipBoundary();
+                    bouCond->velocityBoundaryFlags  = bcArr->bcvector[bc]->getVelocityBoundary();
+                    bouCond->densityBoundaryFlags   = bcArr->bcvector[bc]->getDensityBoundary();
+                    bouCond->wallModelBoundaryFlags = bcArr->bcvector[bc]->getWallModelBoundary();
+                    bouCond->bcVelocityX1           = (real)bcArr->bcvector[bc]->getBoundaryVelocityX1();
+                    bouCond->bcVelocityX2           = (real)bcArr->bcvector[bc]->getBoundaryVelocityX2();
+                    bouCond->bcVelocityX3           = (real)bcArr->bcvector[bc]->getBoundaryVelocityX3();
+                    bouCond->bcDensity              = (real)bcArr->bcvector[bc]->getBoundaryDensity();
+                    bouCond->bcPhaseField           = (real)bcArr->bcvector[bc]->getBoundaryPhaseField();
+                    bouCond->nx1                    = (real)bcArr->bcvector[bc]->nx1;
+                    bouCond->nx2                    = (real)bcArr->bcvector[bc]->nx2;
+                    bouCond->nx3                    = (real)bcArr->bcvector[bc]->nx3;
+                    for (int iq = 0; iq < 26; iq++)
+                        bouCond->q[iq] = (real)bcArr->bcvector[bc]->getQ(iq);
+                    bouCond->algorithmType = bcArr->bcvector[bc]->getBcAlgorithmType();
+                }
 
-                firstBlock = false;
+                bcVector.push_back(*bouCond);
+                bcAddArray[ic].boundCond_count++;
+                count_boundCond++;
+            }
+
+            // the quantity of elements in the bcindexmatrix array (CbArray3D<int, IndexerX3X2X1>) in bcArray(BCArray3D)
+            // is always equal, this will be the size of the "write-read-block" in MPI_write_.../MPI_read-functions when
+            // writing/reading BoundConds
+            if (bcindexmatrixCountNotInit) 
+            {
+                boundCondParamStr.nx1                = static_cast<int>(bcArr->bcindexmatrix.getNX1());
+                boundCondParamStr.nx2                = static_cast<int>(bcArr->bcindexmatrix.getNX2());
+                boundCondParamStr.nx3                = static_cast<int>(bcArr->bcindexmatrix.getNX3());
+                boundCondParamStr.bcindexmatrixCount = static_cast<int>(bcArr->bcindexmatrix.getDataVector().size());
+                bcindexmatrixCountNotInit            = false;
             }
+            bcindexmatrixV.insert(bcindexmatrixV.end(), bcArr->bcindexmatrix.getDataVector().begin(), bcArr->bcindexmatrix.getDataVector().end());
 
-            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0) && (dataSetParamStr.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), averageDensityArray->getDataVector().begin(), averageDensityArray->getDataVector().end());
+            indexContainerV.insert(indexContainerV.end(), bcArr->indexContainer.begin(), bcArr->indexContainer.end());
+            bcAddArray[ic].indexContainer_count = static_cast<int>(bcArr->indexContainer.size());
+            count_indexContainer += bcAddArray[ic].indexContainer_count;
 
             ic++;
         }
     }
 
-    // register new MPI-types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
+    MPI_Type_contiguous(boundCondParamStr.bcindexmatrixCount, MPI_INT, &bcindexmatrixType);
+    MPI_Type_commit(&bcindexmatrixType);
 
-    if (comm->isRoot()) 
+    // how many "big blocks" of BLOCK_SIZE size can by formed
+    int bcBlockCount = (int)(count_boundCond / BLOCK_SIZE);
+    if (bcBlockCount * BLOCK_SIZE < (int)count_boundCond)
+        bcBlockCount += 1;
+    for (int i = (int)count_boundCond; i < bcBlockCount * BLOCK_SIZE; i++) 
     {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageDensityArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        BoundaryCondition *bouCond = new BoundaryCondition();
+        memset(bouCond, 0, sizeof(BoundaryCondition));
+        bcVector.push_back(*bouCond);
     }
 
+    byteCount = bcBlockCount * BLOCK_SIZE * sizeof(BoundaryCondition) + blocksCount * sizeof(BCAddRestart) +
+                sizeof(int) * (blocksCount * boundCondParamStr.bcindexmatrixCount + count_indexContainer);
+
     // write to the file
     // all processes calculate their offsets (quantity of bytes that the process is going to write)
     // and notify the next process (with the rank = rank + 1)
-    MPI_Offset write_offset  = (MPI_Offset)(size * sizeof(int));
+    MPI_Offset write_offset  = (MPI_Offset)(size * (3 * sizeof(int) + sizeof(boundCondParam)));
     size_t next_write_offset = 0;
 
     if (size > 1) 
     {
         if (rank == 0) 
         {
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_write_offset = write_offset + byteCount;
             MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
         } 
         else 
         {
             MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_write_offset = write_offset + byteCount;
             if (rank < size - 1)
                 MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
     }
 
+    if (comm->isRoot()) 
+    {
+        UBLOG(logINFO, "MPIIORestartCoProcessor::writeBoundaryConds start MPI IO rank = " << rank);
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+    }
+
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -925,2388 +948,329 @@ void MPIIORestartCoProcessor::write3DArray(int step, Arrays arrayType, std::stri
 #endif
 
     MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpAverageDensityArray.bin";
+    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpBC.bin";
     int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
+    MPI_Offset write_offset1 = (MPI_Offset)(rank * (3 * sizeof(int) + sizeof(boundCondParam)));
+
     // each process writes the quantity of it's blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    // each process writes common parameters of a dataSet
-    MPI_File_write_at(file_handler, write_offset, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    // each process writes data identifying blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
-    // each process writes the dataSet arrays
-    if (doubleValuesArray.size() > 0)
-        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                          &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
+    MPI_File_write_at(file_handler, write_offset1, &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
+    // each process writes the quantity of "big blocks" of BLOCK_SIZE of boundary conditions
+    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset1 + sizeof(int)), &bcBlockCount, 1, MPI_INT, MPI_STATUS_IGNORE);
+    // each process writes the quantity of indexContainer elements in all blocks
+    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset1 + 2 * sizeof(int)), &count_indexContainer, 1, MPI_INT,  MPI_STATUS_IGNORE);
+    // each process writes the quantity of bcindexmatrix elements in every block
+    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset1 + 3 * sizeof(int)), &boundCondParamStr, 1, boundCondParamType, MPI_STATUS_IGNORE);
+
+    // each process writes data identifying the blocks
+    MPI_File_write_at(file_handler, write_offset, bcAddArray, blocksCount, boundCondTypeAdd, MPI_STATUS_IGNORE);
+    // each process writes boundary conditions
+    if (bcVector.size() > 0)
+        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + blocksCount * sizeof(BCAddRestart)), &bcVector[0],
+                          bcBlockCount, boundCondType1000, MPI_STATUS_IGNORE);
+    // each process writes bcindexmatrix values
+    if (bcindexmatrixV.size() > 0)
+        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + blocksCount * sizeof(BCAddRestart) + bcBlockCount * BLOCK_SIZE * sizeof(BoundaryCondition)),
+                          &bcindexmatrixV[0], blocksCount, bcindexmatrixType, MPI_STATUS_IGNORE);
+    // each process writes indexContainer values
+    if (indexContainerV.size() > 0)
+        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + blocksCount * sizeof(BCAddRestart) + bcBlockCount * BLOCK_SIZE * sizeof(BoundaryCondition) +
+                      blocksCount * boundCondParamStr.bcindexmatrixCount * sizeof(int)), &indexContainerV[0], count_indexContainer, MPI_INT, MPI_STATUS_IGNORE);
 
     MPI_File_sync(file_handler);
     MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
+    MPI_Type_free(&bcindexmatrixType);
 
     if (comm->isRoot()) 
     {
         finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageDensityArray time: " << finish - start << " s");
+        UBLOG(logINFO, "MPIIORestartCoProcessor::writeBoundaryConds time: " << finish - start << " s");
     }
 
-    delete[] dataSetSmallArray;
+    delete[] bcAddArray;
 }
 
-void MPIIORestartCoProcessor::writeAverageVelocityArray(int step)
+//------------------------------------------- READ -----------------------------------------------
+void MPIIORestartCoProcessor::restart(int step)
 {
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    if (comm->isRoot())
+        UBLOG(logINFO, "MPIIORestartCoProcessor restart step: " << step);
+    if (comm->isRoot())
+        UBLOG(logINFO, "Load check point - start");
 
-    int blocksCount = 0; // quantity of blocks in the grid, max 2147483648 blocks!
+    readBlocks(step);
+    readDataSet(step);
+    readBoundaryConds(step);
 
-    std::vector<SPtr<Block3D>> blocksVector[25];
-    int minInitLevel = this->grid->getCoarsestInitializedLevel();
-    int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        grid->getBlocks(level, rank, blocksVector[level]);
-        blocksCount += static_cast<int>(blocksVector[level].size());
-    }
+    grid->setTimeStep(step);
 
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
-    dataSetParam dataSetParamStr;
+    if (comm->isRoot())
+        UBLOG(logINFO, "Load check point - end");
+}
+
+void MPIIORestartCoProcessor::readBlocks(int step) { MPIIOCoProcessor::readBlocks(step); }
+
+void MPIIORestartCoProcessor::readDataSet(int step)
+{
+    int rank, size;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
 
     if (comm->isRoot()) 
     {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageVelocityArray start collect data rank = " << rank);
+        UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet start MPI IO rank = " << rank);
         UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
-    bool firstBlock        = true;
-    int doubleCountInBlock = 0;
-    int ic                 = 0;
-    SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageVelocityArray3DPtr;
-
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
-        {
-            dataSetSmallArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
-            dataSetSmallArray[ic].x2 = block->getX2();
-            dataSetSmallArray[ic].x3 = block->getX3();
-            dataSetSmallArray[ic].level = block->getLevel();
+    
+    real start {0.};
+    real finish {0.};
+    if (comm->isRoot())
+        start = MPI_Wtime();
 
-            AverageVelocityArray3DPtr = block->getKernel()->getDataSet()->getAverageVelocity();
+    MPI_File file_handler;
+    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetF.bin";
+    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
+    if (rc != MPI_SUCCESS)
+        throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
-            if (firstBlock) // when first (any) valid block...
-            {
-                dataSetParamStr.nx1 = dataSetParamStr.nx2 = dataSetParamStr.nx3 = 0;
-                dataSetParamStr.nx[0] = static_cast<int>(AverageVelocityArray3DPtr->getNX1());
-                dataSetParamStr.nx[1] = static_cast<int>(AverageVelocityArray3DPtr->getNX2());
-                dataSetParamStr.nx[2] = static_cast<int>(AverageVelocityArray3DPtr->getNX3());
-                dataSetParamStr.nx[3] = static_cast<int>(AverageVelocityArray3DPtr->getNX4());
-                doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
+    // calculate the read offset
+    MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
+    size_t next_read_offset = 0;
+    bool multiPhase1 = false;
+    bool multiPhase2 = false;
 
-                firstBlock = false;
-            }
+    // read count of blocks
+    int blocksCount = 0;
+    dataSetParam dataSetParamStr1, dataSetParamStr2, dataSetParamStr3;
 
-            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0) && (dataSetParamStr.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), AverageVelocityArray3DPtr->getDataVector().begin(), AverageVelocityArray3DPtr->getDataVector().end());
+    MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, read_offset, &dataSetParamStr1, 1, dataSetParamType, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), &dataSetParamStr2, 1, dataSetParamType, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + 2 * sizeof(dataSetParam)), &dataSetParamStr3, 1, dataSetParamType, MPI_STATUS_IGNORE);
 
-            ic++;
-        }
-    }
+    DataSetRestart *dataSetArray = new DataSetRestart[blocksCount];
+    real doubleCountInBlock = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3] +
+        dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3] +
+        dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
+    std::vector<real> doubleValuesArrayF(size_t(blocksCount * doubleCountInBlock)); // real-values in all blocks  Fdistributions
+    std::vector<real> doubleValuesArrayH1; // real-values in all blocks  H1distributions
+    std::vector<real> doubleValuesArrayH2; // real-values in all blocks  H2distributions
 
-    // register new MPI-types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
+    //   define MPI_types depending on the block-specific information
+    MPI_Type_contiguous(int(doubleCountInBlock), MPI_DOUBLE, &dataSetDoubleType);
     MPI_Type_commit(&dataSetDoubleType);
 
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageVelocityArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    // write to the file
-    // all processes calculate their offsets (quantity of bytes that the process is going to write)
-    // and notify the next process (with the rank = rank + 1)
-    MPI_Offset write_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_write_offset = 0;
-
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } else 
-        {
-            MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_Info info = MPI_INFO_NULL;
-
-#ifdef HLRN_LUSTRE
-    MPI_Info_create(&info);
-    MPI_Info_set(info, "striping_factor", "40");
-    MPI_Info_set(info, "striping_unit", "4M");
-#endif
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpAverageVelocityArray.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // each process writes the quantity of it's blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    // each process writes common parameters of a dataSet
-    MPI_File_write_at(file_handler, write_offset, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    // each process writes data identifying blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
-    // each process writes the dataSet arrays
-    if (doubleValuesArray.size() > 0)
-        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                          &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-
-    MPI_File_sync(file_handler);
-    MPI_File_close(&file_handler);
-
-    MPI_Type_free(&dataSetDoubleType);
-    if (comm->isRoot()) 
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageVelocityArray time: " << finish - start << " s");
-    }
-
-    delete[] dataSetSmallArray;
-}
-
-void MPIIORestartCoProcessor::writeAverageFluktuationsArray(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    int blocksCount = 0; // quantity of blocks in the grid, max 2147483648 blocks!
-
-    std::vector<SPtr<Block3D>> blocksVector[25];
-    int minInitLevel = this->grid->getCoarsestInitializedLevel();
-    int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        grid->getBlocks(level, rank, blocksVector[level]);
-        blocksCount += static_cast<int>(blocksVector[level].size());
-    }
-
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
-    dataSetParam dataSetParamStr;
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageFluktuationsArray start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    bool firstBlock        = true;
-    int doubleCountInBlock = 0;
-    int ic                 = 0;
-    SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageFluctArray3DPtr;
-
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
-        {
-            dataSetSmallArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
-            dataSetSmallArray[ic].x2    = block->getX2();
-            dataSetSmallArray[ic].x3    = block->getX3();
-            dataSetSmallArray[ic].level = block->getLevel();
-
-            AverageFluctArray3DPtr = block->getKernel()->getDataSet()->getAverageFluctuations();
-
-            if (firstBlock) // when first (any) valid block...
-            {
-                dataSetParamStr.nx1 = dataSetParamStr.nx2 = dataSetParamStr.nx3 = 0;
-                dataSetParamStr.nx[0] = static_cast<int>(AverageFluctArray3DPtr->getNX1());
-                dataSetParamStr.nx[1] = static_cast<int>(AverageFluctArray3DPtr->getNX2());
-                dataSetParamStr.nx[2] = static_cast<int>(AverageFluctArray3DPtr->getNX3());
-                dataSetParamStr.nx[3] = static_cast<int>(AverageFluctArray3DPtr->getNX4());
-                doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-
-                firstBlock = false;
-            }
-
-            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0) &&(dataSetParamStr.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), AverageFluctArray3DPtr->getDataVector().begin(),
-                                         AverageFluctArray3DPtr->getDataVector().end());
-
-            ic++;
-        }
-    }
-
-    // register new MPI-types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageFluktuationsArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    // write to the file
-    // all processes calculate their offsets (quantity of bytes that the process is going to write)
-    // and notify the next process (with the rank = rank + 1)
-    MPI_Offset write_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_write_offset = 0;
-
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } else 
-        {
-            MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_Info info = MPI_INFO_NULL;
-
-#ifdef HLRN_LUSTRE
-    MPI_Info_create(&info);
-    MPI_Info_set(info, "striping_factor", "40");
-    MPI_Info_set(info, "striping_unit", "4M");
-#endif
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpAverageFluktuationsArray.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // each process writes the quantity of it's blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    // each process writes common parameters of a dataSet
-    MPI_File_write_at(file_handler, write_offset, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    // each process writes data identifying blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount,
-                      dataSetSmallType, MPI_STATUS_IGNORE);
-    // each process writes the dataSet arrays
-    if (doubleValuesArray.size() > 0)
-        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                          &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-
-    MPI_File_sync(file_handler);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageFluktuationsArray time: " << finish - start << " s");
-    }
-
-    delete[] dataSetSmallArray;
-}
-
-void MPIIORestartCoProcessor::writeAverageTripleArray(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    int blocksCount = 0; // quantity of blocks in the grid, max 2147483648 blocks!
-
-    std::vector<SPtr<Block3D>> blocksVector[25];
-    int minInitLevel = this->grid->getCoarsestInitializedLevel();
-    int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        grid->getBlocks(level, rank, blocksVector[level]);
-        blocksCount += static_cast<int>(blocksVector[level].size());
-    }
-
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
-    dataSetParam dataSetParamStr;
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageTripleArray start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    bool firstBlock        = true;
-    int doubleCountInBlock = 0;
-    int ic                 = 0;
-    SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageTripleArray3DPtr;
-
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
-        {
-            dataSetSmallArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
-            dataSetSmallArray[ic].x2    = block->getX2();
-            dataSetSmallArray[ic].x3    = block->getX3();
-            dataSetSmallArray[ic].level = block->getLevel();
-
-            AverageTripleArray3DPtr = block->getKernel()->getDataSet()->getAverageTriplecorrelations();
-
-            if (firstBlock) // when first (any) valid block...
-            {
-                dataSetParamStr.nx1 = dataSetParamStr.nx2 = dataSetParamStr.nx3 = 0;
-                dataSetParamStr.nx[0] = static_cast<int>(AverageTripleArray3DPtr->getNX1());
-                dataSetParamStr.nx[1] = static_cast<int>(AverageTripleArray3DPtr->getNX2());
-                dataSetParamStr.nx[2] = static_cast<int>(AverageTripleArray3DPtr->getNX3());
-                dataSetParamStr.nx[3] = static_cast<int>(AverageTripleArray3DPtr->getNX4());
-                doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-
-                firstBlock = false;
-            }
-
-            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0) && (dataSetParamStr.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), AverageTripleArray3DPtr->getDataVector().begin(), AverageTripleArray3DPtr->getDataVector().end());
-
-            ic++;
-        }
-    }
-
-    // register new MPI-types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageTripleArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    // write to the file
-    // all processes calculate their offsets (quantity of bytes that the process is going to write)
-    // and notify the next process (with the rank = rank + 1)
-    MPI_Offset write_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_write_offset = 0;
-
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } 
-        else 
-        {
-            MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_Info info = MPI_INFO_NULL;
-
-#ifdef HLRN_LUSTRE
-    MPI_Info_create(&info);
-    MPI_Info_set(info, "striping_factor", "40");
-    MPI_Info_set(info, "striping_unit", "4M");
-#endif
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpAverageTripleArray.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // each process writes the quantity of it's blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    // each process writes common parameters of a dataSet
-    MPI_File_write_at(file_handler, write_offset, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    // each process writes data identifying blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount,
-                      dataSetSmallType, MPI_STATUS_IGNORE);
-    // each process writes the dataSet arrays
-    if (doubleValuesArray.size() > 0)
-        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                          &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-
-    MPI_File_sync(file_handler);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot()) {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageTripleArray time: " << finish - start << " s");
-    }
-
-    delete[] dataSetSmallArray;
-}
-
-void MPIIORestartCoProcessor::writeShearStressValArray(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    int blocksCount = 0; // quantity of blocks in the grid, max 2147483648 blocks!
-
-    std::vector<SPtr<Block3D>> blocksVector[25];
-    int minInitLevel = this->grid->getCoarsestInitializedLevel();
-    int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        grid->getBlocks(level, rank, blocksVector[level]);
-        blocksCount += static_cast<int>(blocksVector[level].size());
-    }
-
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
-    dataSetParam dataSetParamStr;
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeShearStressValArray start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    bool firstBlock        = true;
-    int doubleCountInBlock = 0;
-    int ic                 = 0;
-    SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ShearStressValArray3DPtr;
-
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
-        {
-            dataSetSmallArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
-            dataSetSmallArray[ic].x2    = block->getX2();
-            dataSetSmallArray[ic].x3    = block->getX3();
-            dataSetSmallArray[ic].level = block->getLevel();
-
-            ShearStressValArray3DPtr = block->getKernel()->getDataSet()->getShearStressValues();
-
-            if (firstBlock) // when first (any) valid block...
-            {
-                dataSetParamStr.nx1 = dataSetParamStr.nx2 = dataSetParamStr.nx3 = 0;
-                dataSetParamStr.nx[0] = static_cast<int>(ShearStressValArray3DPtr->getNX1());
-                dataSetParamStr.nx[1] = static_cast<int>(ShearStressValArray3DPtr->getNX2());
-                dataSetParamStr.nx[2] = static_cast<int>(ShearStressValArray3DPtr->getNX3());
-                dataSetParamStr.nx[3] = static_cast<int>(ShearStressValArray3DPtr->getNX4());
-                doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-
-                firstBlock = false;
-            }
-
-            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0) && (dataSetParamStr.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), ShearStressValArray3DPtr->getDataVector().begin(),
-                                         ShearStressValArray3DPtr->getDataVector().end());
-
-            ic++;
-        }
-    }
-
-    // register new MPI-types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeShearStressValArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    // write to the file
-    // all processes calculate their offsets (quantity of bytes that the process is going to write)
-    // and notify the next process (with the rank = rank + 1)
-    MPI_Offset write_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_write_offset = 0;
-
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } 
-        else 
-        {
-            MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_Info info = MPI_INFO_NULL;
-
-#ifdef HLRN_LUSTRE
-    MPI_Info_create(&info);
-    MPI_Info_set(info, "striping_factor", "40");
-    MPI_Info_set(info, "striping_unit", "4M");
-#endif
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpShearStressValArray.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // each process writes the quantity of it's blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    // each process writes common parameters of a dataSet
-    MPI_File_write_at(file_handler, write_offset, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    // each process writes data identifying blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount,
-                      dataSetSmallType, MPI_STATUS_IGNORE);
-    // each process writes the dataSet arrays
-    if (doubleValuesArray.size() > 0)
-        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                          &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-
-    MPI_File_sync(file_handler);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeShearStressValArray time: " << finish - start << " s");
-    }
-
-    delete[] dataSetSmallArray;
-}
-
-void MPIIORestartCoProcessor::writeRelaxationFactor(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    int blocksCount = 0; // quantity of blocks in the grid, max 2147483648 blocks!
-
-    std::vector<SPtr<Block3D>> blocksVector[25];
-    int minInitLevel = this->grid->getCoarsestInitializedLevel();
-    int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        grid->getBlocks(level, rank, blocksVector[level]);
-        blocksCount += static_cast<int>(blocksVector[level].size());
-    }
-
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
-    dataSetParam dataSetParamStr;
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeRelaxationFactor start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    bool firstBlock        = true;
-    int doubleCountInBlock = 0;
-    int ic                 = 0;
-    SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> RelaxationFactor3DPtr;
-
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
-        {
-            dataSetSmallArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
-            dataSetSmallArray[ic].x2    = block->getX2();
-            dataSetSmallArray[ic].x3    = block->getX3();
-            dataSetSmallArray[ic].level = block->getLevel();
-
-            RelaxationFactor3DPtr = block->getKernel()->getDataSet()->getRelaxationFactor();
-
-            if (firstBlock) // when first (any) valid block...
-            {
-                dataSetParamStr.nx1 = dataSetParamStr.nx2 = dataSetParamStr.nx3 = 0;
-                dataSetParamStr.nx[0] = static_cast<int>(RelaxationFactor3DPtr->getNX1());
-                dataSetParamStr.nx[1] = static_cast<int>(RelaxationFactor3DPtr->getNX2());
-                dataSetParamStr.nx[2] = static_cast<int>(RelaxationFactor3DPtr->getNX3());
-                dataSetParamStr.nx[3] = 1;
-                doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-
-                firstBlock = false;
-            }
-
-            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), RelaxationFactor3DPtr->getDataVector().begin(),
-                                         RelaxationFactor3DPtr->getDataVector().end());
-
-            ic++;
-        }
-    }
-
-    // register new MPI-types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeRelaxationFactor start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    // write to the file
-    // all processes calculate their offsets (quantity of bytes that the process is going to write)
-    // and notify the next process (with the rank = rank + 1)
-    MPI_Offset write_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_write_offset = 0;
-
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } 
-        else 
-        {
-            MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_Info info = MPI_INFO_NULL;
-
-#ifdef HLRN_LUSTRE
-    MPI_Info_create(&info);
-    MPI_Info_set(info, "striping_factor", "40");
-    MPI_Info_set(info, "striping_unit", "4M");
-#endif
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpRelaxationFactor.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // each process writes the quantity of it's blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    // each process writes common parameters of a dataSet
-    MPI_File_write_at(file_handler, write_offset, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    // each process writes data identifying blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount,
-                      dataSetSmallType, MPI_STATUS_IGNORE);
-    // each process writes the dataSet arrays
-    if (doubleValuesArray.size() > 0)
-        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                          &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-
-    MPI_File_sync(file_handler);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeRelaxationFactor time: " << finish - start << " s");
-    }
-
-    delete[] dataSetSmallArray;
-}
-
-void MPIIORestartCoProcessor::writePhaseField(int step, int fieldN)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    int blocksCount = 0; // quantity of blocks in the grid, max 2147483648 blocks!
-
-    std::vector<SPtr<Block3D>> blocksVector[25];
-    int minInitLevel = this->grid->getCoarsestInitializedLevel();
-    int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        grid->getBlocks(level, rank, blocksVector[level]);
-        blocksCount += static_cast<int>(blocksVector[level].size());
-    }
-
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
-    dataSetParam dataSetParamStr;
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writePhaseField start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    bool firstBlock        = true;
-    int doubleCountInBlock = 0;
-    int ic                 = 0;
-    SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> PhaseField3DPtr;
-
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
-        {
-            dataSetSmallArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
-            dataSetSmallArray[ic].x2 = block->getX2();
-            dataSetSmallArray[ic].x3 = block->getX3();
-            dataSetSmallArray[ic].level = block->getLevel();
-
-            if(fieldN == 1)
-                PhaseField3DPtr = block->getKernel()->getDataSet()->getPhaseField();
-            else
-                PhaseField3DPtr = block->getKernel()->getDataSet()->getPhaseField2();
-
-            if (firstBlock) // when first (any) valid block...
-            {
-                dataSetParamStr.nx1 = dataSetParamStr.nx2 = dataSetParamStr.nx3 = 0;
-                dataSetParamStr.nx[0] = static_cast<int>(PhaseField3DPtr->getNX1());
-                dataSetParamStr.nx[1] = static_cast<int>(PhaseField3DPtr->getNX2());
-                dataSetParamStr.nx[2] = static_cast<int>(PhaseField3DPtr->getNX3());
-                dataSetParamStr.nx[3] = 1;
-                doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-                std::cout << "writePhaseField"<<fieldN<< " = " << dataSetParamStr.nx[0] << " " << dataSetParamStr.nx[1] << " " << dataSetParamStr.nx[2] << std::endl;
-                firstBlock = false;
-            }
-            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), PhaseField3DPtr->getDataVector().begin(), PhaseField3DPtr->getDataVector().end());
-
-            ic++;
-        }
-    }
-        
-    // register new MPI-types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writePhaseField start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    // write to the file
-    // all processes calculate their offsets (quantity of bytes that the process is going to write)
-    // and notify the next process (with the rank = rank + 1)
-    MPI_Offset write_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_write_offset = 0;
-
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } 
-        else 
-        {
-            MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_Info info = MPI_INFO_NULL;
-
-#ifdef HLRN_LUSTRE
-    MPI_Info_create(&info);
-    MPI_Info_set(info, "striping_factor", "40");
-    MPI_Info_set(info, "striping_unit", "4M");
-#endif
-
-    MPI_File file_handler;
-    std::string filename;
-    if(fieldN == 1) filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpPhaseField1.bin";
-    else filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpPhaseField2.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // each process writes the quantity of it's blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    // each process writes common parameters of a dataSet
-    MPI_File_write_at(file_handler, write_offset, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    // each process writes data identifying blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount,
-                      dataSetSmallType, MPI_STATUS_IGNORE);
-    // each process writes the dataSet arrays
-    if (doubleValuesArray.size() > 0)
-        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                          &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-
-    MPI_File_sync(file_handler);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writePhaseField time: " << finish - start << " s");
-    }
-
-    delete[] dataSetSmallArray;
-}
-
-void MPIIORestartCoProcessor::writePressureField(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    int blocksCount = 0; // quantity of blocks in the grid, max 2147483648 blocks!
-
-    std::vector<SPtr<Block3D>> blocksVector[25];
-    int minInitLevel = this->grid->getCoarsestInitializedLevel();
-    int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++)
-    {
-        grid->getBlocks(level, rank, blocksVector[level]);
-        blocksCount += static_cast<int>(blocksVector[level].size());
-    }
-
-    DataSetSmallRestart* dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
-    dataSetParam dataSetParamStr;
-
-    if (comm->isRoot())
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writePressureField start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    bool firstBlock = true;
-    int doubleCountInBlock = 0;
-    int ic = 0;
-    SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> PressureField3DPtr;
-
-    for (int level = minInitLevel; level <= maxInitLevel; level++)
-    {
-        for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
-        {
-            dataSetSmallArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
-            dataSetSmallArray[ic].x2 = block->getX2();
-            dataSetSmallArray[ic].x3 = block->getX3();
-            dataSetSmallArray[ic].level = block->getLevel();
-
-            PressureField3DPtr = block->getKernel()->getDataSet()->getPressureField();
-
-            if (firstBlock) // when first (any) valid block...
-            {
-                dataSetParamStr.nx1 = dataSetParamStr.nx2 = dataSetParamStr.nx3 = 0;
-                dataSetParamStr.nx[0] = static_cast<int>(PressureField3DPtr->getNX1());
-                dataSetParamStr.nx[1] = static_cast<int>(PressureField3DPtr->getNX2());
-                dataSetParamStr.nx[2] = static_cast<int>(PressureField3DPtr->getNX3());
-                dataSetParamStr.nx[3] = 1;
-                doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-
-                firstBlock = false;
-            }
-
-            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), PressureField3DPtr->getDataVector().begin(),
-                    PressureField3DPtr->getDataVector().end());
- 
-            ic++;
-        }
-    }
-    //doubleValuesArrayRW.assign(doubleValuesArray.begin(), doubleValuesArray.end());
-    //std::cout << "doubleValuesArrayRW = " << doubleValuesArrayRW.size() << std::endl;
-   // register new MPI-types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    if (comm->isRoot())
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writePressureField start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    // write to the file
-    // all processes calculate their offsets (quantity of bytes that the process is going to write)
-    // and notify the next process (with the rank = rank + 1)
-    MPI_Offset write_offset = (MPI_Offset)(size * sizeof(int));
-    size_t next_write_offset = 0;
-
-    if (size > 1)
-    {
-        if (rank == 0)
-        {
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        }
-        else
-        {
-            MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-
-    double start{ 0. };
-    double finish{ 0. };
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_Info info = MPI_INFO_NULL;
-
-#ifdef HLRN_LUSTRE
-    MPI_Info_create(&info);
-    MPI_Info_set(info, "striping_factor", "40");
-    MPI_Info_set(info, "striping_unit", "4M");
-#endif
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpPressureField.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // each process writes the quantity of it's blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    // each process writes common parameters of a dataSet
-    MPI_File_write_at(file_handler, write_offset, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    // each process writes data identifying blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount,
-        dataSetSmallType, MPI_STATUS_IGNORE);
-    // each process writes the dataSet arrays
-    if (doubleValuesArray.size() > 0)
-        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-            &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-
-    MPI_File_sync(file_handler);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot())
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writePressureField time: " << finish - start << " s");
-    }
-
-    delete[] dataSetSmallArray;
-}*/
-
-void MPIIORestartCoProcessor::writeBoundaryConds(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeBoundaryConds start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    int blocksCount          = 0; // quantity of blocks in the grid, max 2147483648 blocks!
-    size_t count_boundCond   = 0; // how many BoundaryConditions in all blocks
-    int count_indexContainer = 0; // how many indexContainer-values in all blocks
-    size_t byteCount         = 0; // how many bytes writes this process in the file
-
-    std::vector<SPtr<Block3D>> blocksVector[25];
-    int minInitLevel = this->grid->getCoarsestInitializedLevel();
-    int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        grid->getBlocks(level, rank, blocksVector[level]);
-        blocksCount += static_cast<int>(blocksVector[level].size());
-    }
-
-    BCAddRestart *bcAddArray = new BCAddRestart[blocksCount];
-    std::vector<BoundaryCondition> bcVector;
-    std::vector<int> bcindexmatrixV;
-    std::vector<int> indexContainerV;
-    bool bcindexmatrixCountNotInit = true;
-    int ic = 0;
-    SPtr<BCArray3D> bcArr;
-
-    for (int level = minInitLevel; level <= maxInitLevel; level++) 
-    {
-        for (SPtr<Block3D> block : blocksVector[level]) // all the blocks of the current level
-        {
-            bcArr = block->getKernel()->getBCProcessor()->getBCArray();
-
-            bcAddArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
-            bcAddArray[ic].x2                   = block->getX2();
-            bcAddArray[ic].x3                   = block->getX3();
-            bcAddArray[ic].level                = block->getLevel();
-            bcAddArray[ic].boundCond_count      = 0; // how many BoundaryConditions in this block
-            bcAddArray[ic].indexContainer_count = 0; // how many indexContainer-values in this block
-
-            for (std::size_t bc = 0; bc < bcArr->getBCVectorSize(); bc++) 
-            {
-                BoundaryCondition *bouCond = new BoundaryCondition();
-                if (bcArr->bcvector[bc] == NULL) 
-                {
-                    memset(bouCond, 0, sizeof(BoundaryCondition));
-                } 
-                else 
-                {
-                    bouCond->noslipBoundaryFlags    = bcArr->bcvector[bc]->getNoSlipBoundary();
-                    bouCond->slipBoundaryFlags      = bcArr->bcvector[bc]->getSlipBoundary();
-                    bouCond->velocityBoundaryFlags  = bcArr->bcvector[bc]->getVelocityBoundary();
-                    bouCond->densityBoundaryFlags   = bcArr->bcvector[bc]->getDensityBoundary();
-                    bouCond->wallModelBoundaryFlags = bcArr->bcvector[bc]->getWallModelBoundary();
-                    bouCond->bcVelocityX1           = (float)bcArr->bcvector[bc]->getBoundaryVelocityX1();
-                    bouCond->bcVelocityX2           = (float)bcArr->bcvector[bc]->getBoundaryVelocityX2();
-                    bouCond->bcVelocityX3           = (float)bcArr->bcvector[bc]->getBoundaryVelocityX3();
-                    bouCond->bcDensity              = (float)bcArr->bcvector[bc]->getBoundaryDensity();
-                    bouCond->bcPhaseField           = (float)bcArr->bcvector[bc]->getBoundaryPhaseField();
-                    bouCond->nx1                    = (float)bcArr->bcvector[bc]->nx1;
-                    bouCond->nx2                    = (float)bcArr->bcvector[bc]->nx2;
-                    bouCond->nx3                    = (float)bcArr->bcvector[bc]->nx3;
-                    for (int iq = 0; iq < 26; iq++)
-                        bouCond->q[iq] = (float)bcArr->bcvector[bc]->getQ(iq);
-                    bouCond->algorithmType = bcArr->bcvector[bc]->getBcAlgorithmType();
-                }
-
-                bcVector.push_back(*bouCond);
-                bcAddArray[ic].boundCond_count++;
-                count_boundCond++;
-            }
-
-            // the quantity of elements in the bcindexmatrix array (CbArray3D<int, IndexerX3X2X1>) in bcArray(BCArray3D)
-            // is always equal, this will be the size of the "write-read-block" in MPI_write_.../MPI_read-functions when
-            // writing/reading BoundConds
-            if (bcindexmatrixCountNotInit) 
-            {
-                boundCondParamStr.nx1                = static_cast<int>(bcArr->bcindexmatrix.getNX1());
-                boundCondParamStr.nx2                = static_cast<int>(bcArr->bcindexmatrix.getNX2());
-                boundCondParamStr.nx3                = static_cast<int>(bcArr->bcindexmatrix.getNX3());
-                boundCondParamStr.bcindexmatrixCount = static_cast<int>(bcArr->bcindexmatrix.getDataVector().size());
-                bcindexmatrixCountNotInit            = false;
-            }
-            bcindexmatrixV.insert(bcindexmatrixV.end(), bcArr->bcindexmatrix.getDataVector().begin(), bcArr->bcindexmatrix.getDataVector().end());
-
-            indexContainerV.insert(indexContainerV.end(), bcArr->indexContainer.begin(), bcArr->indexContainer.end());
-            bcAddArray[ic].indexContainer_count = static_cast<int>(bcArr->indexContainer.size());
-            count_indexContainer += bcAddArray[ic].indexContainer_count;
-
-            ic++;
-        }
-    }
-
-    MPI_Type_contiguous(boundCondParamStr.bcindexmatrixCount, MPI_INT, &bcindexmatrixType);
-    MPI_Type_commit(&bcindexmatrixType);
-
-    // how many "big blocks" of BLOCK_SIZE size can by formed
-    int bcBlockCount = (int)(count_boundCond / BLOCK_SIZE);
-    if (bcBlockCount * BLOCK_SIZE < (int)count_boundCond)
-        bcBlockCount += 1;
-    for (int i = (int)count_boundCond; i < bcBlockCount * BLOCK_SIZE; i++) 
-    {
-        BoundaryCondition *bouCond = new BoundaryCondition();
-        memset(bouCond, 0, sizeof(BoundaryCondition));
-        bcVector.push_back(*bouCond);
-    }
-
-    byteCount = bcBlockCount * BLOCK_SIZE * sizeof(BoundaryCondition) + blocksCount * sizeof(BCAddRestart) +
-                sizeof(int) * (blocksCount * boundCondParamStr.bcindexmatrixCount + count_indexContainer);
-
-    // write to the file
-    // all processes calculate their offsets (quantity of bytes that the process is going to write)
-    // and notify the next process (with the rank = rank + 1)
-    MPI_Offset write_offset  = (MPI_Offset)(size * (3 * sizeof(int) + sizeof(boundCondParam)));
-    size_t next_write_offset = 0;
-
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_write_offset = write_offset + byteCount;
-            MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } 
-        else 
-        {
-            MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + byteCount;
-            if (rank < size - 1)
-                MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeBoundaryConds start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_Info info = MPI_INFO_NULL;
-
-#ifdef HLRN_LUSTRE
-    MPI_Info_create(&info);
-    MPI_Info_set(info, "striping_factor", "40");
-    MPI_Info_set(info, "striping_unit", "4M");
-#endif
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpBC.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    MPI_Offset write_offset1 = (MPI_Offset)(rank * (3 * sizeof(int) + sizeof(boundCondParam)));
-
-    // each process writes the quantity of it's blocks
-    MPI_File_write_at(file_handler, write_offset1, &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    // each process writes the quantity of "big blocks" of BLOCK_SIZE of boundary conditions
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset1 + sizeof(int)), &bcBlockCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    // each process writes the quantity of indexContainer elements in all blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset1 + 2 * sizeof(int)), &count_indexContainer, 1, MPI_INT,  MPI_STATUS_IGNORE);
-    // each process writes the quantity of bcindexmatrix elements in every block
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset1 + 3 * sizeof(int)), &boundCondParamStr, 1, boundCondParamType, MPI_STATUS_IGNORE);
-
-    // each process writes data identifying the blocks
-    MPI_File_write_at(file_handler, write_offset, bcAddArray, blocksCount, boundCondTypeAdd, MPI_STATUS_IGNORE);
-    // each process writes boundary conditions
-    if (bcVector.size() > 0)
-        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + blocksCount * sizeof(BCAddRestart)), &bcVector[0],
-                          bcBlockCount, boundCondType1000, MPI_STATUS_IGNORE);
-    // each process writes bcindexmatrix values
-    if (bcindexmatrixV.size() > 0)
-        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + blocksCount * sizeof(BCAddRestart) + bcBlockCount * BLOCK_SIZE * sizeof(BoundaryCondition)),
-                          &bcindexmatrixV[0], blocksCount, bcindexmatrixType, MPI_STATUS_IGNORE);
-    // each process writes indexContainer values
-    if (indexContainerV.size() > 0)
-        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + blocksCount * sizeof(BCAddRestart) + bcBlockCount * BLOCK_SIZE * sizeof(BoundaryCondition) +
-                      blocksCount * boundCondParamStr.bcindexmatrixCount * sizeof(int)), &indexContainerV[0], count_indexContainer, MPI_INT, MPI_STATUS_IGNORE);
-
-    MPI_File_sync(file_handler);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&bcindexmatrixType);
-
-    if (comm->isRoot()) 
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::writeBoundaryConds time: " << finish - start << " s");
-    }
-
-    delete[] bcAddArray;
-}
-
-//------------------------------------------- READ -----------------------------------------------
-void MPIIORestartCoProcessor::restart(int step)
-{
-    if (comm->isRoot())
-        UBLOG(logINFO, "MPIIORestartCoProcessor restart step: " << step);
-    if (comm->isRoot())
-        UBLOG(logINFO, "Load check point - start");
-
-    readBlocks(step);
-    readDataSet(step);
-    readBoundaryConds(step);
-
-    grid->setTimeStep(step);
-
-    if (comm->isRoot())
-        UBLOG(logINFO, "Load check point - end");
-}
-
-void MPIIORestartCoProcessor::readBlocks(int step) { MPIIOCoProcessor::readBlocks(step); }
-
-void MPIIORestartCoProcessor::readDataSet(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetF.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // calculate the read offset
-    MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_read_offset = 0;
-    bool multiPhase1 = false;
-    bool multiPhase2 = false;
-
-    // read count of blocks
-    int blocksCount = 0;
-    dataSetParam dataSetParamStr1, dataSetParamStr2, dataSetParamStr3;
-
-    MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, read_offset, &dataSetParamStr1, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), &dataSetParamStr2, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + 2 * sizeof(dataSetParam)), &dataSetParamStr3, 1, dataSetParamType, MPI_STATUS_IGNORE);
-
-    DataSetRestart *dataSetArray = new DataSetRestart[blocksCount];
-    double doubleCountInBlock = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3] +
-        dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3] +
-        dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
-    std::vector<double> doubleValuesArrayF(size_t(blocksCount * doubleCountInBlock)); // double-values in all blocks  Fdistributions
-    std::vector<double> doubleValuesArrayH1; // double-values in all blocks  H1distributions
-    std::vector<double> doubleValuesArrayH2; // double-values in all blocks  H2distributions
-
-    //   define MPI_types depending on the block-specific information
-    MPI_Type_contiguous(int(doubleCountInBlock), MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_read_offset = read_offset + 3 * sizeof(dataSetParam) + blocksCount * (sizeof(DataSetRestart) + size_t(doubleCountInBlock) * sizeof(double));
-            MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } 
-        else 
-        {
-            MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + 3 * sizeof(dataSetParam) + blocksCount * (sizeof(DataSetRestart) + size_t(doubleCountInBlock) * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + 3 * sizeof(dataSetParam)), dataSetArray, blocksCount, dataSetType, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + 3 * sizeof(dataSetParam) + blocksCount * sizeof(DataSetRestart)),
-                     &doubleValuesArrayF[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-    MPI_File_close(&file_handler);
-
- //-------------------------------------- H1 -----------------------------
-    MPI_Offset fsize;
-    filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetH1.bin";
-    rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-    MPI_File_get_size(file_handler, &fsize);
-    if (fsize > 0)
-    {
-        multiPhase1 = true;
-        doubleValuesArrayH1.resize(blocksCount * doubleCountInBlock);
-        MPI_File_read_at(file_handler, read_offset, &doubleValuesArrayH1[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-    }
-    MPI_File_close(&file_handler);
-
-    //-------------------------------------- H2 -----------------------------
-    filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetH2.bin";
-    rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    MPI_File_get_size(file_handler, &fsize);
-    if (fsize > 0)
-    {
-        multiPhase2 = true;
-        doubleValuesArrayH2.resize(blocksCount * doubleCountInBlock);
-        MPI_File_read_at(file_handler, read_offset, &doubleValuesArrayH2[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-    }
-    MPI_File_close(&file_handler);
-    //-------------------------------------------------------------------
-
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet time: " << finish - start << " s");
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-    
-    size_t index = 0;
-    std::vector<double> vectorsOfValuesF1, vectorsOfValuesF2, vectorsOfValuesF3;
-    std::vector<double> vectorsOfValuesH11, vectorsOfValuesH12, vectorsOfValuesH13;
-    std::vector<double> vectorsOfValuesH21, vectorsOfValuesH22, vectorsOfValuesH23;
-    size_t vectorSize1 = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3];
-    size_t vectorSize2 = dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3];
-    size_t vectorSize3 = dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
-
-    for (int n = 0; n < blocksCount; n++) 
-    {
-        vectorsOfValuesF1.assign(doubleValuesArrayF.data() + index, doubleValuesArrayF.data() + index + vectorSize1);
-        if (multiPhase1)
-            vectorsOfValuesH11.assign(doubleValuesArrayH1.data() + index, doubleValuesArrayH1.data() + index + vectorSize1);
-        if (multiPhase2)
-            vectorsOfValuesH21.assign(doubleValuesArrayH2.data() + index, doubleValuesArrayH2.data() + index + vectorSize1);
-        index += vectorSize1;
-
-        vectorsOfValuesF2.assign(doubleValuesArrayF.data() + index, doubleValuesArrayF.data() + index + vectorSize2);
-        if (multiPhase1)
-            vectorsOfValuesH12.assign(doubleValuesArrayH1.data() + index, doubleValuesArrayH1.data() + index + vectorSize2);
-        if (multiPhase2)
-            vectorsOfValuesH22.assign(doubleValuesArrayH2.data() + index, doubleValuesArrayH2.data() + index + vectorSize2);
-        index += vectorSize2;
-
-        vectorsOfValuesF3.assign(doubleValuesArrayF.data() + index, doubleValuesArrayF.data() + index + vectorSize3);
-        if (multiPhase1)
-            vectorsOfValuesH13.assign(doubleValuesArrayH1.data() + index, doubleValuesArrayH1.data() + index + vectorSize3);
-        if (multiPhase2)
-            vectorsOfValuesH23.assign(doubleValuesArrayH2.data() + index, doubleValuesArrayH2.data() + index + vectorSize3);
-        index += vectorSize3;
-
-        SPtr<DistributionArray3D> mFdistributions(new D3Q27EsoTwist3DSplittedVector());
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesF1, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesF2, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
-                    vectorsOfValuesF3, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
-
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX1(dataSetParamStr1.nx1);
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX2(dataSetParamStr1.nx2);
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX3(dataSetParamStr1.nx3);
-
-        SPtr<DistributionArray3D> mH1distributions(new D3Q27EsoTwist3DSplittedVector());
-        if (multiPhase1)
-        {
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH11, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH12, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
-                vectorsOfValuesH13, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
-
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX1(dataSetParamStr1.nx1);
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX2(dataSetParamStr1.nx2);
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX3(dataSetParamStr1.nx3);
-        }
-
-        SPtr<DistributionArray3D> mH2distributions(new D3Q27EsoTwist3DSplittedVector());
-        if (multiPhase2)
-        {
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH21, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH22, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
-                    vectorsOfValuesH23, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
-
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX1(dataSetParamStr1.nx1);
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX2(dataSetParamStr1.nx2);
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX3(dataSetParamStr1.nx3);
-        }
-
-        // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetArray[n].x1, dataSetArray[n].x2, dataSetArray[n].x3, dataSetArray[n].level);
-   
-        this->lbmKernel->setBlock(block);
-        this->lbmKernel->setNX(std::array<int, 3>{{dataSetParamStr1.nx1, dataSetParamStr1.nx2, dataSetParamStr1.nx3}});
-        SPtr<LBMKernel> kernel = this->lbmKernel->clone();
-        kernel->setGhostLayerWidth(dataSetArray[n].ghostLayerWidth);
-        kernel->setCollisionFactor(dataSetArray[n].collFactor);
-        kernel->setDeltaT(dataSetArray[n].deltaT);
-        kernel->setCompressible(dataSetArray[n].compressible);
-        kernel->setWithForcing(dataSetArray[n].withForcing);
-        kernel->setCollisionFactorMultiphase(dataSetArray[n].collFactorL, dataSetArray[n].collFactorG);
-        kernel->setDensityRatio(dataSetArray[n].densityRatio);
-
-        SPtr<DataSet3D> dataSetPtr = SPtr<DataSet3D>(new DataSet3D());
-        dataSetPtr->setFdistributions(mFdistributions);
-        if (multiPhase1)
-            dataSetPtr->setHdistributions(mH1distributions);
-        if (multiPhase2)
-            dataSetPtr->setH2distributions(mH2distributions);
-        kernel->setDataSet(dataSetPtr);
-        block->setKernel(kernel);
-    }
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    delete[] dataSetArray;
-
-    //-------------------------------------------------------------
-
-    DSArraysPresence arrPresence;
-    MPI_File file_handler1;
-    std::string filename1 = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpArrays.bin";
-    rc = MPI_File_open(MPI_COMM_WORLD, filename1.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler1);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename1);
-    MPI_File_read_at(file_handler1, (MPI_Offset)0, &arrPresence, 1, arrayPresenceType, MPI_STATUS_IGNORE);
-    MPI_File_close(&file_handler1);
-
-    /*if (arrPresence.isAverageDensityArrayPresent)
-        readAverageDensityArray(step);
-
-    if (arrPresence.isAverageVelocityArrayPresent)
-        readAverageVelocityArray(step);
-
-    if (arrPresence.isAverageFluktuationsArrayPresent)
-        readAverageFluktuationsArray(step);
-
-    if (arrPresence.isAverageTripleArrayPresent)
-        readAverageTripleArray(step);
-
-    if (arrPresence.isShearStressValArrayPresent)
-        readShearStressValArray(step);
-
-    if (arrPresence.isRelaxationFactorPresent)
-        readRelaxationFactor(step);
-
-    if (arrPresence.isPhaseField1Present)
-        readPhaseField(step, 1);
-
-    if (arrPresence.isPhaseField2Present)
-        readPhaseField(step, 2);
-
-    if (arrPresence.isPressureFieldPresent)
-        readPressureField(step);*/
-
-    if (arrPresence.isAverageDensityArrayPresent)
-        readArray(step, AverageDensity, std::string("/cpAverageDensityArray.bin"));
-
-    if (arrPresence.isAverageVelocityArrayPresent)
-        readArray(step, AverageVelocity, std::string("/cpAverageVelocityArray.bin"));
-
-    if (arrPresence.isAverageFluktuationsArrayPresent)
-        readArray(step, AverageFluktuations, std::string("/cpAverageFluktuationsArray.bin"));
-
-    if (arrPresence.isAverageTripleArrayPresent)
-        readArray(step, AverageTriple, std::string("/cpAverageTripleArray.bin"));
-
-    if (arrPresence.isShearStressValArrayPresent)
-        readArray(step, ShearStressVal, std::string("/cpShearStressValArray.bin"));
-
-    if (arrPresence.isRelaxationFactorPresent)
-        readArray(step, RelaxationFactor, std::string("/cpRelaxationFactor.bin"));
-
-    if (arrPresence.isPhaseField1Present)
-        readArray(step, PhaseField1, std::string("/cpPhaseField1.bin"));
-
-    if (arrPresence.isPhaseField2Present)
-        readArray(step, PhaseField2, std::string("/cpPhaseField2.bin"));
-
-    if (arrPresence.isPressureFieldPresent)
-        readArray(step, PressureField, std::string("/cpPressureField.bin"));
-
-}
-
-void MPIIORestartCoProcessor::readArray(int step, Arrays arrType, std::string fname)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    if (comm->isRoot())
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readArray start fname = " << fname);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    double start{ 0. };
-    double finish{ 0. };
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + fname;
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // read count of blocks
-    size_t blocksCount = 0;
-    dataSetParam dataSetParamStr;
-    memset(&dataSetParamStr, 0, sizeof(dataSetParam));
-
-    MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-
-    DataSetSmallRestart* dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
-
-    // define MPI_types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    // calculate the read offset
-    MPI_Offset read_offset = (MPI_Offset)(size * sizeof(int));
-    size_t next_read_offset = 0;
-
-    if (size > 1)
-    {
-        if (rank == 0)
-        {
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        }
-        else
-        {
-            MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, (int)blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
-    if (doubleCountInBlock > 0)
-        MPI_File_read_at(
-            file_handler,
-            (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-            &doubleValuesArray[0], (int)blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot())
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readArray time: " << finish - start << " s");
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readArray start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    //----------------------------- restore data ---------------------------------
-    SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ___4DArray;
-    SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> ___3DArray;
-
-    size_t index = 0;
-    size_t nextVectorSize = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
-    for (std::size_t n = 0; n < blocksCount; n++)
-    {
-        vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
-        index += nextVectorSize;
-
-        // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
-
-       // fill arrays
-       switch (arrType)
-        {
-        case AverageDensity:
-            ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
-                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
-            block->getKernel()->getDataSet()->setAverageDensity(___4DArray);
-            break;
-        case AverageVelocity:
-            ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
-                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
-            block->getKernel()->getDataSet()->setAverageVelocity(___4DArray);
-            break;
-        case AverageFluktuations:
-            ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
-                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
-            block->getKernel()->getDataSet()->setAverageFluctuations(___4DArray);
-            break;
-        case AverageTriple:
-            ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
-                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
-            block->getKernel()->getDataSet()->setAverageTriplecorrelations(___4DArray);
-            break;
-        case ShearStressVal:
-            ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
-                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
-            block->getKernel()->getDataSet()->setShearStressValues(___4DArray);
-            break;
-        case RelaxationFactor:
-            ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
-                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
-            block->getKernel()->getDataSet()->setRelaxationFactor(___3DArray);
-            break;
-        case PhaseField1:
-            ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
-                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
-            block->getKernel()->getDataSet()->setPhaseField(___3DArray);
-            break;
-        case PhaseField2:
-            ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
-                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
-            block->getKernel()->getDataSet()->setPhaseField2(___3DArray);
-            break;
-        case PressureField:
-            ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
-                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
-            block->getKernel()->getDataSet()->setPressureField(___3DArray);
-            break;
-        default:
-            UB_THROW(UbException(UB_EXARGS, "MPIIORestartCoProcessor::readArray : array type does not exist!"));
-            break;
-        }
-    }
-
-    if (comm->isRoot())
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readArray end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    delete[] dataSetSmallArray;
-}
-
-/*void MPIIORestartCoProcessor::readAverageDensityArray(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageDensityArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpAverageDensityArray.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // read count of blocks
-    int blocksCount = 0;
-    dataSetParam dataSetParamStr;
-    memset(&dataSetParamStr, 0, sizeof(dataSetParam));
-
-    MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
-
-    // define MPI_types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    // calculate the read offset
-    MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_read_offset = 0;
-
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } 
-        else 
-        {
-            MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
-    if (doubleCountInBlock > 0)
-        MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                         &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageDensityArray time: " << finish - start << " s");
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageDensityArray start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    size_t index = 0;
-    size_t nextVectorSize = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
-    for (int n = 0; n < blocksCount; n++) 
-    {
-        vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
-        index += nextVectorSize;
-
-        // fill mAverageDensity arrays
-        SPtr<AverageValuesArray3D> mAverageDensity;
-        mAverageDensity = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues, 
-            dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
-
-        // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
-        block->getKernel()->getDataSet()->setAverageDensity(mAverageDensity);
-    }
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageDensityArray end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    delete[] dataSetSmallArray;
-}
-
-void MPIIORestartCoProcessor::readAverageVelocityArray(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageVelocityArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpAverageVelocityArray.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // read count of blocks
-    int blocksCount = 0;
-    dataSetParam dataSetParamStr;
-    MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
-
-    // define MPI_types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    // calculate the read offset
-    MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_read_offset = 0;
-
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } 
-        else 
-        {
-            MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
-    if (doubleCountInBlock > 0)
-        MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                         &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageVelocityArray time: " << finish - start << " s");
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageVelocityArray start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    size_t index = 0;
-    size_t nextVectorSize = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
-    for (int n = 0; n < blocksCount; n++) 
-    {
-        vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
-        index += nextVectorSize;
-
-        // fill mAverageVelocity array
-        SPtr<AverageValuesArray3D> mAverageVelocity;
-        mAverageVelocity = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues, dataSetParamStr.nx[0], 
-            dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
-
-        // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
-        block->getKernel()->getDataSet()->setAverageVelocity(mAverageVelocity);
-    }
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageVelocityArray end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    delete[] dataSetSmallArray;
-}
-
-void MPIIORestartCoProcessor::readAverageFluktuationsArray(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageFluktuationsArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_File file_handler;
-    std::string filename =
-        path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpAverageFluktuationsArray.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // read count of blocks
-    int blocksCount = 0;
-    dataSetParam dataSetParamStr;
-    MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
-
-    // define MPI_types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    // calculate the read offset
-    MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_read_offset = 0;
-
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } 
-        else 
-        {
-            MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
-    if (doubleCountInBlock > 0)
-        MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                         &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageFluktuationsArray time: " << finish - start << " s");
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageFluktuationsArray start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    size_t index = 0;
-    size_t nextVectorSize = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
-    for (int n = 0; n < blocksCount; n++) 
-    {
-        vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
-        index += nextVectorSize;
-
-        // fill AverageFluktuations array
-        SPtr<AverageValuesArray3D> mAverageFluktuations;
-        mAverageFluktuations = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues, 
-                dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
-
-        // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
-        block->getKernel()->getDataSet()->setAverageFluctuations(mAverageFluktuations);
-    }
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageFluktuationsArray end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    delete[] dataSetSmallArray;
-}
-
-void MPIIORestartCoProcessor::readAverageTripleArray(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageTripleArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpAverageTripleArray.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // read count of blocks
-    int blocksCount = 0;
-    dataSetParam dataSetParamStr;
-    MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
-
-    // define MPI_types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    // calculate the read offset
-    MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_read_offset = 0;
-
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } 
-        else 
-        {
-            MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
-    }
-
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
-    if (doubleCountInBlock > 0)
-        MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                         &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageTripleArray time: " << finish - start << " s");
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageTripleArray start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    size_t index = 0;
-    size_t nextVectorSize = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
-    for (int n = 0; n < blocksCount; n++) 
-    {
-        vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
-        index += nextVectorSize;
-
-        // fill AverageTriplecorrelations array
-        SPtr<AverageValuesArray3D> mAverageTriplecorrelations;
-        mAverageTriplecorrelations = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues, 
-                dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
-
-        // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
-        block->getKernel()->getDataSet()->setAverageTriplecorrelations(mAverageTriplecorrelations);
-    }
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageTripleArray end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    delete[] dataSetSmallArray;
-}
-
-void MPIIORestartCoProcessor::readShearStressValArray(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readShearStressValArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpShearStressValArray.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // read count of blocks
-    int blocksCount = 0;
-    dataSetParam dataSetParamStr;
-    MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
-
-    // define MPI_types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    // calculate the read offset
-    MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_read_offset = 0;
-
-    if (size > 1) 
+    if (size > 1) 
     {
         if (rank == 0) 
         {
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_read_offset = read_offset + 3 * sizeof(dataSetParam) + blocksCount * (sizeof(DataSetRestart) + size_t(doubleCountInBlock) * sizeof(real));
             MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
         } 
         else 
         {
             MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_read_offset = read_offset + 3 * sizeof(dataSetParam) + blocksCount * (sizeof(DataSetRestart) + size_t(doubleCountInBlock) * sizeof(real));
             if (rank < size - 1)
                 MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
     }
 
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
-    if (doubleCountInBlock > 0)
-        MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                         &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + 3 * sizeof(dataSetParam)), dataSetArray, blocksCount, dataSetType, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + 3 * sizeof(dataSetParam) + blocksCount * sizeof(DataSetRestart)),
+                     &doubleValuesArrayF[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
     MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
-    if (comm->isRoot()) 
-    {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readShearStressValArray time: " << finish - start << " s");
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readShearStressValArray start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    size_t index = 0;
-    size_t nextVectorSize = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
-    for (int n = 0; n < blocksCount; n++) 
-    {
-        vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
-        index += nextVectorSize;
-
-        // fill ShearStressValuesArray array
-        SPtr<ShearStressValuesArray3D> mShearStressValues;
-        mShearStressValues = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues, 
-                dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
 
-        // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
-        block->getKernel()->getDataSet()->setShearStressValues(mShearStressValues);
-    }
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readShearStressValArray end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-
-    delete[] dataSetSmallArray;
-}
-
-void MPIIORestartCoProcessor::readRelaxationFactor(int step)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readRelaxationFactor start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
-
-    MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpRelaxationFactor.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
+ //-------------------------------------- H1 -----------------------------
+    MPI_Offset fsize;
+    filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetH1.bin";
+    rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
-
-    // read count of blocks
-    int blocksCount = 0;
-    dataSetParam dataSetParamStr;
-    MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
-
-    // define MPI_types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
-
-    // calculate the read offset
-    MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_read_offset = 0;
-
-    if (size > 1) 
+    MPI_File_get_size(file_handler, &fsize);
+    if (fsize > 0)
     {
-        if (rank == 0) 
-        {
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } 
-        else 
-        {
-            MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
+        multiPhase1 = true;
+        doubleValuesArrayH1.resize(blocksCount * doubleCountInBlock);
+        MPI_File_read_at(file_handler, read_offset, &doubleValuesArrayH1[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
     }
+    MPI_File_close(&file_handler);
 
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
-    if (doubleCountInBlock > 0)
-        MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                         &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
+    //-------------------------------------- H2 -----------------------------
+    filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetH2.bin";
+    rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
+    if (rc != MPI_SUCCESS)
+        throw UbException(UB_EXARGS, "couldn't open file " + filename);
+
+    MPI_File_get_size(file_handler, &fsize);
+    if (fsize > 0)
+    {
+        multiPhase2 = true;
+        doubleValuesArrayH2.resize(blocksCount * doubleCountInBlock);
+        MPI_File_read_at(file_handler, read_offset, &doubleValuesArrayH2[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
+    }
     MPI_File_close(&file_handler);
+    //-------------------------------------------------------------------
+
     MPI_Type_free(&dataSetDoubleType);
 
     if (comm->isRoot()) 
     {
         finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readRelaxationFactor time: " << finish - start << " s");
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readRelaxationFactor start of restore of data, rank = " << rank);
+        UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet time: " << finish - start << " s");
+        UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet start of restore of data, rank = " << rank);
         UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
-
+    
     size_t index = 0;
-    size_t nextVectorSize = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
+    std::vector<real> vectorsOfValuesF1, vectorsOfValuesF2, vectorsOfValuesF3;
+    std::vector<real> vectorsOfValuesH11, vectorsOfValuesH12, vectorsOfValuesH13;
+    std::vector<real> vectorsOfValuesH21, vectorsOfValuesH22, vectorsOfValuesH23;
+    size_t vectorSize1 = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3];
+    size_t vectorSize2 = dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3];
+    size_t vectorSize3 = dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
+
     for (int n = 0; n < blocksCount; n++) 
     {
-        vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
-        index += nextVectorSize;
-
-        // fill RelaxationFactor array
-        SPtr<RelaxationFactorArray3D> mRelaxationFactor;
-        mRelaxationFactor = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
-            vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
-
-        // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
-        block->getKernel()->getDataSet()->setRelaxationFactor(mRelaxationFactor);
-    }
+        vectorsOfValuesF1.assign(doubleValuesArrayF.data() + index, doubleValuesArrayF.data() + index + vectorSize1);
+        if (multiPhase1)
+            vectorsOfValuesH11.assign(doubleValuesArrayH1.data() + index, doubleValuesArrayH1.data() + index + vectorSize1);
+        if (multiPhase2)
+            vectorsOfValuesH21.assign(doubleValuesArrayH2.data() + index, doubleValuesArrayH2.data() + index + vectorSize1);
+        index += vectorSize1;
 
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readRelaxationFactor end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
+        vectorsOfValuesF2.assign(doubleValuesArrayF.data() + index, doubleValuesArrayF.data() + index + vectorSize2);
+        if (multiPhase1)
+            vectorsOfValuesH12.assign(doubleValuesArrayH1.data() + index, doubleValuesArrayH1.data() + index + vectorSize2);
+        if (multiPhase2)
+            vectorsOfValuesH22.assign(doubleValuesArrayH2.data() + index, doubleValuesArrayH2.data() + index + vectorSize2);
+        index += vectorSize2;
 
-    delete[] dataSetSmallArray;
-}
+        vectorsOfValuesF3.assign(doubleValuesArrayF.data() + index, doubleValuesArrayF.data() + index + vectorSize3);
+        if (multiPhase1)
+            vectorsOfValuesH13.assign(doubleValuesArrayH1.data() + index, doubleValuesArrayH1.data() + index + vectorSize3);
+        if (multiPhase2)
+            vectorsOfValuesH23.assign(doubleValuesArrayH2.data() + index, doubleValuesArrayH2.data() + index + vectorSize3);
+        index += vectorSize3;
 
-void MPIIORestartCoProcessor::readPhaseField(int step, int fieldN)
-{
-    int rank, size;
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
+        SPtr<DistributionArray3D> mFdistributions(new D3Q27EsoTwist3DSplittedVector());
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesF1, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNonLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesF2, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setZeroDistributions(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
+                    vectorsOfValuesF3, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
 
-    if (comm->isRoot()) 
-    {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readPhaseField start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
-    
-    double start {0.};
-    double finish {0.};
-    if (comm->isRoot())
-        start = MPI_Wtime();
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX1(dataSetParamStr1.nx1);
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX2(dataSetParamStr1.nx2);
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX3(dataSetParamStr1.nx3);
 
-    MPI_File file_handler;
-    std::string filename;
-    if(fieldN == 1) filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpPhaseField1.bin";
-    else filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpPhaseField2.bin";
-    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
-    if (rc != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename);
+        SPtr<DistributionArray3D> mH1distributions(new D3Q27EsoTwist3DSplittedVector());
+        if (multiPhase1)
+        {
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesH11, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNonLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesH12, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setZeroDistributions(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
+                vectorsOfValuesH13, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
 
-    // read count of blocks
-    int blocksCount = 0;
-    dataSetParam dataSetParamStr;
-    MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType,
-                     MPI_STATUS_IGNORE);
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX1(dataSetParamStr1.nx1);
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX2(dataSetParamStr1.nx2);
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX3(dataSetParamStr1.nx3);
+        }
 
-    DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
+        SPtr<DistributionArray3D> mH2distributions(new D3Q27EsoTwist3DSplittedVector());
+        if (multiPhase2)
+        {
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                    new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesH21, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNonLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+                    new CbArray4D<real, IndexerX4X3X2X1>(vectorsOfValuesH22, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setZeroDistributions(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
+                    vectorsOfValuesH23, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
 
-    // define MPI_types depending on the block-specific information
-    MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-    MPI_Type_commit(&dataSetDoubleType);
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX1(dataSetParamStr1.nx1);
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX2(dataSetParamStr1.nx2);
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX3(dataSetParamStr1.nx3);
+        }
 
-    // calculate the read offset
-    MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
-    size_t next_read_offset = 0;
+        // find the nesessary block and fill it
+        SPtr<Block3D> block = grid->getBlock(dataSetArray[n].x1, dataSetArray[n].x2, dataSetArray[n].x3, dataSetArray[n].level);
+   
+        this->lbmKernel->setBlock(block);
+        this->lbmKernel->setNX(std::array<int, 3>{{dataSetParamStr1.nx1, dataSetParamStr1.nx2, dataSetParamStr1.nx3}});
+        SPtr<LBMKernel> kernel = this->lbmKernel->clone();
+        kernel->setGhostLayerWidth(dataSetArray[n].ghostLayerWidth);
+        kernel->setCollisionFactor(dataSetArray[n].collFactor);
+        kernel->setDeltaT(dataSetArray[n].deltaT);
+        kernel->setCompressible(dataSetArray[n].compressible);
+        kernel->setWithForcing(dataSetArray[n].withForcing);
+        kernel->setCollisionFactorMultiphase(dataSetArray[n].collFactorL, dataSetArray[n].collFactorG);
+        kernel->setDensityRatio(dataSetArray[n].densityRatio);
 
-    if (size > 1) 
-    {
-        if (rank == 0) 
-        {
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } 
-        else 
-        {
-            MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
-            if (rank < size - 1)
-                MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-        }
+        SPtr<DataSet3D> dataSetPtr = SPtr<DataSet3D>(new DataSet3D());
+        dataSetPtr->setFdistributions(mFdistributions);
+        if (multiPhase1)
+            dataSetPtr->setHdistributions(mH1distributions);
+        if (multiPhase2)
+            dataSetPtr->setH2distributions(mH2distributions);
+        kernel->setDataSet(dataSetPtr);
+        block->setKernel(kernel);
     }
 
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
-    if (doubleCountInBlock > 0)
-        MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-                         &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-    MPI_File_close(&file_handler);
-    MPI_Type_free(&dataSetDoubleType);
-
     if (comm->isRoot()) 
     {
-        finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readPhaseField time: " << finish - start << " s");
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readPhaseField start of restore of data, rank = " << rank);
+        UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet end of restore of data, rank = " << rank);
         UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
-    size_t index = 0;
-    size_t nextVectorSize = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
-    std::cout << "readPhaseField"<< fieldN<<" = " << dataSetParamStr.nx[0] << " " << dataSetParamStr.nx[1] << " " << dataSetParamStr.nx[2] << std::endl;
+    delete[] dataSetArray;
 
-    for (int n = 0; n < blocksCount; n++)
-    {
-        vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
-        index += nextVectorSize;
+    //-------------------------------------------------------------
 
-        // fill PhaseField array
-        SPtr<PhaseFieldArray3D> mPhaseField;
-        mPhaseField = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
-            vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
+    DSArraysPresence arrPresence;
+    MPI_File file_handler1;
+    std::string filename1 = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpArrays.bin";
+    rc = MPI_File_open(MPI_COMM_WORLD, filename1.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler1);
+    if (rc != MPI_SUCCESS)
+        throw UbException(UB_EXARGS, "couldn't open file " + filename1);
+    MPI_File_read_at(file_handler1, (MPI_Offset)0, &arrPresence, 1, arrayPresenceType, MPI_STATUS_IGNORE);
+    MPI_File_close(&file_handler1);
 
-        // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
-        if(fieldN == 1)
-            block->getKernel()->getDataSet()->setPhaseField(mPhaseField);
-        else
-            block->getKernel()->getDataSet()->setPhaseField2(mPhaseField);
-       int nx1 = static_cast<int>(block->getKernel()->getDataSet()->getPhaseField()->getNX1());
-       int nx2 = static_cast<int>(block->getKernel()->getDataSet()->getPhaseField()->getNX2());
-       int nx3 = static_cast<int>(block->getKernel()->getDataSet()->getPhaseField()->getNX3());
-        dataSetParamStr.nx[3] = 1;
-        doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-        std::cout << "writePhaseField" << fieldN << " = " << nx1 << " " << nx2 << " " << nx3 << std::endl;
+    if (arrPresence.isAverageDensityArrayPresent)
+        readArray(step, AverageDensity, std::string("/cpAverageDensityArray.bin"));
 
-    }
+    if (arrPresence.isAverageVelocityArrayPresent)
+        readArray(step, AverageVelocity, std::string("/cpAverageVelocityArray.bin"));
 
-    if (comm->isRoot()) 
-    { 
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readPhaseField end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
-    }
+    if (arrPresence.isAverageFluktuationsArrayPresent)
+        readArray(step, AverageFluktuations, std::string("/cpAverageFluktuationsArray.bin"));
+
+    if (arrPresence.isAverageTripleArrayPresent)
+        readArray(step, AverageTriple, std::string("/cpAverageTripleArray.bin"));
+
+    if (arrPresence.isShearStressValArrayPresent)
+        readArray(step, ShearStressVal, std::string("/cpShearStressValArray.bin"));
+
+    if (arrPresence.isRelaxationFactorPresent)
+        readArray(step, RelaxationFactor, std::string("/cpRelaxationFactor.bin"));
+
+    if (arrPresence.isPhaseField1Present)
+        readArray(step, PhaseField1, std::string("/cpPhaseField1.bin"));
+
+    if (arrPresence.isPhaseField2Present)
+        readArray(step, PhaseField2, std::string("/cpPhaseField2.bin"));
+
+    if (arrPresence.isPressureFieldPresent)
+        readArray(step, PressureField, std::string("/cpPressureField.bin"));
 
-    delete[] dataSetSmallArray;
 }
 
-void MPIIORestartCoProcessor::readPressureField(int step)
+void MPIIORestartCoProcessor::readArray(int step, Arrays arrType, std::string fname)
 {
     int rank, size;
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
@@ -3314,30 +1278,32 @@ void MPIIORestartCoProcessor::readPressureField(int step)
 
     if (comm->isRoot())
     {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readPressureField start MPI IO rank = " << rank);
+        UBLOG(logINFO, "MPIIORestartCoProcessor::readArray start fname = " << fname);
         UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
-    double start{ 0. };
-    double finish{ 0. };
+    real start{ 0. };
+    real finish{ 0. };
     if (comm->isRoot())
         start = MPI_Wtime();
 
     MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpPressureField.bin";
+    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + fname;
     int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
     // read count of blocks
-    int blocksCount = 0;
+    size_t blocksCount = 0;
     dataSetParam dataSetParamStr;
+    memset(&dataSetParamStr, 0, sizeof(dataSetParam));
+
     MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
     MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
 
     DataSetSmallRestart* dataSetSmallArray = new DataSetSmallRestart[blocksCount];
     int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
+    std::vector<real> doubleValuesArray(blocksCount * doubleCountInBlock); // real-values in all blocks
 
     // define MPI_types depending on the block-specific information
     MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
@@ -3351,60 +1317,112 @@ void MPIIORestartCoProcessor::readPressureField(int step)
     {
         if (rank == 0)
         {
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(real));
             MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
         }
         else
         {
             MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(real));
             if (rank < size - 1)
                 MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
     }
 
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, (int)blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
     if (doubleCountInBlock > 0)
-        MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
-            &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
+        MPI_File_read_at(
+            file_handler,
+            (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
+            &doubleValuesArray[0], (int)blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
     MPI_File_close(&file_handler);
     MPI_Type_free(&dataSetDoubleType);
 
     if (comm->isRoot())
     {
         finish = MPI_Wtime();
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readPressureField time: " << finish - start << " s");
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readPressureField start of restore of data, rank = " << rank);
+        UBLOG(logINFO, "MPIIORestartCoProcessor::readArray time: " << finish - start << " s");
+        UBLOG(logINFO, "MPIIORestartCoProcessor::readArray start of restore of data, rank = " << rank);
         UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
+    //----------------------------- restore data ---------------------------------
+    SPtr<CbArray4D<real, IndexerX4X3X2X1>> ___4DArray;
+    SPtr<CbArray3D<real, IndexerX3X2X1>> ___3DArray;
+
     size_t index = 0;
     size_t nextVectorSize = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
-
-    for (int n = 0; n < blocksCount; n++)
+    std::vector<real> vectorsOfValues;
+    for (std::size_t n = 0; n < blocksCount; n++)
     {
         vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
         index += nextVectorSize;
-    
-        // fill Pressure array
-        SPtr<PressureFieldArray3D> mPressureField;
-        mPressureField = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
-            vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
 
         // find the nesessary block and fill it
         SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
-        block->getKernel()->getDataSet()->setPressureField(mPressureField);
+
+       // fill arrays
+       switch (arrType)
+        {
+        case AverageDensity:
+            ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
+                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
+            block->getKernel()->getDataSet()->setAverageDensity(___4DArray);
+            break;
+        case AverageVelocity:
+            ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
+                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
+            block->getKernel()->getDataSet()->setAverageVelocity(___4DArray);
+            break;
+        case AverageFluktuations:
+            ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
+                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
+            block->getKernel()->getDataSet()->setAverageFluctuations(___4DArray);
+            break;
+        case AverageTriple:
+            ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
+                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
+            block->getKernel()->getDataSet()->setAverageTriplecorrelations(___4DArray);
+            break;
+        case ShearStressVal:
+            ___4DArray = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<real, IndexerX4X3X2X1>(
+                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
+            block->getKernel()->getDataSet()->setShearStressValues(___4DArray);
+            break;
+        case RelaxationFactor:
+            ___3DArray = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
+                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
+            block->getKernel()->getDataSet()->setRelaxationFactor(___3DArray);
+            break;
+        case PhaseField1:
+            ___3DArray = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
+                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
+            block->getKernel()->getDataSet()->setPhaseField(___3DArray);
+            break;
+        case PhaseField2:
+            ___3DArray = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
+                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
+            block->getKernel()->getDataSet()->setPhaseField2(___3DArray);
+            break;
+        case PressureField:
+            ___3DArray = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(
+                vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
+            block->getKernel()->getDataSet()->setPressureField(___3DArray);
+            break;
+        default:
+            UB_THROW(UbException(UB_EXARGS, "MPIIORestartCoProcessor::readArray : array type does not exist!"));
+            break;
+        }
     }
 
     if (comm->isRoot())
     {
-        UBLOG(logINFO, "MPIIORestartCoProcessor::readPressureField end of restore of data, rank = " << rank);
+        UBLOG(logINFO, "MPIIORestartCoProcessor::readArray end of restore of data, rank = " << rank);
         UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     delete[] dataSetSmallArray;
-}*/
+}
 
 void MPIIORestartCoProcessor::readBoundaryConds(int step)
 {
@@ -3418,8 +1436,8 @@ void MPIIORestartCoProcessor::readBoundaryConds(int step)
         UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.h
index 1a1e1fb4d45066a93826fe7a819b056e10544036..a4c1b32efbafbdb467eee7facede5f8d834e1a93 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.h
@@ -35,7 +35,7 @@ public:
     MPIIORestartCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm);
     ~MPIIORestartCoProcessor() override;
     //! Each timestep writes the grid into the files
-    void process(double step) override;
+    void process(real step) override;
     //! Reads the grid from the files before grid reconstruction
     void restart(int step);
     //! Writes the blocks of the grid into the file cpBlocks.bin
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MicrophoneArrayCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/MicrophoneArrayCoProcessor.cpp
index 53e98e9e107e0cc91fccf6e59afae18ea9a0e931..f1762b5fdcfa8a2d4b20ee95665a2a3329e85196 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MicrophoneArrayCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MicrophoneArrayCoProcessor.cpp
@@ -22,7 +22,7 @@ MicrophoneArrayCoProcessor::MicrophoneArrayCoProcessor(SPtr<Grid3D> grid, SPtr<U
 
 MicrophoneArrayCoProcessor::~MicrophoneArrayCoProcessor() = default;
 
-void MicrophoneArrayCoProcessor::process(double step)
+void MicrophoneArrayCoProcessor::process(real step)
 {
     if (microphones.size() > 0) {
         collectData(step);
@@ -89,20 +89,20 @@ bool MicrophoneArrayCoProcessor::addMicrophone(Vector3D coords)
     return false;
 }
 
-void MicrophoneArrayCoProcessor::collectData(double step)
+void MicrophoneArrayCoProcessor::collectData(real step)
 {
     for (std::size_t i = 0; i < microphones.size(); i++) {
-        LBMReal f[D3Q27System::ENDF + 1];
+        real f[D3Q27System::ENDF + 1];
         microphones[i]->distridution->getDistribution(f, val<1>(microphones[i]->nodeIndexes),
                                                       val<2>(microphones[i]->nodeIndexes),
                                                       val<3>(microphones[i]->nodeIndexes));
-        LBMReal vx1, vx2, vx3, rho;
+        real vx1, vx2, vx3, rho;
         calcMacros(f, rho, vx1, vx2, vx3);
         *strVector[i] << step << ';' << rho << '\n';
     }
 }
 
-void MicrophoneArrayCoProcessor::writeFile(double /*step*/)
+void MicrophoneArrayCoProcessor::writeFile(real /*step*/)
 {
     for (std::size_t i = 0; i < microphones.size(); i++) {
         std::string fname = path + "/mic/mic_" + UbSystem::toString(microphones[i]->id) + ".csv";
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MicrophoneArrayCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/MicrophoneArrayCoProcessor.h
index a10f30440c8539677511af6f7ac40fbe257d4eaf..140ac5a48405adb96b64941144a13fa6790a9e8c 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MicrophoneArrayCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MicrophoneArrayCoProcessor.h
@@ -27,14 +27,14 @@ public:
     ~MicrophoneArrayCoProcessor() override;
 
     //! calls collectData.
-    void process(double step) override;
+    void process(real step) override;
 
     //! add microphone
     bool addMicrophone(Vector3D coords);
 
 protected:
-    void collectData(double step);
-    void writeFile(double step);
+    void collectData(real step);
+    void writeFile(real step);
 
 private:
     std::string path;
@@ -52,7 +52,7 @@ private:
     int count;
     int micID;
 
-    using CalcMacrosFct = void (*)(const LBMReal *const &, LBMReal &, LBMReal &, LBMReal &, LBMReal &);
+    using CalcMacrosFct = void (*)(const real *const &, real &, real &, real &, real &);
     CalcMacrosFct calcMacros;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/NUPSCounterCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/NUPSCounterCoProcessor.cpp
index 633ffd26f3ed77c58ac83200fdf18cb6f0385979..af8cf408369454127a1fd246cf19f2e9fecefc96 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/NUPSCounterCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/NUPSCounterCoProcessor.cpp
@@ -44,16 +44,16 @@ NUPSCounterCoProcessor::NUPSCounterCoProcessor(SPtr<Grid3D> grid, SPtr<UbSchedul
     if (comm->getProcessID() == comm->getRoot()) {
         timer.resetAndStart();
 
-        double nop          = comm->getNumberOfProcesses();
+        real nop          = comm->getNumberOfProcesses();
         int minInitLevel    = grid->getCoarsestInitializedLevel();
         int maxInitLevel    = grid->getFinestInitializedLevel();
         UbTupleInt3 blocknx = grid->getBlockNX();
-        double nod          = (double)(val<1>(blocknx)) * (double)(val<2>(blocknx)) * (double)(val<3>(blocknx));
+        real nod          = (real)(val<1>(blocknx)) * (real)(val<2>(blocknx)) * (real)(val<3>(blocknx));
         nup                 = 0;
 
         for (int level = minInitLevel; level <= maxInitLevel; level++) {
             int nob = grid->getNumberOfBlocks(level);
-            nup_t += (double)(1 << level) * nob * nod;
+            nup_t += (real)(1 << level) * nob * nod;
         }
         nup = nup_t / nop;
     }
@@ -61,19 +61,19 @@ NUPSCounterCoProcessor::NUPSCounterCoProcessor(SPtr<Grid3D> grid, SPtr<UbSchedul
 //////////////////////////////////////////////////////////////////////////
 NUPSCounterCoProcessor::~NUPSCounterCoProcessor() = default;
 //////////////////////////////////////////////////////////////////////////
-void NUPSCounterCoProcessor::process(double step)
+void NUPSCounterCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
 }
 //////////////////////////////////////////////////////////////////////////
-void NUPSCounterCoProcessor::collectData(double step)
+void NUPSCounterCoProcessor::collectData(real step)
 {
     if (comm->getProcessID() == comm->getRoot()) {
-        double time   = timer.stop();
-        double nups_t = nup_t * (step - nupsStep) / time;
-        double nups   = nup * (step - nupsStep) / time;
-        double tnups  = nups / (double)numOfThreads;
+        real time   = timer.stop();
+        real nups_t = nup_t * (step - nupsStep) / time;
+        real nups   = nup * (step - nupsStep) / time;
+        real tnups  = nups / (real)numOfThreads;
         UBLOG(logINFO, "Calculation step = " << step);
         UBLOG(logINFO, "Total performance = " << nups_t << " NUPS");
         UBLOG(logINFO, "Performance per process = " << nups << " NUPS");
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/NUPSCounterCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/NUPSCounterCoProcessor.h
index ce6b16996824be9e614e131c6e05fad0d1a507fd..b178c97fe25647c7bec60883811a3263abc046bc 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/NUPSCounterCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/NUPSCounterCoProcessor.h
@@ -57,19 +57,19 @@ public:
     NUPSCounterCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, int numOfThreads, std::shared_ptr<vf::mpi::Communicator> comm);
     ~NUPSCounterCoProcessor() override;
 
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
     //! Collect data for calculation of NUPS
     //! \param step is a time step
-    void collectData(double step);
+    void collectData(real step);
     UbTimer timer;
     int numOfThreads;
-    double numberOfNodes;
-    double numberOfBlocks;
-    double nup;
-    double nup_t;
-    double nupsStep;
+    real numberOfNodes;
+    real numberOfBlocks;
+    real nup;
+    real nup_t;
+    real nupsStep;
     std::shared_ptr<vf::mpi::Communicator> comm;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.cpp
index ae385117c311eabfe2c5b98c8c2c45f4cd7473cd..4197c5cfe7e9d8f0f9da618ff58f4b421ae3d4fa 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.cpp
@@ -23,7 +23,7 @@ PressureCoefficientCoProcessor::PressureCoefficientCoProcessor(SPtr<Grid3D> grid
 //////////////////////////////////////////////////////////////////////////
 PressureCoefficientCoProcessor::~PressureCoefficientCoProcessor() = default;
 //////////////////////////////////////////////////////////////////////////
-void PressureCoefficientCoProcessor::process(double step)
+void PressureCoefficientCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
@@ -31,7 +31,7 @@ void PressureCoefficientCoProcessor::process(double step)
     UBLOG(logDEBUG3, "D3Q27ForcesCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void PressureCoefficientCoProcessor::collectData(double step)
+void PressureCoefficientCoProcessor::collectData(real step)
 {
     calculateRho();
 
@@ -42,10 +42,10 @@ void PressureCoefficientCoProcessor::collectData(double step)
 //////////////////////////////////////////////////////////////////////////
 void PressureCoefficientCoProcessor::calculateRho()
 {
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal vx1, vx2, vx3, rho;
-    std::vector<double> values;
-    std::vector<double> rvalues;
+    real f[D3Q27System::ENDF + 1];
+    real vx1, vx2, vx3, rho;
+    std::vector<real> values;
+    std::vector<real> rvalues;
 
     for (SPtr<D3Q27Interactor> interactor : interactors) {
         typedef std::map<SPtr<Block3D>, std::set<std::vector<int>>> TransNodeIndicesMap;
@@ -60,7 +60,7 @@ void PressureCoefficientCoProcessor::calculateRho()
             UbTupleDouble3 org = grid->getBlockWorldCoordinates(block);
             //         UbTupleDouble3 blockLengths = grid->getBlockLengths(block);
             UbTupleDouble3 nodeOffset = grid->getNodeOffset(block);
-            double dx                 = grid->getDeltaX(block);
+            real dx                 = grid->getDeltaX(block);
 
             if (kernel->getCompressible()) {
                 calcMacros = &D3Q27System::calcCompMacroscopicValues;
@@ -89,9 +89,9 @@ void PressureCoefficientCoProcessor::calculateRho()
                         x1, x2,
                         x3)) // es kann sein, dass der node von einem anderen interactor z.B. als solid gemarkt wurde!!!
                 {
-                    double cx1 = val<1>(org) - val<1>(nodeOffset) + x1 * dx;
-                    double cx2 = val<2>(org) - val<2>(nodeOffset) + x2 * dx;
-                    double cx3 = val<3>(org) - val<3>(nodeOffset) + x3 * dx;
+                    real cx1 = val<1>(org) - val<1>(nodeOffset) + x1 * dx;
+                    real cx2 = val<2>(org) - val<2>(nodeOffset) + x2 * dx;
+                    real cx3 = val<3>(org) - val<3>(nodeOffset) + x3 * dx;
                     if (plane->isPointInGbObject3D(cx1, cx2, cx3)) {
                         distributions->getDistribution(f, x1, x2, x3);
                         calcMacros(f, rho, vx1, vx2, vx3);
@@ -172,7 +172,7 @@ void PressureCoefficientCoProcessor::writeValues(int step)
                 throw UbException(UB_EXARGS, "couldn't open file " + fname);
         }
 
-        out.write((char *)&outValues[0], outValues.size() * sizeof(double));
+        out.write((char *)&outValues[0], outValues.size() * sizeof(real));
 
         out.close();
 
@@ -193,7 +193,7 @@ void PressureCoefficientCoProcessor::readValues(int step)
         int length = (int)in.tellg();
         in.seekg(0, in.beg);
 
-        outValues.resize(length / sizeof(double));
+        outValues.resize(length / sizeof(real));
 
         in.read((char *)&outValues[0], length);
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.h
index 42927a7315d620e60c3af5c4285a89c18609cee7..26b8117aea007671bc1d6b17104f015cd62ddda3 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.h
@@ -22,13 +22,13 @@ public:
                                    const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm);
     ~PressureCoefficientCoProcessor() override;
 
-    void process(double step) override;
+    void process(real step) override;
 
     void addInteractor(SPtr<D3Q27Interactor> interactor);
     void readValues(int step);
 
 protected:
-    void collectData(double step);
+    void collectData(real step);
     void calculateRho();
     void writeValues(int step);
 
@@ -38,15 +38,15 @@ private:
     std::shared_ptr<vf::mpi::Communicator> comm;
     std::vector<SPtr<D3Q27Interactor>> interactors;
     int numberOfSteps;
-    double maxStep;
+    real maxStep;
 
     std::vector<UbTupleFloat3> nodes;
     std::vector<std::string> datanames;
-    std::vector<std::vector<double>> data;
+    std::vector<std::vector<real>> data;
 
-    std::vector<double> outValues;
+    std::vector<real> outValues;
 
-    using CalcMacrosFct = void (*)(const LBMReal *const &, LBMReal &, LBMReal &, LBMReal &, LBMReal &);
+    using CalcMacrosFct = void (*)(const real *const &, real &, real &, real &, real &);
     CalcMacrosFct calcMacros;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/PressureDifferenceCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/PressureDifferenceCoProcessor.cpp
index 74cd5a09c71b717f138090892b51b12a721f60ab..a486da249e5c2ce2eeaaf53fa4601d39bda689b0 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/PressureDifferenceCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/PressureDifferenceCoProcessor.cpp
@@ -17,8 +17,8 @@
 
 PressureDifferenceCoProcessor::PressureDifferenceCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s,
                                                              const std::string &path, SPtr<IntegrateValuesHelper> h1,
-                                                             SPtr<IntegrateValuesHelper> h2, LBMReal rhoReal,
-                                                             LBMReal uReal, LBMReal uLB, std::shared_ptr<vf::mpi::Communicator> comm)
+                                                             SPtr<IntegrateValuesHelper> h2, real rhoReal,
+                                                             real uReal, real uLB, std::shared_ptr<vf::mpi::Communicator> comm)
 
     : CoProcessor(grid, s), path(path), h1(h1), h2(h2), comm(comm)
 {
@@ -71,13 +71,13 @@ PressureDifferenceCoProcessor::PressureDifferenceCoProcessor(SPtr<Grid3D> grid,
 //////////////////////////////////////////////////////////////////////////
 PressureDifferenceCoProcessor::~PressureDifferenceCoProcessor() = default;
 //////////////////////////////////////////////////////////////////////////
-void PressureDifferenceCoProcessor::process(double step)
+void PressureDifferenceCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
 }
 //////////////////////////////////////////////////////////////////////////
-void PressureDifferenceCoProcessor::collectData(double step)
+void PressureDifferenceCoProcessor::collectData(real step)
 {
     h1->calculateMQ();
     h2->calculateMQ();
@@ -85,13 +85,13 @@ void PressureDifferenceCoProcessor::collectData(double step)
     if (comm->getProcessID() == comm->getRoot()) {
         int istep = static_cast<int>(step);
         std::ofstream ostr;
-        double nn1  = h1->getNumberOfFluidsNodes();
-        double nn2  = h2->getNumberOfFluidsNodes();
-        double rho1 = h1->getRho();
-        double rho2 = h2->getRho();
-        double p1_1 = (rho1 / nn1) * factor1;
-        double p1_2 = (rho2 / nn2) * factor1;
-        double dp1  = p1_1 - p1_2;
+        real nn1  = h1->getNumberOfFluidsNodes();
+        real nn2  = h2->getNumberOfFluidsNodes();
+        real rho1 = h1->getRho();
+        real rho2 = h2->getRho();
+        real p1_1 = (rho1 / nn1) * factor1;
+        real p1_2 = (rho2 / nn2) * factor1;
+        real dp1  = p1_1 - p1_2;
 
         // double press1 = h1->getPress();
         // double press2 = h2->getPress();
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/PressureDifferenceCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/PressureDifferenceCoProcessor.h
index 6de68a977904d5cc25ee37395eff4c9e66748eb4..09523552289297b78fb59b66e86e7ba84e1ed00b 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/PressureDifferenceCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/PressureDifferenceCoProcessor.h
@@ -24,22 +24,22 @@ class PressureDifferenceCoProcessor : public CoProcessor
 {
 public:
     PressureDifferenceCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                  SPtr<IntegrateValuesHelper> h1, SPtr<IntegrateValuesHelper> h2, LBMReal rhoReal,
-                                  LBMReal uReal, LBMReal uLB,
+                                  SPtr<IntegrateValuesHelper> h1, SPtr<IntegrateValuesHelper> h2, real rhoReal,
+                                  real uReal, real uLB,
                                   /*const SPtr<LBMUnitConverter> conv,*/ std::shared_ptr<vf::mpi::Communicator> comm);
     ~PressureDifferenceCoProcessor() override;
 
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
     SPtr<IntegrateValuesHelper> h1, h2;
     std::string path;
     SPtr<LBMUnitConverter> conv;
-    void collectData(double step);
+    void collectData(real step);
     std::shared_ptr<vf::mpi::Communicator> comm;
-    LBMReal factor1; //= (1/3)*rhoReal*(uReal/uLB)^2 for calculation pReal = rhoLB * (1/3)*rhoReal*(uReal/uLB)^2,
+    real factor1; //= (1/3)*rhoReal*(uReal/uLB)^2 for calculation pReal = rhoLB * (1/3)*rhoReal*(uReal/uLB)^2,
                      //rhoReal and uReal in SI
-    LBMReal factor2; //= rhoReal*(uReal/uLB)^2       for calculation pReal = press * rhoReal*(uReal/uLB)^2, rhoReal and
+    real factor2; //= rhoReal*(uReal/uLB)^2       for calculation pReal = press * rhoReal*(uReal/uLB)^2, rhoReal and
                      //uReal in SI
 };
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/QCriterionCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/QCriterionCoProcessor.cpp
index 1fbdb6f7f40a9b126cfa174d8cef7d7516ff884a..4e62a1c6bbb4c9f9a74968170c5821cc0f46fd23 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/QCriterionCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/QCriterionCoProcessor.cpp
@@ -32,7 +32,7 @@ void QCriterionCoProcessor::init()
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void QCriterionCoProcessor::process(double step)
+void QCriterionCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
@@ -40,7 +40,7 @@ void QCriterionCoProcessor::process(double step)
     UBLOG(logDEBUG3, "QCriterionCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void QCriterionCoProcessor::collectData(double step)
+void QCriterionCoProcessor::collectData(real step)
 {
     int istep = static_cast<int>(step);
 
@@ -92,7 +92,7 @@ void QCriterionCoProcessor::addData(const SPtr<Block3D> block)
     UbTupleDouble3 org = grid->getBlockWorldCoordinates(block);
     //	UbTupleDouble3 blockLengths = grid->getBlockLengths(block);
     UbTupleDouble3 nodeOffset = grid->getNodeOffset(block);
-    double dx                 = grid->getDeltaX(block);
+    real dx                 = grid->getDeltaX(block);
 
     // Diese Daten werden geschrieben:
     datanames.resize(0);
@@ -135,36 +135,36 @@ void QCriterionCoProcessor::addData(const SPtr<Block3D> block)
 
                     /////////////////////////////
                     // Geschwindigkeitsvektoren
-                    LBMReal vE[3];
-                    LBMReal vW[3];
-                    LBMReal vN[3];
-                    LBMReal vS[3];
-                    LBMReal vT[3];
-                    LBMReal vB[3];
+                    real vE[3];
+                    real vW[3];
+                    real vN[3];
+                    real vS[3];
+                    real vT[3];
+                    real vB[3];
                     // hole geschwindigkeiten an nachbarknoten
                     getNeighborVelocities(1, 0, 0, ix1, ix2, ix3, block, vE, vW);
                     getNeighborVelocities(0, 1, 0, ix1, ix2, ix3, block, vN, vS);
                     getNeighborVelocities(0, 0, 1, ix1, ix2, ix3, block, vT, vB);
                     //////////////////////////////////
                     // derivatives
-                    LBMReal duxdy = (vN[xdir] - vS[xdir]) * 0.5;
-                    LBMReal duydx = (vE[ydir] - vW[ydir]) * 0.5;
-                    LBMReal duxdz = (vT[xdir] - vB[xdir]) * 0.5;
-                    LBMReal duzdx = (vE[zdir] - vW[zdir]) * 0.5;
-                    LBMReal duydz = (vT[ydir] - vB[ydir]) * 0.5;
-                    LBMReal duzdy = (vN[zdir] - vS[zdir]) * 0.5;
-
-                    LBMReal duxdx = (vE[xdir] - vW[xdir]) * 0.5;
-                    LBMReal duydy = (vN[ydir] - vS[ydir]) * 0.5;
-                    LBMReal duzdz = (vT[zdir] - vB[zdir]) * 0.5;
-
-                    LBMReal scaleFactor =
-                        (double)(1
+                    real duxdy = (vN[xdir] - vS[xdir]) * 0.5;
+                    real duydx = (vE[ydir] - vW[ydir]) * 0.5;
+                    real duxdz = (vT[xdir] - vB[xdir]) * 0.5;
+                    real duzdx = (vE[zdir] - vW[zdir]) * 0.5;
+                    real duydz = (vT[ydir] - vB[ydir]) * 0.5;
+                    real duzdy = (vN[zdir] - vS[zdir]) * 0.5;
+
+                    real duxdx = (vE[xdir] - vW[xdir]) * 0.5;
+                    real duydy = (vN[ydir] - vS[ydir]) * 0.5;
+                    real duzdz = (vT[zdir] - vB[zdir]) * 0.5;
+
+                    real scaleFactor =
+                        (real)(1
                                  << (currentLevel -
                                      minInitLevel)); // pow(2.0,(double)(currentLevel-minInitLevel));//finer grid ->
                                                      // current level higher. coarsest grid: currentLevel=minInitLevel=0
                     // Q=-0.5*(S_ij S_ij - Omega_ij Omega_ij) => regions where vorticity is larger than strain rate
-                    LBMReal q = -(duxdy * duydx + duxdz * duzdx + duydz * duzdy + duxdx * duxdx + duydy * duydy +
+                    real q = -(duxdy * duydx + duxdz * duzdx + duydz * duzdy + duxdx * duxdx + duydy * duydy +
                                   duzdz * duzdz) *
                                 scaleFactor;
 
@@ -201,7 +201,7 @@ void QCriterionCoProcessor::addData(const SPtr<Block3D> block)
 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 void QCriterionCoProcessor::getNeighborVelocities(int offx, int offy, int offz, int ix1, int ix2, int ix3,
-                                                  const SPtr<Block3D> block, LBMReal *vE, LBMReal *vW)
+                                                  const SPtr<Block3D> block, real *vE, real *vW)
 {
     SPtr<ILBMKernel> kernel                 = block->getKernel();
     SPtr<BCArray3D> bcArray                 = kernel->getBCProcessor()->getBCArray();
@@ -234,9 +234,9 @@ void QCriterionCoProcessor::getNeighborVelocities(int offx, int offy, int offz,
     if ((ix1 == 0 && offx == 1) || (ix2 == 0 && offy == 1) || (ix3 == 0 && offz == 1)) {
         int RankNeighborW;
         Vector3D orgNodeRW = grid->getNodeCoordinates(block, ix1, ix2, ix3);
-        double xp000       = orgNodeRW[0];
-        double yp000       = orgNodeRW[1];
-        double zp000       = orgNodeRW[2];
+        real xp000       = orgNodeRW[0];
+        real yp000       = orgNodeRW[1];
+        real zp000       = orgNodeRW[2];
 
         int currentLevel         = block->getLevel();
         UbTupleInt3 blockIndexes = grid->getBlockIndexes(xp000, yp000, zp000, currentLevel);
@@ -282,12 +282,12 @@ void QCriterionCoProcessor::getNeighborVelocities(int offx, int offy, int offz,
             SPtr<ILBMKernel> kernelW                 = blockNeighW->getKernel();
             SPtr<BCArray3D> bcArrayW                 = kernelW->getBCProcessor()->getBCArray();
             SPtr<DistributionArray3D> distributionsW = kernelW->getDataSet()->getFdistributions();
-            LBMReal fW2[27];
-            LBMReal fW[27];
-            LBMReal f0[27];
-            LBMReal fE[27];
-            LBMReal v0[3];
-            LBMReal vW2[3];
+            real fW2[27];
+            real fW[27];
+            real f0[27];
+            real fE[27];
+            real v0[3];
+            real vW2[3];
             // distributionsW->getDistribution(fW2, std::max(ix1+2*offx,1), std::max(ix2+2*offy,1),
             // std::max(ix3+2*offz,1)); distributionsW->getDistribution(fW, std::max(ix1+offx,1), std::max(ix2+offy,1),
             // std::max(ix3+offz,1)); distributionsW->getDistribution(f0, std::max(ix1    ,1), std::max(ix2    ,1),
@@ -314,7 +314,7 @@ void QCriterionCoProcessor::getNeighborVelocities(int offx, int offy, int offz,
             SPtr<ILBMKernel> kernelW                 = blockNeighW->getKernel();
             SPtr<BCArray3D> bcArrayW                 = kernelW->getBCProcessor()->getBCArray();
             SPtr<DistributionArray3D> distributionsW = kernelW->getDataSet()->getFdistributions();
-            LBMReal fW[27];
+            real fW[27];
 
             if (offx == 1) {
                 distributionsW->getDistribution(fW, (distributions->getNX1()) - 1, ix2,
@@ -330,20 +330,20 @@ void QCriterionCoProcessor::getNeighborVelocities(int offx, int offy, int offz,
 
     } else {
         // data available in current block:
-        LBMReal fW[27];
+        real fW[27];
         distributions->getDistribution(fW, ix1 - offx, ix2 - offy, ix3 - offz);
         computeVelocity(fW, vW, compressible);
     }
     if (checkInterpolation) {
         // in plus-direction data is available in current block because of ghost layers
-        LBMReal fE[27];
+        real fE[27];
         distributions->getDistribution(fE, ix1 + offx, ix2 + offy, ix3 + offz); // E:= plus 1
         computeVelocity(fE, vE, compressible);
     }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-void QCriterionCoProcessor::computeVelocity(LBMReal *f, LBMReal *v, bool compressible)
+void QCriterionCoProcessor::computeVelocity(real *f, real *v, bool compressible)
 {
     //////////////////////////////////////////////////////////////////////////
     // compute x,y,z-velocity components from distribution
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/QCriterionCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/QCriterionCoProcessor.h
index 55f0df5a2e8aaaf933babb70d6b9c5246424c34c..38cd47fb890ffc79bb3f43ecc17bbe42885fa114 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/QCriterionCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/QCriterionCoProcessor.h
@@ -31,27 +31,27 @@ public:
     QCriterionCoProcessor(SPtr<Grid3D> grid, const std::string &path, WbWriter *const writer, SPtr<UbScheduler> s,
                           std::shared_ptr<vf::mpi::Communicator> comm);
     //! Make update if timestep is write-timestep specified in SPtr<UbScheduler> s
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
     //! Prepare data and write in .vtk file
-    void collectData(double step);
+    void collectData(real step);
     //! Q is computed for all points in a block. Data for writing is added to data and cell vectors.
     void addData(const SPtr<Block3D> block);
     //! After writing to .vtk-file, all vectors are reset
     void clearData();
     //! Computes macroscopic velocities
-    void computeVelocity(LBMReal *f, LBMReal *v, bool compressible);
+    void computeVelocity(real *f, real *v, bool compressible);
     //! Computes average and RMS values of macroscopic quantities
     void getNeighborVelocities(int offx, int offy, int offz, int ix1, int ix2, int ix3, const SPtr<Block3D> block,
-                               LBMReal *vE, LBMReal *vW);
+                               real *vE, real *vW);
 
 private:
     void init();
     std::vector<UbTupleFloat3> nodes;
     std::vector<UbTupleUInt8> cells;
     std::vector<std::string> datanames; // only one entry for QKrit-CoProcessor: Q
-    std::vector<std::vector<double>> data;
+    std::vector<std::vector<real>> data;
     std::vector<std::vector<SPtr<Block3D>>> blockVector;
     int minInitLevel; // go through all levels for block vector of current process from minInitLevel to maxInitLevel
     int maxInitLevel;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/ShearStressCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/ShearStressCoProcessor.cpp
index 64ecc177ff38403f346a519e8d0a5515a12713e4..cd1f9c54cb50585b572a61cdc7d8c884386b864c 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/ShearStressCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/ShearStressCoProcessor.cpp
@@ -38,7 +38,7 @@ ShearStressCoProcessor::ShearStressCoProcessor(SPtr<Grid3D> grid, const std::str
 //////////////////////////////////////////////////////////////////////////
 ShearStressCoProcessor::~ShearStressCoProcessor() = default;
 //////////////////////////////////////////////////////////////////////////
-void ShearStressCoProcessor::process(double step)
+void ShearStressCoProcessor::process(real step)
 {
     if (step == 0) {
         initDistance();
@@ -49,7 +49,7 @@ void ShearStressCoProcessor::process(double step)
     UBLOG(logDEBUG3, "D3Q27ShearStressCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void ShearStressCoProcessor::collectData(double step)
+void ShearStressCoProcessor::collectData(real step)
 {
     using namespace std;
 
@@ -122,12 +122,13 @@ void ShearStressCoProcessor::clearData()
     data.clear();
 }
 //////////////////////////////////////////////////////////////////////////
-void ShearStressCoProcessor::calculateShearStress(double timeStep)
+void ShearStressCoProcessor::calculateShearStress(real timeStep)
 {
+    using namespace vf::lbm::dir;
     using namespace D3Q27System;
 
-    LBMReal f[27];
-    LBMReal vx, vy, vz, sxx, syy, szz, sxy, syz, sxz;
+    real f[27];
+    real vx, vy, vz, sxx, syy, szz, sxy, syz, sxz;
 
     for (SPtr<D3Q27Interactor> interactor : interactors) {
         typedef std::map<SPtr<Block3D>, std::set<std::vector<int>>> TransNodeIndicesMap;
@@ -141,7 +142,7 @@ void ShearStressCoProcessor::calculateShearStress(double timeStep)
             SPtr<ShearStressValuesArray3D> ssv      = kernel->getDataSet()->getShearStressValues();
 
             int ghostLayer     = kernel->getGhostLayerWidth();
-            LBMReal collFactor = kernel->getCollisionFactor();
+            real collFactor = kernel->getCollisionFactor();
 
             int minX1 = ghostLayer;
             int maxX1 = (int)bcArray->getNX1() - 1 - ghostLayer;
@@ -160,8 +161,8 @@ void ShearStressCoProcessor::calculateShearStress(double timeStep)
                     continue;
 
                 if (bcArray->isFluid(ix1, ix2, ix3)) {
-                    double q        = (*ssv)(normalq, ix1, ix2, ix3);
-                    double numPoint = (*ssv)(numberOfPoint, ix1, ix2, ix3);
+                    real q        = (*ssv)(normalq, ix1, ix2, ix3);
+                    real numPoint = (*ssv)(numberOfPoint, ix1, ix2, ix3);
                     if (q == 0 || numPoint != 3)
                         continue;
                     // if (q==0)continue;
@@ -193,11 +194,11 @@ void ShearStressCoProcessor::calculateShearStress(double timeStep)
                           (((f[DIR_PPP] + f[DIR_MMM]) - (f[DIR_PMP] + f[DIR_MPM])) + ((f[DIR_PMM] + f[DIR_MPP]) - (f[DIR_MMP] + f[DIR_PPM])) +
                            (-(f[DIR_0PM] + f[DIR_0MP]) + (f[DIR_0PP] + f[DIR_0MM])) - vy * vz);
 
-                    LBMReal dxxMyy = 3.0 / 2.0 * collFactor / (collFactor - 1.0) *
+                    real dxxMyy = 3.0 / 2.0 * collFactor / (collFactor - 1.0) *
                                      (((f[DIR_P0P] + f[DIR_M0M]) + (f[DIR_P0M] + f[DIR_M0P])) - ((f[DIR_0PM] + f[DIR_0MP]) + (f[DIR_0PP] + f[DIR_0MM])) +
                                       ((f[DIR_P00] + f[DIR_M00]) - (f[DIR_0P0] + f[DIR_0M0])) - vx * vx + vy * vy);
 
-                    LBMReal dxxMzz = 3.0 / 2.0 * collFactor / (collFactor - 1.0) *
+                    real dxxMzz = 3.0 / 2.0 * collFactor / (collFactor - 1.0) *
                                      ((((f[DIR_PP0] + f[DIR_MM0]) + (f[DIR_PM0] + f[DIR_MP0])) - ((f[DIR_0PM] + f[DIR_0MP]) + (f[DIR_0PP] + f[DIR_0MM]))) +
                                       ((f[DIR_P00] + f[DIR_M00]) - (f[DIR_00P] + f[DIR_00M])) - vx * vx + vz * vz);
 
@@ -249,7 +250,7 @@ void ShearStressCoProcessor::addData()
             UbTupleDouble3 org = grid->getBlockWorldCoordinates(block);
             //         UbTupleDouble3 blockLengths = grid->getBlockLengths(block);
             UbTupleDouble3 nodeOffset = grid->getNodeOffset(block);
-            double dx                 = grid->getDeltaX(block);
+            real dx                 = grid->getDeltaX(block);
 
             SPtr<ILBMKernel> kernel                 = block->getKernel();
             SPtr<BCArray3D> bcArray                 = kernel->getBCProcessor()->getBCArray();
@@ -257,7 +258,7 @@ void ShearStressCoProcessor::addData()
             SPtr<ShearStressValuesArray3D> ssv      = kernel->getDataSet()->getShearStressValues();
 
             int ghostLayer     = kernel->getGhostLayerWidth();
-            LBMReal collFactor = kernel->getCollisionFactor();
+            real collFactor = kernel->getCollisionFactor();
 
             int minX1 = ghostLayer;
             int maxX1 = (int)bcArray->getNX1() - 1 - ghostLayer;
@@ -281,8 +282,8 @@ void ShearStressCoProcessor::addData()
                     continue;
 
                 if (bcArray->isFluid(ix1, ix2, ix3)) {
-                    double q        = (*ssv)(normalq, ix1, ix2, ix3);
-                    double numPoint = (*ssv)(numberOfPoint, ix1, ix2, ix3);
+                    real q        = (*ssv)(normalq, ix1, ix2, ix3);
+                    real numPoint = (*ssv)(numberOfPoint, ix1, ix2, ix3);
                     if (q == 0 || numPoint != 3)
                         continue;
                     // if (q==0)continue;
@@ -293,7 +294,7 @@ void ShearStressCoProcessor::addData()
                                                 float(val<3>(org) - val<3>(nodeOffset) + ix3 * dx)));
 
                     //////get normal and distance//////
-                    double A, B, C;
+                    real A, B, C;
                     A = (*ssv)(normalX1, ix1, ix2, ix3);
                     B = (*ssv)(normalX2, ix1, ix2, ix3);
                     C = (*ssv)(normalX3, ix1, ix2, ix3);
@@ -306,35 +307,35 @@ void ShearStressCoProcessor::addData()
                     // vtySonja = (*av)(ix1,ix2,ix3,AvVy)-normals[1]*temp;
                     // vtzSonja = (*av)(ix1,ix2,ix3,AvVz)-normals[2]*temp;
 
-                    double vtx = (B * B * (*ssv)(AvVx, ix1, ix2, ix3) + C * C * (*ssv)(AvVx, ix1, ix2, ix3) -
+                    real vtx = (B * B * (*ssv)(AvVx, ix1, ix2, ix3) + C * C * (*ssv)(AvVx, ix1, ix2, ix3) -
                                   A * B * (*ssv)(AvVy, ix1, ix2, ix3) - A * C * (*ssv)(AvVy, ix1, ix2, ix3)) /
                                  (A * A + B * B + C * C);
-                    double vty = (-(A * B * (*ssv)(AvVx, ix1, ix2, ix3)) + A * A * (*ssv)(AvVy, ix1, ix2, ix3) +
+                    real vty = (-(A * B * (*ssv)(AvVx, ix1, ix2, ix3)) + A * A * (*ssv)(AvVy, ix1, ix2, ix3) +
                                   C * C * (*ssv)(AvVy, ix1, ix2, ix3) - B * C * (*ssv)(AvVz, ix1, ix2, ix3)) /
                                  (A * A + B * B + C * C);
-                    double vtz = (-(A * C * (*ssv)(AvVx, ix1, ix2, ix3)) - B * C * (*ssv)(AvVy, ix1, ix2, ix3) +
+                    real vtz = (-(A * C * (*ssv)(AvVx, ix1, ix2, ix3)) - B * C * (*ssv)(AvVy, ix1, ix2, ix3) +
                                   A * A * (*ssv)(AvVz, ix1, ix2, ix3) + B * B * (*ssv)(AvVz, ix1, ix2, ix3)) /
                                  (A * A + B * B + C * C);
 
-                    double normVt = sqrt(vtx * vtx + vty * vty + vtz * vtz) + 1e-100;
-                    double nvtx   = vtx / normVt;
-                    double nvty   = vty / normVt;
-                    double nvtz   = vtz / normVt;
+                    real normVt = sqrt(vtx * vtx + vty * vty + vtz * vtz) + 1e-100;
+                    real nvtx   = vtx / normVt;
+                    real nvty   = vty / normVt;
+                    real nvtz   = vtz / normVt;
 
-                    double sx   = 0.5 * ((*ssv)(AvSxx, ix1, ix2, ix3) * nvtx + (*ssv)(AvSxy, ix1, ix2, ix3) * nvty +
+                    real sx   = 0.5 * ((*ssv)(AvSxx, ix1, ix2, ix3) * nvtx + (*ssv)(AvSxy, ix1, ix2, ix3) * nvty +
                                        (*ssv)(AvSxz, ix1, ix2, ix3) * nvtz);
-                    double sy   = 0.5 * ((*ssv)(AvSxy, ix1, ix2, ix3) * nvtx + (*ssv)(AvSyy, ix1, ix2, ix3) * nvty +
+                    real sy   = 0.5 * ((*ssv)(AvSxy, ix1, ix2, ix3) * nvtx + (*ssv)(AvSyy, ix1, ix2, ix3) * nvty +
                                        (*ssv)(AvSyz, ix1, ix2, ix3) * nvtz);
-                    double sz   = 0.5 * ((*ssv)(AvSxz, ix1, ix2, ix3) * nvtx + (*ssv)(AvSyz, ix1, ix2, ix3) * nvty +
+                    real sz   = 0.5 * ((*ssv)(AvSxz, ix1, ix2, ix3) * nvtx + (*ssv)(AvSyz, ix1, ix2, ix3) * nvty +
                                        (*ssv)(AvSzz, ix1, ix2, ix3) * nvtz);
-                    double sabs = sqrt(sx * sx + sy * sy + sz * sz);
+                    real sabs = sqrt(sx * sx + sy * sy + sz * sz);
 
-                    double viscosity = (1.0 / 3.0) * (1.0 / collFactor - 0.5);
-                    double rho       = 1.0;
-                    double utau      = sqrt(viscosity / rho * sabs);
+                    real viscosity = (1.0 / 3.0) * (1.0 / collFactor - 0.5);
+                    real rho       = 1.0;
+                    real utau      = sqrt(viscosity / rho * sabs);
 
                     // double q=(*av)(ix1,ix2,ix3,normalq) ;
-                    double yPlus = (utau * q) / viscosity;
+                    real yPlus = (utau * q) / viscosity;
 
                     data[index++].push_back(yPlus);
                     data[index++].push_back(utau);
@@ -344,7 +345,7 @@ void ShearStressCoProcessor::addData()
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void ShearStressCoProcessor::reset(double step)
+void ShearStressCoProcessor::reset(real step)
 {
     if (Resetscheduler->isDue(step))
         resetData(step);
@@ -352,7 +353,7 @@ void ShearStressCoProcessor::reset(double step)
     UBLOG(logDEBUG3, "resetCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void ShearStressCoProcessor::resetData(double /*step*/)
+void ShearStressCoProcessor::resetData(real /*step*/)
 {
     for (int level = minInitLevel; level <= maxInitLevel; level++) {
         for (const auto &block : blockVector[level]) {
@@ -404,14 +405,16 @@ void ShearStressCoProcessor::resetData(double /*step*/)
 //////////////////////////////////////////////////////////////////////////
 void ShearStressCoProcessor::addInteractor(SPtr<D3Q27Interactor> interactor) { interactors.push_back(interactor); }
 //////////////////////////////////////////////////////////////////////////
-void ShearStressCoProcessor::findPlane(int ix1, int ix2, int ix3, SPtr<Grid3D> grid, SPtr<Block3D> block, double &A,
-                                       double &B, double &C, double &D, double &ii)
+void ShearStressCoProcessor::findPlane(int ix1, int ix2, int ix3, SPtr<Grid3D> grid, SPtr<Block3D> block, real &A,
+                                       real &B, real &C, real &D, real &ii)
 {
-    double x1plane = 0.0, y1plane = 0.0, z1plane = 0.0;
-    double x2plane = 0.0, y2plane = 0.0, z2plane = 0.0;
-    double x3plane = 0.0, y3plane = 0.0, z3plane = 0.0;
+    using namespace vf::lbm::dir;
+
+    real x1plane = 0.0, y1plane = 0.0, z1plane = 0.0;
+    real x2plane = 0.0, y2plane = 0.0, z2plane = 0.0;
+    real x3plane = 0.0, y3plane = 0.0, z3plane = 0.0;
     SPtr<BoundaryConditions> bcPtr;
-    double dx                               = grid->getDeltaX(block);
+    real dx                               = grid->getDeltaX(block);
     SPtr<ILBMKernel> kernel                 = block->getKernel();
     SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions();
     SPtr<BCArray3D> bcArray                 = kernel->getBCProcessor()->getBCArray();
@@ -562,32 +565,32 @@ void ShearStressCoProcessor::findPlane(int ix1, int ix2, int ix3, SPtr<Grid3D> g
                                                 "ix2=" + UbSystem::toString(ix2) + "ix3=" + UbSystem::toString(ix3) +
                                                 "GlobalID=" + UbSystem::toString(block->getGlobalID()) +
                                                 "dx=" + UbSystem::toString(dx) +
-                                                "T=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_00P)) +
-                                                "B=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_00M)) +
-                                                "E=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_P00)) +
-                                                "W=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_M00)) +
-                                                "N=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_0P0)) +
-                                                "S=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_0M0)) +
-                                                "NE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_PP0)) +
-                                                "SW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_MM0)) +
-                                                "SE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_PM0)) +
-                                                "NW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_MP0)) +
-                                                "TE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_P0P)) +
-                                                "BW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_M0M)) +
-                                                "BE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_P0M)) +
-                                                "TW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_M0P)) +
-                                                "TN=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_0PP)) +
-                                                "BS=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_0MM)) +
-                                                "BN=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_0PM)) +
-                                                "TS=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_0MP)) +
-                                                "TNE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_PPP)) +
-                                                "TNW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_MPP)) +
-                                                "TSE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_PMP)) +
-                                                "TSW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_MMP)) +
-                                                "BNE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_PPM)) +
-                                                "BNW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_MPM)) +
-                                                "BSE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_PMM)) +
-                                                "BSW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_MMM) * dx)));
+                                                "T=" + UbSystem::toString(bcPtr->getQ(DIR_00P)) +
+                                                "B=" + UbSystem::toString(bcPtr->getQ(DIR_00M)) +
+                                                "E=" + UbSystem::toString(bcPtr->getQ(DIR_P00)) +
+                                                "W=" + UbSystem::toString(bcPtr->getQ(DIR_M00)) +
+                                                "N=" + UbSystem::toString(bcPtr->getQ(DIR_0P0)) +
+                                                "S=" + UbSystem::toString(bcPtr->getQ(DIR_0M0)) +
+                                                "NE=" + UbSystem::toString(bcPtr->getQ(DIR_PP0)) +
+                                                "SW=" + UbSystem::toString(bcPtr->getQ(DIR_MM0)) +
+                                                "SE=" + UbSystem::toString(bcPtr->getQ(DIR_PM0)) +
+                                                "NW=" + UbSystem::toString(bcPtr->getQ(DIR_MP0)) +
+                                                "TE=" + UbSystem::toString(bcPtr->getQ(DIR_P0P)) +
+                                                "BW=" + UbSystem::toString(bcPtr->getQ(DIR_M0M)) +
+                                                "BE=" + UbSystem::toString(bcPtr->getQ(DIR_P0M)) +
+                                                "TW=" + UbSystem::toString(bcPtr->getQ(DIR_M0P)) +
+                                                "TN=" + UbSystem::toString(bcPtr->getQ(DIR_0PP)) +
+                                                "BS=" + UbSystem::toString(bcPtr->getQ(DIR_0MM)) +
+                                                "BN=" + UbSystem::toString(bcPtr->getQ(DIR_0PM)) +
+                                                "TS=" + UbSystem::toString(bcPtr->getQ(DIR_0MP)) +
+                                                "TNE=" + UbSystem::toString(bcPtr->getQ(DIR_PPP)) +
+                                                "TNW=" + UbSystem::toString(bcPtr->getQ(DIR_MPP)) +
+                                                "TSE=" + UbSystem::toString(bcPtr->getQ(DIR_PMP)) +
+                                                "TSW=" + UbSystem::toString(bcPtr->getQ(DIR_MMP)) +
+                                                "BNE=" + UbSystem::toString(bcPtr->getQ(DIR_PPM)) +
+                                                "BNW=" + UbSystem::toString(bcPtr->getQ(DIR_MPM)) +
+                                                "BSE=" + UbSystem::toString(bcPtr->getQ(DIR_PMM)) +
+                                                "BSW=" + UbSystem::toString(bcPtr->getQ(DIR_MMM) * dx)));
         }
     }
 
@@ -597,18 +600,18 @@ void ShearStressCoProcessor::findPlane(int ix1, int ix2, int ix3, SPtr<Grid3D> g
                 for (int k = z; k <= z + 1; k++) {
                     Vector3D pointplane1 = grid->getNodeCoordinates(block, i, j, k);
 
-                    double iph = pointplane1[0];
-                    double jph = pointplane1[1];
-                    double kph = pointplane1[2];
+                    real iph = pointplane1[0];
+                    real jph = pointplane1[1];
+                    real kph = pointplane1[2];
 
                     if (!bcArray->isSolid(i, j, k)) {
                         SPtr<BoundaryConditions> bcPtrIn = bcArray->getBC(i, j, k);
                         if (bcPtrIn) {
                             for (int fdir = D3Q27System::FSTARTDIR; fdir <= D3Q27System::FENDDIR; fdir++) {
                                 if (ii <= 2) {
-                                    LBMReal q = bcPtrIn->getQ(fdir);
+                                    real q = bcPtrIn->getQ(fdir);
                                     if (q != 999.00000) {
-                                        if (fdir == D3Q27System::DIR_P00) {
+                                        if (fdir == DIR_P00) {
                                             // if(!bcArray->isSolid(i, j, k))continue;
                                             if (i + q <= x + 1) {
                                                 if (ii == 0) {
@@ -634,7 +637,7 @@ void ShearStressCoProcessor::findPlane(int ix1, int ix2, int ix3, SPtr<Grid3D> g
                                                 }
                                             }
                                         }
-                                        if (fdir == D3Q27System::DIR_M00) {
+                                        if (fdir == DIR_M00) {
                                             // if(!bcArray->isSolid(i, j, k))continue;
                                             if (i - q >= x) {
                                                 if (ii == 0) {
@@ -660,7 +663,7 @@ void ShearStressCoProcessor::findPlane(int ix1, int ix2, int ix3, SPtr<Grid3D> g
                                                 }
                                             }
                                         }
-                                        if (fdir == D3Q27System::DIR_0P0) {
+                                        if (fdir == DIR_0P0) {
                                             // if(!bcArray->isSolid(i, j, k))continue;
                                             if (j + q <= y + 1) {
                                                 if (ii == 0) {
@@ -686,7 +689,7 @@ void ShearStressCoProcessor::findPlane(int ix1, int ix2, int ix3, SPtr<Grid3D> g
                                                 }
                                             }
                                         }
-                                        if (fdir == D3Q27System::DIR_0M0) {
+                                        if (fdir == DIR_0M0) {
                                             // if(!bcArray->isSolid(i, j, k))continue;
                                             if (j - q >= y) {
                                                 if (ii == 0) {
@@ -713,7 +716,7 @@ void ShearStressCoProcessor::findPlane(int ix1, int ix2, int ix3, SPtr<Grid3D> g
                                             }
                                         }
 
-                                        if (fdir == D3Q27System::DIR_00P) {
+                                        if (fdir == DIR_00P) {
                                             // if(!bcArray->isSolid(i, j, k))continue;
                                             if (k + q <= z + 1) {
                                                 if (ii == 0) {
@@ -739,7 +742,7 @@ void ShearStressCoProcessor::findPlane(int ix1, int ix2, int ix3, SPtr<Grid3D> g
                                                 }
                                             }
                                         }
-                                        if (fdir == D3Q27System::DIR_00M) {
+                                        if (fdir == DIR_00M) {
                                             // if(!bcArray->isSolid(i, j, k))continue;
                                             if (k - q >= z) {
                                                 if (ii == 0) {
@@ -788,32 +791,32 @@ void ShearStressCoProcessor::findPlane(int ix1, int ix2, int ix3, SPtr<Grid3D> g
                     UB_EXARGS, "ii is=" + UbSystem::toString(ii) + "  ix1=" + UbSystem::toString(ix1) +
                                    " ix2=" + UbSystem::toString(ix2) + " ix3=" + UbSystem::toString(ix3) +
                                    " Block3D::GlobalID=" + UbSystem::toString(block->getGlobalID()) + " dx=" +
-                                   UbSystem::toString(dx) + " T=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_00P)) +
-                                   " B=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_00M)) +
-                                   " E=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_P00)) +
-                                   " W=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_M00)) +
-                                   " N=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_0P0)) +
-                                   " S=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_0M0)) +
-                                   " NE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_PP0)) +
-                                   " SW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_MM0)) +
-                                   " SE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_PM0)) +
-                                   " NW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_MP0)) +
-                                   " TE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_P0P)) +
-                                   " BW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_M0M)) +
-                                   " BE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_P0M)) +
-                                   " TW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_M0P)) +
-                                   " TN=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_0PP)) +
-                                   " BS=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_0MM)) +
-                                   " BN=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_0PM)) +
-                                   " TS=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_0MP)) +
-                                   " TNE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_PPP)) +
-                                   " TNW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_MPP)) +
-                                   " TSE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_PMP)) +
-                                   " TSW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_MMP)) +
-                                   " BNE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_PPM)) +
-                                   " BNW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_MPM)) +
-                                   " BSE=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_PMM)) +
-                                   " BSW=" + UbSystem::toString(bcPtr->getQ(D3Q27System::DIR_MMM))));
+                                   UbSystem::toString(dx) + " T=" + UbSystem::toString(bcPtr->getQ(DIR_00P)) +
+                                   " B=" + UbSystem::toString(bcPtr->getQ(DIR_00M)) +
+                                   " E=" + UbSystem::toString(bcPtr->getQ(DIR_P00)) +
+                                   " W=" + UbSystem::toString(bcPtr->getQ(DIR_M00)) +
+                                   " N=" + UbSystem::toString(bcPtr->getQ(DIR_0P0)) +
+                                   " S=" + UbSystem::toString(bcPtr->getQ(DIR_0M0)) +
+                                   " NE=" + UbSystem::toString(bcPtr->getQ(DIR_PP0)) +
+                                   " SW=" + UbSystem::toString(bcPtr->getQ(DIR_MM0)) +
+                                   " SE=" + UbSystem::toString(bcPtr->getQ(DIR_PM0)) +
+                                   " NW=" + UbSystem::toString(bcPtr->getQ(DIR_MP0)) +
+                                   " TE=" + UbSystem::toString(bcPtr->getQ(DIR_P0P)) +
+                                   " BW=" + UbSystem::toString(bcPtr->getQ(DIR_M0M)) +
+                                   " BE=" + UbSystem::toString(bcPtr->getQ(DIR_P0M)) +
+                                   " TW=" + UbSystem::toString(bcPtr->getQ(DIR_M0P)) +
+                                   " TN=" + UbSystem::toString(bcPtr->getQ(DIR_0PP)) +
+                                   " BS=" + UbSystem::toString(bcPtr->getQ(DIR_0MM)) +
+                                   " BN=" + UbSystem::toString(bcPtr->getQ(DIR_0PM)) +
+                                   " TS=" + UbSystem::toString(bcPtr->getQ(DIR_0MP)) +
+                                   " TNE=" + UbSystem::toString(bcPtr->getQ(DIR_PPP)) +
+                                   " TNW=" + UbSystem::toString(bcPtr->getQ(DIR_MPP)) +
+                                   " TSE=" + UbSystem::toString(bcPtr->getQ(DIR_PMP)) +
+                                   " TSW=" + UbSystem::toString(bcPtr->getQ(DIR_MMP)) +
+                                   " BNE=" + UbSystem::toString(bcPtr->getQ(DIR_PPM)) +
+                                   " BNW=" + UbSystem::toString(bcPtr->getQ(DIR_MPM)) +
+                                   " BSE=" + UbSystem::toString(bcPtr->getQ(DIR_PMM)) +
+                                   " BSW=" + UbSystem::toString(bcPtr->getQ(DIR_MMM))));
             }
         }
     }
@@ -835,6 +838,8 @@ bool ShearStressCoProcessor::checkUndefindedNodes(SPtr<BCArray3D> bcArray, int i
 //////////////////////////////////////////////////////////////////////////////////////
 void ShearStressCoProcessor::initDistance()
 {
+    using namespace vf::lbm::dir;
+
     for (const auto &interactor : interactors) {
         //      typedef std::map<SPtr<Block3D>, std::set< std::vector<int> > > TransNodeIndicesMap;
         for (const auto &t : interactor->getBcNodeIndicesMap()) {
@@ -852,7 +857,7 @@ void ShearStressCoProcessor::initDistance()
             SPtr<ShearStressValuesArray3D> ssv      = kernel->getDataSet()->getShearStressValues();
 
             int ghostLayer = kernel->getGhostLayerWidth();
-            //         LBMReal collFactor = kernel->getCollisionFactor();
+            //         real collFactor = kernel->getCollisionFactor();
 
             int minX1 = ghostLayer;
             int maxX1 = (int)bcArray->getNX1() - 1 - ghostLayer;
@@ -876,22 +881,22 @@ void ShearStressCoProcessor::initDistance()
                         continue;
                     int numberOfCorner = 0;
 
-                    if (bc->getQ(D3Q27System::DIR_00P) != 999.000) {
+                    if (bc->getQ(DIR_00P) != 999.000) {
                         numberOfCorner++;
                     }
-                    if (bc->getQ(D3Q27System::DIR_00M) != 999.000) {
+                    if (bc->getQ(DIR_00M) != 999.000) {
                         numberOfCorner++;
                     }
-                    if (bc->getQ(D3Q27System::DIR_P00) != 999.000) {
+                    if (bc->getQ(DIR_P00) != 999.000) {
                         numberOfCorner++;
                     }
-                    if (bc->getQ(D3Q27System::DIR_M00) != 999.000) {
+                    if (bc->getQ(DIR_M00) != 999.000) {
                         numberOfCorner++;
                     }
-                    if (bc->getQ(D3Q27System::DIR_0P0) != 999.000) {
+                    if (bc->getQ(DIR_0P0) != 999.000) {
                         numberOfCorner++;
                     }
-                    if (bc->getQ(D3Q27System::DIR_0M0) != 999.000) {
+                    if (bc->getQ(DIR_0M0) != 999.000) {
                         numberOfCorner++;
                     }
                     // if(bc->hasVelocityBoundary()||bc->hasDensityBoundary())continue;
@@ -901,17 +906,17 @@ void ShearStressCoProcessor::initDistance()
                         continue;
 
                     //////get normal and distance//////
-                    double A, B, C, D, ii = 0.0;
+                    real A, B, C, D, ii = 0.0;
                     findPlane(ix1, ix2, ix3, grid, block, A, B, C, D, ii);
                     Vector3D pointplane1 = grid->getNodeCoordinates(block, ix1, ix2, ix3);
-                    double ix1ph         = pointplane1[0];
-                    double ix2ph         = pointplane1[1];
-                    double ix3ph         = pointplane1[2];
-                    double normalDis;
+                    real ix1ph         = pointplane1[0];
+                    real ix2ph         = pointplane1[1];
+                    real ix3ph         = pointplane1[2];
+                    real normalDis;
                     if (ii != 3) {
                         UB_THROW(UbException(UB_EXARGS, "not enough points to create plane" + UbSystem::toString(ii)));
                     } else {
-                        double s = A * ix1ph + B * ix2ph + C * ix3ph +
+                        real s = A * ix1ph + B * ix2ph + C * ix3ph +
                                    D; // The sign of s = Ax + By + Cz + D determines which side the point (x,y,z) lies
                                       // with respect to the plane. If s > 0 then the point lies on the same side as the
                                       // normal (A,B,C). If s < 0 then it lies on the opposite side, if s = 0 then the
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/ShearStressCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/ShearStressCoProcessor.h
index 995589b9e8b84334ea108cddac8e49bbbfa1c535..73fd42d6485321a26e11b2cf0b4b2a521a0881fd 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/ShearStressCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/ShearStressCoProcessor.h
@@ -31,33 +31,33 @@ public:
                            SPtr<UbScheduler> rs);
     ~ShearStressCoProcessor() override;
 
-    void process(double step) override;
+    void process(real step) override;
 
     void addInteractor(SPtr<D3Q27Interactor> interactor);
 
 protected:
     //! Computes average and shear stress values of macroscopic quantities
-    void calculateShearStress(double timeStep);
+    void calculateShearStress(real timeStep);
     //! Prepare data and write in .vtk file
-    void collectData(double step);
+    void collectData(real step);
     //! Reset data
-    void resetData(double step);
+    void resetData(real step);
     //! prepare data
     void addData();
     void clearData();
-    void reset(double step);
-    void findPlane(int ix1, int ix2, int ix3, SPtr<Grid3D> grid, SPtr<Block3D> block, double &A, double &B, double &C,
-                   double &D, double &ii);
+    void reset(real step);
+    void findPlane(int ix1, int ix2, int ix3, SPtr<Grid3D> grid, SPtr<Block3D> block, real &A, real &B, real &C,
+                   real &D, real &ii);
     bool checkUndefindedNodes(SPtr<BCArray3D> bcArray, int ix1, int ix2, int ix3);
     void initDistance();
 
 private:
     std::vector<UbTupleFloat3> nodes;
     std::vector<std::string> datanames;
-    std::vector<std::vector<double>> data;
+    std::vector<std::vector<real>> data;
     std::string path;
     std::vector<SPtr<D3Q27Interactor>> interactors;
-    std::vector<double> normals;
+    std::vector<real> normals;
     int gridRank;
     WbWriter *writer;
     SPtr<UbScheduler> Resetscheduler; // additional scheduler to restart averaging after a given interval
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.cpp
index d2874876ee9b36b9a17a6c4dcf88c4c7d0e948cb..8fa95c121ee61f419d778a636cacbb129ecdfe9e 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.cpp
@@ -27,8 +27,8 @@ TimeAveragedValuesCoProcessor::TimeAveragedValuesCoProcessor(SPtr<Grid3D> grid,
 TimeAveragedValuesCoProcessor::TimeAveragedValuesCoProcessor(SPtr<Grid3D> grid, const std::string &path,
                                                              WbWriter *const writer, SPtr<UbScheduler> s,
                                                              std::shared_ptr<vf::mpi::Communicator> comm, int options,
-                                                             std::vector<int> levels, std::vector<double> &levelCoords,
-                                                             std::vector<double> &bounds, bool timeAveraging)
+                                                             std::vector<int> levels, std::vector<real> &levelCoords,
+                                                             std::vector<real> &bounds, bool timeAveraging)
     : CoProcessor(grid, s), path(path), writer(writer), comm(comm), options(options), levels(levels),
       levelCoords(levelCoords), bounds(bounds), timeAveraging(timeAveraging)
 {
@@ -59,8 +59,8 @@ void TimeAveragedValuesCoProcessor::init()
         calcMacros = &calcIncompMacroscopicValues;
     }
 
-    double begin        = scheduler->getMinBegin();
-    double gridTimeStep = grid->getTimeStep();
+    real begin        = scheduler->getMinBegin();
+    real gridTimeStep = grid->getTimeStep();
 
     if (gridTimeStep == begin || gridTimeStep == 0) {
         initData();
@@ -116,7 +116,7 @@ void TimeAveragedValuesCoProcessor::initData()
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void TimeAveragedValuesCoProcessor::process(double step)
+void TimeAveragedValuesCoProcessor::process(real step)
 {
     if (step == minStep) {
         initData();
@@ -149,7 +149,7 @@ void TimeAveragedValuesCoProcessor::process(double step)
     UBLOG(logDEBUG3, "AverageValuesCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void TimeAveragedValuesCoProcessor::collectData(double step)
+void TimeAveragedValuesCoProcessor::collectData(real step)
 {
     int istep = int(step);
 
@@ -195,7 +195,7 @@ void TimeAveragedValuesCoProcessor::addData(const SPtr<Block3D> block)
     UbTupleDouble3 org = grid->getBlockWorldCoordinates(block);
     //   UbTupleDouble3 blockLengths = grid->getBlockLengths(block);
     UbTupleDouble3 nodeOffset = grid->getNodeOffset(block);
-    double dx                 = grid->getDeltaX(block);
+    real dx                 = grid->getDeltaX(block);
     int level                 = block->getLevel();
 
     // Diese Daten werden geschrieben:
@@ -267,8 +267,8 @@ void TimeAveragedValuesCoProcessor::addData(const SPtr<Block3D> block)
     maxX2 -= 2;
     maxX3 -= 2;
 
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal vx1, vx2, vx3, rho;
+    real f[D3Q27System::ENDF + 1];
+    real vx1, vx2, vx3, rho;
 
     // D3Q27BoundaryConditionPtr bcPtr;
 
@@ -352,7 +352,7 @@ void TimeAveragedValuesCoProcessor::addData(const SPtr<Block3D> block)
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void TimeAveragedValuesCoProcessor::calculateAverageValues(double timeSteps)
+void TimeAveragedValuesCoProcessor::calculateAverageValues(real timeSteps)
 {
     for (int level = minInitLevel; level <= maxInitLevel; level++) {
         int i;
@@ -384,7 +384,7 @@ void TimeAveragedValuesCoProcessor::calculateAverageValues(double timeSteps)
                 maxX2 -= 2;
                 maxX3 -= 2;
 
-                LBMReal rho {0.}, ux {0.}, uy {0.}, uz {0.}, uxx {0.}, uzz {0.}, uyy {0.}, uxy {0.}, uxz {0.}, uyz {0.}, rhof {0.};
+                real rho {0.}, ux {0.}, uy {0.}, uz {0.}, uxx {0.}, uzz {0.}, uyy {0.}, uxy {0.}, uxz {0.}, uyz {0.}, rhof {0.};
 
                 for (int ix3 = minX3; ix3 <= maxX3; ix3++) {
                     for (int ix2 = minX2; ix2 <= maxX2; ix2++) {
@@ -463,14 +463,14 @@ void TimeAveragedValuesCoProcessor::calculateAverageValues(double timeSteps)
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void TimeAveragedValuesCoProcessor::calculateSubtotal(double step)
+void TimeAveragedValuesCoProcessor::calculateSubtotal(real step)
 {
     if (scheduler->isDue(step)) {
 
         // DEBUG/////////////////////
         // UBLOG(logINFO, "calculateSubtotal::step = " << step);
         ////////////////////////////
-        LBMReal f[27];
+        real f[27];
 
         //#ifdef _OPENMP
         //#pragma omp parallel private (f)
@@ -518,7 +518,7 @@ void TimeAveragedValuesCoProcessor::calculateSubtotal(double step)
                                         //////////////////////////////////////////////////////////////////////////
                                         // compute velocity
                                         //////////////////////////////////////////////////////////////////////////
-                                        LBMReal vx, vy, vz, rho;
+                                        real vx, vy, vz, rho;
                                         calcMacros(f, rho, vx, vy, vz);
                                         // double press = D3Q27System::calcPress(f, rho, vx, vy, vz);
 
@@ -574,7 +574,7 @@ void TimeAveragedValuesCoProcessor::calculateSubtotal(double step)
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void TimeAveragedValuesCoProcessor::planarAverage(double step)
+void TimeAveragedValuesCoProcessor::planarAverage(real step)
 {
     std::ofstream ostr;
 
@@ -625,11 +625,11 @@ void TimeAveragedValuesCoProcessor::planarAverage(double step)
 
     for (int i = 0; i < size; i++) {
         int level    = levels[i];
-        double dx    = grid->getDeltaX(level);
-        double start = levelCoords[k];
-        double stop  = levelCoords[k + 1];
+        real dx    = grid->getDeltaX(level);
+        real start = levelCoords[k];
+        real stop  = levelCoords[k + 1];
 
-        for (double j = start; j < stop; j += dx) {
+        for (real j = start; j < stop; j += dx) {
             IntegrateValuesHelper intValHelp(grid, comm, bounds[0], bounds[1], j, bounds[3], bounds[4], j + dx, level);
 
             std::vector<IntegrateValuesHelper::CalcNodes> cnodes = intValHelp.getCNodes();
@@ -640,46 +640,46 @@ void TimeAveragedValuesCoProcessor::planarAverage(double step)
             calculateAverageValuesForPlane(cnodes);
 
             if (root) {
-                double numberOfFluidsNodes = intValHelp.getNumberOfFluidsNodes();
+                real numberOfFluidsNodes = intValHelp.getNumberOfFluidsNodes();
                 if (numberOfFluidsNodes > 0) {
                     ostr << j + 0.5 * dx << std::setprecision(15);
 
                     // mean density
                     if ((options & Density) == Density) {
-                        double rho  = saRho / numberOfFluidsNodes;
-                        double rhoF = saRhoF / numberOfFluidsNodes;
+                        real rho  = saRho / numberOfFluidsNodes;
+                        real rhoF = saRhoF / numberOfFluidsNodes;
                         ostr << ";" << rho << ";" << rhoF;
                     }
 
                     // mean velocity
                     if ((options & Velocity) == Velocity) {
-                        double Vx = saVx / numberOfFluidsNodes;
-                        double Vy = saVy / numberOfFluidsNodes;
-                        double Vz = saVz / numberOfFluidsNodes;
+                        real Vx = saVx / numberOfFluidsNodes;
+                        real Vy = saVy / numberOfFluidsNodes;
+                        real Vz = saVz / numberOfFluidsNodes;
                         ostr << ";" << Vx << ";" << Vy << ";" << Vz;
                     }
                     // fluctuations
                     if ((options & Fluctuations) == Fluctuations) {
-                        double Vxx = saVxx / numberOfFluidsNodes;
-                        double Vyy = saVyy / numberOfFluidsNodes;
-                        double Vzz = saVzz / numberOfFluidsNodes;
-                        double Vxy = saVxy / numberOfFluidsNodes;
-                        double Vxz = saVxz / numberOfFluidsNodes;
-                        double Vyz = saVyz / numberOfFluidsNodes;
+                        real Vxx = saVxx / numberOfFluidsNodes;
+                        real Vyy = saVyy / numberOfFluidsNodes;
+                        real Vzz = saVzz / numberOfFluidsNodes;
+                        real Vxy = saVxy / numberOfFluidsNodes;
+                        real Vxz = saVxz / numberOfFluidsNodes;
+                        real Vyz = saVyz / numberOfFluidsNodes;
                         ostr << ";" << Vxx << ";" << Vyy << ";" << Vzz << ";" << Vxy << ";" << Vxz << ";" << Vyz;
                     }
                     // triple-correlations
                     if ((options & Triplecorrelations) == Triplecorrelations) {
-                        double Vxxx = saVxxx / numberOfFluidsNodes;
-                        double Vxxy = saVxxy / numberOfFluidsNodes;
-                        double Vxxz = saVxxz / numberOfFluidsNodes;
-                        double Vyyy = saVyyy / numberOfFluidsNodes;
-                        double Vyyx = saVyyx / numberOfFluidsNodes;
-                        double Vyyz = saVyyz / numberOfFluidsNodes;
-                        double Vzzz = saVzzz / numberOfFluidsNodes;
-                        double Vzzx = saVzzx / numberOfFluidsNodes;
-                        double Vzzy = saVzzy / numberOfFluidsNodes;
-                        double Vxyz = saVxyz / numberOfFluidsNodes;
+                        real Vxxx = saVxxx / numberOfFluidsNodes;
+                        real Vxxy = saVxxy / numberOfFluidsNodes;
+                        real Vxxz = saVxxz / numberOfFluidsNodes;
+                        real Vyyy = saVyyy / numberOfFluidsNodes;
+                        real Vyyx = saVyyx / numberOfFluidsNodes;
+                        real Vyyz = saVyyz / numberOfFluidsNodes;
+                        real Vzzz = saVzzz / numberOfFluidsNodes;
+                        real Vzzx = saVzzx / numberOfFluidsNodes;
+                        real Vzzy = saVzzy / numberOfFluidsNodes;
+                        real Vxyz = saVxyz / numberOfFluidsNodes;
                         ostr << ";" << Vxxx << ";" << Vxxy << ";" << Vxxz << ";" << Vyyy << ";" << Vyyx << ";" << Vyyz
                              << ";" << Vzzz << ";" << Vzzx << ";" << Vzzy << ";" << Vxyz;
                     }
@@ -766,30 +766,30 @@ void TimeAveragedValuesCoProcessor::calculateAverageValuesForPlane(
     saRho  = 0;
     saRhoF = 0;
 
-    double lsaVx = 0;
-    double lsaVy = 0;
-    double lsaVz = 0;
-
-    double lsaVxx = 0;
-    double lsaVyy = 0;
-    double lsaVzz = 0;
-    double lsaVxy = 0;
-    double lsaVxz = 0;
-    double lsaVyz = 0;
-
-    double lsaVxxx = 0;
-    double lsaVxxy = 0;
-    double lsaVxxz = 0;
-    double lsaVyyy = 0;
-    double lsaVyyx = 0;
-    double lsaVyyz = 0;
-    double lsaVzzz = 0;
-    double lsaVzzx = 0;
-    double lsaVzzy = 0;
-    double lsaVxyz = 0;
-
-    double lsaRho  = 0;
-    double lsaRhoF = 0;
+    real lsaVx = 0;
+    real lsaVy = 0;
+    real lsaVz = 0;
+
+    real lsaVxx = 0;
+    real lsaVyy = 0;
+    real lsaVzz = 0;
+    real lsaVxy = 0;
+    real lsaVxz = 0;
+    real lsaVyz = 0;
+
+    real lsaVxxx = 0;
+    real lsaVxxy = 0;
+    real lsaVxxz = 0;
+    real lsaVyyy = 0;
+    real lsaVyyx = 0;
+    real lsaVyyz = 0;
+    real lsaVzzz = 0;
+    real lsaVzzx = 0;
+    real lsaVzzy = 0;
+    real lsaVxyz = 0;
+
+    real lsaRho  = 0;
+    real lsaRhoF = 0;
 
     for (IntegrateValuesHelper::CalcNodes cn : cnodes) {
         SPtr<ILBMKernel> kernel                               = cn.block->getKernel();
@@ -799,30 +799,30 @@ void TimeAveragedValuesCoProcessor::calculateAverageValuesForPlane(
         SPtr<AverageValuesArray3D> averagedTriplecorrelations = kernel->getDataSet()->getAverageTriplecorrelations();
 
         for (UbTupleInt3 node : cn.nodes) {
-            double aRho  = (*averagedDensity)(Rho, val<1>(node), val<2>(node), val<3>(node));
-            double aRhoF = (*averagedDensity)(RhoF, val<1>(node), val<2>(node), val<3>(node));
-
-            double aVx = (*averagedVelocity)(Vx, val<1>(node), val<2>(node), val<3>(node));
-            double aVy = (*averagedVelocity)(Vy, val<1>(node), val<2>(node), val<3>(node));
-            double aVz = (*averagedVelocity)(Vz, val<1>(node), val<2>(node), val<3>(node));
-
-            double aVxx = (*averagedFluctuations)(Vxx, val<1>(node), val<2>(node), val<3>(node));
-            double aVyy = (*averagedFluctuations)(Vyy, val<1>(node), val<2>(node), val<3>(node));
-            double aVzz = (*averagedFluctuations)(Vzz, val<1>(node), val<2>(node), val<3>(node));
-            double aVxy = (*averagedFluctuations)(Vxy, val<1>(node), val<2>(node), val<3>(node));
-            double aVxz = (*averagedFluctuations)(Vxz, val<1>(node), val<2>(node), val<3>(node));
-            double aVyz = (*averagedFluctuations)(Vyz, val<1>(node), val<2>(node), val<3>(node));
-
-            double aVxxx = (*averagedTriplecorrelations)(Vxxx, val<1>(node), val<2>(node), val<3>(node));
-            double aVxxy = (*averagedTriplecorrelations)(Vxxy, val<1>(node), val<2>(node), val<3>(node));
-            double aVxxz = (*averagedTriplecorrelations)(Vxxz, val<1>(node), val<2>(node), val<3>(node));
-            double aVyyy = (*averagedTriplecorrelations)(Vyyy, val<1>(node), val<2>(node), val<3>(node));
-            double aVyyx = (*averagedTriplecorrelations)(Vyyx, val<1>(node), val<2>(node), val<3>(node));
-            double aVyyz = (*averagedTriplecorrelations)(Vyyz, val<1>(node), val<2>(node), val<3>(node));
-            double aVzzz = (*averagedTriplecorrelations)(Vzzz, val<1>(node), val<2>(node), val<3>(node));
-            double aVzzx = (*averagedTriplecorrelations)(Vzzx, val<1>(node), val<2>(node), val<3>(node));
-            double aVzzy = (*averagedTriplecorrelations)(Vzzy, val<1>(node), val<2>(node), val<3>(node));
-            double aVxyz = (*averagedTriplecorrelations)(Vxyz, val<1>(node), val<2>(node), val<3>(node));
+            real aRho  = (*averagedDensity)(Rho, val<1>(node), val<2>(node), val<3>(node));
+            real aRhoF = (*averagedDensity)(RhoF, val<1>(node), val<2>(node), val<3>(node));
+
+            real aVx = (*averagedVelocity)(Vx, val<1>(node), val<2>(node), val<3>(node));
+            real aVy = (*averagedVelocity)(Vy, val<1>(node), val<2>(node), val<3>(node));
+            real aVz = (*averagedVelocity)(Vz, val<1>(node), val<2>(node), val<3>(node));
+
+            real aVxx = (*averagedFluctuations)(Vxx, val<1>(node), val<2>(node), val<3>(node));
+            real aVyy = (*averagedFluctuations)(Vyy, val<1>(node), val<2>(node), val<3>(node));
+            real aVzz = (*averagedFluctuations)(Vzz, val<1>(node), val<2>(node), val<3>(node));
+            real aVxy = (*averagedFluctuations)(Vxy, val<1>(node), val<2>(node), val<3>(node));
+            real aVxz = (*averagedFluctuations)(Vxz, val<1>(node), val<2>(node), val<3>(node));
+            real aVyz = (*averagedFluctuations)(Vyz, val<1>(node), val<2>(node), val<3>(node));
+
+            real aVxxx = (*averagedTriplecorrelations)(Vxxx, val<1>(node), val<2>(node), val<3>(node));
+            real aVxxy = (*averagedTriplecorrelations)(Vxxy, val<1>(node), val<2>(node), val<3>(node));
+            real aVxxz = (*averagedTriplecorrelations)(Vxxz, val<1>(node), val<2>(node), val<3>(node));
+            real aVyyy = (*averagedTriplecorrelations)(Vyyy, val<1>(node), val<2>(node), val<3>(node));
+            real aVyyx = (*averagedTriplecorrelations)(Vyyx, val<1>(node), val<2>(node), val<3>(node));
+            real aVyyz = (*averagedTriplecorrelations)(Vyyz, val<1>(node), val<2>(node), val<3>(node));
+            real aVzzz = (*averagedTriplecorrelations)(Vzzz, val<1>(node), val<2>(node), val<3>(node));
+            real aVzzx = (*averagedTriplecorrelations)(Vzzx, val<1>(node), val<2>(node), val<3>(node));
+            real aVzzy = (*averagedTriplecorrelations)(Vzzy, val<1>(node), val<2>(node), val<3>(node));
+            real aVxyz = (*averagedTriplecorrelations)(Vxyz, val<1>(node), val<2>(node), val<3>(node));
 
             lsaRho += aRho;
             lsaRhoF += aRhoF;
@@ -850,8 +850,8 @@ void TimeAveragedValuesCoProcessor::calculateAverageValuesForPlane(
             lsaVxyz += aVxyz;
         }
     }
-    std::vector<double> values;
-    std::vector<double> rvalues;
+    std::vector<real> values;
+    std::vector<real> rvalues;
 
     values.push_back(lsaRho);
     values.push_back(lsaRhoF);
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.h
index 155f293a08d0ef0726193a48c9a8fb8051bd3972..72a0f6fe16ef3805ff496ccff924b8ecf541bfef 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.h
@@ -44,29 +44,29 @@ public:
                                   SPtr<UbScheduler> s, std::shared_ptr<vf::mpi::Communicator> comm, int options);
     TimeAveragedValuesCoProcessor(SPtr<Grid3D> grid, const std::string &path, WbWriter *const writer,
                                   SPtr<UbScheduler> s, std::shared_ptr<vf::mpi::Communicator> comm, int options, std::vector<int> levels,
-                                  std::vector<double> &levelCoords, std::vector<double> &bounds,
+                                  std::vector<real> &levelCoords, std::vector<real> &bounds,
                                   bool timeAveraging = true);
     //! Make update
-    void process(double step) override;
+    void process(real step) override;
     //! Computes subtotal of velocity , fluctuations and triple correlations
-    void calculateSubtotal(double step);
-    void addLevelCoordinate(double c);
+    void calculateSubtotal(real step);
+    void addLevelCoordinate(real c);
     void reset();
     void setWithGhostLayer(bool val);
     bool getWithGhostLayer();
 
 protected:
     //! Prepare data and write in .vtk file
-    void collectData(double step);
+    void collectData(real step);
     //! prepare data
     void addData(const SPtr<Block3D> block);
     void clearData();
     //! Computes average values of velocity , fluctuations and triple correlations
-    void calculateAverageValues(double timeStep);
+    void calculateAverageValues(real timeStep);
 
     void init();
     void initData();
-    void planarAverage(double step);
+    void planarAverage(real step);
     void calculateAverageValuesForPlane(std::vector<IntegrateValuesHelper::CalcNodes> &cnodes);
 
 private:
@@ -74,7 +74,7 @@ private:
     std::vector<UbTupleFloat3> nodes;
     std::vector<UbTupleUInt8> cells;
     std::vector<std::string> datanames;
-    std::vector<std::vector<double>> data;
+    std::vector<std::vector<real>> data;
     std::vector<std::vector<SPtr<Block3D>>> blockVector;
     bool root;
     int minInitLevel; // min init level
@@ -82,7 +82,7 @@ private:
     int gridRank;
     int resetStepRMS;
     int resetStepMeans;
-    double averageInterval;
+    real averageInterval;
     std::string path;
     WbWriter *writer;
     bool restart, compressible;
@@ -96,29 +96,29 @@ private:
     enum Fluctuations { Vxx, Vyy, Vzz, Vxy, Vxz, Vyz };
     enum Triplecorrelations { Vxxx, Vxxy, Vxxz, Vyyy, Vyyx, Vyyz, Vzzz, Vzzx, Vzzy, Vxyz };
 
-    double saRho, saRhoF;
-    double saVx, saVy, saVz;
-    double saVxx, saVyy, saVzz, saVxy, saVxz, saVyz;
-    double saVxxx, saVxxy, saVxxz, saVyyy, saVyyx, saVyyz, saVzzz, saVzzx, saVzzy, saVxyz;
+    real saRho, saRhoF;
+    real saVx, saVy, saVz;
+    real saVxx, saVyy, saVzz, saVxy, saVxz, saVyz;
+    real saVxxx, saVxxy, saVxxz, saVyyy, saVyyx, saVyyz, saVzzz, saVzzx, saVzzy, saVxyz;
 
     int options;
-    double numberOfSteps;
-    double minStep;
-    double maxStep;
+    real numberOfSteps;
+    real minStep;
+    real maxStep;
 
     int iMinX1, iMinX2, iMinX3;
     // int iMaxX1, iMaxX2, iMaxX3;
     int iMinC;
     int iMaxC;
 
-    using CalcMacrosFct = void (*)(const LBMReal *const &, LBMReal &, LBMReal &, LBMReal &, LBMReal &);
+    using CalcMacrosFct = void (*)(const real *const &, real &, real &, real &, real &);
     CalcMacrosFct calcMacros;
 
     bool planarAveraging;
     bool timeAveraging;
-    std::vector<double> levelCoords;
+    std::vector<real> levelCoords;
     std::vector<int> levels;
-    std::vector<double> bounds;
+    std::vector<real> bounds;
 
     bool withGhostLayer;
 };
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/TimeDependentBCCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/TimeDependentBCCoProcessor.cpp
index 1540ef953b583be39dd7e5e4c1ee3dc678f0d12f..22158269d3d622fd4dd0f6c943b7e28b871b5172 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/TimeDependentBCCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/TimeDependentBCCoProcessor.cpp
@@ -8,7 +8,7 @@ TimeDependentBCCoProcessor::TimeDependentBCCoProcessor(SPtr<Grid3D> grid, SPtr<U
 //////////////////////////////////////////////////////////////////////////
 TimeDependentBCCoProcessor::~TimeDependentBCCoProcessor() = default;
 //////////////////////////////////////////////////////////////////////////
-void TimeDependentBCCoProcessor::process(double step)
+void TimeDependentBCCoProcessor::process(real step)
 {
     if (scheduler->isDue(step)) {
         for (SPtr<Interactor3D> inter : interactors)
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/TimeDependentBCCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/TimeDependentBCCoProcessor.h
index 0483c9f826c7d1b110bd8fb07016dc5b997bad40..cb41ca9622759f99d701d1d97865d9754d9bf5e6 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/TimeDependentBCCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/TimeDependentBCCoProcessor.h
@@ -18,7 +18,7 @@ public:
     TimeDependentBCCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s);
     ~TimeDependentBCCoProcessor() override;
 
-    void process(double step) override;
+    void process(real step) override;
 
     //! add interactors to CoProcessor
     void addInteractor(SPtr<Interactor3D> interactor);
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/TimeseriesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/TimeseriesCoProcessor.cpp
index d2be7f0e25ae773be89dcad02dc6b96c0651d23a..b897d4df17ceb61d88f242c17de3e2030d01e120 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/TimeseriesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/TimeseriesCoProcessor.cpp
@@ -43,13 +43,13 @@ TimeseriesCoProcessor::TimeseriesCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler
 //////////////////////////////////////////////////////////////////////////
 TimeseriesCoProcessor::~TimeseriesCoProcessor() = default;
 //////////////////////////////////////////////////////////////////////////
-void TimeseriesCoProcessor::process(double step)
+void TimeseriesCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
 }
 //////////////////////////////////////////////////////////////////////////
-void TimeseriesCoProcessor::collectData(double step)
+void TimeseriesCoProcessor::collectData(real step)
 {
     h1->calculateMQ();
 
@@ -58,13 +58,13 @@ void TimeseriesCoProcessor::collectData(double step)
     if (comm->getProcessID() == comm->getRoot()) {
         int istep = static_cast<int>(step);
         std::ofstream ostr;
-        double cellsVolume = h1->getCellsVolume();
+        real cellsVolume = h1->getCellsVolume();
 
-        double rho    = (h1->getRho()) / cellsVolume;
-        double vx     = (h1->getVx1()) / cellsVolume;
-        double vy     = (h1->getVx2()) / cellsVolume;
-        double vz     = (h1->getVx3()) / cellsVolume;
-        double volume = cellsVolume;
+        real rho    = (h1->getRho()) / cellsVolume;
+        real vx     = (h1->getVx1()) / cellsVolume;
+        real vy     = (h1->getVx2()) / cellsVolume;
+        real vz     = (h1->getVx3()) / cellsVolume;
+        real volume = cellsVolume;
 
         ostr.open(fname.c_str(), std::ios_base::out | std::ios_base::app);
         if (!ostr) {
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/TimeseriesCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/TimeseriesCoProcessor.h
index e92e324aab1b7cbbe16d7e6652ecb3ed0dfa9ed4..7cdc98fd02b2776e970e52ccacf9966a4411a309 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/TimeseriesCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/TimeseriesCoProcessor.h
@@ -31,10 +31,10 @@ public:
     ~TimeseriesCoProcessor() override;
 
     //! calls collectData.
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
-    void collectData(double step);
+    void collectData(real step);
 
     //! object that can compute spacial average values in 3D-subdomain.
     SPtr<IntegrateValuesHelper> h1;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/TurbulenceIntensityCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/TurbulenceIntensityCoProcessor.cpp
index 6a06a20d41fc8b57c43dd219623bb2d544d7a4a9..4714349a9c25ec2b5d427e3b64ad00be738915f6 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/TurbulenceIntensityCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/TurbulenceIntensityCoProcessor.cpp
@@ -40,7 +40,7 @@ void TurbulenceIntensityCoProcessor::init()
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void TurbulenceIntensityCoProcessor::process(double step)
+void TurbulenceIntensityCoProcessor::process(real step)
 {
     calculateAverageValues(int(step));
 
@@ -50,7 +50,7 @@ void TurbulenceIntensityCoProcessor::process(double step)
     UBLOG(logDEBUG3, "TurbulenceIntensityCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void TurbulenceIntensityCoProcessor::collectData(double step)
+void TurbulenceIntensityCoProcessor::collectData(real step)
 {
     int istep = int(step);
 
@@ -100,7 +100,7 @@ void TurbulenceIntensityCoProcessor::addData(const SPtr<Block3D> block)
     UbTupleDouble3 org = grid->getBlockWorldCoordinates(block);
     //   UbTupleDouble3 blockLengths = grid->getBlockLengths(block);
     UbTupleDouble3 nodeOffset = grid->getNodeOffset(block);
-    double dx                 = grid->getDeltaX(block);
+    real dx                 = grid->getDeltaX(block);
 
     // Diese Daten werden geschrieben:
     datanames.resize(0);
@@ -138,12 +138,12 @@ void TurbulenceIntensityCoProcessor::addData(const SPtr<Block3D> block)
                                                 float(val<3>(org) - val<3>(nodeOffset) + ix3 * dx)));
 
                     // compute turbulence intensity
-                    double temp =
+                    real temp =
                         (*av)(ix1, ix2, ix3, AvVxxyyzz) / ((*av)(ix1, ix2, ix3, AvVx) * (*av)(ix1, ix2, ix3, AvVx) +
                                                            (*av)(ix1, ix2, ix3, AvVy) * (*av)(ix1, ix2, ix3, AvVy) +
                                                            (*av)(ix1, ix2, ix3, AvVz) * (*av)(ix1, ix2, ix3, AvVz));
 
-                    LBMReal ti = sqrt(temp);
+                    real ti = sqrt(temp);
 
                     if (UbMath::isNaN(ti))
                         UB_THROW(
@@ -179,14 +179,15 @@ void TurbulenceIntensityCoProcessor::addData(const SPtr<Block3D> block)
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void TurbulenceIntensityCoProcessor::calculateAverageValues(double timeStep)
+void TurbulenceIntensityCoProcessor::calculateAverageValues(real timeStep)
 {
+    using namespace vf::lbm::dir;
     using namespace D3Q27System;
 
     int minInitLevel = this->grid->getCoarsestInitializedLevel();
     int maxInitLevel = this->grid->getFinestInitializedLevel();
-    LBMReal f[27];
-    LBMReal vx, vy, vz;
+    real f[27];
+    real vx, vy, vz;
 
     for (int level = minInitLevel; level <= maxInitLevel; level++) {
         for (SPtr<Block3D> block : blockVector[level]) {
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/TurbulenceIntensityCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/TurbulenceIntensityCoProcessor.h
index 40983604d25385420cba8da4af28faa33283aaf0..8f11e94b446050d2069f89dd8971fb3acc8fb787 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/TurbulenceIntensityCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/TurbulenceIntensityCoProcessor.h
@@ -19,20 +19,20 @@ class TurbulenceIntensityCoProcessor : public CoProcessor
 public:
     TurbulenceIntensityCoProcessor(SPtr<Grid3D> grid, const std::string &path, WbWriter *const writer,
                                    SPtr<UbScheduler> s, std::shared_ptr<vf::mpi::Communicator> comm);
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
-    void collectData(double step);
+    void collectData(real step);
     void addData(const SPtr<Block3D> block);
     void clearData();
-    void calculateAverageValues(double timeStep);
+    void calculateAverageValues(real timeStep);
 
 private:
     void init();
     std::vector<UbTupleFloat3> nodes;
     std::vector<UbTupleUInt8> cells;
     std::vector<std::string> datanames;
-    std::vector<std::vector<double>> data;
+    std::vector<std::vector<real>> data;
     std::vector<std::vector<SPtr<Block3D>>> blockVector;
     int minInitLevel;
     int maxInitLevel;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBlocksCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBlocksCoProcessor.cpp
index 523ae4ffb0ac78143e071054d8804f67bd2e12d6..de781d96b582f83e38e74ca0643a0d0c23b170c0 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBlocksCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBlocksCoProcessor.cpp
@@ -49,13 +49,13 @@ WriteBlocksCoProcessor::WriteBlocksCoProcessor(SPtr<Grid3D> grid, SPtr<UbSchedul
 //////////////////////////////////////////////////////////////////////////
 WriteBlocksCoProcessor::~WriteBlocksCoProcessor() = default;
 //////////////////////////////////////////////////////////////////////////
-void WriteBlocksCoProcessor::process(double step)
+void WriteBlocksCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
 }
 //////////////////////////////////////////////////////////////////////////
-void WriteBlocksCoProcessor::collectData(double step)
+void WriteBlocksCoProcessor::collectData(real step)
 {
     if (comm->getProcessID() == comm->getRoot()) {
         int istep = int(step);
@@ -76,7 +76,7 @@ void WriteBlocksCoProcessor::collectData(double step)
         celldatanames.push_back("bundle");
 #endif
 
-        std::vector<std::vector<double>> celldata(celldatanames.size());
+        std::vector<std::vector<real>> celldata(celldatanames.size());
 
         int nr           = 0;
         int minInitLevel = this->grid->getCoarsestInitializedLevel();
@@ -109,12 +109,12 @@ void WriteBlocksCoProcessor::collectData(double step)
                 nr += 8;
 
                 // data
-                celldata[0].push_back((double)block->isActive());
-                celldata[1].push_back((double)block->getRank());
-                celldata[2].push_back((double)block->hasInterpolationFlag());
-                celldata[3].push_back((double)block->getGlobalID());
-                celldata[4].push_back((double)block->getPart());
-                celldata[5].push_back((double)block->getLevel());
+                celldata[0].push_back((real)block->isActive());
+                celldata[1].push_back((real)block->getRank());
+                celldata[2].push_back((real)block->hasInterpolationFlag());
+                celldata[3].push_back((real)block->getGlobalID());
+                celldata[4].push_back((real)block->getPart());
+                celldata[5].push_back((real)block->getLevel());
 
                 // bool flag = false;
                 // std::vector<SPtr<Block3DConnector>> connectors;
@@ -164,7 +164,7 @@ void WriteBlocksCoProcessor::collectData(double step)
                 //}
 
 #ifdef VF_FETOL
-                celldata[6].push_back((double)block->getBundle());
+                celldata[6].push_back((real)block->getBundle());
 #endif
             }
         }
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBlocksCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBlocksCoProcessor.h
index 837d9bbad7533d0f097c07851b352c50cccf5465..c94cd1e64861cead5d01becbd80e5b3381e6e159 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBlocksCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBlocksCoProcessor.h
@@ -60,12 +60,12 @@ public:
                            std::shared_ptr<vf::mpi::Communicator> comm);
     ~WriteBlocksCoProcessor() override;
 
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
     //! Collect data for VTK-file
     //! \param step is a time step
-    void collectData(double step);
+    void collectData(real step);
 
     std::string path;
     WbWriter *writer;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.cpp
index 6c927f4945f9bcf211c7f84e38fbc6d395960b7f..4ed68397a7813314450ddd7dea33ca1824f54ac9 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.cpp
@@ -69,7 +69,7 @@ WriteBoundaryConditionsCoProcessor::WriteBoundaryConditionsCoProcessor(SPtr<Grid
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void WriteBoundaryConditionsCoProcessor::process(double step)
+void WriteBoundaryConditionsCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
@@ -77,7 +77,7 @@ void WriteBoundaryConditionsCoProcessor::process(double step)
     UBLOG(logDEBUG3, "WriteBoundaryConditionsCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void WriteBoundaryConditionsCoProcessor::collectData(double step)
+void WriteBoundaryConditionsCoProcessor::collectData(real step)
 {
     int istep = static_cast<int>(step);
 
@@ -134,9 +134,9 @@ void WriteBoundaryConditionsCoProcessor::addDataGeo(SPtr<Block3D> block)
 {
     UbTupleDouble3 org        = grid->getBlockWorldCoordinates(block);
     UbTupleDouble3 nodeOffset = grid->getNodeOffset(block);
-    double dx                 = grid->getDeltaX(block);
+    real dx                 = grid->getDeltaX(block);
 
-    double level = (double)block->getLevel();
+    real level = (real)block->getLevel();
 
     // Diese Daten werden geschrieben:
     datanames.resize(0);
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.h
index 2608a3ae8df931a5f0b347b77ad525712676aeab..31f2a5c8e31820217d76745d371a8a812acb3b67 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.h
@@ -64,12 +64,12 @@ public:
                                        WbWriter *const writer, std::shared_ptr<vf::mpi::Communicator> comm);
     ~WriteBoundaryConditionsCoProcessor() override = default;
 
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
     //! Collect data for VTK-file
     //! \param step is a time step
-    void collectData(double step);
+    void collectData(real step);
     void addDataGeo(SPtr<Block3D> block);
     void clearData();
 
@@ -77,7 +77,7 @@ private:
     std::vector<UbTupleFloat3> nodes;
     std::vector<UbTupleUInt8> cells;
     std::vector<std::string> datanames;
-    std::vector<std::vector<double>> data;
+    std::vector<std::vector<real>> data;
     std::string path;
     WbWriter *writer;
     std::vector<std::vector<SPtr<Block3D>>> blockVector;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteGbObjectsCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteGbObjectsCoProcessor.cpp
index 1935ea22396a43dad53b2cf0a5b2960319026656..b47f1056172c07855eda232bede05eef475c4718 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteGbObjectsCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteGbObjectsCoProcessor.cpp
@@ -14,7 +14,7 @@ WriteGbObjectsCoProcessor::WriteGbObjectsCoProcessor(SPtr<Grid3D> grid, SPtr<UbS
 //////////////////////////////////////////////////////////////////////////
 WriteGbObjectsCoProcessor::~WriteGbObjectsCoProcessor() = default;
 //////////////////////////////////////////////////////////////////////////
-void WriteGbObjectsCoProcessor::process(double step)
+void WriteGbObjectsCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
@@ -22,7 +22,7 @@ void WriteGbObjectsCoProcessor::process(double step)
 //////////////////////////////////////////////////////////////////////////
 void WriteGbObjectsCoProcessor::addGbObject(SPtr<GbObject3D> object) { objects.push_back(object); }
 //////////////////////////////////////////////////////////////////////////
-void WriteGbObjectsCoProcessor::collectData(double step)
+void WriteGbObjectsCoProcessor::collectData(real step)
 {
     std::vector<UbTupleFloat3> nodes;
     std::vector<UbTupleInt3> triangles;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteGbObjectsCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/WriteGbObjectsCoProcessor.h
index 09b9bdeb766d5c4251c18a46df888fe67ef54df8..5b502044f0556d2519afc23b72ea2b50bd664832 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteGbObjectsCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteGbObjectsCoProcessor.h
@@ -24,12 +24,12 @@ public:
                               std::shared_ptr<vf::mpi::Communicator> comm);
     ~WriteGbObjectsCoProcessor() override;
     //! calls collectData.
-    void process(double step) override;
+    void process(real step) override;
     //! adds geometry object
     void addGbObject(SPtr<GbObject3D> object);
 
 protected:
-    void collectData(double step);
+    void collectData(real step);
 
 private:
     std::vector<SPtr<GbObject3D>> objects;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMQFromSelectionCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMQFromSelectionCoProcessor.cpp
index fc70b841ff2bee64176ec711dc579649c0f1c032..2b49861af9fd53cdec491527f40f96b4f8bc0484 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMQFromSelectionCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMQFromSelectionCoProcessor.cpp
@@ -35,7 +35,7 @@ WriteMQFromSelectionCoProcessor::WriteMQFromSelectionCoProcessor(SPtr<Grid3D> gr
 //////////////////////////////////////////////////////////////////////////
 void WriteMQFromSelectionCoProcessor::init() {}
 //////////////////////////////////////////////////////////////////////////
-void WriteMQFromSelectionCoProcessor::process(double step)
+void WriteMQFromSelectionCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
@@ -43,7 +43,7 @@ void WriteMQFromSelectionCoProcessor::process(double step)
     UBLOG(logDEBUG3, "WriteMQFromSelectionCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void WriteMQFromSelectionCoProcessor::collectData(double step)
+void WriteMQFromSelectionCoProcessor::collectData(real step)
 {
     int istep = static_cast<int>(step);
 
@@ -53,12 +53,12 @@ void WriteMQFromSelectionCoProcessor::collectData(double step)
                 UbTupleDouble3 org          = grid->getBlockWorldCoordinates(block);
                 UbTupleDouble3 blockLengths = grid->getBlockLengths(block);
 
-                double minX1 = val<1>(org);
-                double minX2 = val<2>(org);
-                double minX3 = val<3>(org);
-                double maxX1 = val<1>(org) + val<1>(blockLengths);
-                double maxX2 = val<2>(org) + val<2>(blockLengths);
-                double maxX3 = val<3>(org) + val<3>(blockLengths);
+                real minX1 = val<1>(org);
+                real minX2 = val<2>(org);
+                real minX3 = val<3>(org);
+                real maxX1 = val<1>(org) + val<1>(blockLengths);
+                real maxX2 = val<2>(org) + val<2>(blockLengths);
+                real maxX3 = val<3>(org) + val<3>(blockLengths);
 
                 if (gbObject->isCellInsideOrCuttingGbObject3D(minX1, minX2, minX3, maxX1, maxX2, maxX3)) {
                     addDataMQ(block);
@@ -110,7 +110,7 @@ void WriteMQFromSelectionCoProcessor::clearData()
 //////////////////////////////////////////////////////////////////////////
 void WriteMQFromSelectionCoProcessor::addDataMQ(SPtr<Block3D> block)
 {
-    double level = (double)block->getLevel();
+    real level = (real)block->getLevel();
     //   double blockID = (double)block->getGlobalID();
 
     // Diese Daten werden geschrieben:
@@ -128,8 +128,8 @@ void WriteMQFromSelectionCoProcessor::addDataMQ(SPtr<Block3D> block)
     SPtr<ILBMKernel> kernel                 = block->getKernel();
     SPtr<BCArray3D> bcArray                 = kernel->getBCProcessor()->getBCArray();
     SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions();
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal vx1, vx2, vx3, rho;
+    real f[D3Q27System::ENDF + 1];
+    real vx1, vx2, vx3, rho;
 
     if (block->getKernel()->getCompressible()) {
         calcMacros = &D3Q27System::calcCompMacroscopicValues;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMQFromSelectionCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMQFromSelectionCoProcessor.h
index 0dc3976b14b9930a1c1713074ff2222ad52b1fc8..64e7572797a613815d62eec91a61d68120ee1a2e 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMQFromSelectionCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMQFromSelectionCoProcessor.h
@@ -27,10 +27,10 @@ public:
                                     std::shared_ptr<vf::mpi::Communicator> comm);
     ~WriteMQFromSelectionCoProcessor() override = default;
 
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
-    void collectData(double step);
+    void collectData(real step);
     void addDataMQ(SPtr<Block3D> block);
     void clearData();
 
@@ -38,7 +38,7 @@ private:
     void init();
     std::vector<UbTupleFloat3> nodes;
     std::vector<std::string> datanames;
-    std::vector<std::vector<double>> data;
+    std::vector<std::vector<real>> data;
     std::string path;
     WbWriter *writer;
     SPtr<LBMUnitConverter> conv;
@@ -50,7 +50,7 @@ private:
     std::shared_ptr<vf::mpi::Communicator> comm;
     SPtr<GbObject3D> gbObject;
 
-    using CalcMacrosFct = void (*)(const LBMReal *const &, LBMReal &, LBMReal &, LBMReal &, LBMReal &);
+    using CalcMacrosFct = void (*)(const real *const &, real &, real &, real &, real &);
     CalcMacrosFct calcMacros;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.cpp
index b886271b7f3842b0453a0fa16f53ca75442da9e9..58ed15604f8937b0b33fca96dab69250a404242c 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.cpp
@@ -72,7 +72,7 @@ void WriteMacroscopicQuantitiesCoProcessor::init()
 {}
 
 //////////////////////////////////////////////////////////////////////////
-void WriteMacroscopicQuantitiesCoProcessor::process(double step)
+void WriteMacroscopicQuantitiesCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
@@ -81,7 +81,7 @@ void WriteMacroscopicQuantitiesCoProcessor::process(double step)
 }
 
 //////////////////////////////////////////////////////////////////////////
-void WriteMacroscopicQuantitiesCoProcessor::collectData(double step)
+void WriteMacroscopicQuantitiesCoProcessor::collectData(real step)
 {
     int istep = static_cast<int>(step);
 
@@ -143,7 +143,7 @@ void WriteMacroscopicQuantitiesCoProcessor::clearData()
 //////////////////////////////////////////////////////////////////////////
 void WriteMacroscopicQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
 {
-    double level   = (double)block->getLevel();
+    real level   = (real)block->getLevel();
 
     // Diese Daten werden geschrieben:
     datanames.resize(0);
@@ -162,8 +162,8 @@ void WriteMacroscopicQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
     SPtr<ILBMKernel> kernel                 = block->getKernel();
     SPtr<BCArray3D> bcArray                 = kernel->getBCProcessor()->getBCArray();
     SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions();
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal vx1, vx2, vx3, rho;
+    real f[D3Q27System::ENDF + 1];
+    real vx1, vx2, vx3, rho;
 
     // knotennummerierung faengt immer bei 0 an!
     int SWB, SEB, NEB, NWB, SWT, SET, NET, NWT;
@@ -206,8 +206,8 @@ void WriteMacroscopicQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
                     int index                  = 0;
                     nodeNumbers(ix1, ix2, ix3) = nr++;
                     Vector3D worldCoordinates  = grid->getNodeCoordinates(block, ix1, ix2, ix3);
-                    nodes.push_back(UbTupleFloat3(float(worldCoordinates[0]), float(worldCoordinates[1]),
-                                                  float(worldCoordinates[2])));
+                    nodes.push_back(UbTupleFloat3(real(worldCoordinates[0]), real(worldCoordinates[1]),
+                                                  real(worldCoordinates[2])));
 
                     distributions->getDistribution(f, ix1, ix2, ix3);
                     calcMacros(f, rho, vx1, vx2, vx3);
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.h
index 7fb1844e08cf7454294b658f539b95c38eb3fa34..1815d480f392fa47cdbf64038791929dc32a2ff3 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.h
@@ -66,12 +66,12 @@ public:
                                           WbWriter *const writer, SPtr<LBMUnitConverter> conv, std::shared_ptr<vf::mpi::Communicator> comm);
     ~WriteMacroscopicQuantitiesCoProcessor() override = default;
 
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
     //! Collect data for VTK-file
     //! \param step is a time step
-    void collectData(double step);
+    void collectData(real step);
     //! Collect data for VTK-file
     //! \param block is a time step
     void addDataMQ(SPtr<Block3D> block);
@@ -82,7 +82,7 @@ private:
     std::vector<UbTupleFloat3> nodes;
     std::vector<UbTupleUInt8> cells;
     std::vector<std::string> datanames;
-    std::vector<std::vector<double>> data;
+    std::vector<std::vector<real>> data;
     std::string path;
     WbWriter *writer;
     SPtr<LBMUnitConverter> conv;
@@ -92,7 +92,7 @@ private:
     int gridRank;
     std::shared_ptr<vf::mpi::Communicator> comm;
 
-    using CalcMacrosFct = void (*)(const LBMReal *const &, LBMReal &, LBMReal &, LBMReal &, LBMReal &);
+    using CalcMacrosFct = void (*)(const real *const &, real &, real &, real &, real &);
     CalcMacrosFct calcMacros;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesPlusMassCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesPlusMassCoProcessor.cpp
index c71a12ee6d7bc17297cca377712b9a6b479bb336..5c4f80887349280856a2de3791d9d0fb9012f53b 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesPlusMassCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesPlusMassCoProcessor.cpp
@@ -72,7 +72,7 @@ void WriteMacroscopicQuantitiesPlusMassCoProcessor::init()
 {}
 
 //////////////////////////////////////////////////////////////////////////
-void WriteMacroscopicQuantitiesPlusMassCoProcessor::process(double step)
+void WriteMacroscopicQuantitiesPlusMassCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
@@ -81,7 +81,7 @@ void WriteMacroscopicQuantitiesPlusMassCoProcessor::process(double step)
 }
 
 //////////////////////////////////////////////////////////////////////////
-void WriteMacroscopicQuantitiesPlusMassCoProcessor::collectData(double step)
+void WriteMacroscopicQuantitiesPlusMassCoProcessor::collectData(real step)
 {
     int istep = static_cast<int>(step);
 
@@ -143,7 +143,7 @@ void WriteMacroscopicQuantitiesPlusMassCoProcessor::clearData()
 //////////////////////////////////////////////////////////////////////////
 void WriteMacroscopicQuantitiesPlusMassCoProcessor::addDataMQ(SPtr<Block3D> block)
 {
-    double level   = (double)block->getLevel();
+    real level   = (real)block->getLevel();
 
     // Diese Daten werden geschrieben:
     datanames.resize(0);
@@ -162,8 +162,8 @@ void WriteMacroscopicQuantitiesPlusMassCoProcessor::addDataMQ(SPtr<Block3D> bloc
     SPtr<ILBMKernel> kernel                 = block->getKernel();
     SPtr<BCArray3D> bcArray                 = kernel->getBCProcessor()->getBCArray();
     SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions();
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal vx1, vx2, vx3, rho;
+    real f[D3Q27System::ENDF + 1];
+    real vx1, vx2, vx3, rho;
 
     // knotennummerierung faengt immer bei 0 an!
     int SWB, SEB, NEB, NWB, SWT, SET, NET, NWT;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesPlusMassCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesPlusMassCoProcessor.h
index 07f1a12676902e5964fcb0127203c1b527c89778..9251bfc22549a7a366e57540ea8387e851d4756f 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesPlusMassCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesPlusMassCoProcessor.h
@@ -66,12 +66,12 @@ public:
                                           WbWriter *const writer, SPtr<LBMUnitConverter> conv, std::shared_ptr<vf::mpi::Communicator> comm);
     ~WriteMacroscopicQuantitiesPlusMassCoProcessor() override = default;
 
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
     //! Collect data for VTK-file
     //! \param step is a time step
-    void collectData(double step);
+    void collectData(real step);
     //! Collect data for VTK-file
     //! \param block is a time step
     void addDataMQ(SPtr<Block3D> block);
@@ -82,7 +82,7 @@ private:
     std::vector<UbTupleFloat3> nodes;
     std::vector<UbTupleUInt8> cells;
     std::vector<std::string> datanames;
-    std::vector<std::vector<double>> data;
+    std::vector<std::vector<real>> data;
     std::string path;
     WbWriter *writer;
     SPtr<LBMUnitConverter> conv;
@@ -92,7 +92,7 @@ private:
     int gridRank;
     std::shared_ptr<vf::mpi::Communicator> comm;
 
-    using CalcMacrosFct = void (*)(const LBMReal *const &, LBMReal &, LBMReal &, LBMReal &, LBMReal &);
+    using CalcMacrosFct = void (*)(const real *const &, real &, real &, real &, real &);
     CalcMacrosFct calcMacros;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMultiphaseQuantitiesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMultiphaseQuantitiesCoProcessor.cpp
index 0298c1dbeb1d4b4a9ed6afb0c202206d9d21c488..74fced98d39116ebbb9ebc9a762398f2cba7c579 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMultiphaseQuantitiesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMultiphaseQuantitiesCoProcessor.cpp
@@ -45,6 +45,7 @@
 #include "LBMUnitConverter.h"
 #include "UbScheduler.h"
 #include "basics/writer/WbWriterVtkXmlASCII.h"
+#include <logger/Logger.h>
 
 WriteMultiphaseQuantitiesCoProcessor::WriteMultiphaseQuantitiesCoProcessor() = default;
 //////////////////////////////////////////////////////////////////////////
@@ -72,16 +73,17 @@ void WriteMultiphaseQuantitiesCoProcessor::init()
 {}
 
 //////////////////////////////////////////////////////////////////////////
-void WriteMultiphaseQuantitiesCoProcessor::process(double step)
+void WriteMultiphaseQuantitiesCoProcessor::process(real step)
 {
     if (scheduler->isDue(step))
         collectData(step);
 
-    UBLOG(logDEBUG3, "WriteMultiphaseQuantitiesCoProcessor::update:" << step);
+    //UBLOG(logDEBUG3, "WriteMultiphaseQuantitiesCoProcessor::update:" << step);
+    VF_LOG_DEBUG("WriteMultiphaseQuantitiesCoProcessor::update:: {}", step);
 }
 
 //////////////////////////////////////////////////////////////////////////
-void WriteMultiphaseQuantitiesCoProcessor::collectData(double step)
+void WriteMultiphaseQuantitiesCoProcessor::collectData(real step)
 {
     int istep = static_cast<int>(step);
 
@@ -125,7 +127,8 @@ void WriteMultiphaseQuantitiesCoProcessor::collectData(double step)
         {
             WbWriterVtkXmlASCII::getInstance()->addFilesToCollection(cfilePath, filenames, istep, false);
         }
-        UBLOG(logINFO, "WriteMultiphaseQuantitiesCoProcessor step: " << istep);
+        //UBLOG(logINFO, "WriteMultiphaseQuantitiesCoProcessor step: " << istep);
+        VF_LOG_INFO("WriteMultiphaseQuantitiesCoProcessor step: {}", istep);
     }
 
     clearData();
@@ -144,7 +147,10 @@ void WriteMultiphaseQuantitiesCoProcessor::clearData()
 void WriteMultiphaseQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
 {
     using namespace D3Q27System;
-    using namespace UbMath;
+ //   using namespace UbMath;
+    using namespace vf::lbm::dir;
+    using namespace vf::basics::constant;
+
     SPtr<LBMKernel> kernel = dynamicPointerCast<LBMKernel>(block->getKernel());
     //double level   = (double)block->getLevel();
 
@@ -170,15 +176,15 @@ void WriteMultiphaseQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
     SPtr<PressureFieldArray3D> pressure;
     if (kernel->getDataSet()->getPressureField()) pressure = kernel->getDataSet()->getPressureField();
 
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal phi[D3Q27System::ENDF + 1];
-    LBMReal phi2[D3Q27System::ENDF + 1];
-    LBMReal vx1, vx2, vx3, rho, p1, beta, kappa;
-    LBMReal densityRatio = kernel->getDensityRatio();
+    real f[D3Q27System::ENDF + 1];
+    real phi[D3Q27System::ENDF + 1];
+    real phi2[D3Q27System::ENDF + 1];
+    real vx1, vx2, vx3, rho, p1, beta, kappa;
+    real densityRatio = kernel->getDensityRatio();
 
     kernel->getMultiphaseModelParameters(beta, kappa);
-    LBMReal phiL = kernel->getPhiL();
-    LBMReal phiH = kernel->getPhiH();
+    real phiL = kernel->getPhiL();
+    real phiH = kernel->getPhiH();
 
     // knotennummerierung faengt immer bei 0 an!
     int SWB, SEB, NEB, NWB, SWT, SET, NET, NWT;
@@ -214,10 +220,10 @@ void WriteMultiphaseQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
 
     // nummern vergeben und node vector erstellen + daten sammeln
     CbArray3D<int> nodeNumbers((int)maxX1, (int)maxX2, (int)maxX3, -1);
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(maxX1, maxX2, maxX3, -999.0));
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField2(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(maxX1, maxX2, maxX3, -999.0));
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField(
+        new CbArray3D<real, IndexerX3X2X1>(maxX1, maxX2, maxX3, -999.0));
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField2(
+        new CbArray3D<real, IndexerX3X2X1>(maxX1, maxX2, maxX3, -999.0));
 
     for (int ix3 = minX3; ix3 < maxX3; ix3++) {
         for (int ix2 = minX2; ix2 < maxX2; ix2++) {
@@ -258,10 +264,10 @@ void WriteMultiphaseQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
     }
 
     int nr = (int)nodes.size();
-    LBMReal dX1_phi;
-    LBMReal dX2_phi;
-    LBMReal dX3_phi;
-    LBMReal mu;
+    real dX1_phi;
+    real dX2_phi;
+    real dX3_phi;
+    real mu;
 
     for (int ix3 = minX3; ix3 <= maxX3; ix3++) {
         for (int ix2 = minX2; ix2 <= maxX2; ix2++) {
@@ -350,12 +356,12 @@ void WriteMultiphaseQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
                     }
 
                     distributionsF->getDistribution(f, ix1, ix2, ix3);
-                    //LBMReal dU = (*divU)(ix1, ix2, ix3);
+                    //real dU = (*divU)(ix1, ix2, ix3);
 
-                    LBMReal rhoH = 1.0;
-                    LBMReal rhoL = 1.0 / densityRatio;
+                    real rhoH = 1.0;
+                    real rhoL = 1.0 / densityRatio;
                     // LBMReal rhoToPhi = (1.0 - 1.0/densityRatio);
-                    LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+                    real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
 
                     // rho = phi[ZERO] + (1.0 - phi[ZERO])*1.0/densityRatio;
                     rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
@@ -468,39 +474,41 @@ void WriteMultiphaseQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
     }
 }
 
-LBMReal WriteMultiphaseQuantitiesCoProcessor::gradX1_phi(const LBMReal *const &h)
+real WriteMultiphaseQuantitiesCoProcessor::gradX1_phi(const real *const &h)
 {
     using namespace D3Q27System;
-    LBMReal sum = 0.0;
+    real sum = 0.0;
     for (int k = FSTARTDIR; k <= FENDDIR; k++) {
         sum += WEIGTH[k] * DX1[k] * h[k];
     }
     return 3.0 * sum;
 }
-LBMReal WriteMultiphaseQuantitiesCoProcessor::gradX2_phi(const LBMReal *const &h)
+real WriteMultiphaseQuantitiesCoProcessor::gradX2_phi(const real *const &h)
 {
     using namespace D3Q27System;
-    LBMReal sum = 0.0;
+    real sum = 0.0;
     for (int k = FSTARTDIR; k <= FENDDIR; k++) {
         sum += WEIGTH[k] * DX2[k] * h[k];
     }
     return 3.0 * sum;
 }
 
-LBMReal WriteMultiphaseQuantitiesCoProcessor::gradX3_phi(const LBMReal *const &h)
+real WriteMultiphaseQuantitiesCoProcessor::gradX3_phi(const real *const &h)
 {
     using namespace D3Q27System;
-    LBMReal sum = 0.0;
+    real sum = 0.0;
     for (int k = FSTARTDIR; k <= FENDDIR; k++) {
         sum += WEIGTH[k] * DX3[k] * h[k];
     }
     return 3.0 * sum;
 }
 
-LBMReal WriteMultiphaseQuantitiesCoProcessor::nabla2_phi(const LBMReal *const &h)
+real WriteMultiphaseQuantitiesCoProcessor::nabla2_phi(const real *const &h)
 {
+    using namespace vf::lbm::dir;
+
     using namespace D3Q27System;
-    LBMReal sum = 0.0;
+    real sum = 0.0;
     for (int k = FSTARTDIR; k <= FENDDIR; k++) {
         sum += WEIGTH[k] * (h[k] - h[DIR_000]);
     }
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMultiphaseQuantitiesCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMultiphaseQuantitiesCoProcessor.h
index 3825f9d4df3e744aec1605524c78f0028e4380fd..de09654bb01b8bd851df3afcea1fb3445386fff0 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMultiphaseQuantitiesCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMultiphaseQuantitiesCoProcessor.h
@@ -66,12 +66,12 @@ public:
                                           WbWriter *const writer, SPtr<LBMUnitConverter> conv, std::shared_ptr<vf::mpi::Communicator> comm);
     ~WriteMultiphaseQuantitiesCoProcessor() override = default;
 
-    void process(double step) override;
+    void process(real step) override;
 
 protected:
     //! Collect data for VTK-file
     //! \param step is a time step
-    void collectData(double step);
+    void collectData(real step);
     //! Collect data for VTK-file
     //! \param block is a time step
     void addDataMQ(SPtr<Block3D> block);
@@ -82,7 +82,7 @@ private:
     std::vector<UbTupleFloat3> nodes;
     std::vector<UbTupleUInt8> cells;
     std::vector<std::string> datanames;
-    std::vector<std::vector<double>> data;
+    std::vector<std::vector<real>> data;
     std::string path;
     WbWriter *writer;
     SPtr<LBMUnitConverter> conv;
@@ -92,12 +92,12 @@ private:
     int gridRank;
     std::shared_ptr<vf::mpi::Communicator> comm;
 
-    LBMReal gradX1_phi(const LBMReal *const &);
-    LBMReal gradX2_phi(const LBMReal *const &);
-    LBMReal gradX3_phi(const LBMReal *const &);
-    LBMReal nabla2_phi(const LBMReal *const &);
+    real gradX1_phi(const real *const &);
+    real gradX2_phi(const real *const &);
+    real gradX3_phi(const real *const &);
+    real nabla2_phi(const real *const &);
 
-    using CalcMacrosFct = void (*)(const LBMReal *const &, LBMReal &, LBMReal &, LBMReal &, LBMReal &);
+    using CalcMacrosFct = void (*)(const real *const &, real &, real &, real &, real &);
     CalcMacrosFct calcMacros;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.cpp
index 900c4bc95e85e57254121882e43e89fbb05b7201..9ff3fe0982d43ed119e7cfb3fba497a61e2c201b 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.cpp
@@ -71,7 +71,7 @@ void WriteThixotropyQuantitiesCoProcessor::init()
 
 }
 //////////////////////////////////////////////////////////////////////////
-void WriteThixotropyQuantitiesCoProcessor::process(double step)
+void WriteThixotropyQuantitiesCoProcessor::process(real step)
 {
 	if (scheduler->isDue(step))
 		collectData(step);
@@ -79,7 +79,7 @@ void WriteThixotropyQuantitiesCoProcessor::process(double step)
 	UBLOG(logDEBUG3, "WriteThixotropyQuantitiesCoProcessor::update:" << step);
 }
 //////////////////////////////////////////////////////////////////////////
-void WriteThixotropyQuantitiesCoProcessor::collectData(double step)
+void WriteThixotropyQuantitiesCoProcessor::collectData(real step)
 {
 	int istep = static_cast<int>(step);
 	//ConcentrationSum = 0;
@@ -144,7 +144,7 @@ void WriteThixotropyQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
 {
 	UbTupleDouble3 org = grid->getBlockWorldCoordinates(block);;
 	UbTupleDouble3 nodeOffset = grid->getNodeOffset(block);
-	double         dx = grid->getDeltaX(block);
+	real         dx = grid->getDeltaX(block);
 
 	//double level = (double)block->getLevel();
 	//double blockID = (double)block->getGlobalID();
@@ -174,8 +174,8 @@ void WriteThixotropyQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
    SPtr<DistributionArray3D> distributionsF = kernel->getDataSet()->getFdistributions(); 
 	//SPtr<DistributionArray3D> distributionsH = kernel->getDataSet()->getHdistributions();
 	//LBMReal collFactorF = staticPointerCast<ThixotropyExpLBMKernel>(kernel)->getCollisionFactorF();
-	LBMReal collFactor = kernel->getCollisionFactor();
-	LBMReal f[D3Q27System::ENDF + 1];
+	real collFactor = kernel->getCollisionFactor();
+	real f[D3Q27System::ENDF + 1];
 	//LBMReal h[D3Q27System::ENDF + 1];
 	//LBMReal viscosity=0; // lambda, gammaDot;
 	
@@ -243,12 +243,12 @@ void WriteThixotropyQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
 					//data[index++].push_back(collFactorF);
 
 					distributionsF->getDistribution(f, ix1, ix2, ix3);
-					LBMReal rho = D3Q27System::getDensity(f);
-					LBMReal shearRate = D3Q27System::getShearRate(f, collFactor);
+					real rho = D3Q27System::getDensity(f);
+					real shearRate = D3Q27System::getShearRate(f, collFactor);
 					//LBMReal omega = Rheology::getHerschelBulkleyCollFactor(collFactor, shearRate, rho);
 					//LBMReal omega = Rheology::getPowellEyringCollFactor(collFactor, shearRate, rho);
-					LBMReal omega = Rheology::getBinghamCollFactor(collFactor, shearRate, rho);
-					LBMReal viscosity = (omega == 0) ? 0 : UbMath::c1o3 * (UbMath::c1/omega-UbMath::c1o2);
+					real omega = Rheology::getBinghamCollFactor(collFactor, shearRate, rho);
+					real viscosity = (omega == 0) ? 0 : vf::basics::constant::c1o3 * (vf::basics::constant::c1o1/omega- vf::basics::constant::c1o2);
 
 					
 					data[index++].push_back(viscosity);
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.h
index d247c5c76bd5dc243041e53905e2189980875bd3..fbb2b6740c53b1d8a2ed9e37881fff98e4707a69 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.h
@@ -48,10 +48,10 @@ public:
 	WriteThixotropyQuantitiesCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string& path, WbWriter* const writer, SPtr<LBMUnitConverter> conv, std::shared_ptr<vf::mpi::Communicator> comm);
 	~WriteThixotropyQuantitiesCoProcessor() = default;
 
-   void process(double step) override;
+   void process(real step) override;
 
 protected:
-   void collectData(double step);
+   void collectData(real step);
    void addDataMQ(SPtr<Block3D> block);
    void clearData();
 
@@ -60,7 +60,7 @@ private:
    std::vector<UbTupleFloat3> nodes;
    std::vector<UbTupleUInt8> cells;
    std::vector<std::string> datanames;
-   std::vector<std::vector<double> > data; 
+   std::vector<std::vector<real> > data; 
    std::string path;
    WbWriter* writer;
    SPtr<LBMUnitConverter> conv;
diff --git a/src/cpu/VirtualFluidsCore/Connectors/Block3DConnector.h b/src/cpu/VirtualFluidsCore/Connectors/Block3DConnector.h
index e5e86a31b35e7900059649c7322478b1db1b7284..89817f187e9b943655363e7a5cb707c01019965e 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/Block3DConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/Block3DConnector.h
@@ -75,7 +75,7 @@ public:
     // grid refinement
     virtual int getSendDir() const { return sendDir; }
 
-    // virtual double getSendRecieveTime() = 0;
+    // virtual real getSendRecieveTime() = 0;
 
     virtual void prepareForSendX1() = 0;
     virtual void prepareForSendX2() = 0;
diff --git a/src/cpu/VirtualFluidsCore/Connectors/CoarseToFineVectorConnector.h b/src/cpu/VirtualFluidsCore/Connectors/CoarseToFineVectorConnector.h
index 00ba4c4e915530a4678b7271b8f78648f267cd96..c2ba596a7bdb20c24fa373064e09854c4f6d95eb 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/CoarseToFineVectorConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/CoarseToFineVectorConnector.h
@@ -97,7 +97,7 @@ public:
     bool isInterpolationConnectorCF() override { return true; }
     bool isInterpolationConnectorFC() override { return false; }
 
-    double getSendRecieveTime();
+    real getSendRecieveTime();
 
     void prepareForSendX1() override {}
     void prepareForSendX2() override {}
@@ -123,7 +123,7 @@ protected:
     InterpolationProcessorPtr iprocessor;
 
     void writeICellFtoData(vector_type &data, int &index, D3Q27ICell &icellF);
-    void writeNodeToVector(vector_type &data, int &index, LBMReal *inode);
+    void writeNodeToVector(vector_type &data, int &index, real *inode);
     void getLocalMinMax(const int &gMin, const int &gMax, const bool &even, int &lMin, int &lMax,
                         const bool &dataDistribution);
     void getLocalMinMax(int &minX1, int &minX2, int &minX3, int &maxX1, int &maxX2, int &maxX3);
@@ -135,7 +135,7 @@ protected:
     void distributeReceiveVector(SPtr<DistributionArray3D> fTo, const int &lMinX1, const int &lMinX2, const int &lMinX3,
                                  const int &lMaxX1, const int &lMaxX2, const int &lMaxX3, vector_type &data,
                                  int &index);
-    void readICellCfromData(vector_type &data, int &index, LBMReal *icellC);
+    void readICellCfromData(vector_type &data, int &index, real *icellC);
 
     void findCFnodes();
     void findCFnodes(SPtr<DistributionArray3D> fFrom, const int &lMinX1, const int &lMinX2, const int &lMinX3,
@@ -156,15 +156,17 @@ CoarseToFineVectorConnector<VectorTransmitter>::CoarseToFineVectorConnector(
       receiverEvenOddNW(receiverEvenOddNW), receiverOddEvenSE(receiverOddEvenSE), receiverOddOddNE(receiverOddOddNE),
       iprocessor(iprocessor)
 {
-    if (!(sendDir == D3Q27System::DIR_P00 || sendDir == D3Q27System::DIR_M00 || sendDir == D3Q27System::DIR_0P0 ||
-          sendDir == D3Q27System::DIR_0M0 || sendDir == D3Q27System::DIR_00P || sendDir == D3Q27System::DIR_00M ||
-          sendDir == D3Q27System::DIR_PP0 || sendDir == D3Q27System::DIR_MM0 || sendDir == D3Q27System::DIR_PM0 ||
-          sendDir == D3Q27System::DIR_MP0 || sendDir == D3Q27System::DIR_P0P || sendDir == D3Q27System::DIR_M0M ||
-          sendDir == D3Q27System::DIR_P0M || sendDir == D3Q27System::DIR_M0P || sendDir == D3Q27System::DIR_0PP ||
-          sendDir == D3Q27System::DIR_0MM || sendDir == D3Q27System::DIR_0PM || sendDir == D3Q27System::DIR_0MP ||
-          sendDir == D3Q27System::DIR_PPP || sendDir == D3Q27System::DIR_MPP || sendDir == D3Q27System::DIR_PMP ||
-          sendDir == D3Q27System::DIR_MMP || sendDir == D3Q27System::DIR_PPM || sendDir == D3Q27System::DIR_MPM ||
-          sendDir == D3Q27System::DIR_PMM || sendDir == D3Q27System::DIR_MMM)) {
+    using namespace vf::lbm::dir;
+
+    if (!(sendDir == DIR_P00 || sendDir == DIR_M00 || sendDir == DIR_0P0 ||
+          sendDir == DIR_0M0 || sendDir == DIR_00P || sendDir == DIR_00M ||
+          sendDir == DIR_PP0 || sendDir == DIR_MM0 || sendDir == DIR_PM0 ||
+          sendDir == DIR_MP0 || sendDir == DIR_P0P || sendDir == DIR_M0M ||
+          sendDir == DIR_P0M || sendDir == DIR_M0P || sendDir == DIR_0PP ||
+          sendDir == DIR_0MM || sendDir == DIR_0PM || sendDir == DIR_0MP ||
+          sendDir == DIR_PPP || sendDir == DIR_MPP || sendDir == DIR_PMP ||
+          sendDir == DIR_MMP || sendDir == DIR_PPM || sendDir == DIR_MPM ||
+          sendDir == DIR_PMM || sendDir == DIR_MMM)) {
         throw UbException(UB_EXARGS, "invalid constructor for this direction");
     }
 }
@@ -298,13 +300,14 @@ template <typename VectorTransmitter>
 void CoarseToFineVectorConnector<VectorTransmitter>::init()
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
 
     bMaxX1 = (int)block.lock()->getKernel()->getDataSet()->getFdistributions()->getNX1();
     bMaxX2 = (int)block.lock()->getKernel()->getDataSet()->getFdistributions()->getNX2();
     bMaxX3 = (int)block.lock()->getKernel()->getDataSet()->getFdistributions()->getNX3();
 
     int sendSize      = 0;
-    LBMReal initValue = -999.0;
+    real initValue = -999.0;
 
     int sendDataPerNode = 27 /*f*/;
     int iCellSize       = 8; // size of interpolation cell
@@ -356,28 +359,28 @@ void CoarseToFineVectorConnector<VectorTransmitter>::init()
     if (senderEvenEvenSW)
         senderEvenEvenSW->getData().resize(sendSize, initValue);
     else
-        senderEvenEvenSW = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<LBMReal>>());
+        senderEvenEvenSW = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<real>>());
     if (senderEvenOddNW)
         senderEvenOddNW->getData().resize(sendSize, initValue);
     else
-        senderEvenOddNW = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<LBMReal>>());
+        senderEvenOddNW = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<real>>());
     if (senderOddEvenSE)
         senderOddEvenSE->getData().resize(sendSize, initValue);
     else
-        senderOddEvenSE = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<LBMReal>>());
+        senderOddEvenSE = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<real>>());
     if (senderOddOddNE)
         senderOddOddNE->getData().resize(sendSize, initValue);
     else
-        senderOddOddNE = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<LBMReal>>());
+        senderOddOddNE = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<real>>());
 
     if (!receiverEvenEvenSW)
-        receiverEvenEvenSW = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<LBMReal>>());
+        receiverEvenEvenSW = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<real>>());
     if (!receiverEvenOddNW)
-        receiverEvenOddNW = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<LBMReal>>());
+        receiverEvenOddNW = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<real>>());
     if (!receiverOddEvenSE)
-        receiverOddEvenSE = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<LBMReal>>());
+        receiverOddEvenSE = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<real>>());
     if (!receiverOddOddNE)
-        receiverOddOddNE = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<LBMReal>>());
+        receiverOddOddNE = VectorTransmitterPtr(new TbLocalTransmitter<CbVector<real>>());
 
     // findCFnodes();
 }
@@ -386,6 +389,7 @@ template <typename VectorTransmitter>
 void CoarseToFineVectorConnector<VectorTransmitter>::fillSendVectors()
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
 
     SPtr<DistributionArray3D> fFrom = block.lock()->getKernel()->getDataSet()->getFdistributions();
     int maxX1                       = (int)fFrom->getNX1();
@@ -828,7 +832,7 @@ void CoarseToFineVectorConnector<VectorTransmitter>::fillSendVectorExt(SPtr<Dist
     if (data.size() == 0)
         return;
     int ix1, ix2, ix3;
-    LBMReal xoff, yoff, zoff;
+    real xoff, yoff, zoff;
     SPtr<BCArray3D> bcArray = block.lock()->getKernel()->getBCProcessor()->getBCArray();
 
     for (ix3 = lMinX3; ix3 < lMaxX3; ix3++) {
@@ -878,7 +882,7 @@ void CoarseToFineVectorConnector<VectorTransmitter>::writeICellFtoData(vector_ty
 }
 //////////////////////////////////////////////////////////////////////////
 template <typename VectorTransmitter>
-void CoarseToFineVectorConnector<VectorTransmitter>::writeNodeToVector(vector_type &data, int &index, LBMReal *inode)
+void CoarseToFineVectorConnector<VectorTransmitter>::writeNodeToVector(vector_type &data, int &index, real *inode)
 {
     for (int i = D3Q27System::STARTF; i < D3Q27System::ENDF + 1; i++) {
         data[index++] = inode[i];
@@ -889,6 +893,7 @@ template <typename VectorTransmitter>
 void CoarseToFineVectorConnector<VectorTransmitter>::distributeReceiveVectors()
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
 
     SPtr<DistributionArray3D> fTo = block.lock()->getKernel()->getDataSet()->getFdistributions();
     int maxX1                     = (int)fTo->getNX1();
@@ -1606,7 +1611,7 @@ void CoarseToFineVectorConnector<VectorTransmitter>::distributeReceiveVector(SPt
     for (ix3 = lMinX3; ix3 < lMaxX3; ix3++) {
         for (ix2 = lMinX2; ix2 < lMaxX2; ix2++) {
             for (ix1 = lMinX1; ix1 < lMaxX1; ix1++) {
-                LBMReal icellC[27];
+                real icellC[27];
                 this->readICellCfromData(data, index, icellC);
                 iprocessor->writeINodeInv(fTo, icellC, ix1, ix2, ix3);
             }
@@ -1615,7 +1620,7 @@ void CoarseToFineVectorConnector<VectorTransmitter>::distributeReceiveVector(SPt
 }
 //////////////////////////////////////////////////////////////////////////
 template <typename VectorTransmitter>
-void CoarseToFineVectorConnector<VectorTransmitter>::readICellCfromData(vector_type &data, int &index, LBMReal *icellC)
+void CoarseToFineVectorConnector<VectorTransmitter>::readICellCfromData(vector_type &data, int &index, real *icellC)
 {
     for (int i = D3Q27System::STARTF; i < D3Q27System::ENDF + 1; i++) {
         icellC[i] = data[index++];
@@ -1627,6 +1632,8 @@ void CoarseToFineVectorConnector<VectorTransmitter>::getLocalMinMax(int &minX1,
                                                                     int &maxX2, int &maxX3)
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
+
     int TminX1 = minX1;
     int TminX2 = minX2;
     int TminX3 = minX3;
@@ -1767,6 +1774,8 @@ void CoarseToFineVectorConnector<VectorTransmitter>::getLocalMinMax(int &minX1,
                                                                     CFconnectorType /*connType*/)
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
+
     int TminX1 = minX1;
     int TminX2 = minX2;
     int TminX3 = minX3;
@@ -1923,6 +1932,8 @@ void CoarseToFineVectorConnector<VectorTransmitter>::findCFnodes()
     int lMinX1, lMinX2, lMinX3, lMaxX1, lMaxX2, lMaxX3;
 
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
+
     if (block.lock()->hasInterpolationFlagCF(DIR_M00)) {
         lMinX1 = 1;
         lMaxX1 = lMinX1 + 1;
@@ -1967,7 +1978,7 @@ void CoarseToFineVectorConnector<VectorTransmitter>::findCFnodes(SPtr<Distributi
     if (data.size() == 0)
         return;
     int ix1, ix2, ix3;
-    LBMReal xoff, yoff, zoff;
+    real xoff, yoff, zoff;
     SPtr<BCArray3D> bcArray = block.lock()->getKernel()->getBCProcessor()->getBCArray();
 
     for (ix3 = lMinX3; ix3 < lMaxX3; ix3++) {
@@ -2014,7 +2025,7 @@ void CoarseToFineVectorConnector<VectorTransmitter>::findCFnodes(SPtr<Distributi
 }
 //////////////////////////////////////////////////////////////////////////
 template <typename VectorTransmitter>
-double CoarseToFineVectorConnector<VectorTransmitter>::getSendRecieveTime()
+real CoarseToFineVectorConnector<VectorTransmitter>::getSendRecieveTime()
 {
     return 0;
 }
diff --git a/src/cpu/VirtualFluidsCore/Connectors/FineToCoarseVectorConnector.h b/src/cpu/VirtualFluidsCore/Connectors/FineToCoarseVectorConnector.h
index f8809f17b6676148faf3a94a1b9fcabede72fe58..f4f4080215869cdd4ae1c75e750daa2419af9240 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/FineToCoarseVectorConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/FineToCoarseVectorConnector.h
@@ -87,7 +87,7 @@ public:
     bool isInterpolationConnectorCF() override { return false; }
     bool isInterpolationConnectorFC() override { return true; }
 
-    double getSendRecieveTime();
+    real getSendRecieveTime();
 
     void prepareForSendX1() override {}
     void prepareForSendX2() override {}
@@ -104,6 +104,7 @@ public:
     void receiveVectorsX1() override {}
     void receiveVectorsX2() override {}
     void receiveVectorsX3() override {}
+    void getLocalMinMax(int &minX1, int &minX2, int &minX3, int &maxX1, int &maxX2, int &maxX3);
 
 protected:
     WPtr<Block3D> block; // dieser nvd sendet daten und die empfangenen werden diesem nvd zugeordnet
@@ -114,9 +115,9 @@ protected:
 
     CFconnectorType connType;
 
-    void writeICellCtoData(vector_type &data, int &index, LBMReal *icellC);
-    void writeNodeToVector(vector_type &data, int &index, LBMReal *inode);
-    void getLocalMinMax(int &minX1, int &minX2, int &minX3, int &maxX1, int &maxX2, int &maxX3);
+    void writeICellCtoData(vector_type &data, int &index, real *icellC);
+    void writeNodeToVector(vector_type &data, int &index, real *inode);
+    //void getLocalMinMax(int &minX1, int &minX2, int &minX3, int &maxX1, int &maxX2, int &maxX3);
     void getLocalMinMax(int &minX1, int &minX2, int &minX3, int &maxX1, int &maxX2, int &maxX3,
                         CFconnectorType connType);
     void getLocalMinMaxCF(int gMax, int &lMin, int &lMax);
@@ -127,7 +128,7 @@ protected:
                                  const int &lMaxX1, const int &lMaxX2, const int &lMaxX3, vector_type &data,
                                  int &index);
     void readICellFfromData(vector_type &data, int &index, D3Q27ICell &icellF);
-    void readNodeFromVector(vector_type &data, int &index, LBMReal *inode);
+    void readNodeFromVector(vector_type &data, int &index, real *inode);
     void getLocalOffsets(const int &gMax, int &oMin);
     void getLocalMins(int &minX1, int &minX2, int &minX3, const int &oMinX1, const int &oMinX2, const int &oMinX3);
 
@@ -143,16 +144,18 @@ FineToCoarseVectorConnector<VectorTransmitter>::FineToCoarseVectorConnector(SPtr
     : Block3DConnector(sendDir), block(block), sender(sender), receiver(receiver), iprocessor(iprocessor),
       connType(connType)
 {
-    if (!(sendDir == D3Q27System::DIR_P00 || sendDir == D3Q27System::DIR_M00 || sendDir == D3Q27System::DIR_0P0 ||
-          sendDir == D3Q27System::DIR_0M0 || sendDir == D3Q27System::DIR_00P || sendDir == D3Q27System::DIR_00M ||
-          sendDir == D3Q27System::DIR_PP0 || sendDir == D3Q27System::DIR_MM0 || sendDir == D3Q27System::DIR_PM0 ||
-          sendDir == D3Q27System::DIR_MP0 || sendDir == D3Q27System::DIR_P0P || sendDir == D3Q27System::DIR_M0M ||
-          sendDir == D3Q27System::DIR_P0M || sendDir == D3Q27System::DIR_M0P || sendDir == D3Q27System::DIR_0PP ||
-          sendDir == D3Q27System::DIR_0MM || sendDir == D3Q27System::DIR_0PM || sendDir == D3Q27System::DIR_0MP
+    using namespace vf::lbm::dir;
 
-          || sendDir == D3Q27System::DIR_PPP || sendDir == D3Q27System::DIR_MPP || sendDir == D3Q27System::DIR_PMP ||
-          sendDir == D3Q27System::DIR_MMP || sendDir == D3Q27System::DIR_PPM || sendDir == D3Q27System::DIR_MPM ||
-          sendDir == D3Q27System::DIR_PMM || sendDir == D3Q27System::DIR_MMM
+    if (!(sendDir == DIR_P00 || sendDir == DIR_M00 || sendDir == DIR_0P0 ||
+          sendDir == DIR_0M0 || sendDir == DIR_00P || sendDir == DIR_00M ||
+          sendDir == DIR_PP0 || sendDir == DIR_MM0 || sendDir == DIR_PM0 ||
+          sendDir == DIR_MP0 || sendDir == DIR_P0P || sendDir == DIR_M0M ||
+          sendDir == DIR_P0M || sendDir == DIR_M0P || sendDir == DIR_0PP ||
+          sendDir == DIR_0MM || sendDir == DIR_0PM || sendDir == DIR_0MP
+
+          || sendDir == DIR_PPP || sendDir == DIR_MPP || sendDir == DIR_PMP ||
+          sendDir == DIR_MMP || sendDir == DIR_PPM || sendDir == DIR_MPM ||
+          sendDir == DIR_PMM || sendDir == DIR_MMM
 
           )) {
         throw UbException(UB_EXARGS, "invalid constructor for this direction");
@@ -223,13 +226,14 @@ template <typename VectorTransmitter>
 void FineToCoarseVectorConnector<VectorTransmitter>::init()
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
 
     bMaxX1 = (int)block.lock()->getKernel()->getDataSet()->getFdistributions()->getNX1();
     bMaxX2 = (int)block.lock()->getKernel()->getDataSet()->getFdistributions()->getNX2();
     bMaxX3 = (int)block.lock()->getKernel()->getDataSet()->getFdistributions()->getNX3();
 
     int sendSize      = 0;
-    LBMReal initValue = -999.0;
+    real initValue = -999.0;
 
     int sendDataPerNode = 27 /*f*/;
     int iCellSize       = 1; // size of interpolation cell
@@ -285,6 +289,7 @@ template <typename VectorTransmitter>
 void FineToCoarseVectorConnector<VectorTransmitter>::fillSendVectors()
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
 
     SPtr<DistributionArray3D> fFrom = block.lock()->getKernel()->getDataSet()->getFdistributions();
     int maxX1                       = (int)fFrom->getNX1();
@@ -789,13 +794,13 @@ void FineToCoarseVectorConnector<VectorTransmitter>::fillSendVector(SPtr<Distrib
                                                                     const int &lMaxX3, vector_type &data, int &index)
 {
     int ix1, ix2, ix3;
-    LBMReal xoff, yoff, zoff;
+    real xoff, yoff, zoff;
     SPtr<BCArray3D> bcArray = block.lock()->getKernel()->getBCProcessor()->getBCArray();
 
     for (ix3 = lMinX3; ix3 < lMaxX3; ix3 += 2) {
         for (ix2 = lMinX2; ix2 < lMaxX2; ix2 += 2) {
             for (ix1 = lMinX1; ix1 < lMaxX1; ix1 += 2) {
-                LBMReal icellC[27];
+                real icellC[27];
                 D3Q27ICell icellF;
 
                 int howManySolids = iprocessor->iCellHowManySolids(bcArray, ix1, ix2, ix3);
@@ -826,7 +831,7 @@ void FineToCoarseVectorConnector<VectorTransmitter>::fillSendVector(SPtr<Distrib
 }
 //////////////////////////////////////////////////////////////////////////
 template <typename VectorTransmitter>
-void FineToCoarseVectorConnector<VectorTransmitter>::writeICellCtoData(vector_type &data, int &index, LBMReal *icellC)
+void FineToCoarseVectorConnector<VectorTransmitter>::writeICellCtoData(vector_type &data, int &index, real *icellC)
 {
     for (int i = D3Q27System::STARTF; i < D3Q27System::ENDF + 1; i++) {
         data[index++] = icellC[i];
@@ -848,6 +853,7 @@ template <typename VectorTransmitter>
 void FineToCoarseVectorConnector<VectorTransmitter>::distributeReceiveVectors()
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
 
     SPtr<DistributionArray3D> fTo = block.lock()->getKernel()->getDataSet()->getFdistributions();
     int maxX1                     = (int)fTo->getNX1();
@@ -1144,7 +1150,7 @@ void FineToCoarseVectorConnector<VectorTransmitter>::readICellFfromData(vector_t
 }
 //////////////////////////////////////////////////////////////////////////
 template <typename VectorTransmitter>
-void FineToCoarseVectorConnector<VectorTransmitter>::readNodeFromVector(vector_type &data, int &index, LBMReal *inode)
+void FineToCoarseVectorConnector<VectorTransmitter>::readNodeFromVector(vector_type &data, int &index, real *inode)
 {
     for (int i = D3Q27System::STARTF; i < D3Q27System::ENDF + 1; i++) {
         inode[i] = data[index++];
@@ -1156,6 +1162,8 @@ void FineToCoarseVectorConnector<VectorTransmitter>::getLocalMinMax(int &minX1,
                                                                     int &maxX2, int &maxX3)
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
+    
     int TminX1 = minX1;
     int TminX2 = minX2;
     int TminX3 = minX3;
@@ -1297,6 +1305,8 @@ void FineToCoarseVectorConnector<VectorTransmitter>::getLocalMinMax(int &minX1,
                                                                     CFconnectorType /*connType*/)
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
+
     int TminX1 = minX1;
     int TminX2 = minX2;
     int TminX3 = minX3;
@@ -1446,6 +1456,7 @@ void FineToCoarseVectorConnector<VectorTransmitter>::getLocalMins(int &minX1, in
                                                                   const int &oMinX2, const int &oMinX3)
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
 
     switch (sendDir) {
         case DIR_P00:
@@ -1544,7 +1555,7 @@ void FineToCoarseVectorConnector<VectorTransmitter>::getLocalMins(int &minX1, in
 }
 //////////////////////////////////////////////////////////////////////////
 template <typename VectorTransmitter>
-double FineToCoarseVectorConnector<VectorTransmitter>::getSendRecieveTime()
+real FineToCoarseVectorConnector<VectorTransmitter>::getSendRecieveTime()
 {
     return 0;
 }
diff --git a/src/cpu/VirtualFluidsCore/Connectors/FineToCoarseVectorConnectorTest.cpp b/src/cpu/VirtualFluidsCore/Connectors/FineToCoarseVectorConnectorTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1d400f74e8e4a2def9419c4e5288c512db7af936
--- /dev/null
+++ b/src/cpu/VirtualFluidsCore/Connectors/FineToCoarseVectorConnectorTest.cpp
@@ -0,0 +1,50 @@
+#include <gmock/gmock.h>
+
+#include "FineToCoarseVectorConnector.h"
+#include "Block3D.h"
+#include "CbVector.h"
+#include "CreateTransmittersHelper.h"
+
+class FineToCoarseVectorConnectorTest : public testing::Test
+{
+
+    void SetUp() override
+    {
+        block = std::make_shared<Block3D>();
+    }
+
+    void TearDown() override
+    {
+    
+    }
+
+public:
+    CreateTransmittersHelper::TransmitterPtr senderFCevenEvenSW, receiverFCevenEvenSW;
+    std::shared_ptr<Block3D> block;
+};
+
+TEST_F(FineToCoarseVectorConnectorTest, getLocalMinMax)
+{
+    using namespace vf::lbm::dir;
+
+    int sendDir = DIR_P00;
+    block->setInterpolationFlagFC(sendDir);
+    //FineToCoarseVectorConnector(SPtr<Block3D> block, VectorTransmitterPtr sender, VectorTransmitterPtr receiver,
+                                //int sendDir, InterpolationProcessorPtr iprocessor, CFconnectorType connType);
+    InterpolationProcessorPtr iprocessor;
+    auto sut = FineToCoarseVectorConnector<TbTransmitter<CbVector<real>>>(block, senderFCevenEvenSW, receiverFCevenEvenSW, sendDir, iprocessor, EvenOddNW);
+
+
+    //(int &minX1, int &minX2, int &minX3, int &maxX1, int &maxX2, int &maxX3);
+    //SPtr<DistributionArray3D> fFrom = block.lock()->getKernel()->getDataSet()->getFdistributions();
+    int maxX1 = 5; //(int)fFrom->getNX1();
+    int maxX2 = 5; //(int)fFrom->getNX2();
+    int maxX3 = 5;//(int)fFrom->getNX3();
+    int minX1 = 0;
+    int minX2 = 0;
+    int minX3 = 0;
+    sut.getLocalMinMax(minX1, minX2, minX3, maxX1, maxX2, maxX3);
+
+    int expectedMaxX1 = 2;
+    EXPECT_THAT(maxX1, testing::Eq(expectedMaxX1));
+}
\ No newline at end of file
diff --git a/src/cpu/VirtualFluidsCore/Connectors/FullDirectConnector.cpp b/src/cpu/VirtualFluidsCore/Connectors/FullDirectConnector.cpp
index 18a8319589cde954b7c2202e10f3eda61b435671..7a12ef1bb30796bd2aa24fd6c61c07f26295950d 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/FullDirectConnector.cpp
+++ b/src/cpu/VirtualFluidsCore/Connectors/FullDirectConnector.cpp
@@ -59,8 +59,10 @@ void FullDirectConnector::sendVectors()
 //////////////////////////////////////////////////////////////////////////
 void FullDirectConnector::exchangeData()
 {
+    using namespace vf::lbm::dir;
+
     // EAST
-    if (sendDir == D3Q27System::DIR_P00) {
+    if (sendDir == DIR_P00) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             for (int x2 = 1; x2 < maxX2; x2++) {
                 exchangeData(maxX1 - 1, x2, x3, 0, x2, x3);
@@ -68,7 +70,7 @@ void FullDirectConnector::exchangeData()
         }
     }
     // WEST
-    else if (sendDir == D3Q27System::DIR_M00) {
+    else if (sendDir == DIR_M00) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             for (int x2 = 1; x2 < maxX2; x2++) {
                 exchangeData(1, x2, x3, maxX1, x2, x3);
@@ -76,7 +78,7 @@ void FullDirectConnector::exchangeData()
         }
     }
     // NORTH
-    else if (sendDir == D3Q27System::DIR_0P0) {
+    else if (sendDir == DIR_0P0) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             for (int x1 = 1; x1 < maxX1; x1++) {
                 exchangeData(x1, maxX2 - 1, x3, x1, 0, x3);
@@ -84,7 +86,7 @@ void FullDirectConnector::exchangeData()
         }
     }
     // SOUTH
-    else if (sendDir == D3Q27System::DIR_0M0) {
+    else if (sendDir == DIR_0M0) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             for (int x1 = 1; x1 < maxX1; x1++) {
                 exchangeData(x1, 1, x3, x1, maxX2, x3);
@@ -93,7 +95,7 @@ void FullDirectConnector::exchangeData()
     }
 
     // TOP
-    else if (sendDir == D3Q27System::DIR_00P) {
+    else if (sendDir == DIR_00P) {
         for (int x2 = 1; x2 < maxX2; x2++) {
             for (int x1 = 1; x1 < maxX1; x1++) {
                 exchangeData(x1, x2, maxX3 - 1, x1, x2, 0);
@@ -101,7 +103,7 @@ void FullDirectConnector::exchangeData()
         }
     }
     // BOTTOM
-    else if (sendDir == D3Q27System::DIR_00M) {
+    else if (sendDir == DIR_00M) {
         for (int x2 = 1; x2 < maxX2; x2++) {
             for (int x1 = 1; x1 < maxX1; x1++) {
                 exchangeData(x1, x2, 1, x1, x2, maxX3);
@@ -109,77 +111,77 @@ void FullDirectConnector::exchangeData()
         }
     }
     // NORTHEAST
-    else if (sendDir == D3Q27System::DIR_PP0) {
+    else if (sendDir == DIR_PP0) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             exchangeData(maxX1 - 1, maxX2 - 1, x3, 0, 0, x3);
         }
     }
     // NORTHWEST
-    else if (sendDir == D3Q27System::DIR_MP0) {
+    else if (sendDir == DIR_MP0) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             exchangeData(1, maxX2 - 1, x3, maxX1, 0, x3);
         }
     }
     // SOUTHWEST
-    else if (sendDir == D3Q27System::DIR_MM0) {
+    else if (sendDir == DIR_MM0) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             exchangeData(1, 1, x3, maxX1, maxX2, x3);
         }
     }
     // SOUTHEAST
-    else if (sendDir == D3Q27System::DIR_PM0) {
+    else if (sendDir == DIR_PM0) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             exchangeData(maxX1 - 1, 1, x3, 0, maxX2, x3);
         }
-    } else if (sendDir == D3Q27System::DIR_P0P)
+    } else if (sendDir == DIR_P0P)
         for (int x2 = 1; x2 < maxX2; x2++) {
             exchangeData(maxX1 - 1, x2, maxX3 - 1, 0, x2, 0);
         }
-    else if (sendDir == D3Q27System::DIR_M0M)
+    else if (sendDir == DIR_M0M)
         for (int x2 = 1; x2 < maxX2; x2++) {
             exchangeData(1, x2, 1, maxX1, x2, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_P0M)
+    else if (sendDir == DIR_P0M)
         for (int x2 = 1; x2 < maxX2; x2++) {
             exchangeData(maxX1 - 1, x2, 1, 0, x2, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_M0P)
+    else if (sendDir == DIR_M0P)
         for (int x2 = 1; x2 < maxX2; x2++) {
             exchangeData(1, x2, maxX3 - 1, maxX1, x2, 0);
         }
-    else if (sendDir == D3Q27System::DIR_0PP)
+    else if (sendDir == DIR_0PP)
         for (int x1 = 1; x1 < maxX1; x1++) {
             exchangeData(x1, maxX2 - 1, maxX3 - 1, x1, 0, 0);
         }
-    else if (sendDir == D3Q27System::DIR_0MM)
+    else if (sendDir == DIR_0MM)
         for (int x1 = 1; x1 < maxX1; x1++) {
             exchangeData(x1, 1, 1, x1, maxX2, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_0PM)
+    else if (sendDir == DIR_0PM)
         for (int x1 = 1; x1 < maxX1; x1++) {
             exchangeData(x1, maxX2 - 1, 1, x1, 0, maxX3);
         }
 
-    else if (sendDir == D3Q27System::DIR_0MP)
+    else if (sendDir == DIR_0MP)
         for (int x1 = 1; x1 < maxX1; x1++) {
             exchangeData(x1, 1, maxX3 - 1, x1, maxX2, 0);
         }
 
-    else if (sendDir == D3Q27System::DIR_MMP) {
+    else if (sendDir == DIR_MMP) {
         exchangeData(1, 1, maxX3 - 1, maxX1, maxX2, 0);
-    } else if (sendDir == D3Q27System::DIR_PMP) {
+    } else if (sendDir == DIR_PMP) {
         exchangeData(maxX1 - 1, 1, maxX3 - 1, 0, maxX2, 0);
-    } else if (sendDir == D3Q27System::DIR_MPP) {
+    } else if (sendDir == DIR_MPP) {
         exchangeData(1, maxX2 - 1, maxX3 - 1, maxX1, 0, 0);
-    } else if (sendDir == D3Q27System::DIR_PPP) {
+    } else if (sendDir == DIR_PPP) {
         exchangeData(maxX1 - 1, maxX2 - 1, maxX3 - 1, 0, 0, 0);
-    } else if (sendDir == D3Q27System::DIR_MMM) {
+    } else if (sendDir == DIR_MMM) {
         exchangeData(1, 1, 1, maxX1, maxX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_PMM) {
+    } else if (sendDir == DIR_PMM) {
         exchangeData(maxX1 - 1, 1, 1, 0, maxX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_MPM) {
+    } else if (sendDir == DIR_MPM) {
         exchangeData(1, maxX2 - 1, 1, maxX1, 0, maxX3);
-    } else if (sendDir == D3Q27System::DIR_PPM) {
+    } else if (sendDir == DIR_PPM) {
         exchangeData(maxX1 - 1, maxX2 - 1, 1, 0, 0, maxX3);
     } else
         UB_THROW(UbException(UB_EXARGS, "unknown dir"));
diff --git a/src/cpu/VirtualFluidsCore/Connectors/FullVectorConnector.cpp b/src/cpu/VirtualFluidsCore/Connectors/FullVectorConnector.cpp
index d5b810015abc1172699b7489df4ea0aee8b02fe2..d4cb17d156016815b20f8420a4699a428899af51 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/FullVectorConnector.cpp
+++ b/src/cpu/VirtualFluidsCore/Connectors/FullVectorConnector.cpp
@@ -59,11 +59,13 @@ void FullVectorConnector::fillSendVectors()
 ////////////////////////////////////////////////////////////////////////
 void FullVectorConnector::fillData()
 {
+    using namespace vf::lbm::dir;
+
     vector_type &sdata = sender->getData();
 
     int index = 0;
     // EAST
-    if (sendDir == D3Q27System::DIR_P00) {
+    if (sendDir == DIR_P00) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             for (int x2 = 1; x2 < maxX2; x2++) {
                 fillData(sdata, index, maxX1 - 1, x2, x3);
@@ -71,7 +73,7 @@ void FullVectorConnector::fillData()
         }
     }
     // WEST
-    else if (sendDir == D3Q27System::DIR_M00) {
+    else if (sendDir == DIR_M00) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             for (int x2 = 1; x2 < maxX2; x2++) {
                 fillData(sdata, index, 1, x2, x3);
@@ -79,7 +81,7 @@ void FullVectorConnector::fillData()
         }
     }
     // NORTH
-    else if (sendDir == D3Q27System::DIR_0P0) {
+    else if (sendDir == DIR_0P0) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             for (int x1 = 1; x1 < maxX1; x1++) {
                 fillData(sdata, index, x1, maxX2 - 1, x3);
@@ -87,7 +89,7 @@ void FullVectorConnector::fillData()
         }
     }
     // SOUTH
-    else if (sendDir == D3Q27System::DIR_0M0) {
+    else if (sendDir == DIR_0M0) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             for (int x1 = 1; x1 < maxX1; x1++) {
                 fillData(sdata, index, x1, 1, x3);
@@ -95,7 +97,7 @@ void FullVectorConnector::fillData()
         }
     }
     // TOP
-    else if (sendDir == D3Q27System::DIR_00P) {
+    else if (sendDir == DIR_00P) {
         for (int x2 = 1; x2 < maxX2; x2++) {
             for (int x1 = 1; x1 < maxX1; x1++) {
                 fillData(sdata, index, x1, x2, maxX3 - 1);
@@ -103,7 +105,7 @@ void FullVectorConnector::fillData()
         }
     }
     // BOTTOM
-    else if (sendDir == D3Q27System::DIR_00M) {
+    else if (sendDir == DIR_00M) {
         for (int x2 = 1; x2 < maxX2; x2++) {
             for (int x1 = 1; x1 < maxX1; x1++) {
                 fillData(sdata, index, x1, x2, 1);
@@ -111,24 +113,24 @@ void FullVectorConnector::fillData()
         }
     }
     // NE NW SW SE
-    else if (sendDir == D3Q27System::DIR_PP0 || sendDir == D3Q27System::DIR_MP0 || sendDir == D3Q27System::DIR_MM0 ||
-             sendDir == D3Q27System::DIR_PM0) {
+    else if (sendDir == DIR_PP0 || sendDir == DIR_MP0 || sendDir == DIR_MM0 ||
+             sendDir == DIR_PM0) {
         int x1 = 0;
         int x2 = 0;
         switch (sendDir) {
-            case D3Q27System::DIR_PP0:
+            case DIR_PP0:
                 x1 = maxX1 - 1;
                 x2 = maxX2 - 1;
                 break;
-            case D3Q27System::DIR_MP0:
+            case DIR_MP0:
                 x1 = 1;
                 x2 = maxX2 - 1;
                 break;
-            case D3Q27System::DIR_MM0:
+            case DIR_MM0:
                 x1 = 1;
                 x2 = 1;
                 break;
-            case D3Q27System::DIR_PM0:
+            case DIR_PM0:
                 x1 = maxX1 - 1;
                 x2 = 1;
                 break;
@@ -138,24 +140,24 @@ void FullVectorConnector::fillData()
         }
     }
     // TE TW BW BE
-    else if (sendDir == D3Q27System::DIR_P0P || sendDir == D3Q27System::DIR_M0P || sendDir == D3Q27System::DIR_M0M ||
-             sendDir == D3Q27System::DIR_P0M) {
+    else if (sendDir == DIR_P0P || sendDir == DIR_M0P || sendDir == DIR_M0M ||
+             sendDir == DIR_P0M) {
         int x1 = 0;
         int x3 = 0;
         switch (sendDir) {
-            case D3Q27System::DIR_P0P:
+            case DIR_P0P:
                 x1 = maxX1 - 1;
                 x3 = maxX3 - 1;
                 break;
-            case D3Q27System::DIR_M0P:
+            case DIR_M0P:
                 x1 = 1;
                 x3 = maxX3 - 1;
                 break;
-            case D3Q27System::DIR_M0M:
+            case DIR_M0M:
                 x1 = 1;
                 x3 = 1;
                 break;
-            case D3Q27System::DIR_P0M:
+            case DIR_P0M:
                 x1 = maxX1 - 1;
                 x3 = 1;
                 break;
@@ -165,24 +167,24 @@ void FullVectorConnector::fillData()
         }
     }
     // TN BN BS TS
-    else if (sendDir == D3Q27System::DIR_0PP || sendDir == D3Q27System::DIR_0PM || sendDir == D3Q27System::DIR_0MM ||
-             sendDir == D3Q27System::DIR_0MP) {
+    else if (sendDir == DIR_0PP || sendDir == DIR_0PM || sendDir == DIR_0MM ||
+             sendDir == DIR_0MP) {
         int x2 = 0;
         int x3 = 0;
         switch (sendDir) {
-            case D3Q27System::DIR_0PP:
+            case DIR_0PP:
                 x3 = maxX3 - 1;
                 x2 = maxX2 - 1;
                 break;
-            case D3Q27System::DIR_0PM:
+            case DIR_0PM:
                 x3 = 1;
                 x2 = maxX2 - 1;
                 break;
-            case D3Q27System::DIR_0MM:
+            case DIR_0MM:
                 x3 = 1;
                 x2 = 1;
                 break;
-            case D3Q27System::DIR_0MP:
+            case DIR_0MP:
                 x3 = maxX3 - 1;
                 x2 = 1;
                 break;
@@ -192,49 +194,49 @@ void FullVectorConnector::fillData()
         }
     }
     // TNE TNW TSW TSE BNE BNW BSW BSE
-    else if (sendDir == D3Q27System::DIR_PPP || sendDir == D3Q27System::DIR_MPP || sendDir == D3Q27System::DIR_MMP ||
-             sendDir == D3Q27System::DIR_PMP || sendDir == D3Q27System::DIR_PPM || sendDir == D3Q27System::DIR_MPM ||
-             sendDir == D3Q27System::DIR_MMM || sendDir == D3Q27System::DIR_PMM) {
+    else if (sendDir == DIR_PPP || sendDir == DIR_MPP || sendDir == DIR_MMP ||
+             sendDir == DIR_PMP || sendDir == DIR_PPM || sendDir == DIR_MPM ||
+             sendDir == DIR_MMM || sendDir == DIR_PMM) {
         int x1 = 0;
         int x2 = 0;
         int x3 = 0;
         switch (sendDir) {
-            case D3Q27System::DIR_PPP:
+            case DIR_PPP:
                 x1 = maxX1 - 1;
                 x2 = maxX2 - 1;
                 x3 = maxX3 - 1;
                 break;
-            case D3Q27System::DIR_MPP:
+            case DIR_MPP:
                 x1 = 1;
                 x2 = maxX2 - 1;
                 x3 = maxX3 - 1;
                 break;
-            case D3Q27System::DIR_MMP:
+            case DIR_MMP:
                 x1 = 1;
                 x2 = 1;
                 x3 = maxX3 - 1;
                 break;
-            case D3Q27System::DIR_PMP:
+            case DIR_PMP:
                 x1 = maxX1 - 1;
                 x2 = 1;
                 x3 = maxX3 - 1;
                 break;
-            case D3Q27System::DIR_PPM:
+            case DIR_PPM:
                 x1 = maxX1 - 1;
                 x2 = maxX2 - 1;
                 x3 = 1;
                 break;
-            case D3Q27System::DIR_MPM:
+            case DIR_MPM:
                 x1 = 1;
                 x2 = maxX2 - 1;
                 x3 = 1;
                 break;
-            case D3Q27System::DIR_MMM:
+            case DIR_MMM:
                 x1 = 1;
                 x2 = 1;
                 x3 = 1;
                 break;
-            case D3Q27System::DIR_PMM:
+            case DIR_PMM:
                 x1 = maxX1 - 1;
                 x2 = 1;
                 x3 = 1;
@@ -253,41 +255,43 @@ void FullVectorConnector::distributeReceiveVectors()
 ////////////////////////////////////////////////////////////////////////
 void FullVectorConnector::distributeData()
 {
+    using namespace vf::lbm::dir;
+
     vector_type &rdata = receiver->getData();
 
     int index = 0;
 
-    if (sendDir == D3Q27System::DIR_M00) {
+    if (sendDir == DIR_M00) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             for (int x2 = 1; x2 < maxX2; x2++) {
                 distributeData(rdata, index, 0, x2, x3);
             }
         }
-    } else if (sendDir == D3Q27System::DIR_P00) {
+    } else if (sendDir == DIR_P00) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             for (int x2 = 1; x2 < maxX2; x2++) {
                 distributeData(rdata, index, maxX1, x2, x3);
             }
         }
-    } else if (sendDir == D3Q27System::DIR_0M0) {
+    } else if (sendDir == DIR_0M0) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             for (int x1 = 1; x1 < maxX1; x1++) {
                 distributeData(rdata, index, x1, 0, x3);
             }
         }
-    } else if (sendDir == D3Q27System::DIR_0P0) {
+    } else if (sendDir == DIR_0P0) {
         for (int x3 = 1; x3 < maxX3; x3++) {
             for (int x1 = 1; x1 < maxX1; x1++) {
                 distributeData(rdata, index, x1, maxX2, x3);
             }
         }
-    } else if (sendDir == D3Q27System::DIR_00M) {
+    } else if (sendDir == DIR_00M) {
         for (int x2 = 1; x2 < maxX2; x2++) {
             for (int x1 = 1; x1 < maxX1; x1++) {
                 distributeData(rdata, index, x1, x2, 0);
             }
         }
-    } else if (sendDir == D3Q27System::DIR_00P) {
+    } else if (sendDir == DIR_00P) {
         for (int x2 = 1; x2 < maxX2; x2++) {
             for (int x1 = 1; x1 < maxX1; x1++) {
                 distributeData(rdata, index, x1, x2, maxX3);
@@ -295,25 +299,25 @@ void FullVectorConnector::distributeData()
         }
     }
     // NE NW SW SE
-    else if (sendDir == D3Q27System::DIR_PP0 || sendDir == D3Q27System::DIR_MP0 || sendDir == D3Q27System::DIR_MM0 ||
-             sendDir == D3Q27System::DIR_PM0) {
+    else if (sendDir == DIR_PP0 || sendDir == DIR_MP0 || sendDir == DIR_MM0 ||
+             sendDir == DIR_PM0) {
         int x1 = 0;
         int x2 = 0;
         switch (sendDir) // wenn sendir NE dann kommen werte von SW
         {
-            case D3Q27System::DIR_PP0:
+            case DIR_PP0:
                 x1 = maxX1;
                 x2 = maxX2;
                 break;
-            case D3Q27System::DIR_MP0:
+            case DIR_MP0:
                 x1 = 0;
                 x2 = maxX2;
                 break;
-            case D3Q27System::DIR_MM0:
+            case DIR_MM0:
                 x1 = 0;
                 x2 = 0;
                 break;
-            case D3Q27System::DIR_PM0:
+            case DIR_PM0:
                 x1 = maxX1;
                 x2 = 0;
                 break;
@@ -324,27 +328,27 @@ void FullVectorConnector::distributeData()
 
     }
     // TE TW BW BE
-    else if (sendDir == D3Q27System::DIR_P0P || sendDir == D3Q27System::DIR_M0P || sendDir == D3Q27System::DIR_M0M ||
-             sendDir == D3Q27System::DIR_P0M)
+    else if (sendDir == DIR_P0P || sendDir == DIR_M0P || sendDir == DIR_M0M ||
+             sendDir == DIR_P0M)
 
     {
         int x1 = 0;
         int x3 = 0;
         switch (sendDir) // wenn sendir NE dann kommen werte von SW
         {
-            case D3Q27System::DIR_P0P:
+            case DIR_P0P:
                 x1 = maxX1;
                 x3 = maxX3;
                 break;
-            case D3Q27System::DIR_M0P:
+            case DIR_M0P:
                 x1 = 0;
                 x3 = maxX3;
                 break;
-            case D3Q27System::DIR_M0M:
+            case DIR_M0M:
                 x1 = 0;
                 x3 = 0;
                 break;
-            case D3Q27System::DIR_P0M:
+            case DIR_P0M:
                 x1 = maxX1;
                 x3 = 0;
                 break;
@@ -354,24 +358,24 @@ void FullVectorConnector::distributeData()
         }
     }
     // TN BN BS TS
-    else if (sendDir == D3Q27System::DIR_0PP || sendDir == D3Q27System::DIR_0PM || sendDir == D3Q27System::DIR_0MM ||
-             sendDir == D3Q27System::DIR_0MP) {
+    else if (sendDir == DIR_0PP || sendDir == DIR_0PM || sendDir == DIR_0MM ||
+             sendDir == DIR_0MP) {
         int x2 = 0;
         int x3 = 0;
         switch (sendDir) {
-            case D3Q27System::DIR_0PP:
+            case DIR_0PP:
                 x3 = maxX3;
                 x2 = maxX2;
                 break;
-            case D3Q27System::DIR_0PM:
+            case DIR_0PM:
                 x3 = 0;
                 x2 = maxX2;
                 break;
-            case D3Q27System::DIR_0MM:
+            case DIR_0MM:
                 x3 = 0;
                 x2 = 0;
                 break;
-            case D3Q27System::DIR_0MP:
+            case DIR_0MP:
                 x3 = maxX3;
                 x2 = 0;
                 break;
@@ -381,50 +385,50 @@ void FullVectorConnector::distributeData()
         }
     }
     // TNE TNW TSW TSE BNE BNW BSW BSE
-    else if (sendDir == D3Q27System::DIR_PPP || sendDir == D3Q27System::DIR_MPP || sendDir == D3Q27System::DIR_MMP ||
-             sendDir == D3Q27System::DIR_PMP || sendDir == D3Q27System::DIR_PPM || sendDir == D3Q27System::DIR_MPM ||
-             sendDir == D3Q27System::DIR_MMM || sendDir == D3Q27System::DIR_PMM) {
+    else if (sendDir == DIR_PPP || sendDir == DIR_MPP || sendDir == DIR_MMP ||
+             sendDir == DIR_PMP || sendDir == DIR_PPM || sendDir == DIR_MPM ||
+             sendDir == DIR_MMM || sendDir == DIR_PMM) {
         int x1 = 0;
         int x2 = 0;
         int x3 = 0;
 
         switch (sendDir) {
-            case D3Q27System::DIR_PPP:
+            case DIR_PPP:
                 x1 = maxX1;
                 x2 = maxX2;
                 x3 = maxX3;
                 break;
-            case D3Q27System::DIR_MPP:
+            case DIR_MPP:
                 x1 = 0;
                 x2 = maxX2;
                 x3 = maxX3;
                 break;
-            case D3Q27System::DIR_MMP:
+            case DIR_MMP:
                 x1 = 0;
                 x2 = 0;
                 x3 = maxX3;
                 break;
-            case D3Q27System::DIR_PMP:
+            case DIR_PMP:
                 x1 = maxX1;
                 x2 = 0;
                 x3 = maxX3;
                 break;
-            case D3Q27System::DIR_PPM:
+            case DIR_PPM:
                 x1 = maxX1;
                 x2 = maxX2;
                 x3 = 0;
                 break;
-            case D3Q27System::DIR_MPM:
+            case DIR_MPM:
                 x1 = 0;
                 x2 = maxX2;
                 x3 = 0;
                 break;
-            case D3Q27System::DIR_MMM:
+            case DIR_MMM:
                 x1 = 0;
                 x2 = 0;
                 x3 = 0;
                 break;
-            case D3Q27System::DIR_PMM:
+            case DIR_PMM:
                 x1 = maxX1;
                 x2 = 0;
                 x3 = 0;
diff --git a/src/cpu/VirtualFluidsCore/Connectors/LocalBlock3DConnector.h b/src/cpu/VirtualFluidsCore/Connectors/LocalBlock3DConnector.h
index 9e8819ebd645ade3b17b2cb1e3a3f2d2c7c67d0c..ba17218ba7ec390ace7b3b964a3b33977a97f52e 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/LocalBlock3DConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/LocalBlock3DConnector.h
@@ -37,6 +37,7 @@
 #include "Block3D.h"
 #include "Block3DConnector.h"
 #include "PointerDefinitions.h"
+#include "lbm/constants/D3Q27.h"
 
 //! A class provides an interface for connectors in shared memory
 class LocalBlock3DConnector : public Block3DConnector
@@ -63,7 +64,7 @@ public:
     bool isInterpolationConnectorCF() override { return false; }
     bool isInterpolationConnectorFC() override { return false; }
 
-    double getSendRecieveTime();
+    real getSendRecieveTime();
 
     void prepareForSendX1() override {}
     void prepareForSendX2() override {}
diff --git a/src/cpu/VirtualFluidsCore/Connectors/OneDistributionFullDirectConnector.h b/src/cpu/VirtualFluidsCore/Connectors/OneDistributionFullDirectConnector.h
index 7344b0fe1272c7dac58e45d25b2d0011d65d637f..44d3f9fc251d12c9c621193d326cbc751921d957 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/OneDistributionFullDirectConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/OneDistributionFullDirectConnector.h
@@ -54,13 +54,13 @@ protected:
     inline void exchangeData(int x1From, int x2From, int x3From, int x1To, int x2To, int x3To) override;
 
 private:
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFrom;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFrom;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsFrom;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFrom;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFrom;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsFrom;
 
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsTo;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsTo;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsTo;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsTo;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsTo;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsTo;
 
     SPtr<EsoTwist3D> fFrom;
     SPtr<EsoTwist3D> fTo;
diff --git a/src/cpu/VirtualFluidsCore/Connectors/OneDistributionFullVectorConnector.cpp b/src/cpu/VirtualFluidsCore/Connectors/OneDistributionFullVectorConnector.cpp
index 72f43858ae7e64538b4b9bdb7028a8c895e2e84d..1bdb92f6b0d51c3bfb8daf6e149be7c1be0fecf0 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/OneDistributionFullVectorConnector.cpp
+++ b/src/cpu/VirtualFluidsCore/Connectors/OneDistributionFullVectorConnector.cpp
@@ -12,57 +12,59 @@ OneDistributionFullVectorConnector::OneDistributionFullVectorConnector(SPtr<Bloc
 //////////////////////////////////////////////////////////////////////////
 void OneDistributionFullVectorConnector::init()
 {
+    using namespace vf::lbm::dir;
+
     FullVectorConnector::init();
     
     fDis = dynamicPointerCast<EsoTwist3D>(block.lock()->getKernel()->getDataSet()->getFdistributions());
 
     int anz = 27;
     switch (sendDir) {
-        case D3Q27System::DIR_000:
+        case DIR_000:
             UB_THROW(UbException(UB_EXARGS, "ZERO not allowed"));
             break;
-        case D3Q27System::DIR_P00:
-        case D3Q27System::DIR_M00:
+        case DIR_P00:
+        case DIR_M00:
             sender->getData().resize(maxX2 * maxX3 * anz, 0.0);
             break;
-        case D3Q27System::DIR_0P0:
-        case D3Q27System::DIR_0M0:
+        case DIR_0P0:
+        case DIR_0M0:
             sender->getData().resize(maxX1 * maxX3 * anz, 0.0);
             break;
-        case D3Q27System::DIR_00P:
-        case D3Q27System::DIR_00M:
+        case DIR_00P:
+        case DIR_00M:
             sender->getData().resize(maxX1 * maxX2 * anz, 0.0);
             break;
 
-        case D3Q27System::DIR_PP0:
-        case D3Q27System::DIR_MM0:
-        case D3Q27System::DIR_PM0:
-        case D3Q27System::DIR_MP0:
+        case DIR_PP0:
+        case DIR_MM0:
+        case DIR_PM0:
+        case DIR_MP0:
             sender->getData().resize(maxX3 * anz, 0.0);
             break;
 
-        case D3Q27System::DIR_P0P:
-        case D3Q27System::DIR_M0M:
-        case D3Q27System::DIR_P0M:
-        case D3Q27System::DIR_M0P:
+        case DIR_P0P:
+        case DIR_M0M:
+        case DIR_P0M:
+        case DIR_M0P:
             sender->getData().resize(maxX2 * anz, 0.0);
             break;
 
-        case D3Q27System::DIR_0PP:
-        case D3Q27System::DIR_0MM:
-        case D3Q27System::DIR_0PM:
-        case D3Q27System::DIR_0MP:
+        case DIR_0PP:
+        case DIR_0MM:
+        case DIR_0PM:
+        case DIR_0MP:
             sender->getData().resize(maxX1 * anz, 0.0);
             break;
 
-        case D3Q27System::DIR_PPP:
-        case D3Q27System::DIR_MMM:
-        case D3Q27System::DIR_PPM:
-        case D3Q27System::DIR_MMP:
-        case D3Q27System::DIR_PMP:
-        case D3Q27System::DIR_MPM:
-        case D3Q27System::DIR_PMM:
-        case D3Q27System::DIR_MPP:
+        case DIR_PPP:
+        case DIR_MMM:
+        case DIR_PPM:
+        case DIR_MMP:
+        case DIR_PMP:
+        case DIR_MPM:
+        case DIR_PMM:
+        case DIR_MPP:
             sender->getData().resize(anz, 0.0);
             break;
 
diff --git a/src/cpu/VirtualFluidsCore/Connectors/OneDistributionFullVectorConnector.h b/src/cpu/VirtualFluidsCore/Connectors/OneDistributionFullVectorConnector.h
index 2342fe66f48e8b437540716a1264ecf286c7295e..a9a53455c934fa68663ac6f3ff0722cdb45f689b 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/OneDistributionFullVectorConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/OneDistributionFullVectorConnector.h
@@ -31,9 +31,9 @@ protected:
     inline void distributeData(vector_type &rdata, int &index, int x1, int x2, int x3) override;
 
 private:
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributions;
 
     SPtr<EsoTwist3D> fDis;
 };
diff --git a/src/cpu/VirtualFluidsCore/Connectors/RemoteBlock3DConnector.h b/src/cpu/VirtualFluidsCore/Connectors/RemoteBlock3DConnector.h
index 5151e61900c8b25bd6282987143c6935c6a66469..2c27eea33a01b7b680c645c4e143639f779bf4d6 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/RemoteBlock3DConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/RemoteBlock3DConnector.h
@@ -70,7 +70,7 @@ public:
     bool isInterpolationConnectorCF() override { return false; }
     bool isInterpolationConnectorFC() override { return false; }
 
-    double getSendRecieveTime() { return 0; }
+    real getSendRecieveTime() { return 0; }
 
     void prepareForSendX1() override {}
     void prepareForSendX2() override {}
diff --git a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullDirectConnector.cpp b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullDirectConnector.cpp
index 782b0f27d4b0cd006a27c89def02dad11ff558c5..4856743128041bbfef3048fb53c9d8110de498fb 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullDirectConnector.cpp
+++ b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullDirectConnector.cpp
@@ -63,6 +63,8 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::sendVectors()
 //////////////////////////////////////////////////////////////////////////
 void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
 {
+    using namespace vf::lbm::dir;
+
     ////////////////////////////////////////////////////////////
     // relation between ghost layer and regular nodes
     // maxX1m3 maxX1m2 ... minX1p2 minX1p3 - regular nodes
@@ -94,7 +96,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
     int maxX3m3 = maxX3 - 3;
 
     // EAST
-    if (sendDir == D3Q27System::DIR_P00) {
+    if (sendDir == DIR_P00) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
                 exchangeData(maxX1m3, x2, x3, minX1, x2, x3);
@@ -103,7 +105,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // WEST
-    else if (sendDir == D3Q27System::DIR_M00) {
+    else if (sendDir == DIR_M00) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
                 exchangeData(minX1p3, x2, x3, maxX1, x2, x3);
@@ -112,7 +114,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // NORTH
-    else if (sendDir == D3Q27System::DIR_0P0) {
+    else if (sendDir == DIR_0P0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 exchangeData(x1, maxX2m3, x3, x1, minX2, x3);
@@ -121,7 +123,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // SOUTH
-    else if (sendDir == D3Q27System::DIR_0M0) {
+    else if (sendDir == DIR_0M0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 exchangeData(x1, minX2p3, x3, x1, maxX2, x3);
@@ -131,7 +133,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
     }
 
     // TOP
-    else if (sendDir == D3Q27System::DIR_00P) {
+    else if (sendDir == DIR_00P) {
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 exchangeData(x1, x2, maxX3m3, x1, x2, minX3);
@@ -140,7 +142,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // BOTTOM
-    else if (sendDir == D3Q27System::DIR_00M) {
+    else if (sendDir == DIR_00M) {
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 exchangeData(x1, x2, minX3p3, x1, x2, maxX3);
@@ -149,7 +151,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // NORTHEAST
-    else if (sendDir == D3Q27System::DIR_PP0) {
+    else if (sendDir == DIR_PP0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             exchangeData(maxX1m3, maxX2m3, x3, minX1, minX2, x3);
             exchangeData(maxX1m2, maxX2m2, x3, minX1p1, minX2p1, x3);
@@ -158,7 +160,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // NORTHWEST
-    else if (sendDir == D3Q27System::DIR_MP0) {
+    else if (sendDir == DIR_MP0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             exchangeData(minX1p3, maxX2m3, x3, maxX1, minX2, x3);
             exchangeData(minX1p2, maxX2m2, x3, maxX1m1, minX2p1, x3);
@@ -167,7 +169,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // SOUTHWEST
-    else if (sendDir == D3Q27System::DIR_MM0) {
+    else if (sendDir == DIR_MM0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             exchangeData(minX1p3, minX2p3, x3, maxX1, maxX2, x3);
             exchangeData(minX1p2, minX2p2, x3, maxX1m1, maxX2m1, x3);
@@ -176,70 +178,70 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // SOUTHEAST
-    else if (sendDir == D3Q27System::DIR_PM0) {
+    else if (sendDir == DIR_PM0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             exchangeData(maxX1m3, minX2p3, x3, minX1, maxX2, x3);
             exchangeData(maxX1m2, minX2p2, x3, minX1p1, maxX2m1, x3);
             exchangeData(maxX1m3, minX2p2, x3, minX1, maxX2m1, x3);
             exchangeData(maxX1m2, minX2p3, x3, minX1p1, maxX2, x3);
         }
-    } else if (sendDir == D3Q27System::DIR_P0P)
+    } else if (sendDir == DIR_P0P)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             exchangeData(maxX1m3, x2, maxX3m3, minX1, x2, minX3);
             exchangeData(maxX1m2, x2, maxX3m2, minX1p1, x2, minX3p1);
             exchangeData(maxX1m3, x2, maxX3m2, minX1, x2, minX3p1);
             exchangeData(maxX1m2, x2, maxX3m3, minX1p1, x2, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_M0M)
+    else if (sendDir == DIR_M0M)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             exchangeData(minX1p3, x2, minX3p3, maxX1, x2, maxX3);
             exchangeData(minX1p2, x2, minX3p2, maxX1m1, x2, maxX3m1);
             exchangeData(minX1p3, x2, minX3p2, maxX1, x2, maxX3m1);
             exchangeData(minX1p2, x2, minX3p3, maxX1m1, x2, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_P0M)
+    else if (sendDir == DIR_P0M)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             exchangeData(maxX1m3, x2, minX3p3, minX1, x2, maxX3);
             exchangeData(maxX1m2, x2, minX3p2, minX1p1, x2, maxX3m1);
             exchangeData(maxX1m3, x2, minX3p2, minX1, x2, maxX3m1);
             exchangeData(maxX1m2, x2, minX3p3, minX1p1, x2, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_M0P)
+    else if (sendDir == DIR_M0P)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             exchangeData(minX1p3, x2, maxX3m3, maxX1, x2, minX3);
             exchangeData(minX1p2, x2, maxX3m2, maxX1m1, x2, minX3p1);
             exchangeData(minX1p3, x2, maxX3m2, maxX1, x2, minX3p1);
             exchangeData(minX1p2, x2, maxX3m3, maxX1m1, x2, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_0PP)
+    else if (sendDir == DIR_0PP)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             exchangeData(x1, maxX2m3, maxX3m3, x1, minX2, minX3);
             exchangeData(x1, maxX2m2, maxX3m2, x1, minX2p1, minX3p1);
             exchangeData(x1, maxX2m3, maxX3m2, x1, minX2, minX3p1);
             exchangeData(x1, maxX2m2, maxX3m3, x1, minX2p1, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_0MM)
+    else if (sendDir == DIR_0MM)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             exchangeData(x1, minX2p3, minX3p3, x1, maxX2, maxX3);
             exchangeData(x1, minX2p2, minX3p2, x1, maxX2m1, maxX3m1);
             exchangeData(x1, minX2p3, minX3p2, x1, maxX2, maxX3m1);
             exchangeData(x1, minX2p2, minX3p3, x1, maxX2m1, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_0PM)
+    else if (sendDir == DIR_0PM)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             exchangeData(x1, maxX2m3, minX3p3, x1, minX2, maxX3);
             exchangeData(x1, maxX2m2, minX3p2, x1, minX2p1, maxX3m1);
             exchangeData(x1, maxX2m3, minX3p2, x1, minX2, maxX3m1);
             exchangeData(x1, maxX2m2, minX3p3, x1, minX2p1, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_0MP)
+    else if (sendDir == DIR_0MP)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             exchangeData(x1, minX2p3, maxX3m3, x1, maxX2, minX3);
             exchangeData(x1, minX2p2, maxX3m2, x1, maxX2m1, minX3p1);
             exchangeData(x1, minX2p3, maxX3m2, x1, maxX2, minX3p1);
             exchangeData(x1, minX2p2, maxX3m3, x1, maxX2m1, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_MMP) {
+    else if (sendDir == DIR_MMP) {
         exchangeData(minX1p3, minX2p3, maxX3m3, maxX1, maxX2, minX3);
         exchangeData(minX1p2, minX2p2, maxX3m2, maxX1m1, maxX2m1, minX3p1);
         exchangeData(minX1p3, minX2p2, maxX3m2, maxX1, maxX2m1, minX3p1);
@@ -248,7 +250,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(minX1p3, minX2p3, maxX3m2, maxX1, maxX2, minX3p1);
         exchangeData(minX1p3, minX2p2, maxX3m3, maxX1, maxX2m1, minX3);
         exchangeData(minX1p2, minX2p3, maxX3m3, maxX1m1, maxX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_PMP) {
+    } else if (sendDir == DIR_PMP) {
         exchangeData(maxX1m3, minX1p3, maxX3m3, minX1, maxX2, minX3);
         exchangeData(maxX1m2, minX1p2, maxX3m2, minX1p1, maxX2m1, minX3p1);
         exchangeData(maxX1m3, minX1p2, maxX3m2, minX1, maxX2m1, minX3p1);
@@ -257,7 +259,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(maxX1m3, minX1p3, maxX3m2, minX1, maxX2, minX3p1);
         exchangeData(maxX1m3, minX1p2, maxX3m3, minX1, maxX2m1, minX3);
         exchangeData(maxX1m2, minX1p3, maxX3m3, minX1p1, maxX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_MPP) {
+    } else if (sendDir == DIR_MPP) {
         exchangeData(minX1p3, maxX2m3, maxX3m3, maxX1, minX2, minX3);
         exchangeData(minX1p2, maxX2m2, maxX3m2, maxX1m1, minX2p1, minX3p1);
         exchangeData(minX1p3, maxX2m2, maxX3m2, maxX1, minX2p1, minX3p1);
@@ -266,7 +268,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(minX1p3, maxX2m3, maxX3m2, maxX1, minX2, minX3p1);
         exchangeData(minX1p3, maxX2m2, maxX3m3, maxX1, minX2p1, minX3);
         exchangeData(minX1p2, maxX2m3, maxX3m3, maxX1m1, minX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_PPP) {
+    } else if (sendDir == DIR_PPP) {
         exchangeData(maxX1m3, maxX2m3, maxX3m3, minX1, minX2, minX3);
         exchangeData(maxX1m2, maxX2m2, maxX3m2, minX1p1, minX2p1, minX3p1);
         exchangeData(maxX1m3, maxX2m2, maxX3m2, minX1, minX2p1, minX3p1);
@@ -275,7 +277,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(maxX1m3, maxX2m3, maxX3m2, minX1, minX2, minX3p1);
         exchangeData(maxX1m3, maxX2m2, maxX3m3, minX1, minX2p1, minX3);
         exchangeData(maxX1m2, maxX2m3, maxX3m3, minX1p1, minX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_MMM) {
+    } else if (sendDir == DIR_MMM) {
         exchangeData(minX1p3, minX2p3, minX3p3, maxX1, maxX2, maxX3);
         exchangeData(minX1p2, minX2p2, minX3p2, maxX1m1, maxX2m1, maxX3m1);
         exchangeData(minX1p3, minX2p2, minX3p2, maxX1, maxX2m1, maxX3m1);
@@ -284,7 +286,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(minX1p3, minX2p3, minX3p2, maxX1, maxX2, maxX3m1);
         exchangeData(minX1p3, minX2p2, minX3p3, maxX1, maxX2m1, maxX3);
         exchangeData(minX1p2, minX2p3, minX3p3, maxX1m1, maxX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_PMM) {
+    } else if (sendDir == DIR_PMM) {
         exchangeData(maxX1m3, minX2p3, minX3p3, minX1, maxX2, maxX3);
         exchangeData(maxX1m2, minX2p2, minX3p2, minX1p1, maxX2m1, maxX3m1);
         exchangeData(maxX1m3, minX2p2, minX3p2, minX1, maxX2m1, maxX3m1);
@@ -293,7 +295,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(maxX1m3, minX2p3, minX3p2, minX1, maxX2, maxX3m1);
         exchangeData(maxX1m3, minX2p2, minX3p3, minX1, maxX2m1, maxX3);
         exchangeData(maxX1m2, minX2p3, minX3p3, minX1p1, maxX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_MPM) {
+    } else if (sendDir == DIR_MPM) {
         exchangeData(minX1p3, maxX2m3, minX3p3, maxX1, minX2, maxX3);
         exchangeData(minX1p2, maxX2m2, minX3p2, maxX1m1, minX2p1, maxX3m1);
         exchangeData(minX1p3, maxX2m2, minX3p2, maxX1, minX2p1, maxX3m1);
@@ -302,7 +304,7 @@ void ThreeDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(minX1p3, maxX2m3, minX3p2, maxX1, minX2, maxX3m1);
         exchangeData(minX1p3, maxX2m2, minX3p3, maxX1, minX2p1, maxX3);
         exchangeData(minX1p2, maxX2m3, minX3p3, maxX1m1, minX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_PPM) {
+    } else if (sendDir == DIR_PPM) {
         exchangeData(maxX1m3, maxX2m3, minX3p3, minX1, minX2, maxX3);
         exchangeData(maxX1m2, maxX2m2, minX3p2, minX1p1, minX2p1, maxX3m1);
         exchangeData(maxX1m3, maxX2m2, minX3p2, minX1, minX2p1, maxX3m1);
diff --git a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullDirectConnector.h b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullDirectConnector.h
index 1f53ca70aecd3531c986edb8a3933e9d4c5c5ba7..21a37e3427747d0c813c34b6fcf14b18ff3e1a76 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullDirectConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullDirectConnector.h
@@ -58,29 +58,29 @@ protected:
     inline void exchangeData(int x1From, int x2From, int x3From, int x1To, int x2To, int x3To) override;
 
 private:
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromf;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromf;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromf;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromf;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromf;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromf;
 
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsTof;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsTof;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsTof;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsTof;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsTof;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsTof;
 
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromh;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromh;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromh;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromh;
 
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsToh;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsToh;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsToh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsToh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsToh;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsToh;
 
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromh2;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromh2;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsFromh2;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromh2;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromh2;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsFromh2;
 
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsToh2;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsToh2;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsToh2;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsToh2;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsToh2;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsToh2;
 
 	SPtr<EsoTwist3D> fFrom, hFrom, hFrom2;
     SPtr<EsoTwist3D> fTo, hTo, hTo2;
diff --git a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullVectorConnector.cpp b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullVectorConnector.cpp
index 0b94f7b4a971462517db6dd07050942f4b8595c0..8334b93d21529a54fbe6b29be465d60d2c63e308 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullVectorConnector.cpp
+++ b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullVectorConnector.cpp
@@ -50,6 +50,8 @@ ThreeDistributionsDoubleGhostLayerFullVectorConnector::ThreeDistributionsDoubleG
 //////////////////////////////////////////////////////////////////////////
 void ThreeDistributionsDoubleGhostLayerFullVectorConnector::init()
 {
+    using namespace vf::lbm::dir;
+
    FullVectorConnector::init();
 
    fDis = dynamicPointerCast<EsoTwist3D>(block.lock()->getKernel()->getDataSet()->getFdistributions());
@@ -60,37 +62,37 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::init()
    int anz = 3*27+1;
    switch (sendDir)
    {
-   case D3Q27System::DIR_000: UB_THROW(UbException(UB_EXARGS, "ZERO not allowed")); break;
-   case D3Q27System::DIR_P00:
-   case D3Q27System::DIR_M00: sender->getData().resize(maxX2*maxX3*anz*2, 0.0);   break;
-   case D3Q27System::DIR_0P0:
-   case D3Q27System::DIR_0M0: sender->getData().resize(maxX1*maxX3*anz*2, 0.0);   break;
-   case D3Q27System::DIR_00P:
-   case D3Q27System::DIR_00M: sender->getData().resize(maxX1*maxX2*anz*2, 0.0);   break;
+   case DIR_000: UB_THROW(UbException(UB_EXARGS, "ZERO not allowed")); break;
+   case DIR_P00:
+   case DIR_M00: sender->getData().resize(maxX2*maxX3*anz*2, 0.0);   break;
+   case DIR_0P0:
+   case DIR_0M0: sender->getData().resize(maxX1*maxX3*anz*2, 0.0);   break;
+   case DIR_00P:
+   case DIR_00M: sender->getData().resize(maxX1*maxX2*anz*2, 0.0);   break;
 
-   case D3Q27System::DIR_PP0:
-   case D3Q27System::DIR_MM0:
-   case D3Q27System::DIR_PM0:
-   case D3Q27System::DIR_MP0:  sender->getData().resize(maxX3*anz*4, 0.0);   break;
+   case DIR_PP0:
+   case DIR_MM0:
+   case DIR_PM0:
+   case DIR_MP0:  sender->getData().resize(maxX3*anz*4, 0.0);   break;
 
-   case D3Q27System::DIR_P0P:
-   case D3Q27System::DIR_M0M:
-   case D3Q27System::DIR_P0M:
-   case D3Q27System::DIR_M0P:  sender->getData().resize(maxX2*anz*4, 0.0);   break;
+   case DIR_P0P:
+   case DIR_M0M:
+   case DIR_P0M:
+   case DIR_M0P:  sender->getData().resize(maxX2*anz*4, 0.0);   break;
 
-   case D3Q27System::DIR_0PP:
-   case D3Q27System::DIR_0MM:
-   case D3Q27System::DIR_0PM:
-   case D3Q27System::DIR_0MP:  sender->getData().resize(maxX1*anz*4, 0.0);   break;
+   case DIR_0PP:
+   case DIR_0MM:
+   case DIR_0PM:
+   case DIR_0MP:  sender->getData().resize(maxX1*anz*4, 0.0);   break;
 
-   case D3Q27System::DIR_PPP:
-   case D3Q27System::DIR_MMM:
-   case D3Q27System::DIR_PPM:
-   case D3Q27System::DIR_MMP:
-   case D3Q27System::DIR_PMP:
-   case D3Q27System::DIR_MPM:
-   case D3Q27System::DIR_PMM:
-   case D3Q27System::DIR_MPP:  sender->getData().resize(anz*8, 0.0);   break;
+   case DIR_PPP:
+   case DIR_MMM:
+   case DIR_PPM:
+   case DIR_MMP:
+   case DIR_PMP:
+   case DIR_MPM:
+   case DIR_PMM:
+   case DIR_MPP:  sender->getData().resize(anz*8, 0.0);   break;
 
    default: UB_THROW(UbException(UB_EXARGS, "unknown sendDir"));
    }
@@ -104,6 +106,8 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillSendVectors()
 ////////////////////////////////////////////////////////////////////////
 void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
 {
+    using namespace vf::lbm::dir;
+
     ////////////////////////////////////////////////////////////
     // relation between ghost layer and regular nodes
     // maxX1m3 maxX1m2 ... minX1p2 minX1p3 - regular nodes
@@ -138,7 +142,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
 
     int index = 0;
     // EAST
-    if (sendDir == D3Q27System::DIR_P00) {
+    if (sendDir == DIR_P00) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
                 fillData(sdata, index, maxX1m3, x2, x3);
@@ -147,7 +151,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // WEST
-    else if (sendDir == D3Q27System::DIR_M00) {
+    else if (sendDir == DIR_M00) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
                 fillData(sdata, index, minX1p3, x2, x3);
@@ -156,7 +160,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // NORTH
-    else if (sendDir == D3Q27System::DIR_0P0) {
+    else if (sendDir == DIR_0P0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 fillData(sdata, index, x1, maxX2m3, x3);
@@ -165,7 +169,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // SOUTH
-    else if (sendDir == D3Q27System::DIR_0M0) {
+    else if (sendDir == DIR_0M0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 fillData(sdata, index, x1, minX2p3, x3);
@@ -175,7 +179,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
     }
 
     // TOP
-    else if (sendDir == D3Q27System::DIR_00P) {
+    else if (sendDir == DIR_00P) {
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 fillData(sdata, index, x1, x2, maxX3m3);
@@ -184,7 +188,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // BOTTOM
-    else if (sendDir == D3Q27System::DIR_00M) {
+    else if (sendDir == DIR_00M) {
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 fillData(sdata, index, x1, x2, minX3p3);
@@ -193,7 +197,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // NORTHEAST
-    else if (sendDir == D3Q27System::DIR_PP0) {
+    else if (sendDir == DIR_PP0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             fillData(sdata, index, maxX1m3, maxX2m3, x3);
             fillData(sdata, index, maxX1m2, maxX2m2, x3);
@@ -202,7 +206,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // NORTHWEST
-    else if (sendDir == D3Q27System::DIR_MP0) {
+    else if (sendDir == DIR_MP0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             fillData(sdata, index, minX1p3, maxX2m3, x3);
             fillData(sdata, index, minX1p2, maxX2m2, x3);
@@ -211,7 +215,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // SOUTHWEST
-    else if (sendDir == D3Q27System::DIR_MM0) {
+    else if (sendDir == DIR_MM0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             fillData(sdata, index, minX1p3, minX2p3, x3);
             fillData(sdata, index, minX1p2, minX2p2, x3);
@@ -220,70 +224,70 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // SOUTHEAST
-    else if (sendDir == D3Q27System::DIR_PM0) {
+    else if (sendDir == DIR_PM0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             fillData(sdata, index, maxX1m3, minX2p3, x3);
             fillData(sdata, index, maxX1m2, minX2p2, x3);
             fillData(sdata, index, maxX1m3, minX2p2, x3);
             fillData(sdata, index, maxX1m2, minX2p3, x3);
         }
-    } else if (sendDir == D3Q27System::DIR_P0P)
+    } else if (sendDir == DIR_P0P)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             fillData(sdata, index, maxX1m3, x2, maxX3m3);
             fillData(sdata, index, maxX1m2, x2, maxX3m2);
             fillData(sdata, index, maxX1m3, x2, maxX3m2);
             fillData(sdata, index, maxX1m2, x2, maxX3m3);
         }
-    else if (sendDir == D3Q27System::DIR_M0M)
+    else if (sendDir == DIR_M0M)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             fillData(sdata, index, minX1p3, x2, minX3p3);
             fillData(sdata, index, minX1p2, x2, minX3p2);
             fillData(sdata, index, minX1p3, x2, minX3p2);
             fillData(sdata, index, minX1p2, x2, minX3p3);
         }
-    else if (sendDir == D3Q27System::DIR_P0M)
+    else if (sendDir == DIR_P0M)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             fillData(sdata, index, maxX1m3, x2, minX3p3);
             fillData(sdata, index, maxX1m2, x2, minX3p2);
             fillData(sdata, index, maxX1m3, x2, minX3p2);
             fillData(sdata, index, maxX1m2, x2, minX3p3);
         }
-    else if (sendDir == D3Q27System::DIR_M0P)
+    else if (sendDir == DIR_M0P)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             fillData(sdata, index, minX1p3, x2, maxX3m3);
             fillData(sdata, index, minX1p2, x2, maxX3m2);
             fillData(sdata, index, minX1p3, x2, maxX3m2);
             fillData(sdata, index, minX1p2, x2, maxX3m3);
         }
-    else if (sendDir == D3Q27System::DIR_0PP)
+    else if (sendDir == DIR_0PP)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             fillData(sdata, index, x1, maxX2m3, maxX3m3);
             fillData(sdata, index, x1, maxX2m2, maxX3m2);
             fillData(sdata, index, x1, maxX2m3, maxX3m2);
             fillData(sdata, index, x1, maxX2m2, maxX3m3);
         }
-    else if (sendDir == D3Q27System::DIR_0MM)
+    else if (sendDir == DIR_0MM)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             fillData(sdata, index, x1, minX2p3, minX3p3);
             fillData(sdata, index, x1, minX2p2, minX3p2);
             fillData(sdata, index, x1, minX2p3, minX3p2);
             fillData(sdata, index, x1, minX2p2, minX3p3);
         }
-    else if (sendDir == D3Q27System::DIR_0PM)
+    else if (sendDir == DIR_0PM)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             fillData(sdata, index, x1, maxX2m3, minX3p3);
             fillData(sdata, index, x1, maxX2m2, minX3p2);
             fillData(sdata, index, x1, maxX2m3, minX3p2);
             fillData(sdata, index, x1, maxX2m2, minX3p3);
         }
-    else if (sendDir == D3Q27System::DIR_0MP)
+    else if (sendDir == DIR_0MP)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             fillData(sdata, index, x1, minX2p3, maxX3m3);
             fillData(sdata, index, x1, minX2p2, maxX3m2);
             fillData(sdata, index, x1, minX2p3, maxX3m2);
             fillData(sdata, index, x1, minX2p2, maxX3m3);
         }
-    else if (sendDir == D3Q27System::DIR_MMP) {
+    else if (sendDir == DIR_MMP) {
         fillData(sdata, index, minX1p3, minX2p3, maxX3m3);
         fillData(sdata, index, minX1p2, minX2p2, maxX3m2);
         fillData(sdata, index, minX1p3, minX2p2, maxX3m2);
@@ -292,7 +296,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, minX1p3, minX2p3, maxX3m2);
         fillData(sdata, index, minX1p3, minX2p2, maxX3m3);
         fillData(sdata, index, minX1p2, minX2p3, maxX3m3);
-    } else if (sendDir == D3Q27System::DIR_PMP) {
+    } else if (sendDir == DIR_PMP) {
         fillData(sdata, index, maxX1m3, minX1p3, maxX3m3);
         fillData(sdata, index, maxX1m2, minX1p2, maxX3m2);
         fillData(sdata, index, maxX1m3, minX1p2, maxX3m2);
@@ -301,7 +305,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, maxX1m3, minX1p3, maxX3m2);
         fillData(sdata, index, maxX1m3, minX1p2, maxX3m3);
         fillData(sdata, index, maxX1m2, minX1p3, maxX3m3);
-    } else if (sendDir == D3Q27System::DIR_MPP) {
+    } else if (sendDir == DIR_MPP) {
         fillData(sdata, index, minX1p3, maxX2m3, maxX3m3);
         fillData(sdata, index, minX1p2, maxX2m2, maxX3m2);
         fillData(sdata, index, minX1p3, maxX2m2, maxX3m2);
@@ -310,7 +314,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, minX1p3, maxX2m3, maxX3m2);
         fillData(sdata, index, minX1p3, maxX2m2, maxX3m3);
         fillData(sdata, index, minX1p2, maxX2m3, maxX3m3);
-    } else if (sendDir == D3Q27System::DIR_PPP) {
+    } else if (sendDir == DIR_PPP) {
         fillData(sdata, index, maxX1m3, maxX2m3, maxX3m3);
         fillData(sdata, index, maxX1m2, maxX2m2, maxX3m2);
         fillData(sdata, index, maxX1m3, maxX2m2, maxX3m2);
@@ -319,7 +323,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, maxX1m3, maxX2m3, maxX3m2);
         fillData(sdata, index, maxX1m3, maxX2m2, maxX3m3);
         fillData(sdata, index, maxX1m2, maxX2m3, maxX3m3);
-    } else if (sendDir == D3Q27System::DIR_MMM) {
+    } else if (sendDir == DIR_MMM) {
         fillData(sdata, index, minX1p3, minX2p3, minX3p3);
         fillData(sdata, index, minX1p2, minX2p2, minX3p2);
         fillData(sdata, index, minX1p3, minX2p2, minX3p2);
@@ -328,7 +332,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, minX1p3, minX2p3, minX3p2);
         fillData(sdata, index, minX1p3, minX2p2, minX3p3);
         fillData(sdata, index, minX1p2, minX2p3, minX3p3);
-    } else if (sendDir == D3Q27System::DIR_PMM) {
+    } else if (sendDir == DIR_PMM) {
         fillData(sdata, index, maxX1m3, minX2p3, minX3p3);
         fillData(sdata, index, maxX1m2, minX2p2, minX3p2);
         fillData(sdata, index, maxX1m3, minX2p2, minX3p2);
@@ -337,7 +341,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, maxX1m3, minX2p3, minX3p2);
         fillData(sdata, index, maxX1m3, minX2p2, minX3p3);
         fillData(sdata, index, maxX1m2, minX2p3, minX3p3);
-    } else if (sendDir == D3Q27System::DIR_MPM) {
+    } else if (sendDir == DIR_MPM) {
         fillData(sdata, index, minX1p3, maxX2m3, minX3p3);
         fillData(sdata, index, minX1p2, maxX2m2, minX3p2);
         fillData(sdata, index, minX1p3, maxX2m2, minX3p2);
@@ -346,7 +350,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, minX1p3, maxX2m3, minX3p2);
         fillData(sdata, index, minX1p3, maxX2m2, minX3p3);
         fillData(sdata, index, minX1p2, maxX2m3, minX3p3);
-    } else if (sendDir == D3Q27System::DIR_PPM) {
+    } else if (sendDir == DIR_PPM) {
         fillData(sdata, index, maxX1m3, maxX2m3, minX3p3);
         fillData(sdata, index, maxX1m2, maxX2m2, minX3p2);
         fillData(sdata, index, maxX1m3, maxX2m2, minX3p2);
@@ -367,6 +371,8 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeReceiveVec
 ////////////////////////////////////////////////////////////////////////
 void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
 {
+    using namespace vf::lbm::dir;
+
     vector_type &rdata = receiver->getData();
 
     int index = 0;
@@ -400,7 +406,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
     int maxX3m2 = maxX3 - 2;
     //int maxX3m3 = maxX3 - 3;
 
-    if (sendDir == D3Q27System::DIR_M00) {
+    if (sendDir == DIR_M00) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
                 distributeData(rdata, index, minX1, x2, x3);
@@ -408,7 +414,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             }
         }
     }
-    else if (sendDir == D3Q27System::DIR_P00) {
+    else if (sendDir == DIR_P00) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
                 distributeData(rdata, index, maxX1, x2, x3);
@@ -416,7 +422,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             }
         }
     }
-    else if (sendDir == D3Q27System::DIR_0M0) {
+    else if (sendDir == DIR_0M0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 distributeData(rdata, index, x1, minX2, x3);
@@ -424,7 +430,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             }
         }
     }
-    else if (sendDir == D3Q27System::DIR_0P0) {
+    else if (sendDir == DIR_0P0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 distributeData(rdata, index, x1, maxX2, x3);
@@ -432,7 +438,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             }
         }
     }
-    else if (sendDir == D3Q27System::DIR_00M) {
+    else if (sendDir == DIR_00M) {
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 distributeData(rdata, index, x1, x2, minX3);
@@ -440,7 +446,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             }
         }
     }
-    else if (sendDir == D3Q27System::DIR_00P) {
+    else if (sendDir == DIR_00P) {
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 distributeData(rdata, index, x1, x2, maxX3);
@@ -448,7 +454,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             }
         }
     }
-    else if (sendDir == D3Q27System::DIR_MM0) {
+    else if (sendDir == DIR_MM0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             distributeData(rdata, index, minX1, minX2, x3);
             distributeData(rdata, index, minX1p1, minX2p1, x3);
@@ -456,7 +462,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             distributeData(rdata, index, minX1p1, minX2, x3);
         }
     }
-    else if (sendDir == D3Q27System::DIR_PM0) {
+    else if (sendDir == DIR_PM0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             distributeData(rdata, index, maxX1, minX2, x3);
             distributeData(rdata, index, maxX1m1, minX2p1, x3);
@@ -464,7 +470,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             distributeData(rdata, index, maxX1m1, minX2, x3);
         }
     }
-    else if (sendDir == D3Q27System::DIR_PP0) {
+    else if (sendDir == DIR_PP0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             distributeData(rdata, index, maxX1, maxX2, x3);
             distributeData(rdata, index, maxX1m1, maxX2m1, x3);
@@ -472,70 +478,70 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             distributeData(rdata, index, maxX1m1, maxX2, x3);
         }
     }
-    else if (sendDir == D3Q27System::DIR_MP0) {
+    else if (sendDir == DIR_MP0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             distributeData(rdata, index, minX1, maxX2, x3);
             distributeData(rdata, index, minX1p1, maxX2m1, x3);
             distributeData(rdata, index, minX1, maxX2m1, x3);
             distributeData(rdata, index, minX1p1, maxX2, x3);
         }
-    } else if (sendDir == D3Q27System::DIR_M0M)
+    } else if (sendDir == DIR_M0M)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             distributeData(rdata, index, minX1, x2, minX3);
             distributeData(rdata, index, minX1p1, x2, minX3p1);
             distributeData(rdata, index, minX1, x2, minX3p1);
             distributeData(rdata, index, minX1p1, x2, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_P0P)
+    else if (sendDir == DIR_P0P)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             distributeData(rdata, index, maxX1, x2, maxX3);
             distributeData(rdata, index, maxX1m1, x2, maxX3m1);
             distributeData(rdata, index, maxX1, x2, maxX3m1);
             distributeData(rdata, index, maxX1m1, x2, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_M0P)
+    else if (sendDir == DIR_M0P)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             distributeData(rdata, index, minX1, x2, maxX3);
             distributeData(rdata, index, minX1p1, x2, maxX3m1);
             distributeData(rdata, index, minX1, x2, maxX3m1);
             distributeData(rdata, index, minX1p1, x2, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_P0M)
+    else if (sendDir == DIR_P0M)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             distributeData(rdata, index, maxX1, x2, minX3);
             distributeData(rdata, index, maxX1m1, x2, minX3p1);
             distributeData(rdata, index, maxX1, x2, minX3p1);
             distributeData(rdata, index, maxX1m1, x2, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_0MM)
+    else if (sendDir == DIR_0MM)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             distributeData(rdata, index, x1, minX2, minX3);
             distributeData(rdata, index, x1, minX2p1, minX3p1);
             distributeData(rdata, index, x1, minX2, minX3p1);
             distributeData(rdata, index, x1, minX2p1, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_0PP)
+    else if (sendDir == DIR_0PP)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             distributeData(rdata, index, x1, maxX2, maxX3);
             distributeData(rdata, index, x1, maxX2m1, maxX3m1);
             distributeData(rdata, index, x1, maxX2, maxX3m1);
             distributeData(rdata, index, x1, maxX2m1, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_0MP)
+    else if (sendDir == DIR_0MP)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             distributeData(rdata, index, x1, minX2, maxX3);
             distributeData(rdata, index, x1, minX2p1, maxX3m1);
             distributeData(rdata, index, x1, minX2, maxX3m1);
             distributeData(rdata, index, x1, minX2p1, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_0PM)
+    else if (sendDir == DIR_0PM)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             distributeData(rdata, index, x1, maxX2, minX3);
             distributeData(rdata, index, x1, maxX2m1, minX3p1);
             distributeData(rdata, index, x1, maxX2, minX3p1);
             distributeData(rdata, index, x1, maxX2m1, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_PPM) {
+    else if (sendDir == DIR_PPM) {
         distributeData(rdata, index, maxX1, maxX2, minX3);
         distributeData(rdata, index, maxX1m1, maxX2m1, minX3p1);
         distributeData(rdata, index, maxX1, maxX2m1, minX3p1);
@@ -544,7 +550,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, maxX1, maxX2, minX3p1);
         distributeData(rdata, index, maxX1, maxX2m1, minX3);
         distributeData(rdata, index, maxX1m1, maxX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_MPM) {
+    } else if (sendDir == DIR_MPM) {
         distributeData(rdata, index, minX1, maxX2, minX3);
         distributeData(rdata, index, minX1p1, maxX2m1, minX3p1);
         distributeData(rdata, index, minX1, maxX2m1, minX3p1);
@@ -553,7 +559,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, minX1, maxX2, minX3p1);
         distributeData(rdata, index, minX1, maxX2m1, minX3);
         distributeData(rdata, index, minX1p1, maxX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_PMM) {
+    } else if (sendDir == DIR_PMM) {
         distributeData(rdata, index, maxX1, minX2, minX3);
         distributeData(rdata, index, maxX1m1, minX2p1, minX3p1);
         distributeData(rdata, index, maxX1, minX2p1, minX3p1);
@@ -562,7 +568,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, maxX1, minX2, minX3p1);
         distributeData(rdata, index, maxX1, minX2p1, minX3);
         distributeData(rdata, index, maxX1m1, minX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_MMM) {
+    } else if (sendDir == DIR_MMM) {
         distributeData(rdata, index, minX1, minX2, minX3);
         distributeData(rdata, index, minX1p1, minX2p1, minX3p1);
         distributeData(rdata, index, minX1, minX2p1, minX3p1);
@@ -571,7 +577,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, minX1, minX2, minX3p1);
         distributeData(rdata, index, minX1, minX2p1, minX3);
         distributeData(rdata, index, minX1p1, minX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_PPP) {
+    } else if (sendDir == DIR_PPP) {
         distributeData(rdata, index, maxX1, maxX2, maxX3);
         distributeData(rdata, index, maxX1m1, maxX2m1, maxX3m1);
         distributeData(rdata, index, maxX1, maxX2m1, maxX3m1);
@@ -580,7 +586,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, maxX1, maxX2, maxX3m1);
         distributeData(rdata, index, maxX1, maxX2m1, maxX3);
         distributeData(rdata, index, maxX1m1, maxX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_MPP) {
+    } else if (sendDir == DIR_MPP) {
         distributeData(rdata, index, minX1, maxX2, maxX3);
         distributeData(rdata, index, minX1p1, maxX2m1, maxX3m1);
         distributeData(rdata, index, minX1, maxX2m1, maxX3m1);
@@ -589,7 +595,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, minX1, maxX2, maxX3m1);
         distributeData(rdata, index, minX1, maxX2m1, maxX3);
         distributeData(rdata, index, minX1p1, maxX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_PMP) {
+    } else if (sendDir == DIR_PMP) {
         distributeData(rdata, index, maxX1, minX2, maxX3);
         distributeData(rdata, index, maxX1m1, minX2p1, maxX3m1);
         distributeData(rdata, index, maxX1, minX2p1, maxX3m1);
@@ -598,7 +604,7 @@ void ThreeDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, maxX1, minX2, maxX3m1);
         distributeData(rdata, index, maxX1, minX2p1, maxX3);
         distributeData(rdata, index, maxX1m1, minX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_MMP) {
+    } else if (sendDir == DIR_MMP) {
         distributeData(rdata, index, minX1, minX2, maxX3);
         distributeData(rdata, index, minX1p1, minX2p1, maxX3m1);
         distributeData(rdata, index, minX1, minX2p1, maxX3m1);
diff --git a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullVectorConnector.h b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullVectorConnector.h
index 408a8e79d8a22ae71f0f03d51205b6c01a391aae..e124251d8f8be21aa33ccb8dc91f7e9b40356827 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullVectorConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsDoubleGhostLayerFullVectorConnector.h
@@ -68,21 +68,21 @@ protected:
    inline void distributeData(vector_type &rdata, int &index, int x1, int x2, int x3) override;
 
 private:
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-   CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+   CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
 
    SPtr<EsoTwist3D>  fDis;
 
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localHdistributions;
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalHdistributions;
-   CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroHdistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localHdistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalHdistributions;
+   CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroHdistributions;
 
    SPtr<EsoTwist3D>  hDis;
 
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localH2distributions;
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalH2distributions;
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroH2distributions;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localH2distributions;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalH2distributions;
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroH2distributions;
 
    SPtr<EsoTwist3D> h2Dis;
 
diff --git a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsFullDirectConnector.h b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsFullDirectConnector.h
index 6ccac29f41a297581b263164c3a2fc491022be00..3cb443a474d373552125ddd2626a4f797d040429 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsFullDirectConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsFullDirectConnector.h
@@ -55,29 +55,29 @@ protected:
     inline void exchangeData(int x1From, int x2From, int x3From, int x1To, int x2To, int x3To) override;
 
 private:
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromf;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromf;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromf;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromf;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromf;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromf;
 
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsTof;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsTof;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsTof;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsTof;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsTof;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsTof;
 
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromh;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromh;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromh;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromh;
 
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsToh;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsToh;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsToh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsToh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsToh;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsToh;
 
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromh2;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromh2;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsFromh2;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromh2;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromh2;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsFromh2;
 
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsToh2;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsToh2;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsToh2;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsToh2;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsToh2;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsToh2;
 
 	SPtr<EsoTwist3D> fFrom, hFrom, hFrom2;
     SPtr<EsoTwist3D> fTo, hTo, hTo2;
diff --git a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsFullVectorConnector.cpp b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsFullVectorConnector.cpp
index 534076b7bfcb63386c75d6d8619b4b56bbd5c5ee..1b4f243eeccd39c8bdcc0ac3bf1c8b3510053d58 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsFullVectorConnector.cpp
+++ b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsFullVectorConnector.cpp
@@ -50,6 +50,8 @@ ThreeDistributionsFullVectorConnector::ThreeDistributionsFullVectorConnector(SPt
 //////////////////////////////////////////////////////////////////////////
 void ThreeDistributionsFullVectorConnector::init()
 {
+    using namespace vf::lbm::dir;
+
    FullVectorConnector::init();
 
    fDis = dynamicPointerCast<EsoTwist3D>(block.lock()->getKernel()->getDataSet()->getFdistributions());
@@ -59,37 +61,37 @@ void ThreeDistributionsFullVectorConnector::init()
    int anz = 3*27;
    switch (sendDir)
    {
-   case D3Q27System::DIR_000: UB_THROW(UbException(UB_EXARGS, "ZERO not allowed")); break;
-   case D3Q27System::DIR_P00:
-   case D3Q27System::DIR_M00: sender->getData().resize(maxX2*maxX3*anz, 0.0);   break;
-   case D3Q27System::DIR_0P0:
-   case D3Q27System::DIR_0M0: sender->getData().resize(maxX1*maxX3*anz, 0.0);   break;
-   case D3Q27System::DIR_00P:
-   case D3Q27System::DIR_00M: sender->getData().resize(maxX1*maxX2*anz, 0.0);   break;
+   case DIR_000: UB_THROW(UbException(UB_EXARGS, "ZERO not allowed")); break;
+   case DIR_P00:
+   case DIR_M00: sender->getData().resize(maxX2*maxX3*anz, 0.0);   break;
+   case DIR_0P0:
+   case DIR_0M0: sender->getData().resize(maxX1*maxX3*anz, 0.0);   break;
+   case DIR_00P:
+   case DIR_00M: sender->getData().resize(maxX1*maxX2*anz, 0.0);   break;
 
-   case D3Q27System::DIR_PP0:
-   case D3Q27System::DIR_MM0:
-   case D3Q27System::DIR_PM0:
-   case D3Q27System::DIR_MP0:  sender->getData().resize(maxX3*anz, 0.0);   break;
+   case DIR_PP0:
+   case DIR_MM0:
+   case DIR_PM0:
+   case DIR_MP0:  sender->getData().resize(maxX3*anz, 0.0);   break;
 
-   case D3Q27System::DIR_P0P:
-   case D3Q27System::DIR_M0M:
-   case D3Q27System::DIR_P0M:
-   case D3Q27System::DIR_M0P:  sender->getData().resize(maxX2*anz, 0.0);   break;
+   case DIR_P0P:
+   case DIR_M0M:
+   case DIR_P0M:
+   case DIR_M0P:  sender->getData().resize(maxX2*anz, 0.0);   break;
 
-   case D3Q27System::DIR_0PP:
-   case D3Q27System::DIR_0MM:
-   case D3Q27System::DIR_0PM:
-   case D3Q27System::DIR_0MP:  sender->getData().resize(maxX1*anz, 0.0);   break;
+   case DIR_0PP:
+   case DIR_0MM:
+   case DIR_0PM:
+   case DIR_0MP:  sender->getData().resize(maxX1*anz, 0.0);   break;
 
-   case D3Q27System::DIR_PPP:
-   case D3Q27System::DIR_MMM:
-   case D3Q27System::DIR_PPM:
-   case D3Q27System::DIR_MMP:
-   case D3Q27System::DIR_PMP:
-   case D3Q27System::DIR_MPM:
-   case D3Q27System::DIR_PMM:
-   case D3Q27System::DIR_MPP:  sender->getData().resize(anz, 0.0);   break;
+   case DIR_PPP:
+   case DIR_MMM:
+   case DIR_PPM:
+   case DIR_MMP:
+   case DIR_PMP:
+   case DIR_MPM:
+   case DIR_PMM:
+   case DIR_MPP:  sender->getData().resize(anz, 0.0);   break;
 
    default: UB_THROW(UbException(UB_EXARGS, "unknown sendDir"));
    }
diff --git a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsFullVectorConnector.h b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsFullVectorConnector.h
index c37ff06984e83950ed4edbe03da0f38dc6ffe190..794ba2d01d8015b347e8a1712da943b82d80b83c 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsFullVectorConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/ThreeDistributionsFullVectorConnector.h
@@ -62,21 +62,21 @@ protected:
    inline void distributeData(vector_type &rdata, int &index, int x1, int x2, int x3) override;
 
 private:
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-   CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+   CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
 
    SPtr<EsoTwist3D>  fDis;
 
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localHdistributions;
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalHdistributions;
-   CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroHdistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localHdistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalHdistributions;
+   CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroHdistributions;
 
    SPtr<EsoTwist3D>  hDis;
 
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localH2distributions;
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalH2distributions;
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroH2distributions;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localH2distributions;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalH2distributions;
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroH2distributions;
 
    SPtr<EsoTwist3D> h2Dis;
 
diff --git a/src/cpu/VirtualFluidsCore/Connectors/TransmitterType.h b/src/cpu/VirtualFluidsCore/Connectors/TransmitterType.h
index b36da8850e958c72d519c85bf383c26a8880e5ee..f300f005e9e52e398b0d2131ad4cbba027170ede 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/TransmitterType.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/TransmitterType.h
@@ -39,8 +39,8 @@
 #include "basics/transmitter/TbTransmitterLocal.h"
 #include <PointerDefinitions.h>
 
-using VectorTransmitter    = TbTransmitter<CbVector<LBMReal>>;
+using VectorTransmitter    = TbTransmitter<CbVector<real>>;
 using vector_type          = VectorTransmitter::value_type;
-using VectorTransmitterPtr = SPtr<TbTransmitter<CbVector<LBMReal>>>;
+using VectorTransmitterPtr = SPtr<TbTransmitter<CbVector<real>>>;
 
 #endif // TransmitterType_h__
diff --git a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullDirectConnector.cpp b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullDirectConnector.cpp
index b46ffebeb144569311272050893118f34e862398..121fef6b86040aab370e9ffd925bea0033d61446 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullDirectConnector.cpp
+++ b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullDirectConnector.cpp
@@ -61,6 +61,8 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::sendVectors()
 //////////////////////////////////////////////////////////////////////////
 void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
 {
+    using namespace vf::lbm::dir;
+
     ////////////////////////////////////////////////////////////
     // relation between ghost layer and regular nodes
     // maxX1m3 maxX1m2 ... minX1p2 minX1p3 - regular nodes
@@ -92,7 +94,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
     int maxX3m3 = maxX3 - 3;
 
     // EAST
-    if (sendDir == D3Q27System::DIR_P00) {
+    if (sendDir == DIR_P00) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
                 exchangeData(maxX1m3, x2, x3, minX1, x2, x3);
@@ -101,7 +103,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // WEST
-    else if (sendDir == D3Q27System::DIR_M00) {
+    else if (sendDir == DIR_M00) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
                 exchangeData(minX1p3, x2, x3, maxX1, x2, x3);
@@ -110,7 +112,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // NORTH
-    else if (sendDir == D3Q27System::DIR_0P0) {
+    else if (sendDir == DIR_0P0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 exchangeData(x1, maxX2m3, x3, x1, minX2, x3);
@@ -119,7 +121,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // SOUTH
-    else if (sendDir == D3Q27System::DIR_0M0) {
+    else if (sendDir == DIR_0M0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 exchangeData(x1, minX2p3, x3, x1, maxX2, x3);
@@ -129,7 +131,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
     }
 
     // TOP
-    else if (sendDir == D3Q27System::DIR_00P) {
+    else if (sendDir == DIR_00P) {
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 exchangeData(x1, x2, maxX3m3, x1, x2, minX3);
@@ -138,7 +140,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // BOTTOM
-    else if (sendDir == D3Q27System::DIR_00M) {
+    else if (sendDir == DIR_00M) {
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 exchangeData(x1, x2, minX3p3, x1, x2, maxX3);
@@ -147,7 +149,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // NORTHEAST
-    else if (sendDir == D3Q27System::DIR_PP0) {
+    else if (sendDir == DIR_PP0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             exchangeData(maxX1m3, maxX2m3, x3, minX1, minX2, x3);
             exchangeData(maxX1m2, maxX2m2, x3, minX1p1, minX2p1, x3);
@@ -156,7 +158,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // NORTHWEST
-    else if (sendDir == D3Q27System::DIR_MP0) {
+    else if (sendDir == DIR_MP0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             exchangeData(minX1p3, maxX2m3, x3, maxX1, minX2, x3);
             exchangeData(minX1p2, maxX2m2, x3, maxX1m1, minX2p1, x3);
@@ -165,7 +167,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // SOUTHWEST
-    else if (sendDir == D3Q27System::DIR_MM0) {
+    else if (sendDir == DIR_MM0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             exchangeData(minX1p3, minX2p3, x3, maxX1, maxX2, x3);
             exchangeData(minX1p2, minX2p2, x3, maxX1m1, maxX2m1, x3);
@@ -174,70 +176,70 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         }
     }
     // SOUTHEAST
-    else if (sendDir == D3Q27System::DIR_PM0) {
+    else if (sendDir == DIR_PM0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             exchangeData(maxX1m3, minX2p3, x3, minX1, maxX2, x3);
             exchangeData(maxX1m2, minX2p2, x3, minX1p1, maxX2m1, x3);
             exchangeData(maxX1m3, minX2p2, x3, minX1, maxX2m1, x3);
             exchangeData(maxX1m2, minX2p3, x3, minX1p1, maxX2, x3);
         }
-    } else if (sendDir == D3Q27System::DIR_P0P)
+    } else if (sendDir == DIR_P0P)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             exchangeData(maxX1m3, x2, maxX3m3, minX1, x2, minX3);
             exchangeData(maxX1m2, x2, maxX3m2, minX1p1, x2, minX3p1);
             exchangeData(maxX1m3, x2, maxX3m2, minX1, x2, minX3p1);
             exchangeData(maxX1m2, x2, maxX3m3, minX1p1, x2, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_M0M)
+    else if (sendDir == DIR_M0M)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             exchangeData(minX1p3, x2, minX3p3, maxX1, x2, maxX3);
             exchangeData(minX1p2, x2, minX3p2, maxX1m1, x2, maxX3m1);
             exchangeData(minX1p3, x2, minX3p2, maxX1, x2, maxX3m1);
             exchangeData(minX1p2, x2, minX3p3, maxX1m1, x2, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_P0M)
+    else if (sendDir == DIR_P0M)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             exchangeData(maxX1m3, x2, minX3p3, minX1, x2, maxX3);
             exchangeData(maxX1m2, x2, minX3p2, minX1p1, x2, maxX3m1);
             exchangeData(maxX1m3, x2, minX3p2, minX1, x2, maxX3m1);
             exchangeData(maxX1m2, x2, minX3p3, minX1p1, x2, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_M0P)
+    else if (sendDir == DIR_M0P)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             exchangeData(minX1p3, x2, maxX3m3, maxX1, x2, minX3);
             exchangeData(minX1p2, x2, maxX3m2, maxX1m1, x2, minX3p1);
             exchangeData(minX1p3, x2, maxX3m2, maxX1, x2, minX3p1);
             exchangeData(minX1p2, x2, maxX3m3, maxX1m1, x2, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_0PP)
+    else if (sendDir == DIR_0PP)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             exchangeData(x1, maxX2m3, maxX3m3, x1, minX2, minX3);
             exchangeData(x1, maxX2m2, maxX3m2, x1, minX2p1, minX3p1);
             exchangeData(x1, maxX2m3, maxX3m2, x1, minX2, minX3p1);
             exchangeData(x1, maxX2m2, maxX3m3, x1, minX2p1, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_0MM)
+    else if (sendDir == DIR_0MM)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             exchangeData(x1, minX2p3, minX3p3, x1, maxX2, maxX3);
             exchangeData(x1, minX2p2, minX3p2, x1, maxX2m1, maxX3m1);
             exchangeData(x1, minX2p3, minX3p2, x1, maxX2, maxX3m1);
             exchangeData(x1, minX2p2, minX3p3, x1, maxX2m1, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_0PM)
+    else if (sendDir == DIR_0PM)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             exchangeData(x1, maxX2m3, minX3p3, x1, minX2, maxX3);
             exchangeData(x1, maxX2m2, minX3p2, x1, minX2p1, maxX3m1);
             exchangeData(x1, maxX2m3, minX3p2, x1, minX2, maxX3m1);
             exchangeData(x1, maxX2m2, minX3p3, x1, minX2p1, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_0MP)
+    else if (sendDir == DIR_0MP)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             exchangeData(x1, minX2p3, maxX3m3, x1, maxX2, minX3);
             exchangeData(x1, minX2p2, maxX3m2, x1, maxX2m1, minX3p1);
             exchangeData(x1, minX2p3, maxX3m2, x1, maxX2, minX3p1);
             exchangeData(x1, minX2p2, maxX3m3, x1, maxX2m1, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_MMP) {
+    else if (sendDir == DIR_MMP) {
         exchangeData(minX1p3, minX2p3, maxX3m3, maxX1, maxX2, minX3);
         exchangeData(minX1p2, minX2p2, maxX3m2, maxX1m1, maxX2m1, minX3p1);
         exchangeData(minX1p3, minX2p2, maxX3m2, maxX1, maxX2m1, minX3p1);
@@ -246,7 +248,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(minX1p3, minX2p3, maxX3m2, maxX1, maxX2, minX3p1);
         exchangeData(minX1p3, minX2p2, maxX3m3, maxX1, maxX2m1, minX3);
         exchangeData(minX1p2, minX2p3, maxX3m3, maxX1m1, maxX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_PMP) {
+    } else if (sendDir == DIR_PMP) {
         exchangeData(maxX1m3, minX1p3, maxX3m3, minX1, maxX2, minX3);
         exchangeData(maxX1m2, minX1p2, maxX3m2, minX1p1, maxX2m1, minX3p1);
         exchangeData(maxX1m3, minX1p2, maxX3m2, minX1, maxX2m1, minX3p1);
@@ -255,7 +257,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(maxX1m3, minX1p3, maxX3m2, minX1, maxX2, minX3p1);
         exchangeData(maxX1m3, minX1p2, maxX3m3, minX1, maxX2m1, minX3);
         exchangeData(maxX1m2, minX1p3, maxX3m3, minX1p1, maxX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_MPP) {
+    } else if (sendDir == DIR_MPP) {
         exchangeData(minX1p3, maxX2m3, maxX3m3, maxX1, minX2, minX3);
         exchangeData(minX1p2, maxX2m2, maxX3m2, maxX1m1, minX2p1, minX3p1);
         exchangeData(minX1p3, maxX2m2, maxX3m2, maxX1, minX2p1, minX3p1);
@@ -264,7 +266,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(minX1p3, maxX2m3, maxX3m2, maxX1, minX2, minX3p1);
         exchangeData(minX1p3, maxX2m2, maxX3m3, maxX1, minX2p1, minX3);
         exchangeData(minX1p2, maxX2m3, maxX3m3, maxX1m1, minX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_PPP) {
+    } else if (sendDir == DIR_PPP) {
         exchangeData(maxX1m3, maxX2m3, maxX3m3, minX1, minX2, minX3);
         exchangeData(maxX1m2, maxX2m2, maxX3m2, minX1p1, minX2p1, minX3p1);
         exchangeData(maxX1m3, maxX2m2, maxX3m2, minX1, minX2p1, minX3p1);
@@ -273,7 +275,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(maxX1m3, maxX2m3, maxX3m2, minX1, minX2, minX3p1);
         exchangeData(maxX1m3, maxX2m2, maxX3m3, minX1, minX2p1, minX3);
         exchangeData(maxX1m2, maxX2m3, maxX3m3, minX1p1, minX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_MMM) {
+    } else if (sendDir == DIR_MMM) {
         exchangeData(minX1p3, minX2p3, minX3p3, maxX1, maxX2, maxX3);
         exchangeData(minX1p2, minX2p2, minX3p2, maxX1m1, maxX2m1, maxX3m1);
         exchangeData(minX1p3, minX2p2, minX3p2, maxX1, maxX2m1, maxX3m1);
@@ -282,7 +284,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(minX1p3, minX2p3, minX3p2, maxX1, maxX2, maxX3m1);
         exchangeData(minX1p3, minX2p2, minX3p3, maxX1, maxX2m1, maxX3);
         exchangeData(minX1p2, minX2p3, minX3p3, maxX1m1, maxX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_PMM) {
+    } else if (sendDir == DIR_PMM) {
         exchangeData(maxX1m3, minX2p3, minX3p3, minX1, maxX2, maxX3);
         exchangeData(maxX1m2, minX2p2, minX3p2, minX1p1, maxX2m1, maxX3m1);
         exchangeData(maxX1m3, minX2p2, minX3p2, minX1, maxX2m1, maxX3m1);
@@ -291,7 +293,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(maxX1m3, minX2p3, minX3p2, minX1, maxX2, maxX3m1);
         exchangeData(maxX1m3, minX2p2, minX3p3, minX1, maxX2m1, maxX3);
         exchangeData(maxX1m2, minX2p3, minX3p3, minX1p1, maxX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_MPM) {
+    } else if (sendDir == DIR_MPM) {
         exchangeData(minX1p3, maxX2m3, minX3p3, maxX1, minX2, maxX3);
         exchangeData(minX1p2, maxX2m2, minX3p2, maxX1m1, minX2p1, maxX3m1);
         exchangeData(minX1p3, maxX2m2, minX3p2, maxX1, minX2p1, maxX3m1);
@@ -300,7 +302,7 @@ void TwoDistributionsDoubleGhostLayerFullDirectConnector::exchangeData()
         exchangeData(minX1p3, maxX2m3, minX3p2, maxX1, minX2, maxX3m1);
         exchangeData(minX1p3, maxX2m2, minX3p3, maxX1, minX2p1, maxX3);
         exchangeData(minX1p2, maxX2m3, minX3p3, maxX1m1, minX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_PPM) {
+    } else if (sendDir == DIR_PPM) {
         exchangeData(maxX1m3, maxX2m3, minX3p3, minX1, minX2, maxX3);
         exchangeData(maxX1m2, maxX2m2, minX3p2, minX1p1, minX2p1, maxX3m1);
         exchangeData(maxX1m3, maxX2m2, minX3p2, minX1, minX2p1, maxX3m1);
diff --git a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullDirectConnector.h b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullDirectConnector.h
index bc431f9f3bbb16587df76355c395fff780137b22..bbd1c5a346ac50b08c78794ea3b00457ba4836b7 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullDirectConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullDirectConnector.h
@@ -58,21 +58,21 @@ protected:
     inline void exchangeData(int x1From, int x2From, int x3From, int x1To, int x2To, int x3To) override;
 
 private:
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromf;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromf;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromf;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromf;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromf;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromf;
 
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsTof;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsTof;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsTof;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsTof;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsTof;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsTof;
 
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromh;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromh;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromh;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromh;
 
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsToh;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsToh;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsToh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsToh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsToh;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsToh;
 
 	SPtr<EsoTwist3D> fFrom, hFrom;
     SPtr<EsoTwist3D> fTo, hTo;
diff --git a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullVectorConnector.cpp b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullVectorConnector.cpp
index 9dc8a99deb20f8f49f40f7d2e7c8a0c66b687fcb..8f6b88898a9da1cfca9aee49ae4cb084ee54217a 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullVectorConnector.cpp
+++ b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullVectorConnector.cpp
@@ -50,6 +50,8 @@ TwoDistributionsDoubleGhostLayerFullVectorConnector::TwoDistributionsDoubleGhost
 //////////////////////////////////////////////////////////////////////////
 void TwoDistributionsDoubleGhostLayerFullVectorConnector::init()
 {
+   using namespace vf::lbm::dir;
+
    FullVectorConnector::init();
 
    fDis = dynamicPointerCast<EsoTwist3D>(block.lock()->getKernel()->getDataSet()->getFdistributions());
@@ -59,37 +61,37 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::init()
    int anz = 2*27+1;
    switch (sendDir)
    {
-   case D3Q27System::DIR_000: UB_THROW(UbException(UB_EXARGS, "ZERO not allowed")); break;
-   case D3Q27System::DIR_P00:
-   case D3Q27System::DIR_M00: sender->getData().resize(maxX2*maxX3*anz*2, 0.0);   break;
-   case D3Q27System::DIR_0P0:
-   case D3Q27System::DIR_0M0: sender->getData().resize(maxX1*maxX3*anz*2, 0.0);   break;
-   case D3Q27System::DIR_00P:
-   case D3Q27System::DIR_00M: sender->getData().resize(maxX1*maxX2*anz*2, 0.0);   break;
+   case DIR_000: UB_THROW(UbException(UB_EXARGS, "ZERO not allowed")); break;
+   case DIR_P00:
+   case DIR_M00: sender->getData().resize(maxX2*maxX3*anz*2, 0.0);   break;
+   case DIR_0P0:
+   case DIR_0M0: sender->getData().resize(maxX1*maxX3*anz*2, 0.0);   break;
+   case DIR_00P:
+   case DIR_00M: sender->getData().resize(maxX1*maxX2*anz*2, 0.0);   break;
 
-   case D3Q27System::DIR_PP0:
-   case D3Q27System::DIR_MM0:
-   case D3Q27System::DIR_PM0:
-   case D3Q27System::DIR_MP0:  sender->getData().resize(maxX3*anz*4, 0.0);   break;
+   case DIR_PP0:
+   case DIR_MM0:
+   case DIR_PM0:
+   case DIR_MP0:  sender->getData().resize(maxX3*anz*4, 0.0);   break;
 
-   case D3Q27System::DIR_P0P:
-   case D3Q27System::DIR_M0M:
-   case D3Q27System::DIR_P0M:
-   case D3Q27System::DIR_M0P:  sender->getData().resize(maxX2*anz*4, 0.0);   break;
+   case DIR_P0P:
+   case DIR_M0M:
+   case DIR_P0M:
+   case DIR_M0P:  sender->getData().resize(maxX2*anz*4, 0.0);   break;
 
-   case D3Q27System::DIR_0PP:
-   case D3Q27System::DIR_0MM:
-   case D3Q27System::DIR_0PM:
-   case D3Q27System::DIR_0MP:  sender->getData().resize(maxX1*anz*4, 0.0);   break;
+   case DIR_0PP:
+   case DIR_0MM:
+   case DIR_0PM:
+   case DIR_0MP:  sender->getData().resize(maxX1*anz*4, 0.0);   break;
 
-   case D3Q27System::DIR_PPP:
-   case D3Q27System::DIR_MMM:
-   case D3Q27System::DIR_PPM:
-   case D3Q27System::DIR_MMP:
-   case D3Q27System::DIR_PMP:
-   case D3Q27System::DIR_MPM:
-   case D3Q27System::DIR_PMM:
-   case D3Q27System::DIR_MPP:  sender->getData().resize(anz*8, 0.0);   break;
+   case DIR_PPP:
+   case DIR_MMM:
+   case DIR_PPM:
+   case DIR_MMP:
+   case DIR_PMP:
+   case DIR_MPM:
+   case DIR_PMM:
+   case DIR_MPP:  sender->getData().resize(anz*8, 0.0);   break;
 
    default: UB_THROW(UbException(UB_EXARGS, "unknown sendDir"));
    }
@@ -103,6 +105,8 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillSendVectors()
 ////////////////////////////////////////////////////////////////////////
 void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
 {
+    using namespace vf::lbm::dir;
+
     ////////////////////////////////////////////////////////////
     // relation between ghost layer and regular nodes
     // maxX1m3 maxX1m2 ... minX1p2 minX1p3 - regular nodes
@@ -137,7 +141,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
 
     int index = 0;
     // EAST
-    if (sendDir == D3Q27System::DIR_P00) {
+    if (sendDir == DIR_P00) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
                 fillData(sdata, index, maxX1m3, x2, x3);
@@ -146,7 +150,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // WEST
-    else if (sendDir == D3Q27System::DIR_M00) {
+    else if (sendDir == DIR_M00) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
                 fillData(sdata, index, minX1p3, x2, x3);
@@ -155,7 +159,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // NORTH
-    else if (sendDir == D3Q27System::DIR_0P0) {
+    else if (sendDir == DIR_0P0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 fillData(sdata, index, x1, maxX2m3, x3);
@@ -164,7 +168,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // SOUTH
-    else if (sendDir == D3Q27System::DIR_0M0) {
+    else if (sendDir == DIR_0M0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 fillData(sdata, index, x1, minX2p3, x3);
@@ -174,7 +178,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
     }
 
     // TOP
-    else if (sendDir == D3Q27System::DIR_00P) {
+    else if (sendDir == DIR_00P) {
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 fillData(sdata, index, x1, x2, maxX3m3);
@@ -183,7 +187,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // BOTTOM
-    else if (sendDir == D3Q27System::DIR_00M) {
+    else if (sendDir == DIR_00M) {
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 fillData(sdata, index, x1, x2, minX3p3);
@@ -192,7 +196,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // NORTHEAST
-    else if (sendDir == D3Q27System::DIR_PP0) {
+    else if (sendDir == DIR_PP0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             fillData(sdata, index, maxX1m3, maxX2m3, x3);
             fillData(sdata, index, maxX1m2, maxX2m2, x3);
@@ -201,7 +205,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // NORTHWEST
-    else if (sendDir == D3Q27System::DIR_MP0) {
+    else if (sendDir == DIR_MP0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             fillData(sdata, index, minX1p3, maxX2m3, x3);
             fillData(sdata, index, minX1p2, maxX2m2, x3);
@@ -210,7 +214,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // SOUTHWEST
-    else if (sendDir == D3Q27System::DIR_MM0) {
+    else if (sendDir == DIR_MM0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             fillData(sdata, index, minX1p3, minX2p3, x3);
             fillData(sdata, index, minX1p2, minX2p2, x3);
@@ -219,70 +223,70 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         }
     }
     // SOUTHEAST
-    else if (sendDir == D3Q27System::DIR_PM0) {
+    else if (sendDir == DIR_PM0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             fillData(sdata, index, maxX1m3, minX2p3, x3);
             fillData(sdata, index, maxX1m2, minX2p2, x3);
             fillData(sdata, index, maxX1m3, minX2p2, x3);
             fillData(sdata, index, maxX1m2, minX2p3, x3);
         }
-    } else if (sendDir == D3Q27System::DIR_P0P)
+    } else if (sendDir == DIR_P0P)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             fillData(sdata, index, maxX1m3, x2, maxX3m3);
             fillData(sdata, index, maxX1m2, x2, maxX3m2);
             fillData(sdata, index, maxX1m3, x2, maxX3m2);
             fillData(sdata, index, maxX1m2, x2, maxX3m3);
         }
-    else if (sendDir == D3Q27System::DIR_M0M)
+    else if (sendDir == DIR_M0M)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             fillData(sdata, index, minX1p3, x2, minX3p3);
             fillData(sdata, index, minX1p2, x2, minX3p2);
             fillData(sdata, index, minX1p3, x2, minX3p2);
             fillData(sdata, index, minX1p2, x2, minX3p3);
         }
-    else if (sendDir == D3Q27System::DIR_P0M)
+    else if (sendDir == DIR_P0M)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             fillData(sdata, index, maxX1m3, x2, minX3p3);
             fillData(sdata, index, maxX1m2, x2, minX3p2);
             fillData(sdata, index, maxX1m3, x2, minX3p2);
             fillData(sdata, index, maxX1m2, x2, minX3p3);
         }
-    else if (sendDir == D3Q27System::DIR_M0P)
+    else if (sendDir == DIR_M0P)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             fillData(sdata, index, minX1p3, x2, maxX3m3);
             fillData(sdata, index, minX1p2, x2, maxX3m2);
             fillData(sdata, index, minX1p3, x2, maxX3m2);
             fillData(sdata, index, minX1p2, x2, maxX3m3);
         }
-    else if (sendDir == D3Q27System::DIR_0PP)
+    else if (sendDir == DIR_0PP)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             fillData(sdata, index, x1, maxX2m3, maxX3m3);
             fillData(sdata, index, x1, maxX2m2, maxX3m2);
             fillData(sdata, index, x1, maxX2m3, maxX3m2);
             fillData(sdata, index, x1, maxX2m2, maxX3m3);
         }
-    else if (sendDir == D3Q27System::DIR_0MM)
+    else if (sendDir == DIR_0MM)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             fillData(sdata, index, x1, minX2p3, minX3p3);
             fillData(sdata, index, x1, minX2p2, minX3p2);
             fillData(sdata, index, x1, minX2p3, minX3p2);
             fillData(sdata, index, x1, minX2p2, minX3p3);
         }
-    else if (sendDir == D3Q27System::DIR_0PM)
+    else if (sendDir == DIR_0PM)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             fillData(sdata, index, x1, maxX2m3, minX3p3);
             fillData(sdata, index, x1, maxX2m2, minX3p2);
             fillData(sdata, index, x1, maxX2m3, minX3p2);
             fillData(sdata, index, x1, maxX2m2, minX3p3);
         }
-    else if (sendDir == D3Q27System::DIR_0MP)
+    else if (sendDir == DIR_0MP)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             fillData(sdata, index, x1, minX2p3, maxX3m3);
             fillData(sdata, index, x1, minX2p2, maxX3m2);
             fillData(sdata, index, x1, minX2p3, maxX3m2);
             fillData(sdata, index, x1, minX2p2, maxX3m3);
         }
-    else if (sendDir == D3Q27System::DIR_MMP) {
+    else if (sendDir == DIR_MMP) {
         fillData(sdata, index, minX1p3, minX2p3, maxX3m3);
         fillData(sdata, index, minX1p2, minX2p2, maxX3m2);
         fillData(sdata, index, minX1p3, minX2p2, maxX3m2);
@@ -291,7 +295,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, minX1p3, minX2p3, maxX3m2);
         fillData(sdata, index, minX1p3, minX2p2, maxX3m3);
         fillData(sdata, index, minX1p2, minX2p3, maxX3m3);
-    } else if (sendDir == D3Q27System::DIR_PMP) {
+    } else if (sendDir == DIR_PMP) {
         fillData(sdata, index, maxX1m3, minX1p3, maxX3m3);
         fillData(sdata, index, maxX1m2, minX1p2, maxX3m2);
         fillData(sdata, index, maxX1m3, minX1p2, maxX3m2);
@@ -300,7 +304,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, maxX1m3, minX1p3, maxX3m2);
         fillData(sdata, index, maxX1m3, minX1p2, maxX3m3);
         fillData(sdata, index, maxX1m2, minX1p3, maxX3m3);
-    } else if (sendDir == D3Q27System::DIR_MPP) {
+    } else if (sendDir == DIR_MPP) {
         fillData(sdata, index, minX1p3, maxX2m3, maxX3m3);
         fillData(sdata, index, minX1p2, maxX2m2, maxX3m2);
         fillData(sdata, index, minX1p3, maxX2m2, maxX3m2);
@@ -309,7 +313,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, minX1p3, maxX2m3, maxX3m2);
         fillData(sdata, index, minX1p3, maxX2m2, maxX3m3);
         fillData(sdata, index, minX1p2, maxX2m3, maxX3m3);
-    } else if (sendDir == D3Q27System::DIR_PPP) {
+    } else if (sendDir == DIR_PPP) {
         fillData(sdata, index, maxX1m3, maxX2m3, maxX3m3);
         fillData(sdata, index, maxX1m2, maxX2m2, maxX3m2);
         fillData(sdata, index, maxX1m3, maxX2m2, maxX3m2);
@@ -318,7 +322,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, maxX1m3, maxX2m3, maxX3m2);
         fillData(sdata, index, maxX1m3, maxX2m2, maxX3m3);
         fillData(sdata, index, maxX1m2, maxX2m3, maxX3m3);
-    } else if (sendDir == D3Q27System::DIR_MMM) {
+    } else if (sendDir == DIR_MMM) {
         fillData(sdata, index, minX1p3, minX2p3, minX3p3);
         fillData(sdata, index, minX1p2, minX2p2, minX3p2);
         fillData(sdata, index, minX1p3, minX2p2, minX3p2);
@@ -327,7 +331,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, minX1p3, minX2p3, minX3p2);
         fillData(sdata, index, minX1p3, minX2p2, minX3p3);
         fillData(sdata, index, minX1p2, minX2p3, minX3p3);
-    } else if (sendDir == D3Q27System::DIR_PMM) {
+    } else if (sendDir == DIR_PMM) {
         fillData(sdata, index, maxX1m3, minX2p3, minX3p3);
         fillData(sdata, index, maxX1m2, minX2p2, minX3p2);
         fillData(sdata, index, maxX1m3, minX2p2, minX3p2);
@@ -336,7 +340,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, maxX1m3, minX2p3, minX3p2);
         fillData(sdata, index, maxX1m3, minX2p2, minX3p3);
         fillData(sdata, index, maxX1m2, minX2p3, minX3p3);
-    } else if (sendDir == D3Q27System::DIR_MPM) {
+    } else if (sendDir == DIR_MPM) {
         fillData(sdata, index, minX1p3, maxX2m3, minX3p3);
         fillData(sdata, index, minX1p2, maxX2m2, minX3p2);
         fillData(sdata, index, minX1p3, maxX2m2, minX3p2);
@@ -345,7 +349,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::fillData()
         fillData(sdata, index, minX1p3, maxX2m3, minX3p2);
         fillData(sdata, index, minX1p3, maxX2m2, minX3p3);
         fillData(sdata, index, minX1p2, maxX2m3, minX3p3);
-    } else if (sendDir == D3Q27System::DIR_PPM) {
+    } else if (sendDir == DIR_PPM) {
         fillData(sdata, index, maxX1m3, maxX2m3, minX3p3);
         fillData(sdata, index, maxX1m2, maxX2m2, minX3p2);
         fillData(sdata, index, maxX1m3, maxX2m2, minX3p2);
@@ -366,6 +370,8 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeReceiveVecto
 ////////////////////////////////////////////////////////////////////////
 void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
 {
+    using namespace vf::lbm::dir;
+
     vector_type &rdata = receiver->getData();
 
     int index = 0;
@@ -399,7 +405,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
     int maxX3m2 = maxX3 - 2;
     //int maxX3m3 = maxX3 - 3;
 
-    if (sendDir == D3Q27System::DIR_M00) {
+    if (sendDir == DIR_M00) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
                 distributeData(rdata, index, minX1, x2, x3);
@@ -407,7 +413,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             }
         }
     }
-    else if (sendDir == D3Q27System::DIR_P00) {
+    else if (sendDir == DIR_P00) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
                 distributeData(rdata, index, maxX1, x2, x3);
@@ -415,7 +421,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             }
         }
     }
-    else if (sendDir == D3Q27System::DIR_0M0) {
+    else if (sendDir == DIR_0M0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 distributeData(rdata, index, x1, minX2, x3);
@@ -423,7 +429,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             }
         }
     }
-    else if (sendDir == D3Q27System::DIR_0P0) {
+    else if (sendDir == DIR_0P0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 distributeData(rdata, index, x1, maxX2, x3);
@@ -431,7 +437,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             }
         }
     }
-    else if (sendDir == D3Q27System::DIR_00M) {
+    else if (sendDir == DIR_00M) {
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 distributeData(rdata, index, x1, x2, minX3);
@@ -439,7 +445,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             }
         }
     }
-    else if (sendDir == D3Q27System::DIR_00P) {
+    else if (sendDir == DIR_00P) {
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
                 distributeData(rdata, index, x1, x2, maxX3);
@@ -447,7 +453,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             }
         }
     }
-    else if (sendDir == D3Q27System::DIR_MM0) {
+    else if (sendDir == DIR_MM0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             distributeData(rdata, index, minX1, minX2, x3);
             distributeData(rdata, index, minX1p1, minX2p1, x3);
@@ -455,7 +461,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             distributeData(rdata, index, minX1p1, minX2, x3);
         }
     }
-    else if (sendDir == D3Q27System::DIR_PM0) {
+    else if (sendDir == DIR_PM0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             distributeData(rdata, index, maxX1, minX2, x3);
             distributeData(rdata, index, maxX1m1, minX2p1, x3);
@@ -463,7 +469,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             distributeData(rdata, index, maxX1m1, minX2, x3);
         }
     }
-    else if (sendDir == D3Q27System::DIR_PP0) {
+    else if (sendDir == DIR_PP0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             distributeData(rdata, index, maxX1, maxX2, x3);
             distributeData(rdata, index, maxX1m1, maxX2m1, x3);
@@ -471,70 +477,70 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
             distributeData(rdata, index, maxX1m1, maxX2, x3);
         }
     }
-    else if (sendDir == D3Q27System::DIR_MP0) {
+    else if (sendDir == DIR_MP0) {
         for (int x3 = minX3p2; x3 <= maxX3m2; x3++) {
             distributeData(rdata, index, minX1, maxX2, x3);
             distributeData(rdata, index, minX1p1, maxX2m1, x3);
             distributeData(rdata, index, minX1, maxX2m1, x3);
             distributeData(rdata, index, minX1p1, maxX2, x3);
         }
-    } else if (sendDir == D3Q27System::DIR_M0M)
+    } else if (sendDir == DIR_M0M)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             distributeData(rdata, index, minX1, x2, minX3);
             distributeData(rdata, index, minX1p1, x2, minX3p1);
             distributeData(rdata, index, minX1, x2, minX3p1);
             distributeData(rdata, index, minX1p1, x2, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_P0P)
+    else if (sendDir == DIR_P0P)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             distributeData(rdata, index, maxX1, x2, maxX3);
             distributeData(rdata, index, maxX1m1, x2, maxX3m1);
             distributeData(rdata, index, maxX1, x2, maxX3m1);
             distributeData(rdata, index, maxX1m1, x2, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_M0P)
+    else if (sendDir == DIR_M0P)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             distributeData(rdata, index, minX1, x2, maxX3);
             distributeData(rdata, index, minX1p1, x2, maxX3m1);
             distributeData(rdata, index, minX1, x2, maxX3m1);
             distributeData(rdata, index, minX1p1, x2, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_P0M)
+    else if (sendDir == DIR_P0M)
         for (int x2 = minX2p2; x2 <= maxX2m2; x2++) {
             distributeData(rdata, index, maxX1, x2, minX3);
             distributeData(rdata, index, maxX1m1, x2, minX3p1);
             distributeData(rdata, index, maxX1, x2, minX3p1);
             distributeData(rdata, index, maxX1m1, x2, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_0MM)
+    else if (sendDir == DIR_0MM)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             distributeData(rdata, index, x1, minX2, minX3);
             distributeData(rdata, index, x1, minX2p1, minX3p1);
             distributeData(rdata, index, x1, minX2, minX3p1);
             distributeData(rdata, index, x1, minX2p1, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_0PP)
+    else if (sendDir == DIR_0PP)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             distributeData(rdata, index, x1, maxX2, maxX3);
             distributeData(rdata, index, x1, maxX2m1, maxX3m1);
             distributeData(rdata, index, x1, maxX2, maxX3m1);
             distributeData(rdata, index, x1, maxX2m1, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_0MP)
+    else if (sendDir == DIR_0MP)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             distributeData(rdata, index, x1, minX2, maxX3);
             distributeData(rdata, index, x1, minX2p1, maxX3m1);
             distributeData(rdata, index, x1, minX2, maxX3m1);
             distributeData(rdata, index, x1, minX2p1, maxX3);
         }
-    else if (sendDir == D3Q27System::DIR_0PM)
+    else if (sendDir == DIR_0PM)
         for (int x1 = minX1p2; x1 <= maxX1m2; x1++) {
             distributeData(rdata, index, x1, maxX2, minX3);
             distributeData(rdata, index, x1, maxX2m1, minX3p1);
             distributeData(rdata, index, x1, maxX2, minX3p1);
             distributeData(rdata, index, x1, maxX2m1, minX3);
         }
-    else if (sendDir == D3Q27System::DIR_PPM) {
+    else if (sendDir == DIR_PPM) {
         distributeData(rdata, index, maxX1, maxX2, minX3);
         distributeData(rdata, index, maxX1m1, maxX2m1, minX3p1);
         distributeData(rdata, index, maxX1, maxX2m1, minX3p1);
@@ -543,7 +549,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, maxX1, maxX2, minX3p1);
         distributeData(rdata, index, maxX1, maxX2m1, minX3);
         distributeData(rdata, index, maxX1m1, maxX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_MPM) {
+    } else if (sendDir == DIR_MPM) {
         distributeData(rdata, index, minX1, maxX2, minX3);
         distributeData(rdata, index, minX1p1, maxX2m1, minX3p1);
         distributeData(rdata, index, minX1, maxX2m1, minX3p1);
@@ -552,7 +558,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, minX1, maxX2, minX3p1);
         distributeData(rdata, index, minX1, maxX2m1, minX3);
         distributeData(rdata, index, minX1p1, maxX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_PMM) {
+    } else if (sendDir == DIR_PMM) {
         distributeData(rdata, index, maxX1, minX2, minX3);
         distributeData(rdata, index, maxX1m1, minX2p1, minX3p1);
         distributeData(rdata, index, maxX1, minX2p1, minX3p1);
@@ -561,7 +567,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, maxX1, minX2, minX3p1);
         distributeData(rdata, index, maxX1, minX2p1, minX3);
         distributeData(rdata, index, maxX1m1, minX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_MMM) {
+    } else if (sendDir == DIR_MMM) {
         distributeData(rdata, index, minX1, minX2, minX3);
         distributeData(rdata, index, minX1p1, minX2p1, minX3p1);
         distributeData(rdata, index, minX1, minX2p1, minX3p1);
@@ -570,7 +576,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, minX1, minX2, minX3p1);
         distributeData(rdata, index, minX1, minX2p1, minX3);
         distributeData(rdata, index, minX1p1, minX2, minX3);
-    } else if (sendDir == D3Q27System::DIR_PPP) {
+    } else if (sendDir == DIR_PPP) {
         distributeData(rdata, index, maxX1, maxX2, maxX3);
         distributeData(rdata, index, maxX1m1, maxX2m1, maxX3m1);
         distributeData(rdata, index, maxX1, maxX2m1, maxX3m1);
@@ -579,7 +585,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, maxX1, maxX2, maxX3m1);
         distributeData(rdata, index, maxX1, maxX2m1, maxX3);
         distributeData(rdata, index, maxX1m1, maxX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_MPP) {
+    } else if (sendDir == DIR_MPP) {
         distributeData(rdata, index, minX1, maxX2, maxX3);
         distributeData(rdata, index, minX1p1, maxX2m1, maxX3m1);
         distributeData(rdata, index, minX1, maxX2m1, maxX3m1);
@@ -588,7 +594,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, minX1, maxX2, maxX3m1);
         distributeData(rdata, index, minX1, maxX2m1, maxX3);
         distributeData(rdata, index, minX1p1, maxX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_PMP) {
+    } else if (sendDir == DIR_PMP) {
         distributeData(rdata, index, maxX1, minX2, maxX3);
         distributeData(rdata, index, maxX1m1, minX2p1, maxX3m1);
         distributeData(rdata, index, maxX1, minX2p1, maxX3m1);
@@ -597,7 +603,7 @@ void TwoDistributionsDoubleGhostLayerFullVectorConnector::distributeData()
         distributeData(rdata, index, maxX1, minX2, maxX3m1);
         distributeData(rdata, index, maxX1, minX2p1, maxX3);
         distributeData(rdata, index, maxX1m1, minX2, maxX3);
-    } else if (sendDir == D3Q27System::DIR_MMP) {
+    } else if (sendDir == DIR_MMP) {
         distributeData(rdata, index, minX1, minX2, maxX3);
         distributeData(rdata, index, minX1p1, minX2p1, maxX3m1);
         distributeData(rdata, index, minX1, minX2p1, maxX3m1);
diff --git a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullVectorConnector.h b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullVectorConnector.h
index 508c9e90a2bced8560dcda6098d0fb4aea8b4d9a..d5769c726eda127ca603d6984744274abaf2edae 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullVectorConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsDoubleGhostLayerFullVectorConnector.h
@@ -68,15 +68,15 @@ protected:
    inline void distributeData(vector_type &rdata, int &index, int x1, int x2, int x3) override;
 
 private:
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-   CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+   CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
 
    SPtr<EsoTwist3D>  fDis;
 
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localHdistributions;
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalHdistributions;
-   CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroHdistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localHdistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalHdistributions;
+   CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroHdistributions;
 
    SPtr<EsoTwist3D>  hDis;
 
diff --git a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsFullDirectConnector.h b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsFullDirectConnector.h
index fbcfd9830db4e4c08d222471bbdb65e8524eaed5..625b8f6e3292cf4f56eab91536e5e9c8069b238d 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsFullDirectConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsFullDirectConnector.h
@@ -55,21 +55,21 @@ protected:
     inline void exchangeData(int x1From, int x2From, int x3From, int x1To, int x2To, int x3To) override;
 
 private:
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromf;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromf;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromf;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromf;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromf;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromf;
 
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsTof;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsTof;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsTof;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsTof;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsTof;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsTof;
 
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromh;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromh;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsFromh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsFromh;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsFromh;
 
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsToh;
-	CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsToh;
-	CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsToh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsToh;
+	CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsToh;
+	CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsToh;
 
 	SPtr<EsoTwist3D>  fFrom, hFrom;
 	SPtr<EsoTwist3D>  fTo, hTo;
diff --git a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsFullVectorConnector.cpp b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsFullVectorConnector.cpp
index bab931d93dd99cf89f4517159cef1d6efc000eff..7987c2f6c8af52fbf897ff6bbcee47add3fc0056 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsFullVectorConnector.cpp
+++ b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsFullVectorConnector.cpp
@@ -50,6 +50,8 @@ TwoDistributionsFullVectorConnector::TwoDistributionsFullVectorConnector(SPtr<Bl
 //////////////////////////////////////////////////////////////////////////
 void TwoDistributionsFullVectorConnector::init()
 {
+   using namespace vf::lbm::dir;
+
    FullVectorConnector::init();
 
    fDis = dynamicPointerCast<EsoTwist3D>(block.lock()->getKernel()->getDataSet()->getFdistributions());
@@ -58,37 +60,37 @@ void TwoDistributionsFullVectorConnector::init()
    int anz = 2*27;
    switch (sendDir)
    {
-   case D3Q27System::DIR_000: UB_THROW(UbException(UB_EXARGS, "ZERO not allowed")); break;
-   case D3Q27System::DIR_P00:
-   case D3Q27System::DIR_M00: sender->getData().resize(maxX2*maxX3*anz, 0.0);   break;
-   case D3Q27System::DIR_0P0:
-   case D3Q27System::DIR_0M0: sender->getData().resize(maxX1*maxX3*anz, 0.0);   break;
-   case D3Q27System::DIR_00P:
-   case D3Q27System::DIR_00M: sender->getData().resize(maxX1*maxX2*anz, 0.0);   break;
+   case DIR_000: UB_THROW(UbException(UB_EXARGS, "ZERO not allowed")); break;
+   case DIR_P00:
+   case DIR_M00: sender->getData().resize(maxX2*maxX3*anz, 0.0);   break;
+   case DIR_0P0:
+   case DIR_0M0: sender->getData().resize(maxX1*maxX3*anz, 0.0);   break;
+   case DIR_00P:
+   case DIR_00M: sender->getData().resize(maxX1*maxX2*anz, 0.0);   break;
 
-   case D3Q27System::DIR_PP0:
-   case D3Q27System::DIR_MM0:
-   case D3Q27System::DIR_PM0:
-   case D3Q27System::DIR_MP0:  sender->getData().resize(maxX3*anz, 0.0);   break;
+   case DIR_PP0:
+   case DIR_MM0:
+   case DIR_PM0:
+   case DIR_MP0:  sender->getData().resize(maxX3*anz, 0.0);   break;
 
-   case D3Q27System::DIR_P0P:
-   case D3Q27System::DIR_M0M:
-   case D3Q27System::DIR_P0M:
-   case D3Q27System::DIR_M0P:  sender->getData().resize(maxX2*anz, 0.0);   break;
+   case DIR_P0P:
+   case DIR_M0M:
+   case DIR_P0M:
+   case DIR_M0P:  sender->getData().resize(maxX2*anz, 0.0);   break;
 
-   case D3Q27System::DIR_0PP:
-   case D3Q27System::DIR_0MM:
-   case D3Q27System::DIR_0PM:
-   case D3Q27System::DIR_0MP:  sender->getData().resize(maxX1*anz, 0.0);   break;
+   case DIR_0PP:
+   case DIR_0MM:
+   case DIR_0PM:
+   case DIR_0MP:  sender->getData().resize(maxX1*anz, 0.0);   break;
 
-   case D3Q27System::DIR_PPP:
-   case D3Q27System::DIR_MMM:
-   case D3Q27System::DIR_PPM:
-   case D3Q27System::DIR_MMP:
-   case D3Q27System::DIR_PMP:
-   case D3Q27System::DIR_MPM:
-   case D3Q27System::DIR_PMM:
-   case D3Q27System::DIR_MPP:  sender->getData().resize(anz, 0.0);   break;
+   case DIR_PPP:
+   case DIR_MMM:
+   case DIR_PPM:
+   case DIR_MMP:
+   case DIR_PMP:
+   case DIR_MPM:
+   case DIR_PMM:
+   case DIR_MPP:  sender->getData().resize(anz, 0.0);   break;
 
    default: UB_THROW(UbException(UB_EXARGS, "unknown sendDir"));
    }
diff --git a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsFullVectorConnector.h b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsFullVectorConnector.h
index 406acb5b8707609811d35da46034db4cfec7c9c3..b2cb384d652273aee82c992c50d4df9b1e46a4e9 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsFullVectorConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/TwoDistributionsFullVectorConnector.h
@@ -62,15 +62,15 @@ protected:
    inline void distributeData(vector_type &rdata, int &index, int x1, int x2, int x3) override;
 
 private:
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-   CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+   CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
 
    SPtr<EsoTwist3D>  fDis;
 
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localHdistributions;
-   CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalHdistributions;
-   CbArray3D <LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroHdistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr localHdistributions;
+   CbArray4D <real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalHdistributions;
+   CbArray3D <real, IndexerX3X2X1>::CbArray3DPtr   zeroHdistributions;
 
 
    SPtr<EsoTwist3D>  hDis;
diff --git a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSoA.cpp b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSoA.cpp
index 0585947928e4fdc626659ed1a1d0e956fdd62de6..07e62e78a57fa8817ec8f4b7c20bce693697788d 100644
--- a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSoA.cpp
+++ b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSoA.cpp
@@ -4,66 +4,66 @@
 
 D3Q27EsoTwist3DSoA::D3Q27EsoTwist3DSoA() = default;
 //////////////////////////////////////////////////////////////////////////
-D3Q27EsoTwist3DSoA::D3Q27EsoTwist3DSoA(const size_t &nx1, const size_t &nx2, const size_t &nx3, LBMReal value)
+D3Q27EsoTwist3DSoA::D3Q27EsoTwist3DSoA(const size_t &nx1, const size_t &nx2, const size_t &nx3, real value)
 {
     this->NX1 = nx1;
     this->NX2 = nx2;
     this->NX3 = nx3;
 
-    d.E = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.W = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.N = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.S = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.T = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.B = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.NE = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.SW = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.SE = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.NW = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.TE = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.BW = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.BE = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.TW = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.TN = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.BS = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.BN = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.TS = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.TNE = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.TNW = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.TSE = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.TSW = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.BNE = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.BNW = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.BSE = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
-    d.BSW = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.E = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.W = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.N = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.S = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.T = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.B = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.NE = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.SW = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.SE = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.NW = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.TE = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.BW = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.BE = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.TW = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.TN = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.BS = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.BN = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.TS = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.TNE = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.TNW = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.TSE = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.TSW = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.BNE = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.BNW = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.BSE = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
+    d.BSW = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(
+        new CbArray3D<real, IndexerX3X2X1>(nx1 + 1, nx2 + 1, nx3 + 1, value));
     d.REST =
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(nx1, nx2, nx3, value));
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(nx1, nx2, nx3, value));
 }
 //////////////////////////////////////////////////////////////////////////
 D3Q27EsoTwist3DSoA::~D3Q27EsoTwist3DSoA() = default;
@@ -85,114 +85,120 @@ void D3Q27EsoTwist3DSoA::swap()
     std::swap(d.TSW, d.BNE);
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSoA::getDistribution(LBMReal *const f, size_t x1, size_t x2, size_t x3)
+void D3Q27EsoTwist3DSoA::getDistribution(real *const f, size_t x1, size_t x2, size_t x3)
 {
+    using namespace vf::lbm::dir;
+
     size_t x1p = x1 + 1;
     size_t x2p = x2 + 1;
     size_t x3p = x3 + 1;
 
-    f[D3Q27System::DIR_P00]   = (*d.E)(x1, x2, x3);
-    f[D3Q27System::DIR_0P0]   = (*d.N)(x1, x2, x3);
-    f[D3Q27System::DIR_00P]   = (*d.T)(x1, x2, x3);
-    f[D3Q27System::DIR_PP0]  = (*d.NE)(x1, x2, x3);
-    f[D3Q27System::DIR_MP0]  = (*d.NW)(x1p, x2, x3);
-    f[D3Q27System::DIR_P0P]  = (*d.TE)(x1, x2, x3);
-    f[D3Q27System::DIR_M0P]  = (*d.TW)(x1p, x2, x3);
-    f[D3Q27System::DIR_0PP]  = (*d.TN)(x1, x2, x3);
-    f[D3Q27System::DIR_0MP]  = (*d.TS)(x1, x2p, x3);
-    f[D3Q27System::DIR_PPP] = (*d.TNE)(x1, x2, x3);
-    f[D3Q27System::DIR_MPP] = (*d.TNW)(x1p, x2, x3);
-    f[D3Q27System::DIR_PMP] = (*d.TSE)(x1, x2p, x3);
-    f[D3Q27System::DIR_MMP] = (*d.TSW)(x1p, x2p, x3);
+    f[DIR_P00]   = (*d.E)(x1, x2, x3);
+    f[DIR_0P0]   = (*d.N)(x1, x2, x3);
+    f[DIR_00P]   = (*d.T)(x1, x2, x3);
+    f[DIR_PP0]  = (*d.NE)(x1, x2, x3);
+    f[DIR_MP0]  = (*d.NW)(x1p, x2, x3);
+    f[DIR_P0P]  = (*d.TE)(x1, x2, x3);
+    f[DIR_M0P]  = (*d.TW)(x1p, x2, x3);
+    f[DIR_0PP]  = (*d.TN)(x1, x2, x3);
+    f[DIR_0MP]  = (*d.TS)(x1, x2p, x3);
+    f[DIR_PPP] = (*d.TNE)(x1, x2, x3);
+    f[DIR_MPP] = (*d.TNW)(x1p, x2, x3);
+    f[DIR_PMP] = (*d.TSE)(x1, x2p, x3);
+    f[DIR_MMP] = (*d.TSW)(x1p, x2p, x3);
 
-    f[D3Q27System::DIR_M00]   = (*d.W)(x1p, x2, x3);
-    f[D3Q27System::DIR_0M0]   = (*d.S)(x1, x2p, x3);
-    f[D3Q27System::DIR_00M]   = (*d.B)(x1, x2, x3p);
-    f[D3Q27System::DIR_MM0]  = (*d.SW)(x1p, x2p, x3);
-    f[D3Q27System::DIR_PM0]  = (*d.SE)(x1, x2p, x3);
-    f[D3Q27System::DIR_M0M]  = (*d.BW)(x1p, x2, x3p);
-    f[D3Q27System::DIR_P0M]  = (*d.BE)(x1, x2, x3p);
-    f[D3Q27System::DIR_0MM]  = (*d.BS)(x1, x2p, x3p);
-    f[D3Q27System::DIR_0PM]  = (*d.BN)(x1, x2, x3p);
-    f[D3Q27System::DIR_MMM] = (*d.BSW)(x1p, x2p, x3p);
-    f[D3Q27System::DIR_PMM] = (*d.BSE)(x1, x2p, x3p);
-    f[D3Q27System::DIR_MPM] = (*d.BNW)(x1p, x2, x3p);
-    f[D3Q27System::DIR_PPM] = (*d.BNE)(x1, x2, x3p);
+    f[DIR_M00]   = (*d.W)(x1p, x2, x3);
+    f[DIR_0M0]   = (*d.S)(x1, x2p, x3);
+    f[DIR_00M]   = (*d.B)(x1, x2, x3p);
+    f[DIR_MM0]  = (*d.SW)(x1p, x2p, x3);
+    f[DIR_PM0]  = (*d.SE)(x1, x2p, x3);
+    f[DIR_M0M]  = (*d.BW)(x1p, x2, x3p);
+    f[DIR_P0M]  = (*d.BE)(x1, x2, x3p);
+    f[DIR_0MM]  = (*d.BS)(x1, x2p, x3p);
+    f[DIR_0PM]  = (*d.BN)(x1, x2, x3p);
+    f[DIR_MMM] = (*d.BSW)(x1p, x2p, x3p);
+    f[DIR_PMM] = (*d.BSE)(x1, x2p, x3p);
+    f[DIR_MPM] = (*d.BNW)(x1p, x2, x3p);
+    f[DIR_PPM] = (*d.BNE)(x1, x2, x3p);
 
-    f[D3Q27System::DIR_000] = (*d.REST)(x1, x2, x3);
+    f[DIR_000] = (*d.REST)(x1, x2, x3);
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSoA::setDistribution(const LBMReal *const f, size_t x1, size_t x2, size_t x3)
+void D3Q27EsoTwist3DSoA::setDistribution(const real *const f, size_t x1, size_t x2, size_t x3)
 {
+    using namespace vf::lbm::dir;
+
     size_t x1p = x1 + 1;
     size_t x2p = x2 + 1;
     size_t x3p = x3 + 1;
 
-    (*d.E)(x1, x2, x3)     = f[D3Q27System::INV_P00];
-    (*d.N)(x1, x2, x3)     = f[D3Q27System::INV_0P0];
-    (*d.T)(x1, x2, x3)     = f[D3Q27System::INV_00P];
-    (*d.NE)(x1, x2, x3)    = f[D3Q27System::INV_PP0];
-    (*d.NW)(x1p, x2, x3)   = f[D3Q27System::INV_MP0];
-    (*d.TE)(x1, x2, x3)    = f[D3Q27System::INV_P0P];
-    (*d.TW)(x1p, x2, x3)   = f[D3Q27System::INV_M0P];
-    (*d.TN)(x1, x2, x3)    = f[D3Q27System::INV_0PP];
-    (*d.TS)(x1, x2p, x3)   = f[D3Q27System::INV_0MP];
-    (*d.TNE)(x1, x2, x3)   = f[D3Q27System::INV_PPP];
-    (*d.TNW)(x1p, x2, x3)  = f[D3Q27System::INV_MPP];
-    (*d.TSE)(x1, x2p, x3)  = f[D3Q27System::INV_PMP];
-    (*d.TSW)(x1p, x2p, x3) = f[D3Q27System::INV_MMP];
+    (*d.E)(x1, x2, x3)     = f[INV_P00];
+    (*d.N)(x1, x2, x3)     = f[INV_0P0];
+    (*d.T)(x1, x2, x3)     = f[INV_00P];
+    (*d.NE)(x1, x2, x3)    = f[INV_PP0];
+    (*d.NW)(x1p, x2, x3)   = f[INV_MP0];
+    (*d.TE)(x1, x2, x3)    = f[INV_P0P];
+    (*d.TW)(x1p, x2, x3)   = f[INV_M0P];
+    (*d.TN)(x1, x2, x3)    = f[INV_0PP];
+    (*d.TS)(x1, x2p, x3)   = f[INV_0MP];
+    (*d.TNE)(x1, x2, x3)   = f[INV_PPP];
+    (*d.TNW)(x1p, x2, x3)  = f[INV_MPP];
+    (*d.TSE)(x1, x2p, x3)  = f[INV_PMP];
+    (*d.TSW)(x1p, x2p, x3) = f[INV_MMP];
 
-    (*d.W)(x1p, x2, x3)     = f[D3Q27System::INV_M00];
-    (*d.S)(x1, x2p, x3)     = f[D3Q27System::INV_0M0];
-    (*d.B)(x1, x2, x3p)     = f[D3Q27System::INV_00M];
-    (*d.SW)(x1p, x2p, x3)   = f[D3Q27System::INV_MM0];
-    (*d.SE)(x1, x2p, x3)    = f[D3Q27System::INV_PM0];
-    (*d.BW)(x1p, x2, x3p)   = f[D3Q27System::INV_M0M];
-    (*d.BE)(x1, x2, x3p)    = f[D3Q27System::INV_P0M];
-    (*d.BS)(x1, x2p, x3p)   = f[D3Q27System::INV_0MM];
-    (*d.BN)(x1, x2, x3p)    = f[D3Q27System::INV_0PM];
-    (*d.BSW)(x1p, x2p, x3p) = f[D3Q27System::INV_MMM];
-    (*d.BSE)(x1, x2p, x3p)  = f[D3Q27System::INV_PMM];
-    (*d.BNW)(x1p, x2, x3p)  = f[D3Q27System::INV_MPM];
-    (*d.BNE)(x1, x2, x3p)   = f[D3Q27System::INV_PPM];
+    (*d.W)(x1p, x2, x3)     = f[INV_M00];
+    (*d.S)(x1, x2p, x3)     = f[INV_0M0];
+    (*d.B)(x1, x2, x3p)     = f[INV_00M];
+    (*d.SW)(x1p, x2p, x3)   = f[INV_MM0];
+    (*d.SE)(x1, x2p, x3)    = f[INV_PM0];
+    (*d.BW)(x1p, x2, x3p)   = f[INV_M0M];
+    (*d.BE)(x1, x2, x3p)    = f[INV_P0M];
+    (*d.BS)(x1, x2p, x3p)   = f[INV_0MM];
+    (*d.BN)(x1, x2, x3p)    = f[INV_0PM];
+    (*d.BSW)(x1p, x2p, x3p) = f[INV_MMM];
+    (*d.BSE)(x1, x2p, x3p)  = f[INV_PMM];
+    (*d.BNW)(x1p, x2, x3p)  = f[INV_MPM];
+    (*d.BNE)(x1, x2, x3p)   = f[INV_PPM];
 
-    (*d.REST)(x1, x2, x3) = f[D3Q27System::DIR_000];
+    (*d.REST)(x1, x2, x3) = f[DIR_000];
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSoA::getDistributionInv(LBMReal *const f, size_t x1, size_t x2, size_t x3)
+void D3Q27EsoTwist3DSoA::getDistributionInv(real *const f, size_t x1, size_t x2, size_t x3)
 {
-    f[D3Q27System::INV_P00]   = (*d.E)(x1, x2, x3);
-    f[D3Q27System::INV_0P0]   = (*d.N)(x1, x2, x3);
-    f[D3Q27System::INV_00P]   = (*d.T)(x1, x2, x3);
-    f[D3Q27System::INV_PP0]  = (*d.NE)(x1, x2, x3);
-    f[D3Q27System::INV_MP0]  = (*d.NW)(x1 + 1, x2, x3);
-    f[D3Q27System::INV_P0P]  = (*d.TE)(x1, x2, x3);
-    f[D3Q27System::INV_M0P]  = (*d.TW)(x1 + 1, x2, x3);
-    f[D3Q27System::INV_0PP]  = (*d.TN)(x1, x2, x3);
-    f[D3Q27System::INV_0MP]  = (*d.TS)(x1, x2 + 1, x3);
-    f[D3Q27System::INV_PPP] = (*d.TNE)(x1, x2, x3);
-    f[D3Q27System::INV_MPP] = (*d.TNW)(x1 + 1, x2, x3);
-    f[D3Q27System::INV_PMP] = (*d.TSE)(x1, x2 + 1, x3);
-    f[D3Q27System::INV_MMP] = (*d.TSW)(x1 + 1, x2 + 1, x3);
+    using namespace vf::lbm::dir;
+
+    f[INV_P00]   = (*d.E)(x1, x2, x3);
+    f[INV_0P0]   = (*d.N)(x1, x2, x3);
+    f[INV_00P]   = (*d.T)(x1, x2, x3);
+    f[INV_PP0]  = (*d.NE)(x1, x2, x3);
+    f[INV_MP0]  = (*d.NW)(x1 + 1, x2, x3);
+    f[INV_P0P]  = (*d.TE)(x1, x2, x3);
+    f[INV_M0P]  = (*d.TW)(x1 + 1, x2, x3);
+    f[INV_0PP]  = (*d.TN)(x1, x2, x3);
+    f[INV_0MP]  = (*d.TS)(x1, x2 + 1, x3);
+    f[INV_PPP] = (*d.TNE)(x1, x2, x3);
+    f[INV_MPP] = (*d.TNW)(x1 + 1, x2, x3);
+    f[INV_PMP] = (*d.TSE)(x1, x2 + 1, x3);
+    f[INV_MMP] = (*d.TSW)(x1 + 1, x2 + 1, x3);
 
-    f[D3Q27System::INV_M00]   = (*d.W)(x1 + 1, x2, x3);
-    f[D3Q27System::INV_0M0]   = (*d.S)(x1, x2 + 1, x3);
-    f[D3Q27System::INV_00M]   = (*d.B)(x1, x2, x3 + 1);
-    f[D3Q27System::INV_MM0]  = (*d.SW)(x1 + 1, x2 + 1, x3);
-    f[D3Q27System::INV_PM0]  = (*d.SE)(x1, x2 + 1, x3);
-    f[D3Q27System::INV_M0M]  = (*d.BW)(x1 + 1, x2, x3 + 1);
-    f[D3Q27System::INV_P0M]  = (*d.BE)(x1, x2, x3 + 1);
-    f[D3Q27System::INV_0MM]  = (*d.BS)(x1, x2 + 1, x3 + 1);
-    f[D3Q27System::INV_0PM]  = (*d.BN)(x1, x2, x3 + 1);
-    f[D3Q27System::INV_MMM] = (*d.BSW)(x1 + 1, x2 + 1, x3 + 1);
-    f[D3Q27System::INV_PMM] = (*d.BSE)(x1, x2 + 1, x3 + 1);
-    f[D3Q27System::INV_MPM] = (*d.BNW)(x1 + 1, x2, x3 + 1);
-    f[D3Q27System::INV_PPM] = (*d.BNE)(x1, x2, x3 + 1);
+    f[INV_M00]   = (*d.W)(x1 + 1, x2, x3);
+    f[INV_0M0]   = (*d.S)(x1, x2 + 1, x3);
+    f[INV_00M]   = (*d.B)(x1, x2, x3 + 1);
+    f[INV_MM0]  = (*d.SW)(x1 + 1, x2 + 1, x3);
+    f[INV_PM0]  = (*d.SE)(x1, x2 + 1, x3);
+    f[INV_M0M]  = (*d.BW)(x1 + 1, x2, x3 + 1);
+    f[INV_P0M]  = (*d.BE)(x1, x2, x3 + 1);
+    f[INV_0MM]  = (*d.BS)(x1, x2 + 1, x3 + 1);
+    f[INV_0PM]  = (*d.BN)(x1, x2, x3 + 1);
+    f[INV_MMM] = (*d.BSW)(x1 + 1, x2 + 1, x3 + 1);
+    f[INV_PMM] = (*d.BSE)(x1, x2 + 1, x3 + 1);
+    f[INV_MPM] = (*d.BNW)(x1 + 1, x2, x3 + 1);
+    f[INV_PPM] = (*d.BNE)(x1, x2, x3 + 1);
 
-    f[D3Q27System::DIR_000] = (*d.REST)(x1, x2, x3);
+    f[DIR_000] = (*d.REST)(x1, x2, x3);
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSoA::setDistributionInv(const LBMReal *const f, size_t x1, size_t x2, size_t x3)
+void D3Q27EsoTwist3DSoA::setDistributionInv(const real *const f, size_t x1, size_t x2, size_t x3)
 {
     //(*this->localDistributions)(D3Q27System::ET_E,x1,  x2,  x3) = f[D3Q27System::DIR_P00];
     //(*this->localDistributions)(D3Q27System::ET_N,x1,  x2,  x3) = f[D3Q27System::DIR_0P0];
@@ -225,7 +231,7 @@ void D3Q27EsoTwist3DSoA::setDistributionInv(const LBMReal *const f, size_t x1, s
     //(*this->zeroDistributions)(x1,x2,x3) = f[D3Q27System::REST];
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSoA::setDistributionForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+void D3Q27EsoTwist3DSoA::setDistributionForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                                      unsigned long int direction)
 {
     // bool directionFlag = false;
@@ -288,7 +294,7 @@ void D3Q27EsoTwist3DSoA::setDistributionForDirection(const LBMReal *const f, siz
     //#endif //DEBUG
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSoA::setDistributionForDirection(LBMReal f, size_t x1, size_t x2, size_t x3, int direction)
+void D3Q27EsoTwist3DSoA::setDistributionForDirection(real f, size_t x1, size_t x2, size_t x3, int direction)
 {
     // switch (direction)
     //{
@@ -378,7 +384,7 @@ void D3Q27EsoTwist3DSoA::setDistributionForDirection(LBMReal f, size_t x1, size_
     //}
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSoA::setDistributionInvForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+void D3Q27EsoTwist3DSoA::setDistributionInvForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                                         unsigned long int direction)
 {
     //   bool directionFlag = false;
@@ -444,7 +450,7 @@ void D3Q27EsoTwist3DSoA::setDistributionInvForDirection(const LBMReal *const f,
     //#endif //DEBUG
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSoA::setDistributionInvForDirection(LBMReal f, size_t x1, size_t x2, size_t x3,
+void D3Q27EsoTwist3DSoA::setDistributionInvForDirection(real f, size_t x1, size_t x2, size_t x3,
                                                         unsigned long int direction)
 {
     // switch (direction)
@@ -535,7 +541,7 @@ void D3Q27EsoTwist3DSoA::setDistributionInvForDirection(LBMReal f, size_t x1, si
     //}
 }
 //////////////////////////////////////////////////////////////////////////
-LBMReal D3Q27EsoTwist3DSoA::getDistributionInvForDirection(size_t /*x1*/, size_t /*x2*/, size_t /*x3*/,
+real D3Q27EsoTwist3DSoA::getDistributionInvForDirection(size_t /*x1*/, size_t /*x2*/, size_t /*x3*/,
                                                            int /*direction*/)
 {
     // switch (direction)
diff --git a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSoA.h b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSoA.h
index a3eb85e378051871a018066a5aabe58969790b73..204e6fe15f69a387c289ae8c60f63d59ef62ddc3 100644
--- a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSoA.h
+++ b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSoA.h
@@ -9,67 +9,67 @@
 //#include <boost/serialization/base_object.hpp>
 
 struct Distributions {
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr E;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr W;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr N;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr S;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr T;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr B;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr NE;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr SW;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr SE;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr NW;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr TE;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr BW;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr BE;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr TW;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr TN;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr BS;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr BN;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr TS;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr TNE;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr TNW;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr TSE;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr TSW;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr BNE;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr BNW;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr BSE;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr BSW;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr REST;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr E;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr W;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr N;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr S;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr T;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr B;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr NE;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr SW;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr SE;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr NW;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr TE;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr BW;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr BE;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr TW;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr TN;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr BS;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr BN;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr TS;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr TNE;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr TNW;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr TSE;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr TSW;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr BNE;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr BNW;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr BSE;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr BSW;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr REST;
 };
 
 class D3Q27EsoTwist3DSoA : public EsoTwist3D
 {
 public:
     D3Q27EsoTwist3DSoA();
-    D3Q27EsoTwist3DSoA(const size_t &nx1, const size_t &nx2, const size_t &nx3, LBMReal value);
+    D3Q27EsoTwist3DSoA(const size_t &nx1, const size_t &nx2, const size_t &nx3, real value);
     //////////////////////////////////////////////////////////////////////////
     ~D3Q27EsoTwist3DSoA() override;
     //////////////////////////////////////////////////////////////////////////
     void swap() override;
     //////////////////////////////////////////////////////////////////////////
-    void getDistribution(LBMReal *const f, size_t x1, size_t x2, size_t x3) override;
+    void getDistribution(real *const f, size_t x1, size_t x2, size_t x3) override;
     //////////////////////////////////////////////////////////////////////////
-    void setDistribution(const LBMReal *const f, size_t x1, size_t x2, size_t x3) override;
+    void setDistribution(const real *const f, size_t x1, size_t x2, size_t x3) override;
     ////////////////////////////////////////////////////////////////////////
-    void getDistributionInv(LBMReal *const f, size_t x1, size_t x2, size_t x3) override;
+    void getDistributionInv(real *const f, size_t x1, size_t x2, size_t x3) override;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionInv(const LBMReal *const f, size_t x1, size_t x2, size_t x3) override;
+    void setDistributionInv(const real *const f, size_t x1, size_t x2, size_t x3) override;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+    void setDistributionForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                      unsigned long int direction) override;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionForDirection(LBMReal f, size_t x1, size_t x2, size_t x3, int direction) override;
+    void setDistributionForDirection(real f, size_t x1, size_t x2, size_t x3, int direction) override;
     //////////////////////////////////////////////////////////////////////////
-    LBMReal getDistributionInvForDirection(size_t x1, size_t x2, size_t x3, int direction) override;
+    real getDistributionInvForDirection(size_t x1, size_t x2, size_t x3, int direction) override;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionInvForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+    void setDistributionInvForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                         unsigned long int direction) override;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionInvForDirection(LBMReal f, size_t x1, size_t x2, size_t x3,
+    void setDistributionInvForDirection(real f, size_t x1, size_t x2, size_t x3,
                                         unsigned long int direction) override;
     //////////////////////////////////////////////////////////////////////////
-    LBMReal getDistributionForDirection(size_t x1, size_t x2, size_t x3, int direction) override;
+    real getDistributionForDirection(size_t x1, size_t x2, size_t x3, int direction) override;
     //////////////////////////////////////////////////////////////////////////
     size_t getNX1() const override;
     //////////////////////////////////////////////////////////////////////////
@@ -79,7 +79,7 @@ public:
     //////////////////////////////////////////////////////////////////////////
     Distributions getDistributions();
     //////////////////////////////////////////////////////////////////////////
-    void getDistributionAfterLastStep(LBMReal *const f, size_t x1, size_t x2, size_t x3);
+    void getDistributionAfterLastStep(real *const f, size_t x1, size_t x2, size_t x3);
 
 protected:
     Distributions d;
diff --git a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.cpp b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.cpp
index 684238baad1752ab3fad051666da459fd8e11095..4660e7b8397482683d67e6ba74b466b1857df10c 100644
--- a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.cpp
+++ b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.cpp
@@ -36,298 +36,310 @@
 
 D3Q27EsoTwist3DSplittedVector::D3Q27EsoTwist3DSplittedVector() = default;
 //////////////////////////////////////////////////////////////////////////
-D3Q27EsoTwist3DSplittedVector::D3Q27EsoTwist3DSplittedVector(size_t nx1, size_t nx2, size_t nx3, LBMReal value)
+D3Q27EsoTwist3DSplittedVector::D3Q27EsoTwist3DSplittedVector(size_t nx1, size_t nx2, size_t nx3, real value)
 {
     this->NX1 = nx1;
     this->NX2 = nx2;
     this->NX3 = nx3;
 
     this->localDistributions =
-        std::make_shared<CbArray4D<LBMReal, IndexerX4X3X2X1>>(13, nx1 + 1, nx2 + 1, nx3 + 1, value);
+        std::make_shared<CbArray4D<real, IndexerX4X3X2X1>>(13, nx1 + 1, nx2 + 1, nx3 + 1, value);
     this->nonLocalDistributions =
-        std::make_shared<CbArray4D<LBMReal, IndexerX4X3X2X1>>(13, nx1 + 1, nx2 + 1, nx3 + 1, value);
+        std::make_shared<CbArray4D<real, IndexerX4X3X2X1>>(13, nx1 + 1, nx2 + 1, nx3 + 1, value);
 
-    this->zeroDistributions = std::make_shared<CbArray3D<LBMReal, IndexerX3X2X1>>(nx1, nx2, nx3, value);
+    this->zeroDistributions = std::make_shared<CbArray3D<real, IndexerX3X2X1>>(nx1, nx2, nx3, value);
 }
 //////////////////////////////////////////////////////////////////////////
 D3Q27EsoTwist3DSplittedVector::~D3Q27EsoTwist3DSplittedVector() = default;
 //////////////////////////////////////////////////////////////////////////
 void D3Q27EsoTwist3DSplittedVector::swap() { std::swap(this->localDistributions, this->nonLocalDistributions); }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSplittedVector::getDistribution(LBMReal *const f, size_t x1, size_t x2, size_t x3)
+void D3Q27EsoTwist3DSplittedVector::getDistribution(real *const f, size_t x1, size_t x2, size_t x3)
 {
-    f[D3Q27System::DIR_P00]   = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
-    f[D3Q27System::DIR_0P0]   = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
-    f[D3Q27System::DIR_00P]   = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
-    f[D3Q27System::DIR_PP0]  = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
-    f[D3Q27System::DIR_MP0]  = (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3);
-    f[D3Q27System::DIR_P0P]  = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
-    f[D3Q27System::DIR_M0P]  = (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3);
-    f[D3Q27System::DIR_0PP]  = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
-    f[D3Q27System::DIR_0MP]  = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3);
-    f[D3Q27System::DIR_PPP] = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
-    f[D3Q27System::DIR_MPP] = (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3);
-    f[D3Q27System::DIR_PMP] = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3);
-    f[D3Q27System::DIR_MMP] = (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3);
+    using namespace vf::lbm::dir;
 
-    f[D3Q27System::DIR_M00]   = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3);
-    f[D3Q27System::DIR_0M0]   = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3);
-    f[D3Q27System::DIR_00M]   = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1);
-    f[D3Q27System::DIR_MM0]  = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3);
-    f[D3Q27System::DIR_PM0]  = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3);
-    f[D3Q27System::DIR_M0M]  = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1);
-    f[D3Q27System::DIR_P0M]  = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1);
-    f[D3Q27System::DIR_0MM]  = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1);
-    f[D3Q27System::DIR_0PM]  = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1);
-    f[D3Q27System::DIR_MMM] = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1);
-    f[D3Q27System::DIR_PMM] = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1);
-    f[D3Q27System::DIR_MPM] = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1);
-    f[D3Q27System::DIR_PPM] = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1);
+    f[DIR_P00]   = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
+    f[DIR_0P0]   = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
+    f[DIR_00P]   = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
+    f[DIR_PP0]  = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
+    f[DIR_MP0]  = (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3);
+    f[DIR_P0P]  = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
+    f[DIR_M0P]  = (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3);
+    f[DIR_0PP]  = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
+    f[DIR_0MP]  = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3);
+    f[DIR_PPP] = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
+    f[DIR_MPP] = (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3);
+    f[DIR_PMP] = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3);
+    f[DIR_MMP] = (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3);
 
-    f[D3Q27System::DIR_000] = (*this->zeroDistributions)(x1, x2, x3);
+    f[DIR_M00]   = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3);
+    f[DIR_0M0]   = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3);
+    f[DIR_00M]   = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1);
+    f[DIR_MM0]  = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3);
+    f[DIR_PM0]  = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3);
+    f[DIR_M0M]  = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1);
+    f[DIR_P0M]  = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1);
+    f[DIR_0MM]  = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1);
+    f[DIR_0PM]  = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1);
+    f[DIR_MMM] = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1);
+    f[DIR_PMM] = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1);
+    f[DIR_MPM] = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1);
+    f[DIR_PPM] = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1);
+
+    f[DIR_000] = (*this->zeroDistributions)(x1, x2, x3);
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSplittedVector::setDistribution(const LBMReal *const f, size_t x1, size_t x2, size_t x3)
+void D3Q27EsoTwist3DSplittedVector::setDistribution(const real *const f, size_t x1, size_t x2, size_t x3)
 {
-    (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3)           = f[D3Q27System::INV_P00];
-    (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3)           = f[D3Q27System::INV_0P0];
-    (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3)           = f[D3Q27System::INV_00P];
-    (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3)          = f[D3Q27System::INV_PP0];
-    (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3)      = f[D3Q27System::INV_MP0];
-    (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3)          = f[D3Q27System::INV_P0P];
-    (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3)      = f[D3Q27System::INV_M0P];
-    (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3)          = f[D3Q27System::INV_0PP];
-    (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3)      = f[D3Q27System::INV_0MP];
-    (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3)         = f[D3Q27System::INV_PPP];
-    (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3)     = f[D3Q27System::INV_MPP];
-    (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3)     = f[D3Q27System::INV_PMP];
-    (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3) = f[D3Q27System::INV_MMP];
+    using namespace vf::lbm::dir;
+
+    (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3)           = f[INV_P00];
+    (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3)           = f[INV_0P0];
+    (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3)           = f[INV_00P];
+    (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3)          = f[INV_PP0];
+    (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3)      = f[INV_MP0];
+    (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3)          = f[INV_P0P];
+    (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3)      = f[INV_M0P];
+    (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3)          = f[INV_0PP];
+    (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3)      = f[INV_0MP];
+    (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3)         = f[INV_PPP];
+    (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3)     = f[INV_MPP];
+    (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3)     = f[INV_PMP];
+    (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3) = f[INV_MMP];
 
-    (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3)           = f[D3Q27System::INV_M00];
-    (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3)           = f[D3Q27System::INV_0M0];
-    (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1)           = f[D3Q27System::INV_00M];
-    (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3)      = f[D3Q27System::INV_MM0];
-    (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3)          = f[D3Q27System::INV_PM0];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1)      = f[D3Q27System::INV_M0M];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1)          = f[D3Q27System::INV_P0M];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1)      = f[D3Q27System::INV_0MM];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1)          = f[D3Q27System::INV_0PM];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1) = f[D3Q27System::INV_MMM];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1)     = f[D3Q27System::INV_PMM];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1)     = f[D3Q27System::INV_MPM];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1)         = f[D3Q27System::INV_PPM];
+    (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3)           = f[INV_M00];
+    (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3)           = f[INV_0M0];
+    (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1)           = f[INV_00M];
+    (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3)      = f[INV_MM0];
+    (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3)          = f[INV_PM0];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1)      = f[INV_M0M];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1)          = f[INV_P0M];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1)      = f[INV_0MM];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1)          = f[INV_0PM];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1) = f[INV_MMM];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1)     = f[INV_PMM];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1)     = f[INV_MPM];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1)         = f[INV_PPM];
 
-    (*this->zeroDistributions)(x1, x2, x3) = f[D3Q27System::DIR_000];
+    (*this->zeroDistributions)(x1, x2, x3) = f[DIR_000];
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSplittedVector::getDistributionInv(LBMReal *const f, size_t x1, size_t x2, size_t x3)
+void D3Q27EsoTwist3DSplittedVector::getDistributionInv(real *const f, size_t x1, size_t x2, size_t x3)
 {
-    f[D3Q27System::INV_P00]   = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
-    f[D3Q27System::INV_0P0]   = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
-    f[D3Q27System::INV_00P]   = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
-    f[D3Q27System::INV_PP0]  = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
-    f[D3Q27System::INV_MP0]  = (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3);
-    f[D3Q27System::INV_P0P]  = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
-    f[D3Q27System::INV_M0P]  = (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3);
-    f[D3Q27System::INV_0PP]  = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
-    f[D3Q27System::INV_0MP]  = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3);
-    f[D3Q27System::INV_PPP] = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
-    f[D3Q27System::INV_MPP] = (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3);
-    f[D3Q27System::INV_PMP] = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3);
-    f[D3Q27System::INV_MMP] = (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3);
+    using namespace vf::lbm::dir;
+
+    f[INV_P00]   = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
+    f[INV_0P0]   = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
+    f[INV_00P]   = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
+    f[INV_PP0]  = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
+    f[INV_MP0]  = (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3);
+    f[INV_P0P]  = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
+    f[INV_M0P]  = (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3);
+    f[INV_0PP]  = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
+    f[INV_0MP]  = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3);
+    f[INV_PPP] = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
+    f[INV_MPP] = (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3);
+    f[INV_PMP] = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3);
+    f[INV_MMP] = (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3);
 
-    f[D3Q27System::INV_M00]   = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3);
-    f[D3Q27System::INV_0M0]   = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3);
-    f[D3Q27System::INV_00M]   = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1);
-    f[D3Q27System::INV_MM0]  = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3);
-    f[D3Q27System::INV_PM0]  = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3);
-    f[D3Q27System::INV_M0M]  = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1);
-    f[D3Q27System::INV_P0M]  = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1);
-    f[D3Q27System::INV_0MM]  = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1);
-    f[D3Q27System::INV_0PM]  = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1);
-    f[D3Q27System::INV_MMM] = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1);
-    f[D3Q27System::INV_PMM] = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1);
-    f[D3Q27System::INV_MPM] = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1);
-    f[D3Q27System::INV_PPM] = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1);
+    f[INV_M00]   = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3);
+    f[INV_0M0]   = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3);
+    f[INV_00M]   = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1);
+    f[INV_MM0]  = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3);
+    f[INV_PM0]  = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3);
+    f[INV_M0M]  = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1);
+    f[INV_P0M]  = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1);
+    f[INV_0MM]  = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1);
+    f[INV_0PM]  = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1);
+    f[INV_MMM] = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1);
+    f[INV_PMM] = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1);
+    f[INV_MPM] = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1);
+    f[INV_PPM] = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1);
 
-    f[D3Q27System::DIR_000] = (*this->zeroDistributions)(x1, x2, x3);
+    f[DIR_000] = (*this->zeroDistributions)(x1, x2, x3);
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSplittedVector::setDistributionInv(const LBMReal *const f, size_t x1, size_t x2, size_t x3)
+void D3Q27EsoTwist3DSplittedVector::setDistributionInv(const real *const f, size_t x1, size_t x2, size_t x3)
 {
-    (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3)           = f[D3Q27System::DIR_P00];
-    (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3)           = f[D3Q27System::DIR_0P0];
-    (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3)           = f[D3Q27System::DIR_00P];
-    (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3)          = f[D3Q27System::DIR_PP0];
-    (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3)      = f[D3Q27System::DIR_MP0];
-    (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3)          = f[D3Q27System::DIR_P0P];
-    (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3)      = f[D3Q27System::DIR_M0P];
-    (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3)          = f[D3Q27System::DIR_0PP];
-    (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3)      = f[D3Q27System::DIR_0MP];
-    (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3)         = f[D3Q27System::DIR_PPP];
-    (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3)     = f[D3Q27System::DIR_MPP];
-    (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3)     = f[D3Q27System::DIR_PMP];
-    (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3) = f[D3Q27System::DIR_MMP];
+    using namespace vf::lbm::dir;
 
-    (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3)           = f[D3Q27System::DIR_M00];
-    (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3)           = f[D3Q27System::DIR_0M0];
-    (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1)           = f[D3Q27System::DIR_00M];
-    (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3)      = f[D3Q27System::DIR_MM0];
-    (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3)          = f[D3Q27System::DIR_PM0];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1)      = f[D3Q27System::DIR_M0M];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1)          = f[D3Q27System::DIR_P0M];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1)      = f[D3Q27System::DIR_0MM];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1)          = f[D3Q27System::DIR_0PM];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1) = f[D3Q27System::DIR_MMM];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1)     = f[D3Q27System::DIR_PMM];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1)     = f[D3Q27System::DIR_MPM];
-    (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1)         = f[D3Q27System::DIR_PPM];
+    (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3)           = f[DIR_P00];
+    (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3)           = f[DIR_0P0];
+    (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3)           = f[DIR_00P];
+    (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3)          = f[DIR_PP0];
+    (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3)      = f[DIR_MP0];
+    (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3)          = f[DIR_P0P];
+    (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3)      = f[DIR_M0P];
+    (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3)          = f[DIR_0PP];
+    (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3)      = f[DIR_0MP];
+    (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3)         = f[DIR_PPP];
+    (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3)     = f[DIR_MPP];
+    (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3)     = f[DIR_PMP];
+    (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3) = f[DIR_MMP];
 
-    (*this->zeroDistributions)(x1, x2, x3) = f[D3Q27System::DIR_000];
+    (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3)           = f[DIR_M00];
+    (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3)           = f[DIR_0M0];
+    (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1)           = f[DIR_00M];
+    (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3)      = f[DIR_MM0];
+    (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3)          = f[DIR_PM0];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1)      = f[DIR_M0M];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1)          = f[DIR_P0M];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1)      = f[DIR_0MM];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1)          = f[DIR_0PM];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1) = f[DIR_MMM];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1)     = f[DIR_PMM];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1)     = f[DIR_MPM];
+    (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1)         = f[DIR_PPM];
+
+    (*this->zeroDistributions)(x1, x2, x3) = f[DIR_000];
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSplittedVector::setDistributionForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+void D3Q27EsoTwist3DSplittedVector::setDistributionForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                                                 unsigned long int direction)
 {
+    using namespace vf::lbm::dir;
+
     if ((direction & EsoTwistD3Q27System::etE) == EsoTwistD3Q27System::etE)
-        (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3) = f[D3Q27System::DIR_P00];
+        (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3) = f[DIR_P00];
     if ((direction & EsoTwistD3Q27System::etW) == EsoTwistD3Q27System::etW)
-        (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3) = f[D3Q27System::DIR_M00];
+        (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3) = f[DIR_M00];
     if ((direction & EsoTwistD3Q27System::etS) == EsoTwistD3Q27System::etS)
-        (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3) = f[D3Q27System::DIR_0M0];
+        (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3) = f[DIR_0M0];
     if ((direction & EsoTwistD3Q27System::etN) == EsoTwistD3Q27System::etN)
-        (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3) = f[D3Q27System::DIR_0P0];
+        (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3) = f[DIR_0P0];
     if ((direction & EsoTwistD3Q27System::etB) == EsoTwistD3Q27System::etB)
-        (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3) = f[D3Q27System::DIR_00M];
+        (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3) = f[DIR_00M];
     if ((direction & EsoTwistD3Q27System::etT) == EsoTwistD3Q27System::etT)
-        (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1) = f[D3Q27System::DIR_00P];
+        (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1) = f[DIR_00P];
     if ((direction & EsoTwistD3Q27System::etSW) == EsoTwistD3Q27System::etSW)
-        (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3) = f[D3Q27System::DIR_MM0];
+        (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3) = f[DIR_MM0];
     if ((direction & EsoTwistD3Q27System::etNE) == EsoTwistD3Q27System::etNE)
-        (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3) = f[D3Q27System::DIR_PP0];
+        (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3) = f[DIR_PP0];
     if ((direction & EsoTwistD3Q27System::etNW) == EsoTwistD3Q27System::etNW)
-        (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3) = f[D3Q27System::DIR_MP0];
+        (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3) = f[DIR_MP0];
     if ((direction & EsoTwistD3Q27System::etSE) == EsoTwistD3Q27System::etSE)
-        (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3) = f[D3Q27System::DIR_PM0];
+        (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3) = f[DIR_PM0];
     if ((direction & EsoTwistD3Q27System::etBW) == EsoTwistD3Q27System::etBW)
-        (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3) = f[D3Q27System::DIR_M0M];
+        (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3) = f[DIR_M0M];
     if ((direction & EsoTwistD3Q27System::etTE) == EsoTwistD3Q27System::etTE)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1) = f[D3Q27System::DIR_P0P];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1) = f[DIR_P0P];
     if ((direction & EsoTwistD3Q27System::etTW) == EsoTwistD3Q27System::etTW)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1) = f[D3Q27System::DIR_M0P];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1) = f[DIR_M0P];
     if ((direction & EsoTwistD3Q27System::etBE) == EsoTwistD3Q27System::etBE)
-        (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3) = f[D3Q27System::DIR_P0M];
+        (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3) = f[DIR_P0M];
     if ((direction & EsoTwistD3Q27System::etBS) == EsoTwistD3Q27System::etBS)
-        (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3) = f[D3Q27System::DIR_0MM];
+        (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3) = f[DIR_0MM];
     if ((direction & EsoTwistD3Q27System::etTN) == EsoTwistD3Q27System::etTN)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1) = f[D3Q27System::DIR_0PP];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1) = f[DIR_0PP];
     if ((direction & EsoTwistD3Q27System::etTS) == EsoTwistD3Q27System::etTS)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1) = f[D3Q27System::DIR_0MP];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1) = f[DIR_0MP];
     if ((direction & EsoTwistD3Q27System::etBN) == EsoTwistD3Q27System::etBN)
-        (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3) = f[D3Q27System::DIR_0PM];
+        (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3) = f[DIR_0PM];
     if ((direction & EsoTwistD3Q27System::etBSW) == EsoTwistD3Q27System::etBSW)
-        (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3) = f[D3Q27System::DIR_MMM];
+        (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3) = f[DIR_MMM];
     if ((direction & EsoTwistD3Q27System::etTNE) == EsoTwistD3Q27System::etTNE)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1) = f[D3Q27System::DIR_PPP];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1) = f[DIR_PPP];
     if ((direction & EsoTwistD3Q27System::etBSE) == EsoTwistD3Q27System::etBSE)
-        (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3) = f[D3Q27System::DIR_PMM];
+        (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3) = f[DIR_PMM];
     if ((direction & EsoTwistD3Q27System::etTNW) == EsoTwistD3Q27System::etTNW)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1) = f[D3Q27System::DIR_MPP];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1) = f[DIR_MPP];
     if ((direction & EsoTwistD3Q27System::etBNW) == EsoTwistD3Q27System::etBNW)
-        (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3) = f[D3Q27System::DIR_MPM];
+        (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3) = f[DIR_MPM];
     if ((direction & EsoTwistD3Q27System::etTSE) == EsoTwistD3Q27System::etTSE)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1) = f[D3Q27System::DIR_PMP];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1) = f[DIR_PMP];
     if ((direction & EsoTwistD3Q27System::etBNE) == EsoTwistD3Q27System::etBNE)
-        (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3) = f[D3Q27System::DIR_PPM];
+        (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3) = f[DIR_PPM];
     if ((direction & EsoTwistD3Q27System::etTSW) == EsoTwistD3Q27System::etTSW)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1) = f[D3Q27System::DIR_MMP];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1) = f[DIR_MMP];
     if ((direction & EsoTwistD3Q27System::REST) == EsoTwistD3Q27System::REST)
-        (*this->zeroDistributions)(x1, x2, x3) = f[D3Q27System::DIR_000];
+        (*this->zeroDistributions)(x1, x2, x3) = f[DIR_000];
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSplittedVector::setDistributionForDirection(LBMReal f, size_t x1, size_t x2, size_t x3,
+void D3Q27EsoTwist3DSplittedVector::setDistributionForDirection(real f, size_t x1, size_t x2, size_t x3,
                                                                 int direction)
 {
+    using namespace vf::lbm::dir;
+
     switch (direction) {
-        case D3Q27System::DIR_P00:
+        case DIR_P00:
             (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_M00:
+        case DIR_M00:
             (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_0M0:
+        case DIR_0M0:
             (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_0P0:
+        case DIR_0P0:
             (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3) = f;
             break;
-        case D3Q27System::DIR_00M:
+        case DIR_00M:
             (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_00P:
+        case DIR_00P:
             (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_MM0:
+        case DIR_MM0:
             (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_PP0:
+        case DIR_PP0:
             (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3) = f;
             break;
-        case D3Q27System::DIR_MP0:
+        case DIR_MP0:
             (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3) = f;
             break;
-        case D3Q27System::DIR_PM0:
+        case DIR_PM0:
             (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_M0M:
+        case DIR_M0M:
             (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_P0P:
+        case DIR_P0P:
             (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_M0P:
+        case DIR_M0P:
             (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_P0M:
+        case DIR_P0M:
             (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_0MM:
+        case DIR_0MM:
             (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_0PP:
+        case DIR_0PP:
             (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_0MP:
+        case DIR_0MP:
             (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_0PM:
+        case DIR_0PM:
             (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3) = f;
             break;
-        case D3Q27System::DIR_MMM:
+        case DIR_MMM:
             (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_PPP:
+        case DIR_PPP:
             (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_PMM:
+        case DIR_PMM:
             (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_MPP:
+        case DIR_MPP:
             (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_MPM:
+        case DIR_MPM:
             (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3) = f;
             break;
-        case D3Q27System::DIR_PMP:
+        case DIR_PMP:
             (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_PPM:
+        case DIR_PPM:
             (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3) = f;
             break;
-        case D3Q27System::DIR_MMP:
+        case DIR_MMP:
             (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_000:
+        case DIR_000:
             (*this->zeroDistributions)(x1, x2, x3) = f;
             break;
         default:
@@ -335,148 +347,152 @@ void D3Q27EsoTwist3DSplittedVector::setDistributionForDirection(LBMReal f, size_
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSplittedVector::setDistributionInvForDirection(const LBMReal *const f, size_t x1, size_t x2,
+void D3Q27EsoTwist3DSplittedVector::setDistributionInvForDirection(const real *const f, size_t x1, size_t x2,
                                                                    size_t x3, unsigned long int direction)
 {
+    using namespace vf::lbm::dir;
+
     if ((direction & EsoTwistD3Q27System::etE) == EsoTwistD3Q27System::etE)
-        (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3) = f[D3Q27System::DIR_P00];
+        (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3) = f[DIR_P00];
     if ((direction & EsoTwistD3Q27System::etW) == EsoTwistD3Q27System::etW)
-        (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3) = f[D3Q27System::DIR_M00];
+        (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3) = f[DIR_M00];
     if ((direction & EsoTwistD3Q27System::etS) == EsoTwistD3Q27System::etS)
-        (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3) = f[D3Q27System::DIR_0M0];
+        (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3) = f[DIR_0M0];
     if ((direction & EsoTwistD3Q27System::etN) == EsoTwistD3Q27System::etN)
-        (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3) = f[D3Q27System::DIR_0P0];
+        (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3) = f[DIR_0P0];
     if ((direction & EsoTwistD3Q27System::etB) == EsoTwistD3Q27System::etB)
-        (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1) = f[D3Q27System::DIR_00M];
+        (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1) = f[DIR_00M];
     if ((direction & EsoTwistD3Q27System::etT) == EsoTwistD3Q27System::etT)
-        (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3) = f[D3Q27System::DIR_00P];
+        (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3) = f[DIR_00P];
     if ((direction & EsoTwistD3Q27System::etSW) == EsoTwistD3Q27System::etSW)
-        (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3) = f[D3Q27System::DIR_MM0];
+        (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3) = f[DIR_MM0];
     if ((direction & EsoTwistD3Q27System::etNE) == EsoTwistD3Q27System::etNE)
-        (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3) = f[D3Q27System::DIR_PP0];
+        (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3) = f[DIR_PP0];
     if ((direction & EsoTwistD3Q27System::etNW) == EsoTwistD3Q27System::etNW)
-        (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3) = f[D3Q27System::DIR_MP0];
+        (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3) = f[DIR_MP0];
     if ((direction & EsoTwistD3Q27System::etSE) == EsoTwistD3Q27System::etSE)
-        (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3) = f[D3Q27System::DIR_PM0];
+        (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3) = f[DIR_PM0];
     if ((direction & EsoTwistD3Q27System::etBW) == EsoTwistD3Q27System::etBW)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1) = f[D3Q27System::DIR_M0M];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1) = f[DIR_M0M];
     if ((direction & EsoTwistD3Q27System::etTE) == EsoTwistD3Q27System::etTE)
-        (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3) = f[D3Q27System::DIR_P0P];
+        (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3) = f[DIR_P0P];
     if ((direction & EsoTwistD3Q27System::etTW) == EsoTwistD3Q27System::etTW)
-        (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3) = f[D3Q27System::DIR_M0P];
+        (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3) = f[DIR_M0P];
     if ((direction & EsoTwistD3Q27System::etBE) == EsoTwistD3Q27System::etBE)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1) = f[D3Q27System::DIR_P0M];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1) = f[DIR_P0M];
     if ((direction & EsoTwistD3Q27System::etBS) == EsoTwistD3Q27System::etBS)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1) = f[D3Q27System::DIR_0MM];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1) = f[DIR_0MM];
     if ((direction & EsoTwistD3Q27System::etTN) == EsoTwistD3Q27System::etTN)
-        (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3) = f[D3Q27System::DIR_0PP];
+        (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3) = f[DIR_0PP];
     if ((direction & EsoTwistD3Q27System::etTS) == EsoTwistD3Q27System::etTS)
-        (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3) = f[D3Q27System::DIR_0MP];
+        (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3) = f[DIR_0MP];
     if ((direction & EsoTwistD3Q27System::etBN) == EsoTwistD3Q27System::etBN)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1) = f[D3Q27System::DIR_0PM];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1) = f[DIR_0PM];
     if ((direction & EsoTwistD3Q27System::etBSW) == EsoTwistD3Q27System::etBSW)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1) = f[D3Q27System::DIR_MMM];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1) = f[DIR_MMM];
     if ((direction & EsoTwistD3Q27System::etTNE) == EsoTwistD3Q27System::etTNE)
-        (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3) = f[D3Q27System::DIR_PPP];
+        (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3) = f[DIR_PPP];
     if ((direction & EsoTwistD3Q27System::etBSE) == EsoTwistD3Q27System::etBSE)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1) = f[D3Q27System::DIR_PMM];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1) = f[DIR_PMM];
     if ((direction & EsoTwistD3Q27System::etTNW) == EsoTwistD3Q27System::etTNW)
-        (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3) = f[D3Q27System::DIR_MPP];
+        (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3) = f[DIR_MPP];
     if ((direction & EsoTwistD3Q27System::etBNW) == EsoTwistD3Q27System::etBNW)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1) = f[D3Q27System::DIR_MPM];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1) = f[DIR_MPM];
     if ((direction & EsoTwistD3Q27System::etTSE) == EsoTwistD3Q27System::etTSE)
-        (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3) = f[D3Q27System::DIR_PMP];
+        (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3) = f[DIR_PMP];
     if ((direction & EsoTwistD3Q27System::etBNE) == EsoTwistD3Q27System::etBNE)
-        (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1) = f[D3Q27System::DIR_PPM];
+        (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1) = f[DIR_PPM];
     if ((direction & EsoTwistD3Q27System::etTSW) == EsoTwistD3Q27System::etTSW)
-        (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3) = f[D3Q27System::DIR_MMP];
+        (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3) = f[DIR_MMP];
     if ((direction & EsoTwistD3Q27System::REST) == EsoTwistD3Q27System::REST)
-        (*this->zeroDistributions)(x1, x2, x3) = f[D3Q27System::DIR_000];
+        (*this->zeroDistributions)(x1, x2, x3) = f[DIR_000];
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSplittedVector::setDistributionInvForDirection(LBMReal f, size_t x1, size_t x2, size_t x3,
+void D3Q27EsoTwist3DSplittedVector::setDistributionInvForDirection(real f, size_t x1, size_t x2, size_t x3,
                                                                    unsigned long int direction)
 {
+    using namespace vf::lbm::dir;
+
     switch (direction) {
-        case D3Q27System::DIR_P00:
+        case DIR_P00:
             (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_M00:
+        case DIR_M00:
             (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_0M0:
+        case DIR_0M0:
             (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3) = f;
             break;
-        case D3Q27System::DIR_0P0:
+        case DIR_0P0:
             (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_00M:
+        case DIR_00M:
             (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_00P:
+        case DIR_00P:
             (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_MM0:
+        case DIR_MM0:
             (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3) = f;
             break;
-        case D3Q27System::DIR_PP0:
+        case DIR_PP0:
             (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_MP0:
+        case DIR_MP0:
             (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_PM0:
+        case DIR_PM0:
             (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3) = f;
             break;
-        case D3Q27System::DIR_M0M:
+        case DIR_M0M:
             (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_P0P:
+        case DIR_P0P:
             (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_M0P:
+        case DIR_M0P:
             (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_P0M:
+        case DIR_P0M:
             (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_0MM:
+        case DIR_0MM:
             (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_0PP:
+        case DIR_0PP:
             (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_0MP:
+        case DIR_0MP:
             (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3) = f;
             break;
-        case D3Q27System::DIR_0PM:
+        case DIR_0PM:
             (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_MMM:
+        case DIR_MMM:
             (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_PPP:
+        case DIR_PPP:
             (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_PMM:
+        case DIR_PMM:
             (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_MPP:
+        case DIR_MPP:
             (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3) = f;
             break;
-        case D3Q27System::DIR_MPM:
+        case DIR_MPM:
             (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_PMP:
+        case DIR_PMP:
             (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3) = f;
             break;
-        case D3Q27System::DIR_PPM:
+        case DIR_PPM:
             (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1) = f;
             break;
-        case D3Q27System::DIR_MMP:
+        case DIR_MMP:
             (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3) = f;
             break;
-        case D3Q27System::DIR_000:
+        case DIR_000:
             (*this->zeroDistributions)(x1, x2, x3) = f;
             break;
         default:
@@ -484,124 +500,128 @@ void D3Q27EsoTwist3DSplittedVector::setDistributionInvForDirection(LBMReal f, si
     }
 }
 //////////////////////////////////////////////////////////////////////////
-LBMReal D3Q27EsoTwist3DSplittedVector::getDistributionForDirection(size_t x1, size_t x2, size_t x3, int direction)
+real D3Q27EsoTwist3DSplittedVector::getDistributionForDirection(size_t x1, size_t x2, size_t x3, int direction)
 {
+    using namespace vf::lbm::dir;
+
     switch (direction) {
-        case D3Q27System::DIR_M00:
+        case DIR_M00:
             return (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3);
-        case D3Q27System::DIR_P00:
+        case DIR_P00:
             return (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
-        case D3Q27System::DIR_0P0:
+        case DIR_0P0:
             return (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
-        case D3Q27System::DIR_0M0:
+        case DIR_0M0:
             return (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3);
-        case D3Q27System::DIR_00P:
+        case DIR_00P:
             return (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
-        case D3Q27System::DIR_00M:
+        case DIR_00M:
             return (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1);
-        case D3Q27System::DIR_PP0:
+        case DIR_PP0:
             return (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
-        case D3Q27System::DIR_MM0:
+        case DIR_MM0:
             return (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3);
-        case D3Q27System::DIR_PM0:
+        case DIR_PM0:
             return (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3);
-        case D3Q27System::DIR_MP0:
+        case DIR_MP0:
             return (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3);
-        case D3Q27System::DIR_P0P:
+        case DIR_P0P:
             return (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
-        case D3Q27System::DIR_M0M:
+        case DIR_M0M:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1);
-        case D3Q27System::DIR_P0M:
+        case DIR_P0M:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1);
-        case D3Q27System::DIR_M0P:
+        case DIR_M0P:
             return (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3);
-        case D3Q27System::DIR_0PP:
+        case DIR_0PP:
             return (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
-        case D3Q27System::DIR_0MM:
+        case DIR_0MM:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1);
-        case D3Q27System::DIR_0PM:
+        case DIR_0PM:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1);
-        case D3Q27System::DIR_0MP:
+        case DIR_0MP:
             return (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3);
-        case D3Q27System::DIR_PPP:
+        case DIR_PPP:
             return (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
-        case D3Q27System::DIR_MMM:
+        case DIR_MMM:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1);
-        case D3Q27System::DIR_MPP:
+        case DIR_MPP:
             return (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3);
-        case D3Q27System::DIR_PMM:
+        case DIR_PMM:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1);
-        case D3Q27System::DIR_PMP:
+        case DIR_PMP:
             return (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3);
-        case D3Q27System::DIR_MPM:
+        case DIR_MPM:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1);
-        case D3Q27System::DIR_MMP:
+        case DIR_MMP:
             return (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3);
-        case D3Q27System::DIR_PPM:
+        case DIR_PPM:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1);
-        case D3Q27System::DIR_000:
+        case DIR_000:
             return (*this->zeroDistributions)(x1, x2, x3);
         default:
             UB_THROW(UbException(UB_EXARGS, "Direction didn't find"));
     }
 }
 //////////////////////////////////////////////////////////////////////////
-LBMReal D3Q27EsoTwist3DSplittedVector::getDistributionInvForDirection(size_t x1, size_t x2, size_t x3, int direction)
+real D3Q27EsoTwist3DSplittedVector::getDistributionInvForDirection(size_t x1, size_t x2, size_t x3, int direction)
 {
+    using namespace vf::lbm::dir;
+
     switch (direction) {
-        case D3Q27System::DIR_P00:
+        case DIR_P00:
             return (*this->nonLocalDistributions)(D3Q27System::ET_W, x1 + 1, x2, x3);
-        case D3Q27System::DIR_M00:
+        case DIR_M00:
             return (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
-        case D3Q27System::DIR_0M0:
+        case DIR_0M0:
             return (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
-        case D3Q27System::DIR_0P0:
+        case DIR_0P0:
             return (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2 + 1, x3);
-        case D3Q27System::DIR_00M:
+        case DIR_00M:
             return (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
-        case D3Q27System::DIR_00P:
+        case DIR_00P:
             return (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3 + 1);
-        case D3Q27System::DIR_MM0:
+        case DIR_MM0:
             return (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
-        case D3Q27System::DIR_PP0:
+        case DIR_PP0:
             return (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1 + 1, x2 + 1, x3);
-        case D3Q27System::DIR_MP0:
+        case DIR_MP0:
             return (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2 + 1, x3);
-        case D3Q27System::DIR_PM0:
+        case DIR_PM0:
             return (*this->localDistributions)(D3Q27System::ET_NW, x1 + 1, x2, x3);
-        case D3Q27System::DIR_M0M:
+        case DIR_M0M:
             return (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
-        case D3Q27System::DIR_P0P:
+        case DIR_P0P:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1 + 1, x2, x3 + 1);
-        case D3Q27System::DIR_M0P:
+        case DIR_M0P:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3 + 1);
-        case D3Q27System::DIR_P0M:
+        case DIR_P0M:
             return (*this->localDistributions)(D3Q27System::ET_TW, x1 + 1, x2, x3);
-        case D3Q27System::DIR_0MM:
+        case DIR_0MM:
             return (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
-        case D3Q27System::DIR_0PP:
+        case DIR_0PP:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2 + 1, x3 + 1);
-        case D3Q27System::DIR_0MP:
+        case DIR_0MP:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3 + 1);
-        case D3Q27System::DIR_0PM:
+        case DIR_0PM:
             return (*this->localDistributions)(D3Q27System::ET_TS, x1, x2 + 1, x3);
-        case D3Q27System::DIR_MMM:
+        case DIR_MMM:
             return (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
-        case D3Q27System::DIR_PPP:
+        case DIR_PPP:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1 + 1, x2 + 1, x3 + 1);
-        case D3Q27System::DIR_PMM:
+        case DIR_PMM:
             return (*this->localDistributions)(D3Q27System::ET_TNW, x1 + 1, x2, x3);
-        case D3Q27System::DIR_MPP:
+        case DIR_MPP:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2 + 1, x3 + 1);
-        case D3Q27System::DIR_MPM:
+        case DIR_MPM:
             return (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2 + 1, x3);
-        case D3Q27System::DIR_PMP:
+        case DIR_PMP:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1 + 1, x2, x3 + 1);
-        case D3Q27System::DIR_PPM:
+        case DIR_PPM:
             return (*this->localDistributions)(D3Q27System::ET_TSW, x1 + 1, x2 + 1, x3);
-        case D3Q27System::DIR_MMP:
+        case DIR_MMP:
             return (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3 + 1);
-        case D3Q27System::DIR_000:
+        case DIR_000:
             return (*this->zeroDistributions)(x1, x2, x3);
         default:
             UB_THROW(UbException(UB_EXARGS, "Direction didn't find"));
@@ -614,17 +634,17 @@ size_t D3Q27EsoTwist3DSplittedVector::getNX2() const { return NX2; }
 //////////////////////////////////////////////////////////////////////////
 size_t D3Q27EsoTwist3DSplittedVector::getNX3() const { return NX3; }
 //////////////////////////////////////////////////////////////////////////
-CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr D3Q27EsoTwist3DSplittedVector::getLocalDistributions()
+CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr D3Q27EsoTwist3DSplittedVector::getLocalDistributions()
 {
     return this->localDistributions;
 }
 //////////////////////////////////////////////////////////////////////////
-CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr D3Q27EsoTwist3DSplittedVector::getNonLocalDistributions()
+CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr D3Q27EsoTwist3DSplittedVector::getNonLocalDistributions()
 {
     return this->nonLocalDistributions;
 }
 //////////////////////////////////////////////////////////////////////////
-CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr D3Q27EsoTwist3DSplittedVector::getZeroDistributions()
+CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr D3Q27EsoTwist3DSplittedVector::getZeroDistributions()
 {
     return this->zeroDistributions;
 }
@@ -635,17 +655,17 @@ void D3Q27EsoTwist3DSplittedVector::setNX2(size_t newNX2) { NX2 = newNX2; }
 //////////////////////////////////////////////////////////////////////////
 void D3Q27EsoTwist3DSplittedVector::setNX3(size_t newNX3) { NX3 = newNX3; }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSplittedVector::setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr array)
+void D3Q27EsoTwist3DSplittedVector::setLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr array)
 {
     localDistributions = array;
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSplittedVector::setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr array)
+void D3Q27EsoTwist3DSplittedVector::setNonLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr array)
 {
     nonLocalDistributions = array;
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27EsoTwist3DSplittedVector::setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr array)
+void D3Q27EsoTwist3DSplittedVector::setZeroDistributions(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr array)
 {
     zeroDistributions = array;
 }
diff --git a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.h b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.h
index 1c0d7d05f1392c8c116863e9e0b41000c90ed15e..060e2cb3ad367d31d6b30577f370cd1b692daecd 100644
--- a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.h
+++ b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.h
@@ -49,34 +49,34 @@ public:
     //! \param nx2 number of nodes in x2 direction
     //! \param nx3 number of nodes in x3 direction
     //! \param value initialisation value
-    D3Q27EsoTwist3DSplittedVector(size_t nx1, size_t nx2, size_t nx3, LBMReal value);
+    D3Q27EsoTwist3DSplittedVector(size_t nx1, size_t nx2, size_t nx3, real value);
     //////////////////////////////////////////////////////////////////////////
     ~D3Q27EsoTwist3DSplittedVector() override;
     //////////////////////////////////////////////////////////////////////////
     void swap() override;
     //////////////////////////////////////////////////////////////////////////
-    void getDistribution(LBMReal *const f, size_t x1, size_t x2, size_t x3) override;
+    void getDistribution(real *const f, size_t x1, size_t x2, size_t x3) override;
     //////////////////////////////////////////////////////////////////////////
-    void setDistribution(const LBMReal *const f, size_t x1, size_t x2, size_t x3) override;
+    void setDistribution(const real *const f, size_t x1, size_t x2, size_t x3) override;
     ////////////////////////////////////////////////////////////////////////
-    void getDistributionInv(LBMReal *const f, size_t x1, size_t x2, size_t x3) override;
+    void getDistributionInv(real *const f, size_t x1, size_t x2, size_t x3) override;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionInv(const LBMReal *const f, size_t x1, size_t x2, size_t x3) override;
+    void setDistributionInv(const real *const f, size_t x1, size_t x2, size_t x3) override;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+    void setDistributionForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                      unsigned long int direction) override;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionForDirection(LBMReal f, size_t x1, size_t x2, size_t x3, int direction) override;
+    void setDistributionForDirection(real f, size_t x1, size_t x2, size_t x3, int direction) override;
     //////////////////////////////////////////////////////////////////////////
-    LBMReal getDistributionInvForDirection(size_t x1, size_t x2, size_t x3, int direction) override;
+    real getDistributionInvForDirection(size_t x1, size_t x2, size_t x3, int direction) override;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionInvForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+    void setDistributionInvForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                         unsigned long int direction) override;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionInvForDirection(LBMReal f, size_t x1, size_t x2, size_t x3,
+    void setDistributionInvForDirection(real f, size_t x1, size_t x2, size_t x3,
                                         unsigned long int direction) override;
     //////////////////////////////////////////////////////////////////////////
-    LBMReal getDistributionForDirection(size_t x1, size_t x2, size_t x3, int direction) override;
+    real getDistributionForDirection(size_t x1, size_t x2, size_t x3, int direction) override;
     //////////////////////////////////////////////////////////////////////////
     size_t getNX1() const override;
     //////////////////////////////////////////////////////////////////////////
@@ -84,23 +84,23 @@ public:
     //////////////////////////////////////////////////////////////////////////
     size_t getNX3() const override;
     //////////////////////////////////////////////////////////////////////////
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr getLocalDistributions();
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr getLocalDistributions();
     //////////////////////////////////////////////////////////////////////////
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr getNonLocalDistributions();
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr getNonLocalDistributions();
     //////////////////////////////////////////////////////////////////////////
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr getZeroDistributions();
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr getZeroDistributions();
     //////////////////////////////////////////////////////////////////////////
     void setNX1(size_t newNX1);
     void setNX2(size_t newNX2);
     void setNX3(size_t newNX3);
-    void setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr array);
-    void setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr array);
-    void setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr array);
+    void setLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr array);
+    void setNonLocalDistributions(CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr array);
+    void setZeroDistributions(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr array);
 
 protected:
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributions;
     size_t NX1, NX2, NX3;
 
     friend class MPIIORestartCoProcessor;
diff --git a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVectorEx.cpp b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVectorEx.cpp
index d67341e1af96b3914df478994f3097b64bf78302..07b0abb6aafd34510eedb2df7829d39239ecb13f 100644
--- a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVectorEx.cpp
+++ b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVectorEx.cpp
@@ -1,16 +1,16 @@
 #include "D3Q27EsoTwist3DSplittedVectorEx.h"
 
-D3Q27EsoTwist3DSplittedVectorEx::D3Q27EsoTwist3DSplittedVectorEx(int nx1, int nx2, int nx3, LBMReal value)
+D3Q27EsoTwist3DSplittedVectorEx::D3Q27EsoTwist3DSplittedVectorEx(int nx1, int nx2, int nx3, real value)
 {
     this->NX1 = nx1;
     this->NX2 = nx2;
     this->NX3 = nx3;
 
-    this->localDistributions = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-        new CbArray4D<LBMReal, IndexerX4X3X2X1>(13, nx1, nx2, nx3, value));
-    this->nonLocalDistributions = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-        new CbArray4D<LBMReal, IndexerX4X3X2X1>(13, nx1, nx2, nx3, value));
+    this->localDistributions = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+        new CbArray4D<real, IndexerX4X3X2X1>(13, nx1, nx2, nx3, value));
+    this->nonLocalDistributions = CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr(
+        new CbArray4D<real, IndexerX4X3X2X1>(13, nx1, nx2, nx3, value));
 
     this->zeroDistributions =
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(nx1, nx2, nx3, value));
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(nx1, nx2, nx3, value));
 }
diff --git a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVectorEx.h b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVectorEx.h
index 54f9d55e5c4df0891cf40cc058a4ceaae934626a..e5481f4c86c80c4c9d3f6d64c404b8c279268f9b 100644
--- a/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVectorEx.h
+++ b/src/cpu/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVectorEx.h
@@ -6,7 +6,7 @@
 class D3Q27EsoTwist3DSplittedVectorEx : public D3Q27EsoTwist3DSplittedVector
 {
 public:
-    D3Q27EsoTwist3DSplittedVectorEx(int nx1, int nx2, int nx3, LBMReal value);
+    D3Q27EsoTwist3DSplittedVectorEx(int nx1, int nx2, int nx3, real value);
 
 protected:
 private:
diff --git a/src/cpu/VirtualFluidsCore/Data/DataSet3D.h b/src/cpu/VirtualFluidsCore/Data/DataSet3D.h
index 12b9e57489e39c15e912b5b679b768c16b89a83b..65758b1051cfd70d8495cc8d0d4af811409887e7 100644
--- a/src/cpu/VirtualFluidsCore/Data/DataSet3D.h
+++ b/src/cpu/VirtualFluidsCore/Data/DataSet3D.h
@@ -40,11 +40,11 @@
 #include "basics/container/CbArray3D.h"
 #include "basics/container/CbArray4D.h"
 
-using AverageValuesArray3D     = CbArray4D<LBMReal, IndexerX4X3X2X1>;
-using ShearStressValuesArray3D = CbArray4D<LBMReal, IndexerX4X3X2X1>;
-using RelaxationFactorArray3D  = CbArray3D<LBMReal, IndexerX3X2X1>;
-using PhaseFieldArray3D        = CbArray3D<LBMReal, IndexerX3X2X1>;
-using PressureFieldArray3D     = CbArray3D<LBMReal, IndexerX3X2X1>;
+using AverageValuesArray3D     = CbArray4D<real, IndexerX4X3X2X1>;
+using ShearStressValuesArray3D = CbArray4D<real, IndexerX4X3X2X1>;
+using RelaxationFactorArray3D  = CbArray3D<real, IndexerX3X2X1>;
+using PhaseFieldArray3D        = CbArray3D<real, IndexerX3X2X1>;
+using PressureFieldArray3D     = CbArray3D<real, IndexerX3X2X1>;
 
 //! A class provides an interface for data structures in the kernel.
 class DataSet3D
diff --git a/src/cpu/VirtualFluidsCore/Data/DistributionArray3D.h b/src/cpu/VirtualFluidsCore/Data/DistributionArray3D.h
index 8fe4dccea1b53da0513a093e8a741cd0071caf48..fff57191d5172e2f3c085b6f8753018c58fae42a 100644
--- a/src/cpu/VirtualFluidsCore/Data/DistributionArray3D.h
+++ b/src/cpu/VirtualFluidsCore/Data/DistributionArray3D.h
@@ -55,39 +55,39 @@ public:
     //! \param x1 coordinate x1
     //! \param x2 coordinate x2
     //! \param x3 coordinate x3
-    virtual void getDistribution(LBMReal *const f, size_t x1, size_t x2, size_t x3) = 0;
+    virtual void getDistribution(real *const f, size_t x1, size_t x2, size_t x3) = 0;
     //! set distribution
     //! \param f distribution
     //! \param x1 coordinate x1
     //! \param x2 coordinate x2
     //! \param x3 coordinate x3
-    virtual void setDistribution(const LBMReal *const f, size_t x1, size_t x2, size_t x3) = 0;
+    virtual void setDistribution(const real *const f, size_t x1, size_t x2, size_t x3) = 0;
     //! get distribution in inverse order
     //! \param f distribution
     //! \param x1 coordinate x1
     //! \param x2 coordinate x2
     //! \param x3 coordinate x3
-    virtual void getDistributionInv(LBMReal *const f, size_t x1, size_t x2, size_t x3) = 0;
+    virtual void getDistributionInv(real *const f, size_t x1, size_t x2, size_t x3) = 0;
     //! set distribution in inverse order
     //! \param f distribution
     //! \param x1 coordinate x1
     //! \param x1 coordinate x2
     //! \param x1 coordinate x3
-    virtual void setDistributionInv(const LBMReal *const f, size_t x1, size_t x2, size_t x3) = 0;
+    virtual void setDistributionInv(const real *const f, size_t x1, size_t x2, size_t x3) = 0;
     //! set distribution in inverse order
     //! \param f distribution
     //! \param x1 coordinate x1
     //! \param x1 coordinate x2
     //! \param x1 coordinate x3
-    virtual void setDistributionForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+    virtual void setDistributionForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                              unsigned long int direction)                               = 0;
-    virtual void setDistributionForDirection(LBMReal f, size_t x1, size_t x2, size_t x3, int direction) = 0;
-    virtual LBMReal getDistributionInvForDirection(size_t x1, size_t x2, size_t x3, int direction)      = 0;
-    virtual void setDistributionInvForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+    virtual void setDistributionForDirection(real f, size_t x1, size_t x2, size_t x3, int direction) = 0;
+    virtual real getDistributionInvForDirection(size_t x1, size_t x2, size_t x3, int direction)      = 0;
+    virtual void setDistributionInvForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                                 unsigned long int direction)                            = 0;
-    virtual void setDistributionInvForDirection(LBMReal f, size_t x1, size_t x2, size_t x3,
+    virtual void setDistributionInvForDirection(real f, size_t x1, size_t x2, size_t x3,
                                                 unsigned long int direction)                            = 0;
-    virtual LBMReal getDistributionForDirection(size_t x1, size_t x2, size_t x3, int direction)         = 0;
+    virtual real getDistributionForDirection(size_t x1, size_t x2, size_t x3, int direction)         = 0;
     virtual void swap()                                                                                 = 0;
 
 protected:
diff --git a/src/cpu/VirtualFluidsCore/Data/EsoTwist3D.h b/src/cpu/VirtualFluidsCore/Data/EsoTwist3D.h
index 319a9200cc204b0f9b869b2e52353e717a89d783..6a65255adfaf48d415c76b66364f3f34966572c0 100644
--- a/src/cpu/VirtualFluidsCore/Data/EsoTwist3D.h
+++ b/src/cpu/VirtualFluidsCore/Data/EsoTwist3D.h
@@ -63,31 +63,31 @@ public:
     //////////////////////////////////////////////////////////////////////////
     void swap() override = 0;
     //////////////////////////////////////////////////////////////////////////
-    void getDistribution(LBMReal *const f, size_t x1, size_t x2, size_t x3) override = 0;
+    void getDistribution(real *const f, size_t x1, size_t x2, size_t x3) override = 0;
     //////////////////////////////////////////////////////////////////////////
-    void setDistribution(const LBMReal *const f, size_t x1, size_t x2, size_t x3) override = 0;
+    void setDistribution(const real *const f, size_t x1, size_t x2, size_t x3) override = 0;
     ////////////////////////////////////////////////////////////////////////
-    void getDistributionInv(LBMReal *const f, size_t x1, size_t x2, size_t x3) override = 0;
+    void getDistributionInv(real *const f, size_t x1, size_t x2, size_t x3) override = 0;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionInv(const LBMReal *const f, size_t x1, size_t x2, size_t x3) override = 0;
+    void setDistributionInv(const real *const f, size_t x1, size_t x2, size_t x3) override = 0;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+    void setDistributionForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                      unsigned long int direction) override = 0;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionForDirection(LBMReal f, size_t x1, size_t x2, size_t x3, int direction) override = 0;
+    void setDistributionForDirection(real f, size_t x1, size_t x2, size_t x3, int direction) override = 0;
     //////////////////////////////////////////////////////////////////////////
-    // virtual void getDistributionInvForDirection(LBMReal* const& f, const size_t& x1, const size_t& x2, const size_t&
+    // virtual void getDistributionInvForDirection(real* const& f, const size_t& x1, const size_t& x2, const size_t&
     // x3, const unsigned long int& direction) = 0;
     //////////////////////////////////////////////////////////////////////////
-    LBMReal getDistributionInvForDirection(size_t x1, size_t x2, size_t x3, int direction) override = 0;
+    real getDistributionInvForDirection(size_t x1, size_t x2, size_t x3, int direction) override = 0;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionInvForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+    void setDistributionInvForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                         unsigned long int direction) override = 0;
     //////////////////////////////////////////////////////////////////////////
-    void setDistributionInvForDirection(LBMReal f, size_t x1, size_t x2, size_t x3,
+    void setDistributionInvForDirection(real f, size_t x1, size_t x2, size_t x3,
                                         unsigned long int direction) override = 0;
     //////////////////////////////////////////////////////////////////////////
-    LBMReal getDistributionForDirection(size_t x1, size_t x2, size_t x3, int direction) override = 0;
+    real getDistributionForDirection(size_t x1, size_t x2, size_t x3, int direction) override = 0;
     //////////////////////////////////////////////////////////////////////////
     size_t getNX1() const override = 0;
     //////////////////////////////////////////////////////////////////////////
diff --git a/src/cpu/VirtualFluidsCore/Data/EsoTwistD3Q27System.cpp b/src/cpu/VirtualFluidsCore/Data/EsoTwistD3Q27System.cpp
index b9a2e069edea6fa3e514b64e4e4174f0964a3a20..c28f1d4896619d72f27d7cd99579295fa46c68a5 100644
--- a/src/cpu/VirtualFluidsCore/Data/EsoTwistD3Q27System.cpp
+++ b/src/cpu/VirtualFluidsCore/Data/EsoTwistD3Q27System.cpp
@@ -44,12 +44,13 @@ const int EsoTwistD3Q27System::ETX3[EsoTwistD3Q27System::ENDF + 1] = { 0, 0, 0,
                                                                        0, 1, 1, 0, 0, -1, 0, -1, 0, 1, 0, 1, 0 };
 
 const int EsoTwistD3Q27System::etINVDIR[EsoTwistD3Q27System::ENDF + 1] = {
-    D3Q27System::INV_P00,   D3Q27System::INV_M00,   D3Q27System::INV_0P0,   D3Q27System::INV_0M0,   D3Q27System::INV_00P,
-    D3Q27System::INV_00M,   D3Q27System::INV_PP0,  D3Q27System::INV_MM0,  D3Q27System::INV_PM0,  D3Q27System::INV_MP0,
-    D3Q27System::INV_P0P,  D3Q27System::INV_M0M,  D3Q27System::INV_P0M,  D3Q27System::INV_M0P,  D3Q27System::INV_0PP,
-    D3Q27System::INV_0MM,  D3Q27System::INV_0PM,  D3Q27System::INV_0MP,  D3Q27System::INV_PPP, D3Q27System::INV_MPP,
-    D3Q27System::INV_PMP, D3Q27System::INV_MMP, D3Q27System::INV_PPM, D3Q27System::INV_MPM, D3Q27System::INV_PMM,
-    D3Q27System::INV_MMM, D3Q27System::DIR_000
+
+    vf::lbm::dir::INV_P00,   vf::lbm::dir::INV_M00,   vf::lbm::dir::INV_0P0,   vf::lbm::dir::INV_0M0,   vf::lbm::dir::INV_00P,
+    vf::lbm::dir::INV_00M,   vf::lbm::dir::INV_PP0,  vf::lbm::dir::INV_MM0,  vf::lbm::dir::INV_PM0,  vf::lbm::dir::INV_MP0,
+    vf::lbm::dir::INV_P0P,  vf::lbm::dir::INV_M0M,  vf::lbm::dir::INV_P0M,  vf::lbm::dir::INV_M0P,  vf::lbm::dir::INV_0PP,
+    vf::lbm::dir::INV_0MM,  vf::lbm::dir::INV_0PM,  vf::lbm::dir::INV_0MP,  vf::lbm::dir::INV_PPP, vf::lbm::dir::INV_MPP,
+    vf::lbm::dir::INV_PMP, vf::lbm::dir::INV_MMP, vf::lbm::dir::INV_PPM, vf::lbm::dir::INV_MPM, vf::lbm::dir::INV_PMM,
+    vf::lbm::dir::INV_MMM, vf::lbm::dir::DIR_000
 };
 
 const unsigned long int EsoTwistD3Q27System::etDIR[EsoTwistD3Q27System::ENDF + 1] = {
diff --git a/src/cpu/VirtualFluidsCore/Data/EsoTwistD3Q27System.h b/src/cpu/VirtualFluidsCore/Data/EsoTwistD3Q27System.h
index 7ccd413c662206ab3a12b3a1c88fcc81450f7a75..74cd5b9b8fe0aeb58fad65c34c5231abb8eb4b8c 100644
--- a/src/cpu/VirtualFluidsCore/Data/EsoTwistD3Q27System.h
+++ b/src/cpu/VirtualFluidsCore/Data/EsoTwistD3Q27System.h
@@ -44,63 +44,63 @@ struct EsoTwistD3Q27System {
     const static int STARTF = D3Q27System::STARTF;
     const static int ENDF   = D3Q27System::ENDF;
 
-    const static int STARTDIR = D3Q27System::STARTDIR;
+ //   const static int STARTDIR = D3Q27System::STARTDIR;
     const static int ENDDIR   = D3Q27System::ENDDIR;
 
-    static const int REST = D3Q27System::DIR_000; /*f0 */
-    static const int E    = D3Q27System::DIR_P00;    /*f1 */
-    static const int W    = D3Q27System::DIR_M00;    /*f2 */
-    static const int N    = D3Q27System::DIR_0P0;    /*f3 */
-    static const int S    = D3Q27System::DIR_0M0;    /*f4 */
-    static const int T    = D3Q27System::DIR_00P;    /*f5 */
-    static const int B    = D3Q27System::DIR_00M;    /*f6 */
-    static const int NE   = D3Q27System::DIR_PP0;   /*f7 */
-    static const int SW   = D3Q27System::DIR_MM0;   /*f8 */
-    static const int SE   = D3Q27System::DIR_PM0;   /*f9 */
-    static const int NW   = D3Q27System::DIR_MP0;   /*f10*/
-    static const int TE   = D3Q27System::DIR_P0P;   /*f11*/
-    static const int BW   = D3Q27System::DIR_M0M;   /*f12*/
-    static const int BE   = D3Q27System::DIR_P0M;   /*f13*/
-    static const int TW   = D3Q27System::DIR_M0P;   /*f14*/
-    static const int TN   = D3Q27System::DIR_0PP;   /*f15*/
-    static const int BS   = D3Q27System::DIR_0MM;   /*f16*/
-    static const int BN   = D3Q27System::DIR_0PM;   /*f17*/
-    static const int TS   = D3Q27System::DIR_0MP;   /*f18*/
-    static const int TNE  = D3Q27System::DIR_PPP;
-    static const int TNW  = D3Q27System::DIR_MPP;
-    static const int TSE  = D3Q27System::DIR_PMP;
-    static const int TSW  = D3Q27System::DIR_MMP;
-    static const int BNE  = D3Q27System::DIR_PPM;
-    static const int BNW  = D3Q27System::DIR_MPM;
-    static const int BSE  = D3Q27System::DIR_PMM;
-    static const int BSW  = D3Q27System::DIR_MMM;
+    static const int REST = vf::lbm::dir::DIR_000; /*f0 */
+    static const int E    = vf::lbm::dir::DIR_P00;    /*f1 */
+    static const int W    = vf::lbm::dir::DIR_M00;    /*f2 */
+    static const int N    = vf::lbm::dir::DIR_0P0;    /*f3 */
+    static const int S    = vf::lbm::dir::DIR_0M0;    /*f4 */
+    static const int T    = vf::lbm::dir::DIR_00P;    /*f5 */
+    static const int B    = vf::lbm::dir::DIR_00M;    /*f6 */
+    static const int NE   = vf::lbm::dir::DIR_PP0;   /*f7 */
+    static const int SW   = vf::lbm::dir::DIR_MM0;   /*f8 */
+    static const int SE   = vf::lbm::dir::DIR_PM0;   /*f9 */
+    static const int NW   = vf::lbm::dir::DIR_MP0;   /*f10*/
+    static const int TE   = vf::lbm::dir::DIR_P0P;   /*f11*/
+    static const int BW   = vf::lbm::dir::DIR_M0M;   /*f12*/
+    static const int BE   = vf::lbm::dir::DIR_P0M;   /*f13*/
+    static const int TW   = vf::lbm::dir::DIR_M0P;   /*f14*/
+    static const int TN   = vf::lbm::dir::DIR_0PP;   /*f15*/
+    static const int BS   = vf::lbm::dir::DIR_0MM;   /*f16*/
+    static const int BN   = vf::lbm::dir::DIR_0PM;   /*f17*/
+    static const int TS   = vf::lbm::dir::DIR_0MP;   /*f18*/
+    static const int TNE  = vf::lbm::dir::DIR_PPP;
+    static const int TNW  = vf::lbm::dir::DIR_MPP;
+    static const int TSE  = vf::lbm::dir::DIR_PMP;
+    static const int TSW  = vf::lbm::dir::DIR_MMP;
+    static const int BNE  = vf::lbm::dir::DIR_PPM;
+    static const int BNW  = vf::lbm::dir::DIR_MPM;
+    static const int BSE  = vf::lbm::dir::DIR_PMM;
+    static const int BSW  = vf::lbm::dir::DIR_MMM;
 
-    static const int INV_E   = D3Q27System::DIR_M00;
-    static const int INV_W   = D3Q27System::DIR_P00;
-    static const int INV_N   = D3Q27System::DIR_0M0;
-    static const int INV_S   = D3Q27System::DIR_0P0;
-    static const int INV_T   = D3Q27System::DIR_00M;
-    static const int INV_B   = D3Q27System::DIR_00P;
-    static const int INV_NE  = D3Q27System::DIR_MM0;
-    static const int INV_SW  = D3Q27System::DIR_PP0;
-    static const int INV_SE  = D3Q27System::DIR_MP0;
-    static const int INV_NW  = D3Q27System::DIR_PM0;
-    static const int INV_TE  = D3Q27System::DIR_M0M;
-    static const int INV_BW  = D3Q27System::DIR_P0P;
-    static const int INV_BE  = D3Q27System::DIR_M0P;
-    static const int INV_TW  = D3Q27System::DIR_P0M;
-    static const int INV_TN  = D3Q27System::DIR_0MM;
-    static const int INV_BS  = D3Q27System::DIR_0PP;
-    static const int INV_BN  = D3Q27System::DIR_0MP;
-    static const int INV_TS  = D3Q27System::DIR_0PM;
-    static const int INV_TNE = D3Q27System::DIR_MMM;
-    static const int INV_TNW = D3Q27System::DIR_PMM;
-    static const int INV_TSE = D3Q27System::DIR_MPM;
-    static const int INV_TSW = D3Q27System::DIR_PPM;
-    static const int INV_BNE = D3Q27System::DIR_MMP;
-    static const int INV_BNW = D3Q27System::DIR_PMP;
-    static const int INV_BSE = D3Q27System::DIR_MPP;
-    static const int INV_BSW = D3Q27System::DIR_PPP;
+    static const int INV_E   = vf::lbm::dir::DIR_M00;
+    static const int INV_W   = vf::lbm::dir::DIR_P00;
+    static const int INV_N   = vf::lbm::dir::DIR_0M0;
+    static const int INV_S   = vf::lbm::dir::DIR_0P0;
+    static const int INV_T   = vf::lbm::dir::DIR_00M;
+    static const int INV_B   = vf::lbm::dir::DIR_00P;
+    static const int INV_NE  = vf::lbm::dir::DIR_MM0;
+    static const int INV_SW  = vf::lbm::dir::DIR_PP0;
+    static const int INV_SE  = vf::lbm::dir::DIR_MP0;
+    static const int INV_NW  = vf::lbm::dir::DIR_PM0;
+    static const int INV_TE  = vf::lbm::dir::DIR_M0M;
+    static const int INV_BW  = vf::lbm::dir::DIR_P0P;
+    static const int INV_BE  = vf::lbm::dir::DIR_M0P;
+    static const int INV_TW  = vf::lbm::dir::DIR_P0M;
+    static const int INV_TN  = vf::lbm::dir::DIR_0MM;
+    static const int INV_BS  = vf::lbm::dir::DIR_0PP;
+    static const int INV_BN  = vf::lbm::dir::DIR_0MP;
+    static const int INV_TS  = vf::lbm::dir::DIR_0PM;
+    static const int INV_TNE = vf::lbm::dir::DIR_MMM;
+    static const int INV_TNW = vf::lbm::dir::DIR_PMM;
+    static const int INV_TSE = vf::lbm::dir::DIR_MPM;
+    static const int INV_TSW = vf::lbm::dir::DIR_PPM;
+    static const int INV_BNE = vf::lbm::dir::DIR_MMP;
+    static const int INV_BNW = vf::lbm::dir::DIR_PMP;
+    static const int INV_BSE = vf::lbm::dir::DIR_MPP;
+    static const int INV_BSW = vf::lbm::dir::DIR_PPP;
 
     static const unsigned long int etZERO; // 1;/*f0 */
     static const unsigned long int etE;    //  2;    /*f1 */
diff --git a/src/cpu/VirtualFluidsCore/Data/VoidData3D.h b/src/cpu/VirtualFluidsCore/Data/VoidData3D.h
index 12afae57d0f88d01963b4694aca881dd18691f87..25fe5dde2a5a874fdefe0eaf2502c86df29faa95 100644
--- a/src/cpu/VirtualFluidsCore/Data/VoidData3D.h
+++ b/src/cpu/VirtualFluidsCore/Data/VoidData3D.h
@@ -8,7 +8,7 @@ class VoidData3D : public EsoTwist3D
 public:
     VoidData3D() = default;
     
-    VoidData3D(size_t nx1, size_t nx2, size_t nx3, LBMReal /*value*/)
+    VoidData3D(size_t nx1, size_t nx2, size_t nx3, real /*value*/)
     {
         this->NX1 = nx1;
         this->NX2 = nx2;
@@ -19,28 +19,28 @@ public:
     size_t getNX1() const override { return NX1; }
     size_t getNX2() const override { return NX2; }
     size_t getNX3() const override { return NX3; }
-    void getDistribution(LBMReal *const f, size_t x1, size_t x2, size_t x3) override {}
-    void setDistribution(const LBMReal *const f, size_t x1, size_t x2, size_t x3) override {}
-    void getDistributionInv(LBMReal *const f, size_t x1, size_t x2, size_t x3) override {}
-    void setDistributionInv(const LBMReal *const f, size_t x1, size_t x2, size_t x3) override {}
-    void setDistributionForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+    void getDistribution(real *const f, size_t x1, size_t x2, size_t x3) override {}
+    void setDistribution(const real *const f, size_t x1, size_t x2, size_t x3) override {}
+    void getDistributionInv(real *const f, size_t x1, size_t x2, size_t x3) override {}
+    void setDistributionInv(const real *const f, size_t x1, size_t x2, size_t x3) override {}
+    void setDistributionForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                      unsigned long int direction) override
     {
     }
-    void setDistributionForDirection(LBMReal f, size_t x1, size_t x2, size_t x3, int direction) override {}
-    LBMReal getDistributionInvForDirection(size_t /*x1*/, size_t /*x2*/, size_t /*x3*/, int /*direction*/) override
+    void setDistributionForDirection(real f, size_t x1, size_t x2, size_t x3, int direction) override {}
+    real getDistributionInvForDirection(size_t /*x1*/, size_t /*x2*/, size_t /*x3*/, int /*direction*/) override
     {
         return 0.0;
     }
-    void setDistributionInvForDirection(const LBMReal *const f, size_t x1, size_t x2, size_t x3,
+    void setDistributionInvForDirection(const real *const f, size_t x1, size_t x2, size_t x3,
                                         unsigned long int direction) override
     {
     }
-    void setDistributionInvForDirection(LBMReal f, size_t x1, size_t x2, size_t x3,
+    void setDistributionInvForDirection(real f, size_t x1, size_t x2, size_t x3,
                                         unsigned long int direction) override
     {
     }
-    LBMReal getDistributionForDirection(size_t /*x1*/, size_t /*x2*/, size_t /*x3*/, int /*direction*/) override
+    real getDistributionForDirection(size_t /*x1*/, size_t /*x2*/, size_t /*x3*/, int /*direction*/) override
     {
         return 0.0;
     }
diff --git a/src/cpu/VirtualFluidsCore/Grid/BasicCalculator.cpp b/src/cpu/VirtualFluidsCore/Grid/BasicCalculator.cpp
index 85a01cd1f85cf3efc556dd176cb748fc947e972b..92559f5f37d0b39a086b6bab0e63157b79c9f718 100644
--- a/src/cpu/VirtualFluidsCore/Grid/BasicCalculator.cpp
+++ b/src/cpu/VirtualFluidsCore/Grid/BasicCalculator.cpp
@@ -67,7 +67,7 @@ void BasicCalculator::calculate()
 
 #ifdef TIMING
         UbTimer timer;
-        double time[6];
+        real time[6];
 #endif
 
         for (calcStep = startTimeStep; calcStep <= numberOfTimeSteps; calcStep++) {
@@ -147,7 +147,7 @@ void BasicCalculator::calculate()
             if (additionalGhostLayerUpdateScheduler->isDue(calcStep)) {
                 exchangeBlockData(straightStartLevel, maxInitLevel);
             }
-            coProcess((double)(calcStep));
+            coProcess((real)(calcStep));
             // now ghost nodes have actual values
         }
         UBLOG(logDEBUG1, "OMPCalculator::calculate() - stoped");
diff --git a/src/cpu/VirtualFluidsCore/Grid/Block3D.cpp b/src/cpu/VirtualFluidsCore/Grid/Block3D.cpp
index c6a75187047f1b507389642baaa8471abecd1683..a6b7127bc4ecd2049790cb2bef98c1d506f7f513 100644
--- a/src/cpu/VirtualFluidsCore/Grid/Block3D.cpp
+++ b/src/cpu/VirtualFluidsCore/Grid/Block3D.cpp
@@ -257,6 +257,8 @@ int Block3D::getNumberOfRemoteConnectors()
 //////////////////////////////////////////////////////////////////////////
 int Block3D::getNumberOfLocalConnectorsForSurfaces()
 {
+    using namespace vf::lbm::dir;
+
     int count = 0;
 
     if (connectors.size() < 6)
@@ -264,7 +266,7 @@ int Block3D::getNumberOfLocalConnectorsForSurfaces()
 
     for (SPtr<Block3DConnector> c : connectors) {
         if (c) {
-            if (c->getSendDir() >= D3Q27System::DIR_P00 && c->getSendDir() <= D3Q27System ::DIR_00M && c->isLocalConnector())
+            if (c->getSendDir() >= (int)DIR_P00 && c->getSendDir() <= (int)DIR_00M && c->isLocalConnector())
                 count++;
         }
     }
@@ -274,11 +276,13 @@ int Block3D::getNumberOfLocalConnectorsForSurfaces()
 //////////////////////////////////////////////////////////////////////////
 int Block3D::getNumberOfRemoteConnectorsForSurfaces()
 {
+    using namespace vf::lbm::dir;
+
     int count = 0;
 
     for (SPtr<Block3DConnector> c : connectors) {
         if (c) {
-            if (c->getSendDir() >= D3Q27System::DIR_P00 && c->getSendDir() <= D3Q27System ::DIR_00M && c->isRemoteConnector())
+            if (c->getSendDir() >= (int)DIR_P00 && c->getSendDir() <= (int)DIR_00M && c->isRemoteConnector())
                 count++;
         }
     }
@@ -317,10 +321,10 @@ void Block3D::deleteInterpolationFlag()
     interpolationFlagCF = 0;
 }
 //////////////////////////////////////////////////////////////////////////
-double Block3D::getWorkLoad()
+real Block3D::getWorkLoad()
 {
-    double l = kernel->getCalculationTime();
-    l *= static_cast<double>(1 << level);
+    real l = kernel->getCalculationTime();
+    l *= static_cast<real>(1 << level);
     return l;
 }
 //////////////////////////////////////////////////////////////////////////
diff --git a/src/cpu/VirtualFluidsCore/Grid/Block3D.h b/src/cpu/VirtualFluidsCore/Grid/Block3D.h
index b2279b069e6ee322023d30419f8eed5c587f95e8..686d8df0f9a4dda1b8d744b11b18a7bddf7e3f77 100644
--- a/src/cpu/VirtualFluidsCore/Grid/Block3D.h
+++ b/src/cpu/VirtualFluidsCore/Grid/Block3D.h
@@ -38,6 +38,7 @@
 #include <map>
 #include <string>
 #include <vector>
+#include "lbm/constants/D3Q27.h"
 
 class Block3DConnector;
 class LBMKernel;
@@ -134,7 +135,7 @@ public:
     bool hasInterpolationFlagFC(int dir);
     bool hasInterpolationFlagFC();
 
-    double getWorkLoad();
+    real getWorkLoad();
 
     std::string toString();
 
diff --git a/src/cpu/VirtualFluidsCore/Grid/Calculator.cpp b/src/cpu/VirtualFluidsCore/Grid/Calculator.cpp
index fbeb2de979bb31dfb87441b5cfcfdf3393f0043c..85235867a3352c8277e6e279b7dbb6058d03020a 100644
--- a/src/cpu/VirtualFluidsCore/Grid/Calculator.cpp
+++ b/src/cpu/VirtualFluidsCore/Grid/Calculator.cpp
@@ -77,7 +77,7 @@ Calculator::~Calculator() = default;
 //////////////////////////////////////////////////////////////////////////
 void Calculator::addCoProcessor(SPtr<CoProcessor> coProcessor) { coProcessors.push_back(coProcessor); }
 //////////////////////////////////////////////////////////////////////////
-void Calculator::coProcess(double step)
+void Calculator::coProcess(real step)
 {
     for (SPtr<CoProcessor> cp : coProcessors) {
         cp->process(step);
diff --git a/src/cpu/VirtualFluidsCore/Grid/Calculator.h b/src/cpu/VirtualFluidsCore/Grid/Calculator.h
index 377e6eee7d765ba6888b2aec6045cb14eac484dc..759541dd5c1a85cac727c9a714407d1fa10a1cb7 100644
--- a/src/cpu/VirtualFluidsCore/Grid/Calculator.h
+++ b/src/cpu/VirtualFluidsCore/Grid/Calculator.h
@@ -36,6 +36,7 @@
 
 #include <PointerDefinitions.h>
 #include <vector>
+#include "lbm/constants/D3Q27.h"
 
 class Grid3D;
 class UbScheduler;
@@ -53,7 +54,7 @@ public:
     virtual ~Calculator();
     //! control of coProcessors
     void addCoProcessor(SPtr<CoProcessor> coProcessor);
-    void coProcess(double step);
+    void coProcess(real step);
 
     virtual void calculate() = 0;
 
diff --git a/src/cpu/VirtualFluidsCore/Grid/Grid3D.cpp b/src/cpu/VirtualFluidsCore/Grid/Grid3D.cpp
index 8c9ccab62fa18f96abeca389ab437e62cdc80d7b..a214b4bd0137b2bf319925b519f1dcb77fabded4 100644
--- a/src/cpu/VirtualFluidsCore/Grid/Grid3D.cpp
+++ b/src/cpu/VirtualFluidsCore/Grid/Grid3D.cpp
@@ -45,6 +45,7 @@
 #include "D3Q27System.h"
 #include <Block3D.h>
 #include <Communicator.h>
+#include "UbMath.h"
 
 using namespace std;
 
@@ -66,8 +67,8 @@ Grid3D::Grid3D(std::shared_ptr<vf::mpi::Communicator> comm, int blockNx1, int bl
     levelSet.resize(D3Q27System::MAXLEVEL + 1);
     bundle = comm->getBundleID();
     rank  = comm->getProcessID();
-    trafo = std::make_shared<CoordinateTransformation3D>(0.0, 0.0, 0.0, (double)blockNx1, (double)blockNx2,
-                                                         (double)blockNx3);
+    trafo = std::make_shared<CoordinateTransformation3D>(0.0, 0.0, 0.0, (real)blockNx1, (real)blockNx2,
+                                                         (real)blockNx3);
     UbTupleInt3 minInd(0, 0, 0);
     UbTupleInt3 maxInd(gridNx1, gridNx2, gridNx3);
     this->fillExtentWithBlocks(minInd, maxInd);
@@ -75,7 +76,7 @@ Grid3D::Grid3D(std::shared_ptr<vf::mpi::Communicator> comm, int blockNx1, int bl
 //////////////////////////////////////////////////////////////////////////
 void Grid3D::addInteractor(SPtr<Interactor3D> interactor) { interactors.push_back(interactor); }
 //////////////////////////////////////////////////////////////////////////
-void Grid3D::addAndInitInteractor(SPtr<Interactor3D> interactor, double timestep)
+void Grid3D::addAndInitInteractor(SPtr<Interactor3D> interactor, real timestep)
 {
     interactors.push_back(interactor);
     interactor->initInteractor(timestep);
@@ -432,7 +433,7 @@ void Grid3D::setPeriodicX2(bool value) { this->periodicX2 = value; }
 //////////////////////////////////////////////////////////////////////////
 void Grid3D::setPeriodicX3(bool value) { this->periodicX3 = value; }
 //////////////////////////////////////////////////////////////////////////
-UbTupleInt3 Grid3D::getBlockIndexes(double blockX1Coord, double blockX2Coord, double blockX3Coord) const
+UbTupleInt3 Grid3D::getBlockIndexes(real blockX1Coord, real blockX2Coord, real blockX3Coord) const
 {
     if (!trafo) {
         return makeUbTuple((int)blockX1Coord, (int)blockX2Coord, (int)blockX3Coord);
@@ -443,14 +444,14 @@ UbTupleInt3 Grid3D::getBlockIndexes(double blockX1Coord, double blockX2Coord, do
                        (int)trafo->transformForwardToX3Coordinate(blockX1Coord, blockX2Coord, blockX3Coord));
 }
 //////////////////////////////////////////////////////////////////////////
-UbTupleInt3 Grid3D::getBlockIndexes(double blockX1Coord, double blockX2Coord, double blockX3Coord, int level) const
+UbTupleInt3 Grid3D::getBlockIndexes(real blockX1Coord, real blockX2Coord, real blockX3Coord, int level) const
 {
     if (!trafo) {
         return makeUbTuple((int)blockX1Coord, (int)blockX2Coord, (int)blockX3Coord);
     }
 
-    double dx = getDeltaX(level);
-    double blockLentghX1, blockLentghX2, blockLentghX3;
+    real dx = getDeltaX(level);
+    real blockLentghX1, blockLentghX2, blockLentghX3;
     blockLentghX1      = blockNx1 * dx;
     blockLentghX2      = blockNx2 * dx;
     blockLentghX3      = blockNx3 * dx;
@@ -471,10 +472,10 @@ UbTupleInt3 Grid3D::getBlockIndexes(double blockX1Coord, double blockX2Coord, do
 UbTupleDouble3 Grid3D::getBlockLengths(const SPtr<Block3D> block) const
 {
     int level    = block->getLevel();
-    double delta = 1.0 / (double)(1 << level);
+    real delta = 1.0 / (real)(1 << level);
 
     if (!trafo)
-        makeUbTuple<double, double, double>(delta, delta, delta);
+        makeUbTuple<real, real, real>(delta, delta, delta);
 
     return makeUbTuple(trafo->getX1CoordinateScaling() * delta, trafo->getX2CoordinateScaling() * delta,
                        trafo->getX3CoordinateScaling() * delta);
@@ -486,21 +487,21 @@ void Grid3D::setCoordinateTransformator(SPtr<CoordinateTransformation3D> trafo)
 //////////////////////////////////////////////////////////////////////////
 const SPtr<CoordinateTransformation3D> Grid3D::getCoordinateTransformator() const { return this->trafo; }
 //////////////////////////////////////////////////////////////////////////
-void Grid3D::setDeltaX(double dx) { this->orgDeltaX = dx; }
+void Grid3D::setDeltaX(real dx) { this->orgDeltaX = dx; }
 //////////////////////////////////////////////////////////////////////////
-void Grid3D::setDeltaX(double worldUnit, double gridUnit) { this->orgDeltaX = worldUnit / gridUnit; }
+void Grid3D::setDeltaX(real worldUnit, real gridUnit) { this->orgDeltaX = worldUnit / gridUnit; }
 //////////////////////////////////////////////////////////////////////////
-double Grid3D::getDeltaX(int level) const
+real Grid3D::getDeltaX(int level) const
 {
-    double delta = this->orgDeltaX / (double)(1 << level);
+    real delta = this->orgDeltaX / (real)(1 << level);
     return delta;
 }
 //////////////////////////////////////////////////////////////////////////
-double Grid3D::getDeltaX(SPtr<Block3D> block) const { return getDeltaX(block->getLevel()); }
+real Grid3D::getDeltaX(SPtr<Block3D> block) const { return getDeltaX(block->getLevel()); }
 //////////////////////////////////////////////////////////////////////////
 UbTupleDouble3 Grid3D::getNodeOffset(SPtr<Block3D> block) const
 {
-    double delta = this->getDeltaX(block);
+    real delta = this->getDeltaX(block);
     return makeUbTuple(offset * delta, offset * delta, offset * delta);
 }
 ////////////////////////////////////////////////////////////////////////////
@@ -508,26 +509,26 @@ Vector3D Grid3D::getNodeCoordinates(SPtr<Block3D> block, int ix1, int ix2, int i
 {
     UbTupleDouble3 org        = this->getBlockWorldCoordinates(block);
     UbTupleDouble3 nodeOffset = this->getNodeOffset(block);
-    double deltaX             = getDeltaX(block);
+    real deltaX             = getDeltaX(block);
 
-    double x1 = val<1>(org) - val<1>(nodeOffset) + (double)ix1 * deltaX;
-    double x2 = val<2>(org) - val<2>(nodeOffset) + (double)ix2 * deltaX;
-    double x3 = val<3>(org) - val<3>(nodeOffset) + (double)ix3 * deltaX;
+    real x1 = val<1>(org) - val<1>(nodeOffset) + (real)ix1 * deltaX;
+    real x2 = val<2>(org) - val<2>(nodeOffset) + (real)ix2 * deltaX;
+    real x3 = val<3>(org) - val<3>(nodeOffset) + (real)ix3 * deltaX;
 
     return Vector3D(x1, x2, x3);
 }
 ////////////////////////////////////////////////////////////////////////////
-UbTupleInt3 Grid3D::getNodeIndexes(SPtr<Block3D> block, double nodeX1Coord, double nodeX2Coord,
-                                   double nodeX3Coord) const
+UbTupleInt3 Grid3D::getNodeIndexes(SPtr<Block3D> block, real nodeX1Coord, real nodeX2Coord,
+                                   real nodeX3Coord) const
 {
     UbTupleDouble3 org        = this->getBlockWorldCoordinates(block);
     UbTupleDouble3 nodeOffset = this->getNodeOffset(block);
-    double deltaX             = getDeltaX(block);
+    real deltaX             = getDeltaX(block);
 
     int ix1, ix2, ix3;
-    double ixx1 = (abs(nodeX1Coord - val<1>(org) + val<1>(nodeOffset)) / deltaX);
-    double ixx2 = (abs(nodeX2Coord - val<2>(org) + val<2>(nodeOffset)) / deltaX);
-    double ixx3 = (abs(nodeX3Coord - val<3>(org) + val<3>(nodeOffset)) / deltaX);
+    real ixx1 = (abs(nodeX1Coord - val<1>(org) + val<1>(nodeOffset)) / deltaX);
+    real ixx2 = (abs(nodeX2Coord - val<2>(org) + val<2>(nodeOffset)) / deltaX);
+    real ixx3 = (abs(nodeX3Coord - val<3>(org) + val<3>(nodeOffset)) / deltaX);
     if (ixx1 - (int)ixx1 > .9999999999)
         ix1 = (int)ixx1 + 1;
     else
@@ -560,10 +561,10 @@ UbTupleDouble3 Grid3D::getBlockWorldCoordinates(SPtr<Block3D> block) const
 //////////////////////////////////////////////////////////////////////////
 UbTupleDouble3 Grid3D::getBlockWorldCoordinates(int blockX1Index, int blockX2Index, int blockX3Index, int level) const
 {
-    double c1oShiftedLevel = 1.0 / (double)(1 << level);
-    double x1              = (double)blockX1Index * c1oShiftedLevel;
-    double x2              = (double)blockX2Index * c1oShiftedLevel;
-    double x3              = (double)blockX3Index * c1oShiftedLevel;
+    real c1oShiftedLevel = 1.0 / (real)(1 << level);
+    real x1              = (real)blockX1Index * c1oShiftedLevel;
+    real x2              = (real)blockX2Index * c1oShiftedLevel;
+    real x3              = (real)blockX3Index * c1oShiftedLevel;
 
     if (!trafo)
         return { x1, x2, x3 };
@@ -631,8 +632,9 @@ SPtr<Block3D> Grid3D::getNeighborBlock(int dir, SPtr<Block3D> block) const
 //////////////////////////////////////////////////////////////////////////
 void Grid3D::getAllNeighbors(int ix1, int ix2, int ix3, int level, int levelDepth, std::vector<SPtr<Block3D>> &blocks)
 {
-    for (int dir = D3Q27System::STARTDIR; dir <= D3Q27System::ENDDIR; dir++)
-    {
+   // for (int dir = D3Q27System::STARTDIR; dir <= D3Q27System::ENDDIR; dir++)FSTARTDIR
+   for (int dir = D3Q27System::FSTARTDIR; dir <= D3Q27System::FENDDIR; dir++)
+   {
         this->getNeighborBlocksForDirection(dir, ix1, ix2, ix3, level, levelDepth, blocks);
     }
 }
@@ -1098,83 +1100,85 @@ void Grid3D::getNeighborsBottomSouthWest(int ix1, int ix2, int ix3, int level, i
 void Grid3D::getNeighborBlocksForDirection(int dir, int ix1, int ix2, int ix3, int level, int levelDepth,
                                            std::vector<SPtr<Block3D>> &blocks)
 {
+    using namespace vf::lbm::dir;
+
     switch (dir) {
-        case D3Q27System::DIR_P00:
+        case DIR_P00:
             this->getNeighborsEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_M00:
+        case DIR_M00:
             this->getNeighborsWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_0P0:
+        case DIR_0P0:
             this->getNeighborsNorth(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_0M0:
+        case DIR_0M0:
             this->getNeighborsSouth(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_00P:
+        case DIR_00P:
             this->getNeighborsTop(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_00M:
+        case DIR_00M:
             this->getNeighborsBottom(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_PP0:
+        case DIR_PP0:
             this->getNeighborsNorthEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_MM0:
+        case DIR_MM0:
             this->getNeighborsSouthWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_PM0:
+        case DIR_PM0:
             this->getNeighborsSouthEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_MP0:
+        case DIR_MP0:
             this->getNeighborsNorthWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_P0P:
+        case DIR_P0P:
             this->getNeighborsTopEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_M0M:
+        case DIR_M0M:
             this->getNeighborsBottomWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_P0M:
+        case DIR_P0M:
             this->getNeighborsBottomEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_M0P:
+        case DIR_M0P:
             this->getNeighborsTopWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_0PP:
+        case DIR_0PP:
             this->getNeighborsTopNorth(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_0MM:
+        case DIR_0MM:
             this->getNeighborsBottomSouth(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_0PM:
+        case DIR_0PM:
             this->getNeighborsBottomNorth(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_0MP:
+        case DIR_0MP:
             this->getNeighborsTopSouth(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_PPP:
+        case DIR_PPP:
             this->getNeighborsTopNorthEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_MPP:
+        case DIR_MPP:
             this->getNeighborsTopNorthWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_PMP:
+        case DIR_PMP:
             this->getNeighborsTopSouthEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_MMP:
+        case DIR_MMP:
             this->getNeighborsTopSouthWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_PPM:
+        case DIR_PPM:
             this->getNeighborsBottomNorthEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_MPM:
+        case DIR_MPM:
             this->getNeighborsBottomNorthWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_PMM:
+        case DIR_PMM:
             this->getNeighborsBottomSouthEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_MMM:
+        case DIR_MMM:
             this->getNeighborsBottomSouthWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
         default:
@@ -1261,86 +1265,88 @@ void Grid3D::getSubBlocksZero(int ix1, int ix2, int ix3, int level, vector<SPtr<
 void Grid3D::getNeighborBlocksForDirectionWithREST(int dir, int ix1, int ix2, int ix3, int level, int levelDepth,
                                                       std::vector<SPtr<Block3D>> &blocks)
 {
+    using namespace vf::lbm::dir;
+
     switch (dir) {
-        case D3Q27System::DIR_P00:
+        case DIR_P00:
             this->getNeighborsEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_M00:
+        case DIR_M00:
             this->getNeighborsWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_0P0:
+        case DIR_0P0:
             this->getNeighborsNorth(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_0M0:
+        case DIR_0M0:
             this->getNeighborsSouth(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_00P:
+        case DIR_00P:
             this->getNeighborsTop(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_00M:
+        case DIR_00M:
             this->getNeighborsBottom(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_PP0:
+        case DIR_PP0:
             this->getNeighborsNorthEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_MM0:
+        case DIR_MM0:
             this->getNeighborsSouthWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_PM0:
+        case DIR_PM0:
             this->getNeighborsSouthEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_MP0:
+        case DIR_MP0:
             this->getNeighborsNorthWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_P0P:
+        case DIR_P0P:
             this->getNeighborsTopEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_M0M:
+        case DIR_M0M:
             this->getNeighborsBottomWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_P0M:
+        case DIR_P0M:
             this->getNeighborsBottomEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_M0P:
+        case DIR_M0P:
             this->getNeighborsTopWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_0PP:
+        case DIR_0PP:
             this->getNeighborsTopNorth(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_0MM:
+        case DIR_0MM:
             this->getNeighborsBottomSouth(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_0PM:
+        case DIR_0PM:
             this->getNeighborsBottomNorth(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_0MP:
+        case DIR_0MP:
             this->getNeighborsTopSouth(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_PPP:
+        case DIR_PPP:
             this->getNeighborsTopNorthEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_MPP:
+        case DIR_MPP:
             this->getNeighborsTopNorthWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_PMP:
+        case DIR_PMP:
             this->getNeighborsTopSouthEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_MMP:
+        case DIR_MMP:
             this->getNeighborsTopSouthWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_PPM:
+        case DIR_PPM:
             this->getNeighborsBottomNorthEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_MPM:
+        case DIR_MPM:
             this->getNeighborsBottomNorthWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_PMM:
+        case DIR_PMM:
             this->getNeighborsBottomSouthEast(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_MMM:
+        case DIR_MMM:
             this->getNeighborsBottomSouthWest(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
-        case D3Q27System::DIR_000:
+        case DIR_000:
             this->getNeighborsZero(ix1, ix2, ix3, level, levelDepth, blocks);
             break;
         default:
@@ -2025,7 +2031,7 @@ int Grid3D::getNumberOfBlocks()
 //////////////////////////////////////////////////////////////////////////
 int Grid3D::getNumberOfBlocks(int level) { return (int)levelSet[level].size(); }
 //////////////////////////////////////////////////////////////////////////
-void Grid3D::getBlocksByCuboid(double minX1, double minX2, double minX3, double maxX1, double maxX2, double maxX3,
+void Grid3D::getBlocksByCuboid(real minX1, real minX2, real minX3, real maxX1, real maxX2, real maxX3,
                                std::vector<SPtr<Block3D>> &blocks)
 {
     int coarsestLevel = this->getCoarsestInitializedLevel();
@@ -2035,9 +2041,9 @@ void Grid3D::getBlocksByCuboid(double minX1, double minX2, double minX3, double
     // MINIMALE BLOCK-INDIZES BESTIMMEN
     //
     // min:
-    double dMinX1 = trafo->transformForwardToX1Coordinate(minX1, minX2, minX3) * (1 << finestLevel);
-    double dMinX2 = trafo->transformForwardToX2Coordinate(minX1, minX2, minX3) * (1 << finestLevel);
-    double dMinX3 = trafo->transformForwardToX3Coordinate(minX1, minX2, minX3) * (1 << finestLevel);
+    real dMinX1 = trafo->transformForwardToX1Coordinate(minX1, minX2, minX3) * (1 << finestLevel);
+    real dMinX2 = trafo->transformForwardToX2Coordinate(minX1, minX2, minX3) * (1 << finestLevel);
+    real dMinX3 = trafo->transformForwardToX3Coordinate(minX1, minX2, minX3) * (1 << finestLevel);
 
     // Achtung, wenn minX1 genau auf grenze zwischen zwei bloecken -> der "kleinere" muss genommen werden,
     // da beim Transformieren der "groessere" Index rauskommt
@@ -2062,9 +2068,9 @@ void Grid3D::getBlocksByCuboid(double minX1, double minX2, double minX3, double
     std::set<SPtr<Block3D>> blockset;
     for (int level = coarsestLevel; level <= finestLevel; level++) {
         // damit bei negativen werten auch der "kleinere" genommen wird -> floor!
-        int minx1 = (int)std::floor((double)iMinX1 / (1 << (finestLevel - level)));
-        int minx2 = (int)std::floor((double)iMinX2 / (1 << (finestLevel - level)));
-        int minx3 = (int)std::floor((double)iMinX3 / (1 << (finestLevel - level)));
+        int minx1 = (int)std::floor((real)iMinX1 / (1 << (finestLevel - level)));
+        int minx2 = (int)std::floor((real)iMinX2 / (1 << (finestLevel - level)));
+        int minx3 = (int)std::floor((real)iMinX3 / (1 << (finestLevel - level)));
 
         int maxx1 = iMaxX1 / (1 << (finestLevel - level));
         int maxx2 = iMaxX2 / (1 << (finestLevel - level));
@@ -2084,16 +2090,16 @@ void Grid3D::getBlocksByCuboid(double minX1, double minX2, double minX3, double
     std::copy(blockset.begin(), blockset.end(), blocks.begin());
 }
 //////////////////////////////////////////////////////////////////////////
-void Grid3D::getBlocksByCuboid(int level, double minX1, double minX2, double minX3, double maxX1, double maxX2,
-                               double maxX3, std::vector<SPtr<Block3D>> &blocks)
+void Grid3D::getBlocksByCuboid(int level, real minX1, real minX2, real minX3, real maxX1, real maxX2,
+                               real maxX3, std::vector<SPtr<Block3D>> &blocks)
 {
     //////////////////////////////////////////////////////////////////////////
     // MINIMALE BLOCK-INDIZES BESTIMMEN
     //
     // min:
-    double dMinX1 = trafo->transformForwardToX1Coordinate(minX1, minX2, minX3) * (1 << level);
-    double dMinX2 = trafo->transformForwardToX2Coordinate(minX1, minX2, minX3) * (1 << level);
-    double dMinX3 = trafo->transformForwardToX3Coordinate(minX1, minX2, minX3) * (1 << level);
+    real dMinX1 = trafo->transformForwardToX1Coordinate(minX1, minX2, minX3) * (1 << level);
+    real dMinX2 = trafo->transformForwardToX2Coordinate(minX1, minX2, minX3) * (1 << level);
+    real dMinX3 = trafo->transformForwardToX3Coordinate(minX1, minX2, minX3) * (1 << level);
 
     // Achtung, wenn minX1 genau auf grenze zwischen zwei bloecken -> der "kleinere" muss genommen werden:
     int iMinX1 = (int)dMinX1;
@@ -2128,7 +2134,7 @@ void Grid3D::getBlocksByCuboid(int level, double minX1, double minX2, double min
     std::copy(blockset.begin(), blockset.end(), blocks.begin());
 }
 //////////////////////////////////////////////////////////////////////////
-void Grid3D::getAllBlocksByCuboid(double minX1, double minX2, double minX3, double maxX1, double maxX2, double maxX3,
+void Grid3D::getAllBlocksByCuboid(real minX1, real minX2, real minX3, real maxX1, real maxX2, real maxX3,
                                   std::vector<SPtr<Block3D>> &blocks)
 {
     int coarsestLevel = this->getCoarsestInitializedLevel();
@@ -2138,9 +2144,9 @@ void Grid3D::getAllBlocksByCuboid(double minX1, double minX2, double minX3, doub
     // MINIMALE BLOCK-INDIZES BESTIMMEN
     //
     // min:
-    double dMinX1 = trafo->transformForwardToX1Coordinate(minX1, minX2, minX3) * (1 << finestLevel);
-    double dMinX2 = trafo->transformForwardToX2Coordinate(minX1, minX2, minX3) * (1 << finestLevel);
-    double dMinX3 = trafo->transformForwardToX3Coordinate(minX1, minX2, minX3) * (1 << finestLevel);
+    real dMinX1 = trafo->transformForwardToX1Coordinate(minX1, minX2, minX3) * (1 << finestLevel);
+    real dMinX2 = trafo->transformForwardToX2Coordinate(minX1, minX2, minX3) * (1 << finestLevel);
+    real dMinX3 = trafo->transformForwardToX3Coordinate(minX1, minX2, minX3) * (1 << finestLevel);
 
     // Achtung, wenn minX1 genau auf grenze zwischen zwei bloecken -> der "kleinere" muss genommen werden,
     // da beim Transformieren der "groessere" Index rauskommt
@@ -2165,9 +2171,9 @@ void Grid3D::getAllBlocksByCuboid(double minX1, double minX2, double minX3, doub
     std::set<SPtr<Block3D>> blockset;
     for (int level = coarsestLevel; level <= finestLevel; level++) {
         // damit bei negativen werten auch der "kleinere" genommen wird -> floor!
-        int minx1 = (int)std::floor((double)iMinX1 / (1 << (finestLevel - level)));
-        int minx2 = (int)std::floor((double)iMinX2 / (1 << (finestLevel - level)));
-        int minx3 = (int)std::floor((double)iMinX3 / (1 << (finestLevel - level)));
+        int minx1 = (int)std::floor((real)iMinX1 / (1 << (finestLevel - level)));
+        int minx2 = (int)std::floor((real)iMinX2 / (1 << (finestLevel - level)));
+        int minx3 = (int)std::floor((real)iMinX3 / (1 << (finestLevel - level)));
 
         int maxx1 = iMaxX1 / (1 << (finestLevel - level));
         int maxx2 = iMaxX2 / (1 << (finestLevel - level));
@@ -2187,25 +2193,25 @@ void Grid3D::getAllBlocksByCuboid(double minX1, double minX2, double minX3, doub
     std::copy(blockset.begin(), blockset.end(), blocks.begin());
 }
 //////////////////////////////////////////////////////////////////////////
-void Grid3D::calcStartCoordinatesAndDelta(SPtr<Block3D> block, double &worldX1, double &worldX2, double &worldX3,
-                                          double &deltaX)
+void Grid3D::calcStartCoordinatesAndDelta(SPtr<Block3D> block, real &worldX1, real &worldX2, real &worldX3,
+                                          real &deltaX)
 {
     int blocklevel = block->getLevel();
     worldX1        = block->getX1() / (float)(1 << blocklevel);
     worldX2        = block->getX2() / (float)(1 << blocklevel);
     worldX3        = block->getX3() / (float)(1 << blocklevel);
-    deltaX         = (double)1.0 / (double)(this->blockNx1 * (double)(1 << blocklevel));
+    deltaX         = (real)1.0 / (real)(this->blockNx1 * (real)(1 << blocklevel));
 
     if (this->trafo) {
-        double x1tmp = worldX1, x2tmp = worldX2, x3tmp = worldX3;
+        real x1tmp = worldX1, x2tmp = worldX2, x3tmp = worldX3;
         worldX1 = this->trafo->transformBackwardToX1Coordinate(x1tmp, x2tmp, x3tmp);
         worldX2 = this->trafo->transformBackwardToX2Coordinate(x1tmp, x2tmp, x3tmp);
         worldX3 = this->trafo->transformBackwardToX3Coordinate(x1tmp, x2tmp, x3tmp);
-        deltaX  = this->trafo->getX1CoordinateScaling() / (double)(this->blockNx1 * (double)(1 << blocklevel));
+        deltaX  = this->trafo->getX1CoordinateScaling() / (real)(this->blockNx1 * (real)(1 << blocklevel));
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void Grid3D::calcStartCoordinatesWithOutOverlap(SPtr<Block3D> block, double &worldX1, double &worldX2, double &worldX3)
+void Grid3D::calcStartCoordinatesWithOutOverlap(SPtr<Block3D> block, real &worldX1, real &worldX2, real &worldX3)
 {
     int blocklevel = block->getLevel();
     worldX1        = block->getX1() / (float)(1 << blocklevel);
@@ -2213,7 +2219,7 @@ void Grid3D::calcStartCoordinatesWithOutOverlap(SPtr<Block3D> block, double &wor
     worldX3        = block->getX3() / (float)(1 << blocklevel);
 
     if (this->trafo) {
-        double x1tmp = worldX1, x2tmp = worldX2, x3tmp = worldX3;
+        real x1tmp = worldX1, x2tmp = worldX2, x3tmp = worldX3;
         worldX1 = this->trafo->transformBackwardToX1Coordinate(x1tmp, x2tmp, x3tmp);
         worldX2 = this->trafo->transformBackwardToX2Coordinate(x1tmp, x2tmp, x3tmp);
         worldX3 = this->trafo->transformBackwardToX3Coordinate(x1tmp, x2tmp, x3tmp);
@@ -2227,12 +2233,12 @@ int Grid3D::getGhostLayerWidth() const
 //////////////////////////////////////////////////////////////////////////
 void Grid3D::setGhostLayerWidth(int ghostLayerWidth)
 {
-    this->offset = static_cast<double>(ghostLayerWidth) - 0.5;
+    this->offset = static_cast<real>(ghostLayerWidth) - 0.5;
 }
 //////////////////////////////////////////////////////////////////////////
-void Grid3D::setTimeStep(double step) { timeStep = step; }
+void Grid3D::setTimeStep(real step) { timeStep = step; }
 //////////////////////////////////////////////////////////////////////////
-double Grid3D::getTimeStep() const { return timeStep; }
+real Grid3D::getTimeStep() const { return timeStep; }
 //////////////////////////////////////////////////////////////////////////
 void Grid3D::fillExtentWithBlocks(UbTupleInt3 minInd, UbTupleInt3 maxInd)
 {
diff --git a/src/cpu/VirtualFluidsCore/Grid/Grid3D.h b/src/cpu/VirtualFluidsCore/Grid/Grid3D.h
index fabaaa655e2b63201256802473ec037279f0ea3b..41a99d6cc7be5177cc0f3ff2e89591b28317fede 100644
--- a/src/cpu/VirtualFluidsCore/Grid/Grid3D.h
+++ b/src/cpu/VirtualFluidsCore/Grid/Grid3D.h
@@ -41,6 +41,7 @@
 #include <basics/utilities/UbKeys.h>
 #include <basics/utilities/UbTuple.h>
 #include <basics/utilities/Vector3D.h>
+#include "lbm/constants/D3Q27.h"
 
 class CoordinateTransformation3D;
 
@@ -77,11 +78,11 @@ public:
     void replaceBlock(SPtr<Block3D> block);
     SPtr<Block3D> getBlock(int ix1, int ix2, int ix3, int level) const;
     SPtr<Block3D> getBlock(int id) const;
-    void getBlocksByCuboid(double minX1, double minX2, double minX3, double maxX1, double maxX2, double maxX3,
+    void getBlocksByCuboid(real minX1, real minX2, real minX3, real maxX1, real maxX2, real maxX3,
                            std::vector<SPtr<Block3D>> &blocks);
-    void getBlocksByCuboid(int level, double minX1, double minX2, double minX3, double maxX1, double maxX2,
-                           double maxX3, std::vector<SPtr<Block3D>> &blocks);
-    void getAllBlocksByCuboid(double minX1, double minX2, double minX3, double maxX1, double maxX2, double maxX3,
+    void getBlocksByCuboid(int level, real minX1, real minX2, real minX3, real maxX1, real maxX2,
+                           real maxX3, std::vector<SPtr<Block3D>> &blocks);
+    void getAllBlocksByCuboid(real minX1, real minX2, real minX3, real maxX1, real maxX2, real maxX3,
                               std::vector<SPtr<Block3D>> &blocks);
     //! get blocks for level
     void getBlocks(int level, std::vector<SPtr<Block3D>> &blockVector);
@@ -166,7 +167,7 @@ public:
     //////////////////////////////////////////////////////////////////////////
     // interactors control
     void addInteractor(SPtr<Interactor3D> interactor);
-    void addAndInitInteractor(SPtr<Interactor3D> interactor, double timestep = 0);
+    void addAndInitInteractor(SPtr<Interactor3D> interactor, real timestep = 0);
     Interactor3DSet getInteractors();
     //////////////////////////////////////////////////////////////////////////
     // visitors
@@ -189,19 +190,19 @@ public:
     void setPeriodicX3(bool value);
     //////////////////////////////////////////////////////////////////////////
     // Topology
-    UbTupleInt3 getBlockIndexes(double blockX1Coord, double blockX2Coord, double blockX3Coord) const;
-    UbTupleInt3 getBlockIndexes(double blockX1Coord, double blockX2Coord, double blockX3Coord, int level) const;
+    UbTupleInt3 getBlockIndexes(real blockX1Coord, real blockX2Coord, real blockX3Coord) const;
+    UbTupleInt3 getBlockIndexes(real blockX1Coord, real blockX2Coord, real blockX3Coord, int level) const;
     UbTupleDouble3 getBlockLengths(SPtr<Block3D> block) const;
     UbTupleDouble6 getBlockOversize() const;
     void setCoordinateTransformator(SPtr<CoordinateTransformation3D> trafo);
     const SPtr<CoordinateTransformation3D> getCoordinateTransformator() const;
-    void setDeltaX(double dx);
-    void setDeltaX(double worldUnit, double gridUnit);
-    double getDeltaX(int level) const;
-    double getDeltaX(SPtr<Block3D> block) const;
+    void setDeltaX(real dx);
+    void setDeltaX(real worldUnit, real gridUnit);
+    real getDeltaX(int level) const;
+    real getDeltaX(SPtr<Block3D> block) const;
     UbTupleDouble3 getNodeOffset(SPtr<Block3D> block) const;
     Vector3D getNodeCoordinates(SPtr<Block3D> block, int ix1, int ix2, int ix3) const;
-    UbTupleInt3 getNodeIndexes(SPtr<Block3D> block, double nodeX1Coord, double nodeX2Coord, double nodeX3Coord) const;
+    UbTupleInt3 getNodeIndexes(SPtr<Block3D> block, real nodeX1Coord, real nodeX2Coord, real nodeX3Coord) const;
     void setBlockNX(int nx1, int nx2, int nx3);
     UbTupleInt3 getBlockNX() const;
     UbTupleDouble3 getBlockWorldCoordinates(SPtr<Block3D> block) const;
@@ -212,16 +213,16 @@ public:
     int getNX1() const;
     int getNX2() const;
     int getNX3() const;
-    void calcStartCoordinatesAndDelta(SPtr<Block3D> block, double &worldX1, double &worldX2, double &worldX3, double &deltaX);
-    void calcStartCoordinatesWithOutOverlap(SPtr<Block3D> block, double &worldX1, double &worldX2, double &worldX3);
+    void calcStartCoordinatesAndDelta(SPtr<Block3D> block, real &worldX1, real &worldX2, real &worldX3, real &deltaX);
+    void calcStartCoordinatesWithOutOverlap(SPtr<Block3D> block, real &worldX1, real &worldX2, real &worldX3);
     int getGhostLayerWidth() const;
     void setGhostLayerWidth(int ghostLayerWidth);
     //////////////////////////////////////////////////////////////////////////
     // LBM
     // double getDeltaT(SPtr<Block3D>) const;
     //////////////////////////////////////////////////////////////////////////
-    void setTimeStep(double step);
-    double getTimeStep() const;
+    void setTimeStep(real step);
+    real getTimeStep() const;
 
 protected:
     void checkLevel(int level);
@@ -309,11 +310,11 @@ private:
     int nx3{ 0 };
 
     SPtr<CoordinateTransformation3D> trafo;
-    double orgDeltaX{ 1.0 };
+    real orgDeltaX{ 1.0 };
 
-    double timeStep{ 0.0 };
+    real timeStep{ 0.0 };
 
-    double offset{ 0.5 };
+    real offset{ 0.5 };
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/Interactors/D3Q27Interactor.cpp b/src/cpu/VirtualFluidsCore/Interactors/D3Q27Interactor.cpp
index a834466f85b85890bf7c1a5a264807e6a0b13ee1..c4a1ab11ef476891a7339a8ab2ae3c63556c8cd1 100644
--- a/src/cpu/VirtualFluidsCore/Interactors/D3Q27Interactor.cpp
+++ b/src/cpu/VirtualFluidsCore/Interactors/D3Q27Interactor.cpp
@@ -87,117 +87,119 @@ D3Q27Interactor::~D3Q27Interactor() = default;
 //////////////////////////////////////////////////////////////////////////
 void D3Q27Interactor::initRayVectors()
 {
+    using namespace vf::lbm::dir;
+
     int fdir;
-    double c1oS2 = UbMath::one_over_sqrt2;
-    double c1oS3 = UbMath::one_over_sqrt3;
-    fdir         = D3Q27System::DIR_P00;
+    real c1oS2 = vf::basics::constant::one_over_sqrt2;
+    real c1oS3 = vf::basics::constant::one_over_sqrt3;
+    fdir         = DIR_P00;
     rayX1[fdir]  = 1.0;
     rayX2[fdir]  = 0.0;
     rayX3[fdir]  = 0.0;
-    fdir         = D3Q27System::DIR_M00;
+    fdir         = DIR_M00;
     rayX1[fdir]  = -1.0;
     rayX2[fdir]  = 0.0;
     rayX3[fdir]  = 0.0;
-    fdir         = D3Q27System::DIR_0P0;
+    fdir         = DIR_0P0;
     rayX1[fdir]  = 0.0;
     rayX2[fdir]  = 1.0;
     rayX3[fdir]  = 0.0;
-    fdir         = D3Q27System::DIR_0M0;
+    fdir         = DIR_0M0;
     rayX1[fdir]  = 0.0;
     rayX2[fdir]  = -1.0;
     rayX3[fdir]  = 0.0;
-    fdir         = D3Q27System::DIR_00P;
+    fdir         = DIR_00P;
     rayX1[fdir]  = 0.0;
     rayX2[fdir]  = 0.0;
     rayX3[fdir]  = 1.0;
-    fdir         = D3Q27System::DIR_00M;
+    fdir         = DIR_00M;
     rayX1[fdir]  = 0.0;
     rayX2[fdir]  = 0.0;
     rayX3[fdir]  = -1.0;
-    fdir         = D3Q27System::DIR_PP0;
+    fdir         = DIR_PP0;
     rayX1[fdir]  = c1oS2;
     rayX2[fdir]  = c1oS2;
     rayX3[fdir]  = 0.0;
-    fdir         = D3Q27System::DIR_MM0;
+    fdir         = DIR_MM0;
     rayX1[fdir]  = -c1oS2;
     rayX2[fdir]  = -c1oS2;
     rayX3[fdir]  = 0.0;
-    fdir         = D3Q27System::DIR_PM0;
+    fdir         = DIR_PM0;
     rayX1[fdir]  = c1oS2;
     rayX2[fdir]  = -c1oS2;
     rayX3[fdir]  = 0.0;
-    fdir         = D3Q27System::DIR_MP0;
+    fdir         = DIR_MP0;
     rayX1[fdir]  = -c1oS2;
     rayX2[fdir]  = c1oS2;
     rayX3[fdir]  = 0.0;
-    fdir         = D3Q27System::DIR_P0P;
+    fdir         = DIR_P0P;
     rayX1[fdir]  = c1oS2;
     rayX2[fdir]  = 0.0;
     rayX3[fdir]  = c1oS2;
-    fdir         = D3Q27System::DIR_M0M;
+    fdir         = DIR_M0M;
     rayX1[fdir]  = -c1oS2;
     rayX2[fdir]  = 0.0;
     rayX3[fdir]  = -c1oS2;
-    fdir         = D3Q27System::DIR_P0M;
+    fdir         = DIR_P0M;
     rayX1[fdir]  = c1oS2;
     rayX2[fdir]  = 0.0;
     rayX3[fdir]  = -c1oS2;
-    fdir         = D3Q27System::DIR_M0P;
+    fdir         = DIR_M0P;
     rayX1[fdir]  = -c1oS2;
     rayX2[fdir]  = 0.0;
     rayX3[fdir]  = c1oS2;
-    fdir         = D3Q27System::DIR_0PP;
+    fdir         = DIR_0PP;
     rayX1[fdir]  = 0.0;
     rayX2[fdir]  = c1oS2;
     rayX3[fdir]  = c1oS2;
-    fdir         = D3Q27System::DIR_0MM;
+    fdir         = DIR_0MM;
     rayX1[fdir]  = 0.0;
     rayX2[fdir]  = -c1oS2;
     rayX3[fdir]  = -c1oS2;
-    fdir         = D3Q27System::DIR_0PM;
+    fdir         = DIR_0PM;
     rayX1[fdir]  = 0.0;
     rayX2[fdir]  = c1oS2;
     rayX3[fdir]  = -c1oS2;
-    fdir         = D3Q27System::DIR_0MP;
+    fdir         = DIR_0MP;
     rayX1[fdir]  = 0.0;
     rayX2[fdir]  = -c1oS2;
     rayX3[fdir]  = c1oS2;
 
-    fdir        = D3Q27System::DIR_MPP;
+    fdir        = DIR_MPP;
     rayX1[fdir] = -c1oS3;
     rayX2[fdir] = c1oS3;
     rayX3[fdir] = c1oS3;
-    fdir        = D3Q27System::DIR_PPP;
+    fdir        = DIR_PPP;
     rayX1[fdir] = c1oS3;
     rayX2[fdir] = c1oS3;
     rayX3[fdir] = c1oS3;
-    fdir        = D3Q27System::DIR_MMP;
+    fdir        = DIR_MMP;
     rayX1[fdir] = -c1oS3;
     rayX2[fdir] = -c1oS3;
     rayX3[fdir] = c1oS3;
-    fdir        = D3Q27System::DIR_PMP;
+    fdir        = DIR_PMP;
     rayX1[fdir] = c1oS3;
     rayX2[fdir] = -c1oS3;
     rayX3[fdir] = c1oS3;
-    fdir        = D3Q27System::DIR_MPM;
+    fdir        = DIR_MPM;
     rayX1[fdir] = -c1oS3;
     rayX2[fdir] = c1oS3;
     rayX3[fdir] = -c1oS3;
-    fdir        = D3Q27System::DIR_PPM;
+    fdir        = DIR_PPM;
     rayX1[fdir] = c1oS3;
     rayX2[fdir] = c1oS3;
     rayX3[fdir] = -c1oS3;
-    fdir        = D3Q27System::DIR_MMM;
+    fdir        = DIR_MMM;
     rayX1[fdir] = -c1oS3;
     rayX2[fdir] = -c1oS3;
     rayX3[fdir] = -c1oS3;
-    fdir        = D3Q27System::DIR_PMM;
+    fdir        = DIR_PMM;
     rayX1[fdir] = c1oS3;
     rayX2[fdir] = -c1oS3;
     rayX3[fdir] = -c1oS3;
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27Interactor::initInteractor(const double &timeStep)
+void D3Q27Interactor::initInteractor(const real &timeStep)
 {
     UBLOG(logDEBUG5, "D3Q27Interactor::initInteractor - "
                          << " for timestep = " << timeStep);
@@ -222,7 +224,7 @@ void D3Q27Interactor::initInteractor(const double &timeStep)
     updateBlocks();
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27Interactor::updateInteractor(const double &timestep)
+void D3Q27Interactor::updateInteractor(const real &timestep)
 {
     UBLOG(logDEBUG5, "D3Q27Interactor::updateInteractor - for timestep = " << timestep);
 
@@ -262,9 +264,9 @@ void D3Q27Interactor::updateInteractor(const double &timestep)
             int x2          = (*setPos)[1];
             int x3          = (*setPos)[2];
             Vector3D coords = grid.lock()->getNodeCoordinates(block, x1, x2, x3);
-            double worldX1  = coords[0];
-            double worldX2  = coords[1];
-            double worldX3  = coords[2];
+            real worldX1  = coords[0];
+            real worldX2  = coords[1];
+            real worldX3  = coords[2];
 
             SPtr<BoundaryConditions> bc = bcArray->getBC(x1, x2, x3);
             if (bc) // may be that the BC has been deleted by the solid setting of another interactor
@@ -282,6 +284,8 @@ void D3Q27Interactor::updateInteractor(const double &timestep)
 // extendedBoundingGeoOfGeoObject MUST already have been magnified by delta_x_level in each direction for SOLID
 bool D3Q27Interactor::setDifferencesToGbObject3D(const SPtr<Block3D> block)
 {
+    using namespace vf::lbm::dir;
+
     if (!block)
         return false;
 
@@ -293,7 +297,7 @@ bool D3Q27Interactor::setDifferencesToGbObject3D(const SPtr<Block3D> block)
     solidNodeIndicesMap[block]              = set<UbTupleInt3>();
     set<UbTupleInt3> &solidNodeIndices      = solidNodeIndicesMap[block];
 
-    double timestep    = 0;
+    real timestep    = 0;
     bool oneEntryGotBC = false;
     bool gotQs         = false;
     SPtr<BoundaryConditions> bc;
@@ -301,7 +305,7 @@ bool D3Q27Interactor::setDifferencesToGbObject3D(const SPtr<Block3D> block)
     SPtr<ILBMKernel> kernel = block->getKernel();
     SPtr<BCArray3D> bcArray = kernel->getBCProcessor()->getBCArray();
 
-    double internX1, internX2, internX3;
+    real internX1, internX2, internX3;
 
     int startIX1 = 0;
     int startIX2 = 0;
@@ -310,7 +314,7 @@ bool D3Q27Interactor::setDifferencesToGbObject3D(const SPtr<Block3D> block)
     int stopIX2  = (int)bcArray->getNX2();
     int stopIX3  = (int)bcArray->getNX3();
 
-    double dx = grid.lock()->getDeltaX(block);
+    real dx = grid.lock()->getDeltaX(block);
 
     // other boundingRect than in init, because here the boundrect has to be increased by one dx
     GbCuboid3D extendedBoundingGeoOfGeoObject(
@@ -318,7 +322,7 @@ bool D3Q27Interactor::setDifferencesToGbObject3D(const SPtr<Block3D> block)
         geoObject3D->getX3Minimum() - 1.02 * dx, geoObject3D->getX1Maximum() + 1.02 * dx,
         geoObject3D->getX2Maximum() + 1.02 * dx, geoObject3D->getX3Maximum() + 1.02 * dx);
 
-    double deltaX1 = dx, deltaX2 = dx, deltaX3 = dx;
+    real deltaX1 = dx, deltaX2 = dx, deltaX3 = dx;
 
     if (geoObject3D->hasRaytracing() || (this->isInverseSolid() && geoObject3D->raytracingSupportsPointsInside())) {
         // if deltaX1==deltaX2==deltaX3 (must for LB!!)
@@ -326,20 +330,20 @@ bool D3Q27Interactor::setDifferencesToGbObject3D(const SPtr<Block3D> block)
             throw UbException(
                 UB_EXARGS, "fuer den bei LB nicht vorkommenden Fall deltaX1!=deltaX2!=deltaX3  nicht implementiert ");
 
-        vector<double> distNeigh(D3Q27System::FENDDIR + 1, UbMath::sqrt2 * deltaX1);
-        distNeigh[D3Q27System::DIR_P00] = distNeigh[D3Q27System::DIR_M00] = distNeigh[D3Q27System::DIR_0P0] = deltaX1;
-        distNeigh[D3Q27System::DIR_0M0] = distNeigh[D3Q27System::DIR_00P] = distNeigh[D3Q27System::DIR_00M] = deltaX1;
-        distNeigh[D3Q27System::DIR_PP0] = distNeigh[D3Q27System::DIR_MP0] = distNeigh[D3Q27System::DIR_MM0] =
-            distNeigh[D3Q27System::DIR_PM0]                          = UbMath::sqrt2 * deltaX1;
-        distNeigh[D3Q27System::DIR_P0P] = distNeigh[D3Q27System::DIR_0PP] = distNeigh[D3Q27System::DIR_M0P] =
-            distNeigh[D3Q27System::DIR_0MP]                          = UbMath::sqrt2 * deltaX1;
-        distNeigh[D3Q27System::DIR_P0M] = distNeigh[D3Q27System::DIR_0PM] = distNeigh[D3Q27System::DIR_M0M] =
-            distNeigh[D3Q27System::DIR_0MM]                          = UbMath::sqrt2 * deltaX1;
-        distNeigh[D3Q27System::DIR_PPP] = distNeigh[D3Q27System::DIR_MPP] = distNeigh[D3Q27System::DIR_PMP] =
-            distNeigh[D3Q27System::DIR_MMP]                           = UbMath::sqrt3 * deltaX1;
-        distNeigh[D3Q27System::DIR_PPM] = distNeigh[D3Q27System::DIR_MPM] = distNeigh[D3Q27System::DIR_PMM] =
-            distNeigh[D3Q27System::DIR_MMM]                           = UbMath::sqrt3 * deltaX1;
-        double q;
+        vector<real> distNeigh(D3Q27System::FENDDIR + 1, vf::basics::constant::sqrt2 * deltaX1);
+        distNeigh[DIR_P00] = distNeigh[DIR_M00] = distNeigh[DIR_0P0] = deltaX1;
+        distNeigh[DIR_0M0] = distNeigh[DIR_00P] = distNeigh[DIR_00M] = deltaX1;
+        distNeigh[DIR_PP0] = distNeigh[DIR_MP0] = distNeigh[DIR_MM0] =
+            distNeigh[DIR_PM0]             = vf::basics::constant::sqrt2 * deltaX1;
+        distNeigh[DIR_P0P] = distNeigh[DIR_0PP] = distNeigh[DIR_M0P] =
+            distNeigh[DIR_0MP]             = vf::basics::constant::sqrt2 * deltaX1;
+        distNeigh[DIR_P0M] = distNeigh[DIR_0PM] = distNeigh[DIR_M0M] =
+            distNeigh[DIR_0MM]             = vf::basics::constant::sqrt2 * deltaX1;
+        distNeigh[DIR_PPP] = distNeigh[DIR_MPP] = distNeigh[DIR_PMP] =
+            distNeigh[DIR_MMP]              = vf::basics::constant::sqrt3 * deltaX1;
+        distNeigh[DIR_PPM] = distNeigh[DIR_MPM] = distNeigh[DIR_PMM] =
+            distNeigh[DIR_MMM]              = vf::basics::constant::sqrt3 * deltaX1;
+        real q;
         bool pointOnBoundary = false;
 
         //#ifdef _OPENMP
@@ -487,19 +491,19 @@ bool D3Q27Interactor::setDifferencesToGbObject3D(const SPtr<Block3D> block)
 
                         GbPoint3D pointA(internX1, internX2, internX3);
                         for (int fdir = D3Q27System::FSTARTDIR; fdir <= D3Q27System::FENDDIR; fdir++) {
-                            double x1B = internX1 + D3Q27System::DX1[fdir] * deltaX1;
-                            double x2B = internX2 + D3Q27System::DX2[fdir] * deltaX2;
-                            double x3B = internX3 + D3Q27System::DX3[fdir] * deltaX3;
+                            real x1B = internX1 + D3Q27System::DX1[fdir] * deltaX1;
+                            real x2B = internX2 + D3Q27System::DX2[fdir] * deltaX2;
+                            real x3B = internX3 + D3Q27System::DX3[fdir] * deltaX3;
 
                             GbPoint3D pointB(x1B, x2B, x3B);
                             GbLine3D *clippedLine = this->geoObject3D->createClippedLine3D(pointA, pointB);
 
                             if (clippedLine) {
-                                double q = 0.0;
+                                real q = 0.0;
                                 if (!this->isInverseSolid()) // A is outside
                                 {
-                                    double distanceAB = pointA.getDistance(&pointB); // pointA to B
-                                    double distanceAP = UbMath::min(pointA.getDistance(clippedLine->getPoint1()),
+                                    real distanceAB = pointA.getDistance(&pointB); // pointA to B
+                                    real distanceAP = UbMath::min(pointA.getDistance(clippedLine->getPoint1()),
                                                                     pointA.getDistance(clippedLine->getPoint2()));
                                     q                 = distanceAP / distanceAB;
                                 } else {
@@ -507,8 +511,8 @@ bool D3Q27Interactor::setDifferencesToGbObject3D(const SPtr<Block3D> block)
                                     if (!clippedLine->getPoint1()->equals(&pointB) &&
                                         !clippedLine->getPoint2()->equals(&pointB)) {
                                         // A is inside, a clipped line must not contain B
-                                        double distanceAB = pointA.getDistance(&pointB); // pointA to B
-                                        double distanceAP = clippedLine->getLength();
+                                        real distanceAB = pointA.getDistance(&pointB); // pointA to B
+                                        real distanceAP = clippedLine->getLength();
                                         q                 = distanceAP / distanceAB;
                                     } else if (this->geoObject3D->isPointInGbObject3D(
                                                    pointB.getX1Coordinate(), pointB.getX2Coordinate(),
@@ -569,11 +573,13 @@ bool D3Q27Interactor::setDifferencesToGbObject3D(const SPtr<Block3D> block)
 //////////////////////////////////////////////////////////////////////////
 void D3Q27Interactor::addQsLineSet(std::vector<UbTupleFloat3> &nodes, std::vector<UbTupleInt2> &lines)
 {
+    using namespace vf::lbm::dir;
+
     for (SPtr<Block3D> block : bcBlocks) {
         if (!block)
             continue;
 
-        double dx               = grid.lock()->getDeltaX(block);
+        real dx               = grid.lock()->getDeltaX(block);
         UbTupleDouble3 orgDelta = grid.lock()->getNodeOffset(block);
 
         SPtr<ILBMKernel> kernel = block->getKernel();
@@ -603,142 +609,142 @@ void D3Q27Interactor::addQsLineSet(std::vector<UbTupleFloat3> &nodes, std::vecto
                     continue;
                 SPtr<BoundaryConditions> bc = bcArray->getBC(ix1, ix2, ix3);
 
-                double x1a = val<1>(blockOrg) - val<1>(orgDelta) + ix1 * dx;
-                double x2a = val<2>(blockOrg) - val<2>(orgDelta) + ix2 * dx;
-                double x3a = val<3>(blockOrg) - val<3>(orgDelta) + ix3 * dx;
+                real x1a = val<1>(blockOrg) - val<1>(orgDelta) + ix1 * dx;
+                real x2a = val<2>(blockOrg) - val<2>(orgDelta) + ix2 * dx;
+                real x3a = val<3>(blockOrg) - val<3>(orgDelta) + ix3 * dx;
                 nodes.push_back(makeUbTuple((float)x1a, (float)x2a, (float)x3a));
                 node1Index = nodes.size() - 1;
 
                 for (int dir = D3Q27System::FSTARTDIR; dir <= D3Q27System::FENDDIR; dir++) {
                     if (bc->hasBoundaryConditionFlag(D3Q27System::INVDIR[dir])) {
-                        double x1b, x2b, x3b, q = bc->getQ(dir);
+                        real x1b, x2b, x3b, q = bc->getQ(dir);
                         switch (dir) {
-                            case D3Q27System::DIR_P00:
+                            case DIR_P00:
                                 x1b = x1a + q * dx;
                                 x2b = x2a;
                                 x3b = x3a;
                                 break;
-                            case D3Q27System::DIR_0P0:
+                            case DIR_0P0:
                                 x1b = x1a;
                                 x2b = x2a + q * dx;
                                 x3b = x3a;
                                 break;
-                            case D3Q27System::DIR_M00:
+                            case DIR_M00:
                                 x1b = x1a - q * dx;
                                 x2b = x2a;
                                 x3b = x3a;
                                 break;
-                            case D3Q27System::DIR_0M0:
+                            case DIR_0M0:
                                 x1b = x1a;
                                 x2b = x2a - q * dx;
                                 x3b = x3a;
                                 break;
-                            case D3Q27System::DIR_PP0:
+                            case DIR_PP0:
                                 x1b = x1a + q * dx;
                                 x2b = x2a + q * dx;
                                 x3b = x3a;
                                 break;
-                            case D3Q27System::DIR_MP0:
+                            case DIR_MP0:
                                 x1b = x1a - q * dx;
                                 x2b = x2a + q * dx;
                                 x3b = x3a;
                                 break;
-                            case D3Q27System::DIR_MM0:
+                            case DIR_MM0:
                                 x1b = x1a - q * dx;
                                 x2b = x2a - q * dx;
                                 x3b = x3a;
                                 break;
-                            case D3Q27System::DIR_PM0:
+                            case DIR_PM0:
                                 x1b = x1a + q * dx;
                                 x2b = x2a - q * dx;
                                 x3b = x3a;
                                 break;
-                            case D3Q27System::DIR_00P:
+                            case DIR_00P:
                                 x1b = x1a;
                                 x2b = x2a;
                                 x3b = x3a + q * dx;
                                 break;
-                            case D3Q27System::DIR_P0P:
+                            case DIR_P0P:
                                 x1b = x1a + q * dx;
                                 x2b = x2a;
                                 x3b = x3a + q * dx;
                                 break;
-                            case D3Q27System::DIR_0PP:
+                            case DIR_0PP:
                                 x1b = x1a;
                                 x2b = x2a + q * dx;
                                 x3b = x3a + q * dx;
                                 break;
-                            case D3Q27System::DIR_M0P:
+                            case DIR_M0P:
                                 x1b = x1a - q * dx;
                                 x2b = x2a;
                                 x3b = x3a + q * dx;
                                 break;
-                            case D3Q27System::DIR_0MP:
+                            case DIR_0MP:
                                 x1b = x1a;
                                 x2b = x2a - q * dx;
                                 x3b = x3a + q * dx;
                                 break;
-                            case D3Q27System::DIR_00M:
+                            case DIR_00M:
                                 x1b = x1a;
                                 x2b = x2a;
                                 x3b = x3a - q * dx;
                                 break;
-                            case D3Q27System::DIR_P0M:
+                            case DIR_P0M:
                                 x1b = x1a + q * dx;
                                 x2b = x2a;
                                 x3b = x3a - q * dx;
                                 break;
-                            case D3Q27System::DIR_0PM:
+                            case DIR_0PM:
                                 x1b = x1a;
                                 x2b = x2a + q * dx;
                                 x3b = x3a - q * dx;
                                 break;
-                            case D3Q27System::DIR_M0M:
+                            case DIR_M0M:
                                 x1b = x1a - q * dx;
                                 x2b = x2a;
                                 x3b = x3a - q * dx;
                                 break;
-                            case D3Q27System::DIR_0MM:
+                            case DIR_0MM:
                                 x1b = x1a;
                                 x2b = x2a - q * dx;
                                 x3b = x3a - q * dx;
                                 break;
-                            case D3Q27System::DIR_PPP:
+                            case DIR_PPP:
                                 x1b = x1a + q * dx;
                                 x2b = x2a + q * dx;
                                 x3b = x3a + q * dx;
                                 break;
-                            case D3Q27System::DIR_MMM:
+                            case DIR_MMM:
                                 x1b = x1a - q * dx;
                                 x2b = x2a - q * dx;
                                 x3b = x3a - q * dx;
                                 break;
-                            case D3Q27System::DIR_PPM:
+                            case DIR_PPM:
                                 x1b = x1a + q * dx;
                                 x2b = x2a + q * dx;
                                 x3b = x3a - q * dx;
                                 break;
-                            case D3Q27System::DIR_MMP:
+                            case DIR_MMP:
                                 x1b = x1a - q * dx;
                                 x2b = x2a - q * dx;
                                 x3b = x3a + q * dx;
                                 break;
-                            case D3Q27System::DIR_PMP:
+                            case DIR_PMP:
                                 x1b = x1a + q * dx;
                                 x2b = x2a - q * dx;
                                 x3b = x3a + q * dx;
                                 break;
-                            case D3Q27System::DIR_MPM:
+                            case DIR_MPM:
                                 x1b = x1a - q * dx;
                                 x2b = x2a + q * dx;
                                 x3b = x3a - q * dx;
                                 break;
-                            case D3Q27System::DIR_PMM:
+                            case DIR_PMM:
                                 x1b = x1a + q * dx;
                                 x2b = x2a - q * dx;
                                 x3b = x3a - q * dx;
                                 break;
-                            case D3Q27System::DIR_MPP:
+                            case DIR_MPP:
                                 x1b = x1a - q * dx;
                                 x2b = x2a + q * dx;
                                 x3b = x3a + q * dx;
@@ -760,6 +766,8 @@ void D3Q27Interactor::addQsLineSet(std::vector<UbTupleFloat3> &nodes, std::vecto
 ////////////////////////////////////////////////////////////////////////////
 vector<pair<GbPoint3D, GbPoint3D>> D3Q27Interactor::getQsLineSet()
 {
+    using namespace vf::lbm::dir;
+
     vector<pair<GbPoint3D, GbPoint3D>> QsLineSet;
     pair<GbPoint3D, GbPoint3D> pointpair;
 
@@ -774,7 +782,7 @@ vector<pair<GbPoint3D, GbPoint3D>> D3Q27Interactor::getQsLineSet()
         SPtr<BCArray3D> bcMatrix  = kernel->getBCProcessor()->getBCArray();
         UbTupleDouble3 nodeOffset = grid.lock()->getNodeOffset(block);
 
-        // Check whether top row is double in the system or not
+        // Check whether top row is real in the system or not
         bool include_N_Face  = false; // x1=[0..blocknx1[ && x3=[0..blocknx3[
         bool include_E_Face  = false; // x2=[0..blocknx2[ && x3=[0..blocknx3[
         bool include_T_Face  = false; // x1=[0..blocknx1[ && x2=[0..blocknx2[
@@ -782,17 +790,17 @@ vector<pair<GbPoint3D, GbPoint3D>> D3Q27Interactor::getQsLineSet()
         bool include_TN_Edge = false; //(x1/x2/x3)=([0..blocknx1[/blocknx2/blocknx1)
         bool include_TE_Edge = false; //(x1/x2/x3)=(blocknx1/[0..blocknx2[/blocknx2)
         if (block) {
-            if (!block->getConnector(D3Q27System::DIR_0P0))
+            if (!block->getConnector(DIR_0P0))
                 include_N_Face = true;
-            if (!block->getConnector(D3Q27System::DIR_P00))
+            if (!block->getConnector(DIR_P00))
                 include_E_Face = true;
-            if (!block->getConnector(D3Q27System::DIR_00P))
+            if (!block->getConnector(DIR_00P))
                 include_T_Face = true;
-            if (!block->getConnector(D3Q27System::DIR_PP0) && include_N_Face && include_E_Face)
+            if (!block->getConnector(DIR_PP0) && include_N_Face && include_E_Face)
                 include_NE_Edge = true;
-            if (!block->getConnector(D3Q27System::DIR_0PP) && include_T_Face && include_N_Face)
+            if (!block->getConnector(DIR_0PP) && include_T_Face && include_N_Face)
                 include_TN_Edge = true;
-            if (!block->getConnector(D3Q27System::DIR_P0P) && include_T_Face && include_E_Face)
+            if (!block->getConnector(DIR_P0P) && include_T_Face && include_E_Face)
                 include_TE_Edge = true;
         }
 
@@ -802,7 +810,7 @@ vector<pair<GbPoint3D, GbPoint3D>> D3Q27Interactor::getQsLineSet()
         set<std::vector<int>> &transNodeIndicesSet = pos->second;
         set<std::vector<int>>::iterator setPos;
 
-        double x1, x2, x3, dx;
+        real x1, x2, x3, dx;
         grid.lock()->calcStartCoordinatesAndDelta(block, x1, x2, x3, dx);
 
         for (setPos = transNodeIndicesSet.begin(); setPos != transNodeIndicesSet.end(); ++setPos) {
@@ -824,142 +832,142 @@ vector<pair<GbPoint3D, GbPoint3D>> D3Q27Interactor::getQsLineSet()
                     if (!bcMatrix->hasBC(ix1, ix2, ix3))
                         continue;
                     SPtr<BoundaryConditions> bc = bcMatrix->getBC(ix1, ix2, ix3);
-                    double x1a                  = x1 - val<1>(nodeOffset) + dx * ix1;
-                    double x2a                  = x2 - val<2>(nodeOffset) + dx * ix2;
-                    double x3a                  = x3 - val<3>(nodeOffset) + dx * ix3;
+                    real x1a                  = x1 - val<1>(nodeOffset) + dx * ix1;
+                    real x2a                  = x2 - val<2>(nodeOffset) + dx * ix2;
+                    real x3a                  = x3 - val<3>(nodeOffset) + dx * ix3;
                     pointpair.first.setX1(x1a);
                     pointpair.first.setX2(x2a);
                     pointpair.first.setX3(x3a);
                     for (int dir = D3Q27System::FSTARTDIR; dir <= D3Q27System::FENDDIR; dir++) {
                         if (bc->hasBoundaryConditionFlag(D3Q27System::INVDIR[dir])) {
-                            double x1b, x2b, x3b, q = bc->getQ(dir);
+                            real x1b, x2b, x3b, q = bc->getQ(dir);
                             switch (dir) {
-                                case D3Q27System::DIR_P00:
+                                case DIR_P00:
                                     x1b = x1a + q * dx;
                                     x2b = x2a;
                                     x3b = x3a;
                                     break;
-                                case D3Q27System::DIR_0P0:
+                                case DIR_0P0:
                                     x1b = x1a;
                                     x2b = x2a + q * dx;
                                     x3b = x3a;
                                     break;
-                                case D3Q27System::DIR_M00:
+                                case DIR_M00:
                                     x1b = x1a - q * dx;
                                     x2b = x2a;
                                     x3b = x3a;
                                     break;
-                                case D3Q27System::DIR_0M0:
+                                case DIR_0M0:
                                     x1b = x1a;
                                     x2b = x2a - q * dx;
                                     x3b = x3a;
                                     break;
-                                case D3Q27System::DIR_PP0:
+                                case DIR_PP0:
                                     x1b = x1a + q * dx;
                                     x2b = x2a + q * dx;
                                     x3b = x3a;
                                     break;
-                                case D3Q27System::DIR_MP0:
+                                case DIR_MP0:
                                     x1b = x1a - q * dx;
                                     x2b = x2a + q * dx;
                                     x3b = x3a;
                                     break;
-                                case D3Q27System::DIR_MM0:
+                                case DIR_MM0:
                                     x1b = x1a - q * dx;
                                     x2b = x2a - q * dx;
                                     x3b = x3a;
                                     break;
-                                case D3Q27System::DIR_PM0:
+                                case DIR_PM0:
                                     x1b = x1a + q * dx;
                                     x2b = x2a - q * dx;
                                     x3b = x3a;
                                     break;
-                                case D3Q27System::DIR_00P:
+                                case DIR_00P:
                                     x1b = x1a;
                                     x2b = x2a;
                                     x3b = x3a + q * dx;
                                     break;
-                                case D3Q27System::DIR_P0P:
+                                case DIR_P0P:
                                     x1b = x1a + q * dx;
                                     x2b = x2a;
                                     x3b = x3a + q * dx;
                                     break;
-                                case D3Q27System::DIR_0PP:
+                                case DIR_0PP:
                                     x1b = x1a;
                                     x2b = x2a + q * dx;
                                     x3b = x3a + q * dx;
                                     break;
-                                case D3Q27System::DIR_M0P:
+                                case DIR_M0P:
                                     x1b = x1a - q * dx;
                                     x2b = x2a;
                                     x3b = x3a + q * dx;
                                     break;
-                                case D3Q27System::DIR_0MP:
+                                case DIR_0MP:
                                     x1b = x1a;
                                     x2b = x2a - q * dx;
                                     x3b = x3a + q * dx;
                                     break;
-                                case D3Q27System::DIR_00M:
+                                case DIR_00M:
                                     x1b = x1a;
                                     x2b = x2a;
                                     x3b = x3a - q * dx;
                                     break;
-                                case D3Q27System::DIR_P0M:
+                                case DIR_P0M:
                                     x1b = x1a + q * dx;
                                     x2b = x2a;
                                     x3b = x3a - q * dx;
                                     break;
-                                case D3Q27System::DIR_0PM:
+                                case DIR_0PM:
                                     x1b = x1a;
                                     x2b = x2a + q * dx;
                                     x3b = x3a - q * dx;
                                     break;
-                                case D3Q27System::DIR_M0M:
+                                case DIR_M0M:
                                     x1b = x1a - q * dx;
                                     x2b = x2a;
                                     x3b = x3a - q * dx;
                                     break;
-                                case D3Q27System::DIR_0MM:
+                                case DIR_0MM:
                                     x1b = x1a;
                                     x2b = x2a - q * dx;
                                     x3b = x3a - q * dx;
                                     break;
-                                case D3Q27System::DIR_PPP:
+                                case DIR_PPP:
                                     x1b = x1a + q * dx;
                                     x2b = x2a + q * dx;
                                     x3b = x3a + q * dx;
                                     break;
-                                case D3Q27System::DIR_MMM:
+                                case DIR_MMM:
                                     x1b = x1a - q * dx;
                                     x2b = x2a - q * dx;
                                     x3b = x3a - q * dx;
                                     break;
-                                case D3Q27System::DIR_PPM:
+                                case DIR_PPM:
                                     x1b = x1a + q * dx;
                                     x2b = x2a + q * dx;
                                     x3b = x3a - q * dx;
                                     break;
-                                case D3Q27System::DIR_MMP:
+                                case DIR_MMP:
                                     x1b = x1a - q * dx;
                                     x2b = x2a - q * dx;
                                     x3b = x3a + q * dx;
                                     break;
-                                case D3Q27System::DIR_PMP:
+                                case DIR_PMP:
                                     x1b = x1a + q * dx;
                                     x2b = x2a - q * dx;
                                     x3b = x3a + q * dx;
                                     break;
-                                case D3Q27System::DIR_MPM:
+                                case DIR_MPM:
                                     x1b = x1a - q * dx;
                                     x2b = x2a + q * dx;
                                     x3b = x3a - q * dx;
                                     break;
-                                case D3Q27System::DIR_PMM:
+                                case DIR_PMM:
                                     x1b = x1a + q * dx;
                                     x2b = x2a - q * dx;
                                     x3b = x3a - q * dx;
                                     break;
-                                case D3Q27System::DIR_MPP:
+                                case DIR_MPP:
                                     x1b = x1a - q * dx;
                                     x2b = x2a + q * dx;
                                     x3b = x3a + q * dx;
diff --git a/src/cpu/VirtualFluidsCore/Interactors/D3Q27Interactor.h b/src/cpu/VirtualFluidsCore/Interactors/D3Q27Interactor.h
index 4e588e96adbd42102a38cf3ee8ec27cd49e87dbf..80a58efccb36588111ddf5301f3fb68068e20958 100644
--- a/src/cpu/VirtualFluidsCore/Interactors/D3Q27Interactor.h
+++ b/src/cpu/VirtualFluidsCore/Interactors/D3Q27Interactor.h
@@ -75,8 +75,8 @@ public:
     virtual void addBCAdapter(const SPtr<BCAdapter> bcAdapter) { bcAdapters.push_back(bcAdapter); }
     void deleteBCAdapter() { bcAdapters.clear(); }
 
-    void initInteractor(const double &timeStep = 0) override;
-    void updateInteractor(const double &timestep = 0) override;
+    void initInteractor(const real &timeStep = 0) override;
+    void updateInteractor(const real &timestep = 0) override;
 
     void setReinitWithStoredQs(bool reinitWithStoredQsFlag) { this->reinitWithStoredQsFlag = reinitWithStoredQsFlag; }
 
@@ -112,9 +112,9 @@ protected:
     BcNodeIndicesMap bcNodeIndicesMap;
 
     void initRayVectors();
-    double rayX1[D3Q27System::FENDDIR + 1];
-    double rayX2[D3Q27System::FENDDIR + 1];
-    double rayX3[D3Q27System::FENDDIR + 1];
+    real rayX1[D3Q27System::FENDDIR + 1];
+    real rayX2[D3Q27System::FENDDIR + 1];
+    real rayX3[D3Q27System::FENDDIR + 1];
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/Interactors/D3Q27TriFaceMeshInteractor.cpp b/src/cpu/VirtualFluidsCore/Interactors/D3Q27TriFaceMeshInteractor.cpp
index c7b13d3834a6bb822b945f9425ba21b8d5be399d..d569d3c0f16b24ea7592c09b26a06b8be69e126c 100644
--- a/src/cpu/VirtualFluidsCore/Interactors/D3Q27TriFaceMeshInteractor.cpp
+++ b/src/cpu/VirtualFluidsCore/Interactors/D3Q27TriFaceMeshInteractor.cpp
@@ -52,7 +52,7 @@ D3Q27TriFaceMeshInteractor::D3Q27TriFaceMeshInteractor(SPtr<GbTriFaceMesh3D> tri
 //////////////////////////////////////////////////////////////////////////
 D3Q27TriFaceMeshInteractor::~D3Q27TriFaceMeshInteractor() = default;
 //////////////////////////////////////////////////////////////////////////
-void D3Q27TriFaceMeshInteractor::initInteractor(const double &timeStep)
+void D3Q27TriFaceMeshInteractor::initInteractor(const real &timeStep)
 {
     updateBlocks(); 
     setQs(timeStep);
@@ -77,7 +77,7 @@ bool D3Q27TriFaceMeshInteractor::setDifferencesToGbObject3D(const SPtr<Block3D>
     SPtr<ILBMKernel> kernel = block->getKernel();
     SPtr<BCArray3D> bcArray = kernel->getBCProcessor()->getBCArray();
 
-    double internX1, internX2, internX3;
+    real internX1, internX2, internX3;
 
     int startIX1 = 0, startIX2 = 0, startIX3 = 0;
     int stopIX1 = (int)bcArray->getNX1(), stopIX2 = (int)bcArray->getNX2(), stopIX3 = (int)bcArray->getNX3();
@@ -120,8 +120,10 @@ bool D3Q27TriFaceMeshInteractor::setDifferencesToGbObject3D(const SPtr<Block3D>
 }
 //////////////////////////////////////////////////////////////////////////
 // E.F. /4/16/2013
-void D3Q27TriFaceMeshInteractor::setQs(const double &timeStep)
+void D3Q27TriFaceMeshInteractor::setQs(const real &timeStep)
 {
+    using namespace vf::lbm::dir;
+
     UBLOGML(logDEBUG1, "\nLBMTriFaceMeshInteractor - setQs start ");
     if (!this->grid.lock())
         throw UbException(UB_EXARGS, "ups, no grid.lock()!!");
@@ -364,9 +366,9 @@ void D3Q27TriFaceMeshInteractor::setQs(const double &timeStep)
                 //            tmpSolidNodesFromOtherInteractors[block];
                 double q, distance;
 
-                double &nodeDx1 = nodeDeltaToNeigh[level][D3Q27System::DIR_P00];
-                double &nodeDx2 = nodeDeltaToNeigh[level][D3Q27System::DIR_0P0];
-                double &nodeDx3 = nodeDeltaToNeigh[level][D3Q27System::DIR_00P];
+                double &nodeDx1 = nodeDeltaToNeigh[level][DIR_P00];
+                double &nodeDx2 = nodeDeltaToNeigh[level][DIR_0P0];
+                double &nodeDx3 = nodeDeltaToNeigh[level][DIR_00P];
 
                 // fuer OBB-Test
                 double qEinflussDelta = 1.1 * sqrt(nodeDx1 * nodeDx1 + nodeDx2 * nodeDx2 + nodeDx3 * nodeDx3);
@@ -591,8 +593,10 @@ void D3Q27TriFaceMeshInteractor::setQs(const double &timeStep)
 //  1. fuer nicht markierte Bloecke genuegt EIN pointInObject(Dreicksnetz)-Test um den gesamten Block bei Erfolg als
 //  „not active“ zu markieren
 //  2. fuer markiertre Bloecke wird ein rekursiver Fuellalgorithmus durchgefuehrt
-void D3Q27TriFaceMeshInteractor::initInteractor2(const double &timeStep)
+void D3Q27TriFaceMeshInteractor::initInteractor2(const real &timeStep)
 {
+    using namespace vf::lbm::dir;
+
     UBLOGML(logDEBUG1, "\nLBMTriFaceMeshInteractor - initInteractor start ");
     if (!this->grid.lock())
         throw UbException(UB_EXARGS, "ups, no grid.lock()!!");
@@ -736,7 +740,7 @@ void D3Q27TriFaceMeshInteractor::initInteractor2(const double &timeStep)
 
     // notwendige variablen initialisieren (u.a. blockDeltas des groben levels)
     float triPoints[3][3];
-    float vx1 = 0.0, vx2 = 0.0, vx3 = 0.0;
+    real vx1 = 0.0, vx2 = 0.0, vx3 = 0.0;
     unsigned counterTriBoxOverlap = 0, counterAABBTriFace = 0, counterHalfspace = 0, counterBilligOBB = 0;
     std::vector<GbTriFaceMesh3D::TriFace> &triangles = *mesh->getTriangles();
     std::vector<GbTriFaceMesh3D::Vertex> &nodes      = *mesh->getNodes();
@@ -880,9 +884,9 @@ void D3Q27TriFaceMeshInteractor::initInteractor2(const double &timeStep)
                 std::set<std::vector<int>> &solidsFromOtherInteractors = tmpSolidNodesFromOtherInteractors[block];
                 double q, internX1, internX2, internX3, distance;
 
-                double &nodeDx1 = nodeDeltaToNeigh[level][D3Q27System::DIR_P00];
-                double &nodeDx2 = nodeDeltaToNeigh[level][D3Q27System::DIR_0P0];
-                double &nodeDx3 = nodeDeltaToNeigh[level][D3Q27System::DIR_00P];
+                double &nodeDx1 = nodeDeltaToNeigh[level][DIR_P00];
+                double &nodeDx2 = nodeDeltaToNeigh[level][DIR_0P0];
+                double &nodeDx3 = nodeDeltaToNeigh[level][DIR_00P];
 
                 // fuer OBB-Test
                 double qEinflussDelta = 1.1 * sqrt(nodeDx1 * nodeDx1 + nodeDx2 * nodeDx2 + nodeDx3 * nodeDx3);
@@ -1181,9 +1185,9 @@ void D3Q27TriFaceMeshInteractor::initInteractor2(const double &timeStep)
 
                 std::set<UbTupleInt3> &solidNodeIndices = this->solidNodeIndicesMap[block];
 
-                float nodeDeltaX1 = (float)nodeDeltaToNeigh[level][D3Q27System::DIR_P00];
-                float nodeDeltaX2 = (float)nodeDeltaToNeigh[level][D3Q27System::DIR_0P0];
-                float nodeDeltaX3 = (float)nodeDeltaToNeigh[level][D3Q27System::DIR_00P];
+                float nodeDeltaX1 = (float)nodeDeltaToNeigh[level][DIR_P00];
+                float nodeDeltaX2 = (float)nodeDeltaToNeigh[level][DIR_0P0];
+                float nodeDeltaX3 = (float)nodeDeltaToNeigh[level][DIR_00P];
 
                 // flagfield matrix initialisieren
                 CbArray3D<FLAGS> flagField(blocknx1, blocknx2, blocknx3, UNDEF_FLAG);
@@ -1489,7 +1493,7 @@ void D3Q27TriFaceMeshInteractor::refineBlockGridToLevel(int level, double startD
     UBLOG(logDEBUG1, " - refine done");
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27TriFaceMeshInteractor::updateMovedGeometry(const double &timeStep) {}
+void D3Q27TriFaceMeshInteractor::updateMovedGeometry(const real &timeStep) {}
 ////////////////////////////////////////////////////////////////////////////
 void D3Q27TriFaceMeshInteractor::recursiveGridFill(CbArray3D<FLAGS> &flagfield, const short &xs, const short &ys,
                                                    const short &zs, const FLAGS &type)
@@ -1561,9 +1565,9 @@ UbTupleDouble3 D3Q27TriFaceMeshInteractor::getForces()
     ////return getForcesTriangle();
     // this->calculateForces();
 
-    double forceX1 = 0.0;
-    double forceX2 = 0.0;
-    double forceX3 = 0.0;
+    real forceX1 = 0.0;
+    real forceX2 = 0.0;
+    real forceX3 = 0.0;
 
     // double area = 0.0;
 
@@ -1582,9 +1586,9 @@ UbTupleDouble3 D3Q27TriFaceMeshInteractor::getForces()
 //////////////////////////////////////////////////////////////////////////
 UbTupleDouble3 D3Q27TriFaceMeshInteractor::getForcesTriangle()
 {
-    double forceX1 = 0.0;
-    double forceX2 = 0.0;
-    double forceX3 = 0.0;
+    real forceX1 = 0.0;
+    real forceX2 = 0.0;
+    real forceX3 = 0.0;
 
     // D3Q19BlockGrid& grid.lock() = dynamic_cast<D3Q19BlockGrid&>(*this->grid.lock());
     ////   CoordinateTransformation3D *trafo = this->grid.lock()->getTransformation();
@@ -1829,7 +1833,7 @@ string D3Q27TriFaceMeshInteractor::toString()
     return ss.str();
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27TriFaceMeshInteractor::reinitWithStoredQs(const double & /*timeStep*/)
+void D3Q27TriFaceMeshInteractor::reinitWithStoredQs(const real & /*timeStep*/)
 {
     // alle solid Bloecke wieder solid setzen
     std::vector<SPtr<Block3D>> &solidBlocks = this->getSolidBlockSet();
@@ -1902,7 +1906,7 @@ void D3Q27TriFaceMeshInteractor::reinitWithStoredQs(const double & /*timeStep*/)
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void D3Q27TriFaceMeshInteractor::updateInteractor(const double &timestep)
+void D3Q27TriFaceMeshInteractor::updateInteractor(const real &timestep)
 {
     D3Q27Interactor::updateInteractor(timestep);
 }
diff --git a/src/cpu/VirtualFluidsCore/Interactors/D3Q27TriFaceMeshInteractor.h b/src/cpu/VirtualFluidsCore/Interactors/D3Q27TriFaceMeshInteractor.h
index 9ac8bfc48a4fda3612b0781d93496cce723d2cd8..9e42de660502b33048abf31f5e831902134d3826 100644
--- a/src/cpu/VirtualFluidsCore/Interactors/D3Q27TriFaceMeshInteractor.h
+++ b/src/cpu/VirtualFluidsCore/Interactors/D3Q27TriFaceMeshInteractor.h
@@ -33,14 +33,14 @@ public:
 
     ~D3Q27TriFaceMeshInteractor() override;
 
-    void initInteractor(const double &timeStep = 0) override;
-    virtual void initInteractor2(const double &timeStep = 0);
+    void initInteractor(const real &timeStep = 0) override;
+    virtual void initInteractor2(const real &timeStep = 0);
 
-    void updateInteractor(const double &timestep = 0) override;
+    void updateInteractor(const real &timestep = 0) override;
 
-    void updateMovedGeometry(const double &timeStep = 0);
-    void setQs(const double &timeStep);
-    void refineBlockGridToLevel(int level, double startDistance, double stopDistance);
+    void updateMovedGeometry(const real &timeStep = 0);
+    void setQs(const real &timeStep);
+    void refineBlockGridToLevel(int level, real startDistance, real stopDistance);
 
     bool setDifferencesToGbObject3D(const SPtr<Block3D> block) override;
 
@@ -60,36 +60,36 @@ public:
     void calculateStresses();
     void calculateStressesAlternativ();
 
-    void calcStressesLine(UbTupleDouble6 &stresses, const double &weight, const UbTupleDouble6 &stvW,
+    void calcStressesLine(UbTupleDouble6 &stresses, const real &weight, const UbTupleDouble6 &stvW,
                           const UbTupleDouble6 &stvE);
-    void calcStressesFace(UbTupleDouble6 &stresses, const double &weightX, const double &weightY,
+    void calcStressesFace(UbTupleDouble6 &stresses, const real &weightX, const real &weightY,
                           const UbTupleDouble6 &stvSW, const UbTupleDouble6 &stvSE, const UbTupleDouble6 &stvNE,
                           const UbTupleDouble6 &stvNW);
-    void calcStressesCube(UbTupleDouble6 &stresses, const double &weightX, const double &weightY, const double &weightZ,
+    void calcStressesCube(UbTupleDouble6 &stresses, const real &weightX, const real &weightY, const real &weightZ,
                           const UbTupleDouble6 &stvBSW, const UbTupleDouble6 &stvBSE, const UbTupleDouble6 &stvBNE,
                           const UbTupleDouble6 &stvBNW, const UbTupleDouble6 &stvTSW, const UbTupleDouble6 &stvTSE,
                           const UbTupleDouble6 &stvTNE, const UbTupleDouble6 &stvTNW);
 
     void calculatePressure();
-    void calcPressureLine(double &p, const double &weight, const double &pW, const double &pE);
-    void calcPressureFace(double &p, const double &weightX, const double &weightY, const double &pSW, const double &pSE,
-                          const double &pNE, const double &pNW);
-    void calcPressureCube(double &p, const double &weightX, const double &weightY, const double &weightZ,
-                          const double &pBSW, const double &pBSE, const double &pBNE, const double &pBNW,
-                          const double &pTSW, const double &pTSE, const double &pTNE, const double &pTNW);
-
-    void setForceShift(double forceshift)
+    void calcPressureLine(real &p, const real &weight, const real &pW, const real &pE);
+    void calcPressureFace(real &p, const real &weightX, const real &weightY, const real &pSW, const real &pSE,
+                          const real &pNE, const real &pNW);
+    void calcPressureCube(real &p, const real &weightX, const real &weightY, const real &weightZ,
+                          const real &pBSW, const real &pBSE, const real &pBNE, const real &pBNW,
+                          const real &pTSW, const real &pTSE, const real &pTNE, const real &pTNW);
+
+    void setForceShift(real forceshift)
     {
         this->forceshift       = forceshift;
         this->forceshiftpolicy = true;
     }
-    void setVelocityShift(double velocityshift)
+    void setVelocityShift(real velocityshift)
     {
         this->velocityshift       = velocityshift;
         this->velocityshiftpolicy = true;
     }
-    double getForceShift() { return this->forceshift; }
-    double getVelocityShift() { return this->velocityshift; }
+    real getForceShift() { return this->forceshift; }
+    real getVelocityShift() { return this->velocityshift; }
     bool getForceShiftPolicy() { return forceshiftpolicy; }
     bool getVelocityShiftPolicy() { return velocityshiftpolicy; }
 
@@ -107,7 +107,7 @@ protected:
     bool useHalfSpace{ true };
     bool regardPIOTest{ true };
 
-    void reinitWithStoredQs(const double &timeStep);
+    void reinitWithStoredQs(const real &timeStep);
     //   bool reinitWithStoredQsFlag;
     std::map<SPtr<Block3D>, std::map<UbTupleInt3, std::vector<float>>>
         bcNodeIndicesAndQsMap; //!!! es kann sein, dass in diesem interactor
diff --git a/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.cpp b/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.cpp
index 84526c62598b1d718b1f179228ae2a3f51839856..e08a0283339b6e5976c7439ea2b64142c00ef7c4 100644
--- a/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.cpp
+++ b/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.cpp
@@ -34,7 +34,7 @@
 #include "Interactor3D.h"
 
 #include "UbException.h"
-#include <basics/utilities/UbMath.h>
+//#include <basics/utilities/UbMath.h>
 #include <fstream>
 #include <geometry3d/GbCuboid3D.h>
 
@@ -68,44 +68,44 @@ Interactor3D::Interactor3D(SPtr<GbObject3D> geoObject3D, SPtr<Grid3D> grid, int
 //////////////////////////////////////////////////////////////////////////
 Interactor3D::~Interactor3D() = default;
 //////////////////////////////////////////////////////////////////////////
-bool Interactor3D::arePointsInsideGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2,
-                                            double maxX3, double delta)
+bool Interactor3D::arePointsInsideGeoObject(real minX1, real minX2, real minX3, real maxX1, real maxX2,
+                                            real maxX3, real delta)
 {
     bool result = true;
-    for (double ix3 = minX3; ix3 <= maxX3; ix3 += delta)
-        for (double ix2 = minX2; ix2 <= maxX2; ix2 += delta)
-            for (double ix1 = minX1; ix1 <= maxX1; ix1 += delta)
+    for (real ix3 = minX3; ix3 <= maxX3; ix3 += delta)
+        for (real ix2 = minX2; ix2 <= maxX2; ix2 += delta)
+            for (real ix1 = minX1; ix1 <= maxX1; ix1 += delta)
                 result = result && this->geoObject3D->isPointInGbObject3D(ix1, ix2, ix3);
 
     return result;
 }
 //////////////////////////////////////////////////////////////////////////
-bool Interactor3D::arePointsOutsideGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2,
-                                             double maxX3, double delta)
+bool Interactor3D::arePointsOutsideGeoObject(real minX1, real minX2, real minX3, real maxX1, real maxX2,
+                                             real maxX3, real delta)
 {
     bool result = true;
-    for (double ix3 = minX3; ix3 <= maxX3; ix3 += delta)
-        for (double ix2 = minX2; ix2 <= maxX2; ix2 += delta)
-            for (double ix1 = minX1; ix1 <= maxX1; ix1 += delta)
+    for (real ix3 = minX3; ix3 <= maxX3; ix3 += delta)
+        for (real ix2 = minX2; ix2 <= maxX2; ix2 += delta)
+            for (real ix1 = minX1; ix1 <= maxX1; ix1 += delta)
                 result = result && (!this->geoObject3D->isPointInGbObject3D(ix1, ix2, ix3));
 
     return result;
 }
 //////////////////////////////////////////////////////////////////////////
-bool Interactor3D::arePointsCuttingGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2,
-                                             double maxX3, double delta)
+bool Interactor3D::arePointsCuttingGeoObject(real minX1, real minX2, real minX3, real maxX1, real maxX2,
+                                             real maxX3, real delta)
 {
     bool result = true;
-    for (double ix3 = minX3; ix3 <= maxX3; ix3 += delta)
-        for (double ix2 = minX2; ix2 <= maxX2; ix2 += delta)
-            for (double ix1 = minX1; ix1 <= maxX1; ix1 += delta)
+    for (real ix3 = minX3; ix3 <= maxX3; ix3 += delta)
+        for (real ix2 = minX2; ix2 <= maxX2; ix2 += delta)
+            for (real ix1 = minX1; ix1 <= maxX1; ix1 += delta)
                 result = result || this->geoObject3D->isPointInGbObject3D(ix1, ix2, ix3);
 
     return result;
 }
 //////////////////////////////////////////////////////////////////////////
-bool Interactor3D::isBlockOutsideGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2,
-                                           double maxX3, double delta)
+bool Interactor3D::isBlockOutsideGeoObject(real minX1, real minX2, real minX3, real maxX1, real maxX2,
+                                           real maxX3, real delta)
 {
     switch (accuracy) {
             // simple duff
@@ -144,8 +144,8 @@ bool Interactor3D::isBlockOutsideGeoObject(double minX1, double minX2, double mi
     }
 }
 //////////////////////////////////////////////////////////////////////////
-bool Interactor3D::isBlockInsideGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2,
-                                          double maxX3, double delta)
+bool Interactor3D::isBlockInsideGeoObject(real minX1, real minX2, real minX3, real maxX1, real maxX2,
+                                          real maxX3, real delta)
 {
     switch (accuracy) {
             // simple duff
@@ -184,8 +184,8 @@ bool Interactor3D::isBlockInsideGeoObject(double minX1, double minX2, double min
     }
 }
 //////////////////////////////////////////////////////////////////////////
-bool Interactor3D::isBlockCuttingGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2,
-                                           double maxX3, double delta)
+bool Interactor3D::isBlockCuttingGeoObject(real minX1, real minX2, real minX3, real maxX1, real maxX2,
+                                           real maxX3, real delta)
 {
     switch (accuracy) {
             // simple duff
@@ -226,9 +226,9 @@ bool Interactor3D::isBlockCuttingGeoObject(double minX1, double minX2, double mi
 //////////////////////////////////////////////////////////////////////////
 void Interactor3D::setSolidBlock(SPtr<Block3D> block)
 {
-    double minX1, minX2, minX3, maxX1, maxX2, maxX3;
+    real minX1, minX2, minX3, maxX1, maxX2, maxX3;
 
-    double deltaX               = grid.lock()->getDeltaX(block);
+    real deltaX               = grid.lock()->getDeltaX(block);
     UbTupleDouble3 blockLengths = grid.lock()->getBlockLengths(block);
     UbTupleDouble3 org          = grid.lock()->getBlockWorldCoordinates(block);
     UbTupleDouble3 nodeOffset   = grid.lock()->getNodeOffset(block);
@@ -257,9 +257,9 @@ void Interactor3D::setSolidBlock(SPtr<Block3D> block)
 //////////////////////////////////////////////////////////////////////////
 void Interactor3D::setBCBlock(SPtr<Block3D> block)
 {
-    double minX1, minX2, minX3, maxX1, maxX2, maxX3;
+    real minX1, minX2, minX3, maxX1, maxX2, maxX3;
 
-    double deltaX               = grid.lock()->getDeltaX(block);
+    real deltaX               = grid.lock()->getDeltaX(block);
     UbTupleDouble3 blockLengths = grid.lock()->getBlockLengths(block);
     UbTupleDouble3 org          = grid.lock()->getBlockWorldCoordinates(block);
     UbTupleDouble3 nodeOffset   = grid.lock()->getNodeOffset(block);
@@ -298,7 +298,7 @@ void Interactor3D::updateBlocks()
     }
 }
 //////////////////////////////////////////////////////////////////////////
-void Interactor3D::updateInteractor(const double & /*timeStep*/)
+void Interactor3D::updateInteractor(const real & /*timeStep*/)
 {
     UB_THROW(UbException("Interactor3D::updateInteractor - toDo"));
 }
diff --git a/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.h b/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.h
index 9727bf636085c7c0d24a9108acc71925af36e5d1..3da0ebbca0bca020b5cd0b2cdd9e8acc8564466a 100644
--- a/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.h
+++ b/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.h
@@ -39,6 +39,7 @@
 
 #include "UbSystem.h"
 #include "UbTuple.h"
+#include "lbm/constants/D3Q27.h"
 
 class Block3D;
 class Grid3D;
@@ -57,8 +58,8 @@ public:
     Interactor3D(SPtr<GbObject3D> geoObject3D, SPtr<Grid3D> grid, int type, Interactor3D::Accuracy a);
 
     virtual ~Interactor3D();
-    virtual void initInteractor(const double &timestep = 0) = 0;
-    virtual void updateInteractor(const double &timestep = 0) = 0;
+    virtual void initInteractor(const real &timestep = 0) = 0;
+    virtual void updateInteractor(const real &timestep = 0) = 0;
 
     void setSolidBlock(SPtr<Block3D> block);
     void setBCBlock(SPtr<Block3D> block);
@@ -96,27 +97,27 @@ protected:
     //! detect that points are inside object
     //! \param min/max coordinates of bounding box
     //! \param delta is delta x
-    bool arePointsInsideGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2, double maxX3,
-                                  double delta);
+    bool arePointsInsideGeoObject(real minX1, real minX2, real minX3, real maxX1, real maxX2, real maxX3,
+                                  real delta);
 
     //! detect that points aren't inside object
     //! \param min/max coordinates of bounding box
     //! \param delta is delta x
-    bool arePointsOutsideGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2, double maxX3,
-                                   double delta);
+    bool arePointsOutsideGeoObject(real minX1, real minX2, real minX3, real maxX1, real maxX2, real maxX3,
+                                   real delta);
 
     //! detect that points are cutting object
     //! \param min/max coordinates of bounding box
     //! \param delta is delta x
-    bool arePointsCuttingGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2, double maxX3,
-                                   double delta);
-
-    bool isBlockOutsideGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2, double maxX3,
-                                 double delta);
-    bool isBlockInsideGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2, double maxX3,
-                                double delta);
-    bool isBlockCuttingGeoObject(double minX1, double minX2, double minX3, double maxX1, double maxX2, double maxX3,
-                                 double delta);
+    bool arePointsCuttingGeoObject(real minX1, real minX2, real minX3, real maxX1, real maxX2, real maxX3,
+                                   real delta);
+
+    bool isBlockOutsideGeoObject(real minX1, real minX2, real minX3, real maxX1, real maxX2, real maxX3,
+                                 real delta);
+    bool isBlockInsideGeoObject(real minX1, real minX2, real minX3, real maxX1, real maxX2, real maxX3,
+                                real delta);
+    bool isBlockCuttingGeoObject(real minX1, real minX2, real minX3, real maxX1, real maxX2, real maxX3,
+                                 real delta);
 
     void updateBlocks();
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/BGKLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/BGKLBMKernel.cpp
index 50a5339ea01d70ff0076ef81bda16db22e56ffe7..d27564c49cd2b115b9e4b4609b0ecf8f1a8a941b 100644
--- a/src/cpu/VirtualFluidsCore/LBM/BGKLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/BGKLBMKernel.cpp
@@ -6,6 +6,7 @@
 #include "D3Q27System.h"
 #include "DataSet3D.h"
 #include "Block3D.h"
+#include "basics/constants/NumericConstants.h"
 
 #define PROOF_CORRECTNESS
 
@@ -40,7 +41,9 @@ SPtr<LBMKernel> BGKLBMKernel::clone()
 void BGKLBMKernel::calculate(int step)
 {
     using namespace D3Q27System;
-    using namespace UbMath;
+ //   using namespace UbMath;
+   using namespace vf::basics::constant;
+   using namespace vf::lbm::dir;
 
     // initializing of forcing stuff
     if (withForcing) {
@@ -67,9 +70,9 @@ void BGKLBMKernel::calculate(int step)
         std::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getZeroDistributions();
 
     SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
-    LBMReal f[D3Q27System::ENDF + 1];
-    LBMReal feq[D3Q27System::ENDF + 1];
-    LBMReal drho, vx1, vx2, vx3;
+    real f[D3Q27System::ENDF + 1];
+    real feq[D3Q27System::ENDF + 1];
+    real drho, vx1, vx2, vx3;
     const int bcArrayMaxX1 = (int)bcArray->getNX1();
     const int bcArrayMaxX2 = (int)bcArray->getNX2();
     const int bcArrayMaxX3 = (int)bcArray->getNX3();
@@ -135,7 +138,7 @@ void BGKLBMKernel::calculate(int step)
                     vx3 = f[DIR_00P] - f[DIR_00M] + f[DIR_P0P] - f[DIR_M0M] - f[DIR_P0M] + f[DIR_M0P] + f[DIR_0PP] - f[DIR_0MM] - f[DIR_0PM] + f[DIR_0MP] + f[DIR_PPP] +
                           f[DIR_MMP] + f[DIR_PMP] + f[DIR_MPP] - f[DIR_PPM] - f[DIR_MMM] - f[DIR_PMM] - f[DIR_MPM];
 
-                    LBMReal cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
+                    real cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
 
                     feq[DIR_000] = c8o27 * (drho - cu_sq);
                     feq[DIR_P00]    = c2o27 * (drho + 3.0 * (vx1) + c9o2 * (vx1) * (vx1)-cu_sq);
@@ -244,10 +247,10 @@ void BGKLBMKernel::calculate(int step)
                     }
                     //////////////////////////////////////////////////////////////////////////
 #ifdef PROOF_CORRECTNESS
-                    LBMReal rho_post = f[DIR_000] + f[DIR_P00] + f[DIR_M00] + f[DIR_0P0] + f[DIR_0M0] + f[DIR_00P] + f[DIR_00M] + f[DIR_PP0] + f[DIR_MM0] + f[DIR_PM0] +
+                    real rho_post = f[DIR_000] + f[DIR_P00] + f[DIR_M00] + f[DIR_0P0] + f[DIR_0M0] + f[DIR_00P] + f[DIR_00M] + f[DIR_PP0] + f[DIR_MM0] + f[DIR_PM0] +
                                        f[DIR_MP0] + f[DIR_P0P] + f[DIR_M0M] + f[DIR_P0M] + f[DIR_M0P] + f[DIR_0PP] + f[DIR_0MM] + f[DIR_0PM] + f[DIR_0MP] + f[DIR_PPP] +
                                        f[DIR_MMP] + f[DIR_PMP] + f[DIR_MPP] + f[DIR_PPM] + f[DIR_MMM] + f[DIR_PMM] + f[DIR_MPM];
-                    LBMReal dif = drho - rho_post;
+                    real dif = drho - rho_post;
 #ifdef SINGLEPRECISION
                     if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
@@ -263,35 +266,35 @@ void BGKLBMKernel::calculate(int step)
                     //////////////////////////////////////////////////////////////////////////
                     // write distribution
                     //////////////////////////////////////////////////////////////////////////
-                    (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3)     = f[D3Q27System::INV_P00];
-                    (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3)     = f[D3Q27System::INV_0P0];
-                    (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3)     = f[D3Q27System::INV_00P];
-                    (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3)    = f[D3Q27System::INV_PP0];
-                    (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3)   = f[D3Q27System::INV_MP0];
-                    (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3)    = f[D3Q27System::INV_P0P];
-                    (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3)   = f[D3Q27System::INV_M0P];
-                    (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3)    = f[D3Q27System::INV_0PP];
-                    (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3)   = f[D3Q27System::INV_0MP];
-                    (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3)   = f[D3Q27System::INV_PPP];
-                    (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3)  = f[D3Q27System::INV_MPP];
-                    (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3)  = f[D3Q27System::INV_PMP];
-                    (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3) = f[D3Q27System::INV_MMP];
+                    (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3)     = f[INV_P00];
+                    (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3)     = f[INV_0P0];
+                    (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3)     = f[INV_00P];
+                    (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3)    = f[INV_PP0];
+                    (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3)   = f[INV_MP0];
+                    (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3)    = f[INV_P0P];
+                    (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3)   = f[INV_M0P];
+                    (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3)    = f[INV_0PP];
+                    (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3)   = f[INV_0MP];
+                    (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3)   = f[INV_PPP];
+                    (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3)  = f[INV_MPP];
+                    (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3)  = f[INV_PMP];
+                    (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3) = f[INV_MMP];
 
-                    (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3)     = f[D3Q27System::INV_M00];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3)     = f[D3Q27System::INV_0M0];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p)     = f[D3Q27System::INV_00M];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3)   = f[D3Q27System::INV_MM0];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3)    = f[D3Q27System::INV_PM0];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p)   = f[D3Q27System::INV_M0M];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p)    = f[D3Q27System::INV_P0M];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p)   = f[D3Q27System::INV_0MM];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p)    = f[D3Q27System::INV_0PM];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p) = f[D3Q27System::INV_MMM];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p)  = f[D3Q27System::INV_PMM];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p)  = f[D3Q27System::INV_MPM];
-                    (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p)   = f[D3Q27System::INV_PPM];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3)     = f[INV_M00];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3)     = f[INV_0M0];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p)     = f[INV_00M];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3)   = f[INV_MM0];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3)    = f[INV_PM0];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p)   = f[INV_M0M];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p)    = f[INV_P0M];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p)   = f[INV_0MM];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p)    = f[INV_0PM];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p) = f[INV_MMM];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p)  = f[INV_PMM];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p)  = f[INV_MPM];
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p)   = f[INV_PPM];
 
-                    (*this->zeroDistributions)(x1, x2, x3) = f[D3Q27System::DIR_000];
+                    (*this->zeroDistributions)(x1, x2, x3) = f[DIR_000];
                     //////////////////////////////////////////////////////////////////////////
                 }
             }
@@ -299,4 +302,4 @@ void BGKLBMKernel::calculate(int step)
     }
 }
 //////////////////////////////////////////////////////////////////////////
-double BGKLBMKernel::getCalculationTime() { return 0.0; }
+real BGKLBMKernel::getCalculationTime() { return 0.0; }
diff --git a/src/cpu/VirtualFluidsCore/LBM/BGKLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/BGKLBMKernel.h
index 9d17a8cc7677db7a142f4340dcdeaf38e268d214..099e9c093a6681c4c511a0fb02f9f023dafa3253 100644
--- a/src/cpu/VirtualFluidsCore/LBM/BGKLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/BGKLBMKernel.h
@@ -12,21 +12,21 @@ public:
     ~BGKLBMKernel() override;
     void calculate(int step) override;
     SPtr<LBMKernel> clone() override;
-    double getCalculationTime() override;
+    real getCalculationTime() override;
 
 private:
     void initDataSet();
     // void collideAllCompressible();
     // void collideAllIncompressible();
 
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributions;
 
     mu::value_type muX1, muX2, muX3;
-    LBMReal forcingX1;
-    LBMReal forcingX2;
-    LBMReal forcingX3;
+    real forcingX1;
+    real forcingX2;
+    real forcingX3;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulant4thOrderViscosityLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulant4thOrderViscosityLBMKernel.cpp
index b1e48abd9ed3c0a2b4bff26090c20512d94eff7f..4f5ce06af1a85555933b7b3cd9413e1daee727a0 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulant4thOrderViscosityLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulant4thOrderViscosityLBMKernel.cpp
@@ -9,7 +9,8 @@
 
 #define PROOF_CORRECTNESS
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::basics::constant;
 
 //////////////////////////////////////////////////////////////////////////
 CompressibleCumulant4thOrderViscosityLBMKernel::CompressibleCumulant4thOrderViscosityLBMKernel()
@@ -49,7 +50,7 @@ SPtr<LBMKernel> CompressibleCumulant4thOrderViscosityLBMKernel::clone()
    } 
    else
    {
-      OxxPyyPzz = one;
+      OxxPyyPzz = c1o1;
    }
 
    dynamicPointerCast<CompressibleCumulant4thOrderViscosityLBMKernel>(kernel)->OxxPyyPzz = this->OxxPyyPzz;
@@ -105,20 +106,20 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
    int maxX2 = bcArrayMaxX2-ghostLayerWidth;
    int maxX3 = bcArrayMaxX3-ghostLayerWidth;
 
-   LBMReal omega = collFactor;
+   real omega = collFactor;
    //LBMReal OxyyPxzz  = eight*(-two+omega)*(one+two*omega)/(-eight-fourteen*omega+seven*omega*omega);//one;
    //LBMReal OxyyMxzz  = eight*(-two+omega)*(-seven+four*omega)/(fiftysix-fifty*omega+nine*omega*omega);//one;
    //LBMReal Oxyz      = twentyfour*(-two+omega)*(-two-seven*omega+three*omega*omega)/(fourtyeight+c152*omega-c130*omega*omega+twentynine*omega*omega*omega);
-   LBMReal OxyyPxzz  = 8.0*(omega-2.0)*(OxxPyyPzz*(3.0*omega-1.0)-5.0*omega)/(8.0*(5.0-2.0*omega)*omega+OxxPyyPzz*(8.0+omega*(9.0*omega-26.0)));
-   LBMReal OxyyMxzz  = 8.0*(omega-2.0)*(omega+OxxPyyPzz*(3.0*omega-7.0))/(OxxPyyPzz*(56.0-42.0*omega+9.0*omega*omega)-8.0*omega);
-   LBMReal Oxyz      = 24.0*(omega-2.0)*(4.0*omega*omega+omega*OxxPyyPzz*(18.0-13.0*omega)+OxxPyyPzz*OxxPyyPzz*(2.0+omega*(6.0*omega-11.0)))/(16.0*omega*omega*(omega-6.0)-2.0*omega*OxxPyyPzz*(216.0+5.0*omega*(9.0*omega-46.0))+OxxPyyPzz*OxxPyyPzz*(omega*(3.0*omega-10.0)*(15.0*omega-28.0)-48.0));
+   real OxyyPxzz  = 8.0*(omega-2.0)*(OxxPyyPzz*(3.0*omega-1.0)-5.0*omega)/(8.0*(5.0-2.0*omega)*omega+OxxPyyPzz*(8.0+omega*(9.0*omega-26.0)));
+   real OxyyMxzz  = 8.0*(omega-2.0)*(omega+OxxPyyPzz*(3.0*omega-7.0))/(OxxPyyPzz*(56.0-42.0*omega+9.0*omega*omega)-8.0*omega);
+   real Oxyz      = 24.0*(omega-2.0)*(4.0*omega*omega+omega*OxxPyyPzz*(18.0-13.0*omega)+OxxPyyPzz*OxxPyyPzz*(2.0+omega*(6.0*omega-11.0)))/(16.0*omega*omega*(omega-6.0)-2.0*omega*OxxPyyPzz*(216.0+5.0*omega*(9.0*omega-46.0))+OxxPyyPzz*OxxPyyPzz*(omega*(3.0*omega-10.0)*(15.0*omega-28.0)-48.0));
 
    //LBMReal A = (four + two*omega - three*omega*omega) / (two - seven*omega + five*omega*omega);
    //LBMReal B = (four + twentyeight*omega - fourteen*omega*omega) / (six - twentyone*omega + fiveteen*omega*omega);
 
-   LBMReal A = (4.0*omega*omega+2.0*omega*OxxPyyPzz*(omega-6.0)+OxxPyyPzz*OxxPyyPzz*(omega*(10.0-3.0*omega)-4.0))/((omega-OxxPyyPzz)*(OxxPyyPzz*(2.0+3.0*omega)-8.0*omega));
+   real A = (4.0*omega*omega+2.0*omega*OxxPyyPzz*(omega-6.0)+OxxPyyPzz*OxxPyyPzz*(omega*(10.0-3.0*omega)-4.0))/((omega-OxxPyyPzz)*(OxxPyyPzz*(2.0+3.0*omega)-8.0*omega));
    //FIXME:  warning C4459: declaration of 'B' hides global declaration (message : see declaration of 'D3Q27System::DIR_00M' )
-   LBMReal B = (4.0*omega*OxxPyyPzz*(9.0*omega-16.0)-4.0*omega*omega-2.0*OxxPyyPzz*OxxPyyPzz*(2.0+9.0*omega*(omega-2.0)))/(3.0*(omega-OxxPyyPzz)*(OxxPyyPzz*(2.0+3.0*omega)-8.0*omega));
+   real B = (4.0*omega*OxxPyyPzz*(9.0*omega-16.0)-4.0*omega*omega-2.0*OxxPyyPzz*OxxPyyPzz*(2.0+9.0*omega*(omega-2.0)))/(3.0*(omega-OxxPyyPzz)*(OxxPyyPzz*(2.0+3.0*omega)-8.0*omega));
 
    for (int x3 = minX3; x3 < maxX3; x3++)
    {
@@ -152,50 +153,50 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                // a b c
                //-1 0 1
 
-               LBMReal mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
-               LBMReal mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
-               LBMReal mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
-               LBMReal mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
-               LBMReal mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
-               LBMReal mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
-               LBMReal mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
-               LBMReal mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
-               LBMReal mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
-               LBMReal mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
-               LBMReal mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
-               LBMReal mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
-               LBMReal mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
-
-               LBMReal mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
-               LBMReal mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
-               LBMReal mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
-               LBMReal mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
-               LBMReal mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
-               LBMReal mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
-               LBMReal mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
-               LBMReal mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
-               LBMReal mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
-               LBMReal mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-               LBMReal mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
-               LBMReal mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
-               LBMReal mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-               LBMReal mfbbb = (*this->zeroDistributions)(x1, x2, x3);
-
-               ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal drho = ((((mfccc+mfaaa)+(mfaca+mfcac))+((mfacc+mfcaa)+(mfaac+mfcca)))+
+               real mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
+               real mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
+               real mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
+               real mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
+               real mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
+               real mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
+               real mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
+               real mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
+               real mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
+               real mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
+               real mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
+               real mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
+               real mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
+
+               real mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
+               real mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
+               real mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
+               real mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
+               real mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
+               real mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
+               real mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
+               real mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
+               real mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
+               real mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+               real mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
+               real mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
+               real mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+               real mfbbb = (*this->zeroDistributions)(x1, x2, x3);
+
+               ////////////////////////////////////////////////////////////////////////////////////
+               real drho = ((((mfccc+mfaaa)+(mfaca+mfcac))+((mfacc+mfcaa)+(mfaac+mfcca)))+
                   (((mfbac+mfbca)+(mfbaa+mfbcc))+((mfabc+mfcba)+(mfaba+mfcbc))+((mfacb+mfcab)+(mfaab+mfccb)))+
                   ((mfabb+mfcbb)+(mfbab+mfbcb))+(mfbba+mfbbc))+mfbbb;
 
-               LBMReal rho = one+drho;
+               real rho = c1o1 +drho;
                ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal vvx = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfcaa-mfacc)+(mfcca-mfaac)))+
+               real vvx = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfcaa-mfacc)+(mfcca-mfaac)))+
                   (((mfcba-mfabc)+(mfcbc-mfaba))+((mfcab-mfacb)+(mfccb-mfaab)))+
                   (mfcbb-mfabb))/rho;
-               LBMReal vvy = ((((mfccc-mfaaa)+(mfaca-mfcac))+((mfacc-mfcaa)+(mfcca-mfaac)))+
+               real vvy = ((((mfccc-mfaaa)+(mfaca-mfcac))+((mfacc-mfcaa)+(mfcca-mfaac)))+
                   (((mfbca-mfbac)+(mfbcc-mfbaa))+((mfacb-mfcab)+(mfccb-mfaab)))+
                   (mfbcb-mfbab))/rho;
-               LBMReal vvz = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfacc-mfcaa)+(mfaac-mfcca)))+
+               real vvz = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfacc-mfcaa)+(mfaac-mfcca)))+
                   (((mfbac-mfbca)+(mfbcc-mfbaa))+((mfabc-mfcba)+(mfcbc-mfaba)))+
                   (mfbbc-mfbba))/rho;
                ////////////////////////////////////////////////////////////////////////////////////
@@ -204,9 +205,9 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                ///////////////////////////////////////////////////////////////////////////////////////////
                if (withForcing)
                {
-                  muX1 = static_cast<double>(x1-1+ix1*maxX1);
-                  muX2 = static_cast<double>(x2-1+ix2*maxX2);
-                  muX3 = static_cast<double>(x3-1+ix3*maxX3);
+                  muX1 = static_cast<real>(x1-1+ix1*maxX1);
+                  muX2 = static_cast<real>(x2-1+ix2*maxX2);
+                  muX3 = static_cast<real>(x3-1+ix3*maxX3);
 
                   forcingX1 = muForcingX1.Eval();
                   forcingX2 = muForcingX2.Eval();
@@ -218,20 +219,20 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                }
                ///////////////////////////////////////////////////////////////////////////////////////////               
          ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal oMdrho = one; // comp special
+               real oMdrho = c1o1; // comp special
                ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal m0, m1, m2;
-               LBMReal vx2;
-               LBMReal vy2;
-               LBMReal vz2;
+               real m0, m1, m2;
+               real vx2;
+               real vy2;
+               real vz2;
                vx2 = vvx*vvx;
                vy2 = vvy*vvy;
                vz2 = vvz*vvz;
                ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal wadjust;
-               LBMReal qudricLimitP = 0.01;// * 0.0001f;
-               LBMReal qudricLimitM = 0.01;// * 0.0001f;
-               LBMReal qudricLimitD = 0.01;// * 0.001f;
+               real wadjust;
+               real qudricLimitP = 0.01;// * 0.0001f;
+               real qudricLimitM = 0.01;// * 0.0001f;
+               real qudricLimitD = 0.01;// * 0.001f;
                //LBMReal s9 = minusomega;
                //test
                //s9 = 0.;
@@ -247,7 +248,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfaaa = m0;
                m0 += c1o36 * oMdrho;
                mfaab = m1-m0 * vvz;
-               mfaac = m2-two*	m1 * vvz+vz2 * m0;
+               mfaac = m2-c2o1*	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaba+mfabc;
                m1 = mfabc-mfaba;
@@ -255,7 +256,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfaba = m0;
                m0 += c1o9 * oMdrho;
                mfabb = m1-m0 * vvz;
-               mfabc = m2-two*	m1 * vvz+vz2 * m0;
+               mfabc = m2-c2o1*	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaca+mfacc;
                m1 = mfacc-mfaca;
@@ -263,7 +264,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfaca = m0;
                m0 += c1o36 * oMdrho;
                mfacb = m1-m0 * vvz;
-               mfacc = m2-two*	m1 * vvz+vz2 * m0;
+               mfacc = m2-c2o1*	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfbaa+mfbac;
@@ -272,7 +273,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfbaa = m0;
                m0 += c1o9 * oMdrho;
                mfbab = m1-m0 * vvz;
-               mfbac = m2-two*	m1 * vvz+vz2 * m0;
+               mfbac = m2-c2o1*	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfbba+mfbbc;
                m1 = mfbbc-mfbba;
@@ -280,7 +281,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfbba = m0;
                m0 += c4o9 * oMdrho;
                mfbbb = m1-m0 * vvz;
-               mfbbc = m2-two*	m1 * vvz+vz2 * m0;
+               mfbbc = m2-c2o1*	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfbca+mfbcc;
                m1 = mfbcc-mfbca;
@@ -288,7 +289,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfbca = m0;
                m0 += c1o9 * oMdrho;
                mfbcb = m1-m0 * vvz;
-               mfbcc = m2-two*	m1 * vvz+vz2 * m0;
+               mfbcc = m2-c2o1*	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfcaa+mfcac;
@@ -297,7 +298,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfcaa = m0;
                m0 += c1o36 * oMdrho;
                mfcab = m1-m0 * vvz;
-               mfcac = m2-two*	m1 * vvz+vz2 * m0;
+               mfcac = m2-c2o1*	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfcba+mfcbc;
                m1 = mfcbc-mfcba;
@@ -305,7 +306,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfcba = m0;
                m0 += c1o9 * oMdrho;
                mfcbb = m1-m0 * vvz;
-               mfcbc = m2-two*	m1 * vvz+vz2 * m0;
+               mfcbc = m2-c2o1*	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfcca+mfccc;
                m1 = mfccc-mfcca;
@@ -313,7 +314,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfcca = m0;
                m0 += c1o36 * oMdrho;
                mfccb = m1-m0 * vvz;
-               mfccc = m2-two*	m1 * vvz+vz2 * m0;
+               mfccc = m2-c2o1*	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                // mit  1/6, 0, 1/18, 2/3, 0, 2/9, 1/6, 0, 1/18 Konditionieren
@@ -325,14 +326,14 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfaaa = m0;
                m0 += c1o6 * oMdrho;
                mfaba = m1-m0 * vvy;
-               mfaca = m2-two*	m1 * vvy+vy2 * m0;
+               mfaca = m2-c2o1*	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaab+mfacb;
                m1 = mfacb-mfaab;
                m0 = m2+mfabb;
                mfaab = m0;
                mfabb = m1-m0 * vvy;
-               mfacb = m2-two*	m1 * vvy+vy2 * m0;
+               mfacb = m2-c2o1*	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaac+mfacc;
                m1 = mfacc-mfaac;
@@ -340,7 +341,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfaac = m0;
                m0 += c1o18 * oMdrho;
                mfabc = m1-m0 * vvy;
-               mfacc = m2-two*	m1 * vvy+vy2 * m0;
+               mfacc = m2-c2o1*	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfbaa+mfbca;
@@ -349,14 +350,14 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfbaa = m0;
                m0 += c2o3 * oMdrho;
                mfbba = m1-m0 * vvy;
-               mfbca = m2-two*	m1 * vvy+vy2 * m0;
+               mfbca = m2-c2o1*	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfbab+mfbcb;
                m1 = mfbcb-mfbab;
                m0 = m2+mfbbb;
                mfbab = m0;
                mfbbb = m1-m0 * vvy;
-               mfbcb = m2-two*	m1 * vvy+vy2 * m0;
+               mfbcb = m2-c2o1*	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfbac+mfbcc;
                m1 = mfbcc-mfbac;
@@ -364,7 +365,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfbac = m0;
                m0 += c2o9 * oMdrho;
                mfbbc = m1-m0 * vvy;
-               mfbcc = m2-two*	m1 * vvy+vy2 * m0;
+               mfbcc = m2-c2o1*	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfcaa+mfcca;
@@ -373,14 +374,14 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfcaa = m0;
                m0 += c1o6 * oMdrho;
                mfcba = m1-m0 * vvy;
-               mfcca = m2-two*	m1 * vvy+vy2 * m0;
+               mfcca = m2-c2o1*	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfcab+mfccb;
                m1 = mfccb-mfcab;
                m0 = m2+mfcbb;
                mfcab = m0;
                mfcbb = m1-m0 * vvy;
-               mfccb = m2-two*	m1 * vvy+vy2 * m0;
+               mfccb = m2-c2o1*	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfcac+mfccc;
                m1 = mfccc-mfcac;
@@ -388,7 +389,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfcac = m0;
                m0 += c1o18 * oMdrho;
                mfcbc = m1-m0 * vvy;
-               mfccc = m2-two*	m1 * vvy+vy2 * m0;
+               mfccc = m2-c2o1*	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                // mit     1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9		Konditionieren
@@ -398,16 +399,16 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                m1 = mfcaa-mfaaa;
                m0 = m2+mfbaa;
                mfaaa = m0;
-               m0 += one* oMdrho;
+               m0 += c1o1* oMdrho;
                mfbaa = m1-m0 * vvx;
-               mfcaa = m2-two*	m1 * vvx+vx2 * m0;
+               mfcaa = m2-c2o1*	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaba+mfcba;
                m1 = mfcba-mfaba;
                m0 = m2+mfbba;
                mfaba = m0;
                mfbba = m1-m0 * vvx;
-               mfcba = m2-two*	m1 * vvx+vx2 * m0;
+               mfcba = m2-c2o1*	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaca+mfcca;
                m1 = mfcca-mfaca;
@@ -415,7 +416,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfaca = m0;
                m0 += c1o3 * oMdrho;
                mfbca = m1-m0 * vvx;
-               mfcca = m2-two*	m1 * vvx+vx2 * m0;
+               mfcca = m2-c2o1*	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaab+mfcab;
@@ -423,21 +424,21 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                m0 = m2+mfbab;
                mfaab = m0;
                mfbab = m1-m0 * vvx;
-               mfcab = m2-two*	m1 * vvx+vx2 * m0;
+               mfcab = m2-c2o1*	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfabb+mfcbb;
                m1 = mfcbb-mfabb;
                m0 = m2+mfbbb;
                mfabb = m0;
                mfbbb = m1-m0 * vvx;
-               mfcbb = m2-two*	m1 * vvx+vx2 * m0;
+               mfcbb = m2-c2o1*	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfacb+mfccb;
                m1 = mfccb-mfacb;
                m0 = m2+mfbcb;
                mfacb = m0;
                mfbcb = m1-m0 * vvx;
-               mfccb = m2-two*	m1 * vvx+vx2 * m0;
+               mfccb = m2-c2o1*	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaac+mfcac;
@@ -446,14 +447,14 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfaac = m0;
                m0 += c1o3 * oMdrho;
                mfbac = m1-m0 * vvx;
-               mfcac = m2-two*	m1 * vvx+vx2 * m0;
+               mfcac = m2-c2o1*	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfabc+mfcbc;
                m1 = mfcbc-mfabc;
                m0 = m2+mfbbc;
                mfabc = m0;
                mfbbc = m1-m0 * vvx;
-               mfcbc = m2-two*	m1 * vvx+vx2 * m0;
+               mfcbc = m2-c2o1*	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfacc+mfccc;
                m1 = mfccc-mfacc;
@@ -461,7 +462,7 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                mfacc = m0;
                m0 += c1o9 * oMdrho;
                mfbcc = m1-m0 * vvx;
-               mfccc = m2-two*	m1 * vvx+vx2 * m0;
+               mfccc = m2-c2o1*	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
 
@@ -505,47 +506,47 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////
                //4.
                //////////////////////////////
-               LBMReal O4 = one;
+               real O4 = c1o1;
                //////////////////////////////
-               //LBMReal O4        = omega;//TRT
+               //real O4        = omega;//TRT
                ////////////////////////////////////////////////////////////
                //5.
                //////////////////////////////
-               LBMReal O5 = one;
+               real O5 = c1o1;
                ////////////////////////////////////////////////////////////
                //6.
                //////////////////////////////
-               LBMReal O6 = one;
+               real O6 = c1o1;
                ////////////////////////////////////////////////////////////
 
 
                //central moments to cumulants
                //4.
-               LBMReal CUMcbb = mfcbb-((mfcaa+c1o3) * mfabb+two * mfbba * mfbab)/rho;	//ab 15.05.2015 verwendet
-               LBMReal CUMbcb = mfbcb-((mfaca+c1o3) * mfbab+two * mfbba * mfabb)/rho; //ab 15.05.2015 verwendet
-               LBMReal CUMbbc = mfbbc-((mfaac+c1o3) * mfbba+two * mfbab * mfabb)/rho; //ab 15.05.2015 verwendet
+               real CUMcbb = mfcbb-((mfcaa+c1o3) * mfabb+c2o1 * mfbba * mfbab)/rho;	//ab 15.05.2015 verwendet
+               real CUMbcb = mfbcb-((mfaca+c1o3) * mfbab+c2o1 * mfbba * mfabb)/rho; //ab 15.05.2015 verwendet
+               real CUMbbc = mfbbc-((mfaac+c1o3) * mfbba+c2o1 * mfbab * mfabb)/rho; //ab 15.05.2015 verwendet
 
-               LBMReal CUMcca = mfcca-(((mfcaa * mfaca+two * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));
-               LBMReal CUMcac = mfcac-(((mfcaa * mfaac+two * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));
-               LBMReal CUMacc = mfacc-(((mfaac * mfaca+two * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));
+               real CUMcca = mfcca-(((mfcaa * mfaca+c2o1 * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));
+               real CUMcac = mfcac-(((mfcaa * mfaac+c2o1 * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));
+               real CUMacc = mfacc-(((mfaac * mfaca+c2o1 * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));
 
                //5.
-               LBMReal CUMbcc = mfbcc-((mfaac * mfbca+mfaca * mfbac+four * mfabb * mfbbb+two * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
-               LBMReal CUMcbc = mfcbc-((mfaac * mfcba+mfcaa * mfabc+four * mfbab * mfbbb+two * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
-               LBMReal CUMccb = mfccb-((mfcaa * mfacb+mfaca * mfcab+four * mfbba * mfbbb+two * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
+               real CUMbcc = mfbcc-((mfaac * mfbca+mfaca * mfbac+c4o1 * mfabb * mfbbb+c2o1 * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
+               real CUMcbc = mfcbc-((mfaac * mfcba+mfcaa * mfabc+c4o1 * mfbab * mfbbb+c2o1 * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
+               real CUMccb = mfccb-((mfcaa * mfacb+mfaca * mfcab+c4o1 * mfbba * mfbbb+c2o1 * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
 
                //6.
 
-               LBMReal CUMccc = mfccc+((-four *  mfbbb * mfbbb
+               real CUMccc = mfccc+((-c4o1 *  mfbbb * mfbbb
                   -(mfcaa * mfacc+mfaca * mfcac+mfaac * mfcca)
-                  -four * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
-                  -two * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
-                  +(four * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
-                     +two * (mfcaa * mfaca * mfaac)
-                     +sixteen *  mfbba * mfbab * mfabb)/(rho * rho)
+                  - c4o1 * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
+                  - c2o1 * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
+                  +(c4o1 * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
+                     + c2o1 * (mfcaa * mfaca * mfaac)
+                     + c16o1 *  mfbba * mfbab * mfabb)/(rho * rho)
                   -c1o3 * (mfacc+mfcac+mfcca)/rho
                   -c1o9 * (mfcaa+mfaca+mfaac)/rho
-                  +(two * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
+                  +(c2o1 * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
                      +(mfaac * mfaca+mfaac * mfcaa+mfaca * mfcaa)+c1o3 *(mfaac+mfaca+mfcaa))/(rho * rho) * c2o3
                   +c1o27*((drho * drho-drho)/(rho*rho)));
                //+ c1o27*(one -three/rho +two/(rho*rho)));
@@ -555,9 +556,9 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
 
    //2.
    // linear combinations
-               LBMReal mxxPyyPzz = mfcaa+mfaca+mfaac;
-               LBMReal mxxMyy = mfcaa-mfaca;
-               LBMReal mxxMzz = mfcaa-mfaac;
+               real mxxPyyPzz = mfcaa+mfaca+mfaac;
+               real mxxMyy = mfcaa-mfaca;
+               real mxxMzz = mfcaa-mfaac;
 
                //////////////////////////////////////////////////////////////////////////
       // 			LBMReal magicBulk=(CUMacc+CUMcac+CUMcca)*(one/OxxPyyPzz-c1o2)*c3o2*8.;
@@ -597,24 +598,24 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
                //incl. correction		(hat noch nicht so gut funktioniert...Optimierungsbedarf??)
 
-               LBMReal dxux = c1o2 * (-omega) *(mxxMyy+mxxMzz)+c1o2 *  OxxPyyPzz * (mfaaa-mxxPyyPzz);
-               LBMReal dyuy = dxux+omega * c3o2 * mxxMyy;
-               LBMReal dzuz = dxux+omega * c3o2 * mxxMzz;
+               real dxux = c1o2 * (-omega) *(mxxMyy+mxxMzz)+c1o2 *  OxxPyyPzz * (mfaaa-mxxPyyPzz);
+               real dyuy = dxux+omega * c3o2 * mxxMyy;
+               real dzuz = dxux+omega * c3o2 * mxxMzz;
 
-               LBMReal Dxy =-three*omega*mfbba;
-               LBMReal Dxz =-three*omega*mfbab;
-               LBMReal Dyz =-three*omega*mfabb;
+               real Dxy =-c3o1 *omega*mfbba;
+               real Dxz =-c3o1 *omega*mfbab;
+               real Dyz =-c3o1 *omega*mfabb;
 
 
 
                //relax
 
-               wadjust = OxxPyyPzz+(one-OxxPyyPzz)*fabs((mfaaa-mxxPyyPzz))/(fabs((mfaaa-mxxPyyPzz))+qudricLimitD);
-               mxxPyyPzz += wadjust*(mfaaa-mxxPyyPzz)-three * (one-c1o2 * OxxPyyPzz) * (vx2 * dxux+vy2 * dyuy+vz2 * dzuz);
+               wadjust = OxxPyyPzz+(c1o1 -OxxPyyPzz)*fabs((mfaaa-mxxPyyPzz))/(fabs((mfaaa-mxxPyyPzz))+qudricLimitD);
+               mxxPyyPzz += wadjust*(mfaaa-mxxPyyPzz)- c3o1 * (c1o1 -c1o2 * OxxPyyPzz) * (vx2 * dxux+vy2 * dyuy+vz2 * dzuz);
 
               // mxxPyyPzz += OxxPyyPzz*(mfaaa-mxxPyyPzz)-three * (one-c1o2 * OxxPyyPzz) * (vx2 * dxux+vy2 * dyuy+vz2 * dzuz);//-magicBulk*OxxPyyPzz;
-               mxxMyy += omega * (-mxxMyy)-three * (one+c1o2 * (-omega)) * (vx2 * dxux-vy2 * dyuy);
-               mxxMzz += omega * (-mxxMzz)-three * (one+c1o2 * (-omega)) * (vx2 * dxux-vz2 * dzuz);
+               mxxMyy += omega * (-mxxMyy)-c3o1 * (c1o1 +c1o2 * (-omega)) * (vx2 * dxux-vy2 * dyuy);
+               mxxMzz += omega * (-mxxMzz)-c3o1 * (c1o1 +c1o2 * (-omega)) * (vx2 * dxux-vz2 * dzuz);
 
                //////////////////////////////////////////////////////////////////////////
                //limiter-Scheise Teil 2
@@ -644,37 +645,37 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
 
                // linear combinations back
                mfcaa = c1o3 * (mxxMyy+mxxMzz+mxxPyyPzz);
-               mfaca = c1o3 * (-two*  mxxMyy+mxxMzz+mxxPyyPzz);
-               mfaac = c1o3 * (mxxMyy-two* mxxMzz+mxxPyyPzz);
+               mfaca = c1o3 * (-c2o1 *  mxxMyy+mxxMzz+mxxPyyPzz);
+               mfaac = c1o3 * (mxxMyy-c2o1 * mxxMzz+mxxPyyPzz);
 
                //3.
                // linear combinations
 
-               LBMReal mxxyPyzz = mfcba+mfabc;
-               LBMReal mxxyMyzz = mfcba-mfabc;
+               real mxxyPyzz = mfcba+mfabc;
+               real mxxyMyzz = mfcba-mfabc;
 
-               LBMReal mxxzPyyz = mfcab+mfacb;
-               LBMReal mxxzMyyz = mfcab-mfacb;
+               real mxxzPyyz = mfcab+mfacb;
+               real mxxzMyyz = mfcab-mfacb;
 
-               LBMReal mxyyPxzz = mfbca+mfbac;
-               LBMReal mxyyMxzz = mfbca-mfbac;
+               real mxyyPxzz = mfbca+mfbac;
+               real mxyyMxzz = mfbca-mfbac;
 
                //relax
                //////////////////////////////////////////////////////////////////////////
                //das ist der limiter
-               wadjust = Oxyz+(one-Oxyz)*fabs(mfbbb)/(fabs(mfbbb)+qudricLimitD);
+               wadjust = Oxyz+(c1o1-Oxyz)*fabs(mfbbb)/(fabs(mfbbb)+qudricLimitD);
                mfbbb += wadjust * (-mfbbb);
-               wadjust = OxyyPxzz+(one-OxyyPxzz)*fabs(mxxyPyzz)/(fabs(mxxyPyzz)+qudricLimitP);
+               wadjust = OxyyPxzz+(c1o1-OxyyPxzz)*fabs(mxxyPyzz)/(fabs(mxxyPyzz)+qudricLimitP);
                mxxyPyzz += wadjust * (-mxxyPyzz);
-               wadjust = OxyyMxzz+(one-OxyyMxzz)*fabs(mxxyMyzz)/(fabs(mxxyMyzz)+qudricLimitM);
+               wadjust = OxyyMxzz+(c1o1-OxyyMxzz)*fabs(mxxyMyzz)/(fabs(mxxyMyzz)+qudricLimitM);
                mxxyMyzz += wadjust * (-mxxyMyzz);
-               wadjust = OxyyPxzz+(one-OxyyPxzz)*fabs(mxxzPyyz)/(fabs(mxxzPyyz)+qudricLimitP);
+               wadjust = OxyyPxzz+(c1o1-OxyyPxzz)*fabs(mxxzPyyz)/(fabs(mxxzPyyz)+qudricLimitP);
                mxxzPyyz += wadjust * (-mxxzPyyz);
-               wadjust = OxyyMxzz+(one-OxyyMxzz)*fabs(mxxzMyyz)/(fabs(mxxzMyyz)+qudricLimitM);
+               wadjust = OxyyMxzz+(c1o1-OxyyMxzz)*fabs(mxxzMyyz)/(fabs(mxxzMyyz)+qudricLimitM);
                mxxzMyyz += wadjust * (-mxxzMyyz);
-               wadjust = OxyyPxzz+(one-OxyyPxzz)*fabs(mxyyPxzz)/(fabs(mxyyPxzz)+qudricLimitP);
+               wadjust = OxyyPxzz+(c1o1-OxyyPxzz)*fabs(mxyyPxzz)/(fabs(mxyyPxzz)+qudricLimitP);
                mxyyPxzz += wadjust * (-mxyyPxzz);
-               wadjust = OxyyMxzz+(one-OxyyMxzz)*fabs(mxyyMxzz)/(fabs(mxyyMxzz)+qudricLimitM);
+               wadjust = OxyyMxzz+(c1o1-OxyyMxzz)*fabs(mxyyMxzz)/(fabs(mxyyMxzz)+qudricLimitM);
                mxyyMxzz += wadjust * (-mxyyMxzz);
                //////////////////////////////////////////////////////////////////////////
                //ohne limiter
@@ -725,12 +726,12 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                //CUMbbc += O4 * (-CUMbbc);
                //CUMbcb += O4 * (-CUMbcb);
                //CUMcbb += O4 * (-CUMcbb);
-               CUMacc = -O4*(one / omega - c1o2) * (dyuy + dzuz) * c2o3 * A + (one - O4) * (CUMacc);
-               CUMcac = -O4*(one / omega - c1o2) * (dxux + dzuz) * c2o3 * A + (one - O4) * (CUMcac);
-               CUMcca = -O4*(one / omega - c1o2) * (dyuy + dxux) * c2o3 * A + (one - O4) * (CUMcca);
-               CUMbbc = -O4*(one / omega - c1o2) * Dxy           * c1o3 * B + (one - O4) * (CUMbbc);
-               CUMbcb = -O4*(one / omega - c1o2) * Dxz           * c1o3 * B + (one - O4) * (CUMbcb);
-               CUMcbb = -O4*(one / omega - c1o2) * Dyz           * c1o3 * B + (one - O4) * (CUMcbb);
+               CUMacc = -O4*(c1o1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc);
+               CUMcac = -O4*(c1o1 / omega - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac);
+               CUMcca = -O4*(c1o1 / omega - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca);
+               CUMbbc = -O4*(c1o1 / omega - c1o2) * Dxy           * c1o3 * B + (c1o1 - O4) * (CUMbbc);
+               CUMbcb = -O4*(c1o1 / omega - c1o2) * Dxz           * c1o3 * B + (c1o1 - O4) * (CUMbcb);
+               CUMcbb = -O4*(c1o1 / omega - c1o2) * Dyz           * c1o3 * B + (c1o1 - O4) * (CUMcbb);
                //////////////////////////////////////////////////////////////////////////
 
 
@@ -746,31 +747,31 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
 
                //back cumulants to central moments
                //4.
-               mfcbb = CUMcbb+((mfcaa+c1o3) * mfabb+two * mfbba * mfbab)/rho;
-               mfbcb = CUMbcb+((mfaca+c1o3) * mfbab+two * mfbba * mfabb)/rho;
-               mfbbc = CUMbbc+((mfaac+c1o3) * mfbba+two * mfbab * mfabb)/rho;
+               mfcbb = CUMcbb+((mfcaa+c1o3) * mfabb+c2o1 * mfbba * mfbab)/rho;
+               mfbcb = CUMbcb+((mfaca+c1o3) * mfbab+c2o1 * mfbba * mfabb)/rho;
+               mfbbc = CUMbbc+((mfaac+c1o3) * mfbba+c2o1 * mfbab * mfabb)/rho;
 
-               mfcca = CUMcca+(((mfcaa * mfaca+two * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
-               mfcac = CUMcac+(((mfcaa * mfaac+two * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));//(one/rho-one));
-               mfacc = CUMacc+(((mfaac * mfaca+two * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
+               mfcca = CUMcca+(((mfcaa * mfaca+c2o1 * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
+               mfcac = CUMcac+(((mfcaa * mfaac+c2o1 * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));//(one/rho-one));
+               mfacc = CUMacc+(((mfaac * mfaca+c2o1 * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
 
                //5.
-               mfbcc = CUMbcc+((mfaac * mfbca+mfaca * mfbac+four * mfabb * mfbbb+two * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
-               mfcbc = CUMcbc+((mfaac * mfcba+mfcaa * mfabc+four * mfbab * mfbbb+two * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
-               mfccb = CUMccb+((mfcaa * mfacb+mfaca * mfcab+four * mfbba * mfbbb+two * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
+               mfbcc = CUMbcc+((mfaac * mfbca+mfaca * mfbac+c4o1 * mfabb * mfbbb+c2o1 * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
+               mfcbc = CUMcbc+((mfaac * mfcba+mfcaa * mfabc+c4o1 * mfbab * mfbbb+c2o1 * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
+               mfccb = CUMccb+((mfcaa * mfacb+mfaca * mfcab+c4o1 * mfbba * mfbbb+c2o1 * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
 
                //6.
 
-               mfccc = CUMccc-((-four *  mfbbb * mfbbb
+               mfccc = CUMccc-((-c4o1 *  mfbbb * mfbbb
                   -(mfcaa * mfacc+mfaca * mfcac+mfaac * mfcca)
-                  -four * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
-                  -two * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
-                  +(four * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
-                     +two * (mfcaa * mfaca * mfaac)
-                     +sixteen *  mfbba * mfbab * mfabb)/(rho * rho)
+                  - c4o1 * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
+                  - c2o1 * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
+                  +(c4o1 * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
+                     + c2o1 * (mfcaa * mfaca * mfaac)
+                     + c16o1 *  mfbba * mfbab * mfabb)/(rho * rho)
                   -c1o3 * (mfacc+mfcac+mfcca)/rho
                   -c1o9 * (mfcaa+mfaca+mfaac)/rho
-                  +(two * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
+                  +(c2o1 * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
                      +(mfaac * mfaca+mfaac * mfcaa+mfaca * mfcaa)+c1o3 *(mfaac+mfaca+mfcaa))/(rho * rho) * c2o3
                   +c1o27*((drho * drho-drho)/(rho*rho)));
                ////////////////////////////////////////////////////////////////////////////////////
@@ -786,22 +787,22 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
          //mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
          ////////////////////////////////////////////////////////////////////////////////////
          // Z - Dir
-               m0 = mfaac * c1o2+mfaab * (vvz-c1o2)+(mfaaa+one* oMdrho) * (vz2-vvz) * c1o2;
-               m1 = -mfaac-two* mfaab *  vvz+mfaaa                * (one-vz2)-one* oMdrho * vz2;
-               m2 = mfaac * c1o2+mfaab * (vvz+c1o2)+(mfaaa+one* oMdrho) * (vz2+vvz) * c1o2;
+               m0 = mfaac * c1o2+mfaab * (vvz-c1o2)+(mfaaa+ c1o1 * oMdrho) * (vz2-vvz) * c1o2;
+               m1 = -mfaac-c2o1* mfaab *  vvz+mfaaa                * (c1o1-vz2)- c1o1 * oMdrho * vz2;
+               m2 = mfaac * c1o2+mfaab * (vvz+c1o2)+(mfaaa+ c1o1 * oMdrho) * (vz2+vvz) * c1o2;
                mfaaa = m0;
                mfaab = m1;
                mfaac = m2;
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfabc * c1o2+mfabb * (vvz-c1o2)+mfaba * (vz2-vvz) * c1o2;
-               m1 = -mfabc-two* mfabb *  vvz+mfaba * (one-vz2);
+               m1 = -mfabc-c2o1* mfabb *  vvz+mfaba * (c1o1-vz2);
                m2 = mfabc * c1o2+mfabb * (vvz+c1o2)+mfaba * (vz2+vvz) * c1o2;
                mfaba = m0;
                mfabb = m1;
                mfabc = m2;
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfacc * c1o2+mfacb * (vvz-c1o2)+(mfaca+c1o3 * oMdrho) * (vz2-vvz) * c1o2;
-               m1 = -mfacc-two* mfacb *  vvz+mfaca                  * (one-vz2)-c1o3 * oMdrho * vz2;
+               m1 = -mfacc-c2o1* mfacb *  vvz+mfaca                  * (c1o1-vz2)-c1o3 * oMdrho * vz2;
                m2 = mfacc * c1o2+mfacb * (vvz+c1o2)+(mfaca+c1o3 * oMdrho) * (vz2+vvz) * c1o2;
                mfaca = m0;
                mfacb = m1;
@@ -809,21 +810,21 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfbac * c1o2+mfbab * (vvz-c1o2)+mfbaa * (vz2-vvz) * c1o2;
-               m1 = -mfbac-two* mfbab *  vvz+mfbaa * (one-vz2);
+               m1 = -mfbac-c2o1* mfbab *  vvz+mfbaa * (c1o1-vz2);
                m2 = mfbac * c1o2+mfbab * (vvz+c1o2)+mfbaa * (vz2+vvz) * c1o2;
                mfbaa = m0;
                mfbab = m1;
                mfbac = m2;
                /////////b//////////////////////////////////////////////////////////////////////////
                m0 = mfbbc * c1o2+mfbbb * (vvz-c1o2)+mfbba * (vz2-vvz) * c1o2;
-               m1 = -mfbbc-two* mfbbb *  vvz+mfbba * (one-vz2);
+               m1 = -mfbbc-c2o1* mfbbb *  vvz+mfbba * (c1o1-vz2);
                m2 = mfbbc * c1o2+mfbbb * (vvz+c1o2)+mfbba * (vz2+vvz) * c1o2;
                mfbba = m0;
                mfbbb = m1;
                mfbbc = m2;
                /////////b//////////////////////////////////////////////////////////////////////////
                m0 = mfbcc * c1o2+mfbcb * (vvz-c1o2)+mfbca * (vz2-vvz) * c1o2;
-               m1 = -mfbcc-two* mfbcb *  vvz+mfbca * (one-vz2);
+               m1 = -mfbcc-c2o1* mfbcb *  vvz+mfbca * (c1o1-vz2);
                m2 = mfbcc * c1o2+mfbcb * (vvz+c1o2)+mfbca * (vz2+vvz) * c1o2;
                mfbca = m0;
                mfbcb = m1;
@@ -831,21 +832,21 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfcac * c1o2+mfcab * (vvz-c1o2)+(mfcaa+c1o3 * oMdrho) * (vz2-vvz) * c1o2;
-               m1 = -mfcac-two* mfcab *  vvz+mfcaa                  * (one-vz2)-c1o3 * oMdrho * vz2;
+               m1 = -mfcac- c2o1* mfcab *  vvz+mfcaa                  * (c1o1-vz2)-c1o3 * oMdrho * vz2;
                m2 = mfcac * c1o2+mfcab * (vvz+c1o2)+(mfcaa+c1o3 * oMdrho) * (vz2+vvz) * c1o2;
                mfcaa = m0;
                mfcab = m1;
                mfcac = m2;
                /////////c//////////////////////////////////////////////////////////////////////////
                m0 = mfcbc * c1o2+mfcbb * (vvz-c1o2)+mfcba * (vz2-vvz) * c1o2;
-               m1 = -mfcbc-two* mfcbb *  vvz+mfcba * (one-vz2);
+               m1 = -mfcbc-c2o1* mfcbb *  vvz+mfcba * (c1o1-vz2);
                m2 = mfcbc * c1o2+mfcbb * (vvz+c1o2)+mfcba * (vz2+vvz) * c1o2;
                mfcba = m0;
                mfcbb = m1;
                mfcbc = m2;
                /////////c//////////////////////////////////////////////////////////////////////////
                m0 = mfccc * c1o2+mfccb * (vvz-c1o2)+(mfcca+c1o9 * oMdrho) * (vz2-vvz) * c1o2;
-               m1 = -mfccc-two* mfccb *  vvz+mfcca                  * (one-vz2)-c1o9 * oMdrho * vz2;
+               m1 = -mfccc-c2o1* mfccb *  vvz+mfcca                  * (c1o1-vz2)-c1o9 * oMdrho * vz2;
                m2 = mfccc * c1o2+mfccb * (vvz+c1o2)+(mfcca+c1o9 * oMdrho) * (vz2+vvz) * c1o2;
                mfcca = m0;
                mfccb = m1;
@@ -856,21 +857,21 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                // Y - Dir
                m0 = mfaca * c1o2+mfaba * (vvy-c1o2)+(mfaaa+c1o6 * oMdrho) * (vy2-vvy) * c1o2;
-               m1 = -mfaca-two* mfaba *  vvy+mfaaa                  * (one-vy2)-c1o6 * oMdrho * vy2;
+               m1 = -mfaca-c2o1* mfaba *  vvy+mfaaa                  * (c1o1-vy2)-c1o6 * oMdrho * vy2;
                m2 = mfaca * c1o2+mfaba * (vvy+c1o2)+(mfaaa+c1o6 * oMdrho) * (vy2+vvy) * c1o2;
                mfaaa = m0;
                mfaba = m1;
                mfaca = m2;
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfacb * c1o2+mfabb * (vvy-c1o2)+(mfaab+c2o3 * oMdrho) * (vy2-vvy) * c1o2;
-               m1 = -mfacb-two* mfabb *  vvy+mfaab                  * (one-vy2)-c2o3 * oMdrho * vy2;
+               m1 = -mfacb-c2o1* mfabb *  vvy+mfaab                  * (c1o1-vy2)-c2o3 * oMdrho * vy2;
                m2 = mfacb * c1o2+mfabb * (vvy+c1o2)+(mfaab+c2o3 * oMdrho) * (vy2+vvy) * c1o2;
                mfaab = m0;
                mfabb = m1;
                mfacb = m2;
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfacc * c1o2+mfabc * (vvy-c1o2)+(mfaac+c1o6 * oMdrho) * (vy2-vvy) * c1o2;
-               m1 = -mfacc-two* mfabc *  vvy+mfaac                  * (one-vy2)-c1o6 * oMdrho * vy2;
+               m1 = -mfacc-c2o1* mfabc *  vvy+mfaac                  * (c1o1-vy2)-c1o6 * oMdrho * vy2;
                m2 = mfacc * c1o2+mfabc * (vvy+c1o2)+(mfaac+c1o6 * oMdrho) * (vy2+vvy) * c1o2;
                mfaac = m0;
                mfabc = m1;
@@ -878,21 +879,21 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfbca * c1o2+mfbba * (vvy-c1o2)+mfbaa * (vy2-vvy) * c1o2;
-               m1 = -mfbca-two* mfbba *  vvy+mfbaa * (one-vy2);
+               m1 = -mfbca-c2o1* mfbba *  vvy+mfbaa * (c1o1-vy2);
                m2 = mfbca * c1o2+mfbba * (vvy+c1o2)+mfbaa * (vy2+vvy) * c1o2;
                mfbaa = m0;
                mfbba = m1;
                mfbca = m2;
                /////////b//////////////////////////////////////////////////////////////////////////
                m0 = mfbcb * c1o2+mfbbb * (vvy-c1o2)+mfbab * (vy2-vvy) * c1o2;
-               m1 = -mfbcb-two* mfbbb *  vvy+mfbab * (one-vy2);
+               m1 = -mfbcb-c2o1* mfbbb *  vvy+mfbab * (c1o1-vy2);
                m2 = mfbcb * c1o2+mfbbb * (vvy+c1o2)+mfbab * (vy2+vvy) * c1o2;
                mfbab = m0;
                mfbbb = m1;
                mfbcb = m2;
                /////////b//////////////////////////////////////////////////////////////////////////
                m0 = mfbcc * c1o2+mfbbc * (vvy-c1o2)+mfbac * (vy2-vvy) * c1o2;
-               m1 = -mfbcc-two* mfbbc *  vvy+mfbac * (one-vy2);
+               m1 = -mfbcc-c2o1* mfbbc *  vvy+mfbac * (c1o1-vy2);
                m2 = mfbcc * c1o2+mfbbc * (vvy+c1o2)+mfbac * (vy2+vvy) * c1o2;
                mfbac = m0;
                mfbbc = m1;
@@ -900,21 +901,21 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfcca * c1o2+mfcba * (vvy-c1o2)+(mfcaa+c1o18 * oMdrho) * (vy2-vvy) * c1o2;
-               m1 = -mfcca-two* mfcba *  vvy+mfcaa                   * (one-vy2)-c1o18 * oMdrho * vy2;
+               m1 = -mfcca-c2o1* mfcba *  vvy+mfcaa                   * (c1o1-vy2)-c1o18 * oMdrho * vy2;
                m2 = mfcca * c1o2+mfcba * (vvy+c1o2)+(mfcaa+c1o18 * oMdrho) * (vy2+vvy) * c1o2;
                mfcaa = m0;
                mfcba = m1;
                mfcca = m2;
                /////////c//////////////////////////////////////////////////////////////////////////
                m0 = mfccb * c1o2+mfcbb * (vvy-c1o2)+(mfcab+c2o9 * oMdrho) * (vy2-vvy) * c1o2;
-               m1 = -mfccb-two* mfcbb *  vvy+mfcab                  * (one-vy2)-c2o9 * oMdrho * vy2;
+               m1 = -mfccb-c2o1* mfcbb *  vvy+mfcab                  * (c1o1-vy2)-c2o9 * oMdrho * vy2;
                m2 = mfccb * c1o2+mfcbb * (vvy+c1o2)+(mfcab+c2o9 * oMdrho) * (vy2+vvy) * c1o2;
                mfcab = m0;
                mfcbb = m1;
                mfccb = m2;
                /////////c//////////////////////////////////////////////////////////////////////////
                m0 = mfccc * c1o2+mfcbc * (vvy-c1o2)+(mfcac+c1o18 * oMdrho) * (vy2-vvy) * c1o2;
-               m1 = -mfccc-two* mfcbc *  vvy+mfcac                   * (one-vy2)-c1o18 * oMdrho * vy2;
+               m1 = -mfccc-c2o1* mfcbc *  vvy+mfcac                   * (c1o1-vy2)-c1o18 * oMdrho * vy2;
                m2 = mfccc * c1o2+mfcbc * (vvy+c1o2)+(mfcac+c1o18 * oMdrho) * (vy2+vvy) * c1o2;
                mfcac = m0;
                mfcbc = m1;
@@ -925,21 +926,21 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                // X - Dir
                m0 = mfcaa * c1o2+mfbaa * (vvx-c1o2)+(mfaaa+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcaa-two* mfbaa *  vvx+mfaaa                   * (one-vx2)-c1o36 * oMdrho * vx2;
+               m1 = -mfcaa-c2o1* mfbaa *  vvx+mfaaa                   * (c1o1-vx2)-c1o36 * oMdrho * vx2;
                m2 = mfcaa * c1o2+mfbaa * (vvx+c1o2)+(mfaaa+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
                mfaaa = m0;
                mfbaa = m1;
                mfcaa = m2;
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfcba * c1o2+mfbba * (vvx-c1o2)+(mfaba+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcba-two* mfbba *  vvx+mfaba                  * (one-vx2)-c1o9 * oMdrho * vx2;
+               m1 = -mfcba-c2o1* mfbba *  vvx+mfaba                  * (c1o1-vx2)-c1o9 * oMdrho * vx2;
                m2 = mfcba * c1o2+mfbba * (vvx+c1o2)+(mfaba+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
                mfaba = m0;
                mfbba = m1;
                mfcba = m2;
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfcca * c1o2+mfbca * (vvx-c1o2)+(mfaca+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcca-two* mfbca *  vvx+mfaca                   * (one-vx2)-c1o36 * oMdrho * vx2;
+               m1 = -mfcca-c2o1* mfbca *  vvx+mfaca                   * (c1o1-vx2)-c1o36 * oMdrho * vx2;
                m2 = mfcca * c1o2+mfbca * (vvx+c1o2)+(mfaca+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
                mfaca = m0;
                mfbca = m1;
@@ -947,21 +948,21 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfcab * c1o2+mfbab * (vvx-c1o2)+(mfaab+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcab-two* mfbab *  vvx+mfaab                  * (one-vx2)-c1o9 * oMdrho * vx2;
+               m1 = -mfcab-c2o1* mfbab *  vvx+mfaab                  * (c1o1-vx2)-c1o9 * oMdrho * vx2;
                m2 = mfcab * c1o2+mfbab * (vvx+c1o2)+(mfaab+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
                mfaab = m0;
                mfbab = m1;
                mfcab = m2;
                ///////////b////////////////////////////////////////////////////////////////////////
                m0 = mfcbb * c1o2+mfbbb * (vvx-c1o2)+(mfabb+c4o9 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcbb-two* mfbbb *  vvx+mfabb                  * (one-vx2)-c4o9 * oMdrho * vx2;
+               m1 = -mfcbb-c2o1* mfbbb *  vvx+mfabb                  * (c1o1-vx2)-c4o9 * oMdrho * vx2;
                m2 = mfcbb * c1o2+mfbbb * (vvx+c1o2)+(mfabb+c4o9 * oMdrho) * (vx2+vvx) * c1o2;
                mfabb = m0;
                mfbbb = m1;
                mfcbb = m2;
                ///////////b////////////////////////////////////////////////////////////////////////
                m0 = mfccb * c1o2+mfbcb * (vvx-c1o2)+(mfacb+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfccb-two* mfbcb *  vvx+mfacb                  * (one-vx2)-c1o9 * oMdrho * vx2;
+               m1 = -mfccb-c2o1* mfbcb *  vvx+mfacb                  * (c1o1-vx2)-c1o9 * oMdrho * vx2;
                m2 = mfccb * c1o2+mfbcb * (vvx+c1o2)+(mfacb+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
                mfacb = m0;
                mfbcb = m1;
@@ -969,21 +970,21 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfcac * c1o2+mfbac * (vvx-c1o2)+(mfaac+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcac-two* mfbac *  vvx+mfaac                   * (one-vx2)-c1o36 * oMdrho * vx2;
+               m1 = -mfcac-c2o1* mfbac *  vvx+mfaac                   * (c1o1-vx2)-c1o36 * oMdrho * vx2;
                m2 = mfcac * c1o2+mfbac * (vvx+c1o2)+(mfaac+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
                mfaac = m0;
                mfbac = m1;
                mfcac = m2;
                ///////////c////////////////////////////////////////////////////////////////////////
                m0 = mfcbc * c1o2+mfbbc * (vvx-c1o2)+(mfabc+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcbc-two* mfbbc *  vvx+mfabc                  * (one-vx2)-c1o9 * oMdrho * vx2;
+               m1 = -mfcbc-c2o1* mfbbc *  vvx+mfabc                  * (c1o1-vx2)-c1o9 * oMdrho * vx2;
                m2 = mfcbc * c1o2+mfbbc * (vvx+c1o2)+(mfabc+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
                mfabc = m0;
                mfbbc = m1;
                mfcbc = m2;
                ///////////c////////////////////////////////////////////////////////////////////////
                m0 = mfccc * c1o2+mfbcc * (vvx-c1o2)+(mfacc+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfccc-two* mfbcc *  vvx+mfacc                   * (one-vx2)-c1o36 * oMdrho * vx2;
+               m1 = -mfccc-c2o1* mfbcc *  vvx+mfacc                   * (c1o1-vx2)-c1o36 * oMdrho * vx2;
                m2 = mfccc * c1o2+mfbcc * (vvx+c1o2)+(mfacc+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
                mfacc = m0;
                mfbcc = m1;
@@ -994,11 +995,11 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
                //proof correctness
                //////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-               LBMReal drho_post = (mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
+               real drho_post = (mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
                   +(mfaab+mfacb+mfcab+mfccb)+(mfaba+mfabc+mfcba+mfcbc)+(mfbaa+mfbac+mfbca+mfbcc)
                   +(mfabb+mfcbb)+(mfbab+mfbcb)+(mfbba+mfbbc)+mfbbb;
                //LBMReal dif = fabs(rho - rho_post);
-               LBMReal dif = drho - drho_post;
+               real dif = drho - drho_post;
 #ifdef SINGLEPRECISION
                if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
@@ -1052,13 +1053,13 @@ void CompressibleCumulant4thOrderViscosityLBMKernel::calculate(int step)
    //timer.stop();
 }
 //////////////////////////////////////////////////////////////////////////
-double CompressibleCumulant4thOrderViscosityLBMKernel::getCalculationTime()
+real CompressibleCumulant4thOrderViscosityLBMKernel::getCalculationTime()
 {
    //return timer.getDuration();
    return timer.getTotalTime();
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleCumulant4thOrderViscosityLBMKernel::setBulkViscosity(LBMReal value)
+void CompressibleCumulant4thOrderViscosityLBMKernel::setBulkViscosity(real value)
 {
    bulkViscosity = value;
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulant4thOrderViscosityLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulant4thOrderViscosityLBMKernel.h
index 2cdd9c5b32b0068b5e586a7033a2456f72167d31..fc3e0dffb6abf836995aaecc95f07fc4fcaf1d64 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulant4thOrderViscosityLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulant4thOrderViscosityLBMKernel.h
@@ -21,29 +21,29 @@ public:
    ~CompressibleCumulant4thOrderViscosityLBMKernel() override;
    void calculate(int step) override;
    SPtr<LBMKernel> clone() override;
-   double getCalculationTime() override;
+   real getCalculationTime() override;
    //! The value should not be equal to a shear viscosity
-   void setBulkViscosity(LBMReal value);
+   void setBulkViscosity(real value);
 protected:
    virtual void initDataSet();
-   LBMReal f[D3Q27System::ENDF+1];
+   real f[D3Q27System::ENDF+1];
 
    UbTimer timer;
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
 
    mu::value_type muX1,muX2,muX3;
    mu::value_type muDeltaT;
    mu::value_type muNu;
-   LBMReal forcingX1;
-   LBMReal forcingX2;
-   LBMReal forcingX3;
+   real forcingX1;
+   real forcingX2;
+   real forcingX3;
    
    // bulk viscosity
-   LBMReal OxxPyyPzz; //omega2 (bulk viscosity)
-   LBMReal bulkViscosity;
+   real OxxPyyPzz; //omega2 (bulk viscosity)
+   real bulkViscosity;
 
 };
 #endif // CompressibleCumulant4thOrderViscosityLBMKernel_h__
diff --git a/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulantLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulantLBMKernel.cpp
index 5542d2c9851e362e012a2950600a5225441f6644..1c806225b9730e3f6b42c3487e20edf298199bf1 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulantLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulantLBMKernel.cpp
@@ -7,7 +7,8 @@
 
 #define PROOF_CORRECTNESS
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::basics::constant;
 
 //////////////////////////////////////////////////////////////////////////
 CompressibleCumulantLBMKernel::CompressibleCumulantLBMKernel()
@@ -59,7 +60,7 @@ SPtr<LBMKernel> CompressibleCumulantLBMKernel::clone()
    }
    else
    {
-      dynamicPointerCast<CompressibleCumulantLBMKernel>(kernel)->OxxPyyPzz = one;
+      dynamicPointerCast<CompressibleCumulantLBMKernel>(kernel)->OxxPyyPzz = c1o1;
    }
    return kernel;
 }
@@ -113,7 +114,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
    int maxX2 = bcArrayMaxX2-ghostLayerWidth;
    int maxX3 = bcArrayMaxX3-ghostLayerWidth;
 
-   LBMReal omega = collFactor;
+   real omega = collFactor;
 
 
    //#pragma omp parallel num_threads(8)
@@ -154,50 +155,50 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   // a b c
                   //-1 0 1
 
-                  LBMReal mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
-                  LBMReal mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
-                  LBMReal mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
-                  LBMReal mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
-                  LBMReal mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
-                  LBMReal mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
-                  LBMReal mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
-                  LBMReal mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
-                  LBMReal mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
-                  LBMReal mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
-                  LBMReal mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
-                  LBMReal mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
-                  LBMReal mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
-
-                  LBMReal mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
-                  LBMReal mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
-                  LBMReal mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
-                  LBMReal mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
-                  LBMReal mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
-                  LBMReal mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
-                  LBMReal mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
-                  LBMReal mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
-                  LBMReal mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
-                  LBMReal mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                  LBMReal mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                  LBMReal mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                  LBMReal mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                  LBMReal mfbbb = (*this->zeroDistributions)(x1, x2, x3);
-
-                  ////////////////////////////////////////////////////////////////////////////////////
-                  LBMReal drho = ((((mfccc+mfaaa)+(mfaca+mfcac))+((mfacc+mfcaa)+(mfaac+mfcca)))+
+                  real mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
+                  real mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
+                  real mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
+                  real mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
+                  real mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
+                  real mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
+                  real mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
+                  real mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
+                  real mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
+                  real mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
+                  real mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
+                  real mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
+                  real mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
+
+                  real mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
+                  real mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
+                  real mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
+                  real mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
+                  real mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
+                  real mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
+                  real mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
+                  real mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
+                  real mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
+                  real mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                  real mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                  real mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                  real mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                  real mfbbb = (*this->zeroDistributions)(x1, x2, x3);
+
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  real drho = ((((mfccc+mfaaa)+(mfaca+mfcac))+((mfacc+mfcaa)+(mfaac+mfcca)))+
                      (((mfbac+mfbca)+(mfbaa+mfbcc))+((mfabc+mfcba)+(mfaba+mfcbc))+((mfacb+mfcab)+(mfaab+mfccb)))+
                      ((mfabb+mfcbb)+(mfbab+mfbcb))+(mfbba+mfbbc))+mfbbb;
 
-                  LBMReal rho = one+drho;
+                  real rho = c1o1+drho;
                   ////////////////////////////////////////////////////////////////////////////////////
-                  LBMReal vvx = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfcaa-mfacc)+(mfcca-mfaac)))+
+                  real vvx = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfcaa-mfacc)+(mfcca-mfaac)))+
                      (((mfcba-mfabc)+(mfcbc-mfaba))+((mfcab-mfacb)+(mfccb-mfaab)))+
                      (mfcbb-mfabb))/rho;
-                  LBMReal vvy = ((((mfccc-mfaaa)+(mfaca-mfcac))+((mfacc-mfcaa)+(mfcca-mfaac)))+
+                  real vvy = ((((mfccc-mfaaa)+(mfaca-mfcac))+((mfacc-mfcaa)+(mfcca-mfaac)))+
                      (((mfbca-mfbac)+(mfbcc-mfbaa))+((mfacb-mfcab)+(mfccb-mfaab)))+
                      (mfbcb-mfbab))/rho;
-                  LBMReal vvz = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfacc-mfcaa)+(mfaac-mfcca)))+
+                  real vvz = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfacc-mfcaa)+(mfaac-mfcca)))+
                      (((mfbac-mfbca)+(mfbcc-mfbaa))+((mfabc-mfcba)+(mfcbc-mfaba)))+
                      (mfbbc-mfbba))/rho;
                   ////////////////////////////////////////////////////////////////////////////////////
@@ -206,9 +207,9 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   ///////////////////////////////////////////////////////////////////////////////////////////
                   if (withForcing)
                   {
-                     muX1 = static_cast<double>(x1-1+ix1*maxX1);
-                     muX2 = static_cast<double>(x2-1+ix2*maxX2);
-                     muX3 = static_cast<double>(x3-1+ix3*maxX3);
+                     muX1 = static_cast<real>(x1-1+ix1*maxX1);
+                     muX2 = static_cast<real>(x2-1+ix2*maxX2);
+                     muX3 = static_cast<real>(x3-1+ix3*maxX3);
 
                      forcingX1 = muForcingX1.Eval();
                      forcingX2 = muForcingX2.Eval();
@@ -220,12 +221,12 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   }
                   ///////////////////////////////////////////////////////////////////////////////////////////               
             ////////////////////////////////////////////////////////////////////////////////////
-                  LBMReal oMdrho = one; // comp special
+                  real oMdrho = c1o1; // comp special
                   ////////////////////////////////////////////////////////////////////////////////////
-                  LBMReal m0, m1, m2;
-                  LBMReal vx2;
-                  LBMReal vy2;
-                  LBMReal vz2;
+                  real m0, m1, m2;
+                  real vx2;
+                  real vy2;
+                  real vz2;
                   vx2 = vvx*vvx;
                   vy2 = vvy*vvy;
                   vz2 = vvz*vvz;
@@ -249,7 +250,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfaaa = m0;
                   m0 += c1o36 * oMdrho;
                   mfaab = m1-m0 * vvz;
-                  mfaac = m2-two*	m1 * vvz+vz2 * m0;
+                  mfaac = m2-c2o1*	m1 * vvz+vz2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfaba+mfabc;
                   m1 = mfabc-mfaba;
@@ -257,7 +258,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfaba = m0;
                   m0 += c1o9 * oMdrho;
                   mfabb = m1-m0 * vvz;
-                  mfabc = m2-two*	m1 * vvz+vz2 * m0;
+                  mfabc = m2-c2o1*	m1 * vvz+vz2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfaca+mfacc;
                   m1 = mfacc-mfaca;
@@ -265,7 +266,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfaca = m0;
                   m0 += c1o36 * oMdrho;
                   mfacb = m1-m0 * vvz;
-                  mfacc = m2-two*	m1 * vvz+vz2 * m0;
+                  mfacc = m2-c2o1*	m1 * vvz+vz2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfbaa+mfbac;
@@ -274,7 +275,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfbaa = m0;
                   m0 += c1o9 * oMdrho;
                   mfbab = m1-m0 * vvz;
-                  mfbac = m2-two*	m1 * vvz+vz2 * m0;
+                  mfbac = m2-c2o1*	m1 * vvz+vz2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfbba+mfbbc;
                   m1 = mfbbc-mfbba;
@@ -282,7 +283,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfbba = m0;
                   m0 += c4o9 * oMdrho;
                   mfbbb = m1-m0 * vvz;
-                  mfbbc = m2-two*	m1 * vvz+vz2 * m0;
+                  mfbbc = m2-c2o1*	m1 * vvz+vz2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfbca+mfbcc;
                   m1 = mfbcc-mfbca;
@@ -290,7 +291,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfbca = m0;
                   m0 += c1o9 * oMdrho;
                   mfbcb = m1-m0 * vvz;
-                  mfbcc = m2-two*	m1 * vvz+vz2 * m0;
+                  mfbcc = m2-c2o1*	m1 * vvz+vz2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfcaa+mfcac;
@@ -299,7 +300,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfcaa = m0;
                   m0 += c1o36 * oMdrho;
                   mfcab = m1-m0 * vvz;
-                  mfcac = m2-two*	m1 * vvz+vz2 * m0;
+                  mfcac = m2-c2o1*	m1 * vvz+vz2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfcba+mfcbc;
                   m1 = mfcbc-mfcba;
@@ -307,7 +308,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfcba = m0;
                   m0 += c1o9 * oMdrho;
                   mfcbb = m1-m0 * vvz;
-                  mfcbc = m2-two*	m1 * vvz+vz2 * m0;
+                  mfcbc = m2-c2o1*	m1 * vvz+vz2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfcca+mfccc;
                   m1 = mfccc-mfcca;
@@ -315,7 +316,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfcca = m0;
                   m0 += c1o36 * oMdrho;
                   mfccb = m1-m0 * vvz;
-                  mfccc = m2-two*	m1 * vvz+vz2 * m0;
+                  mfccc = m2-c2o1*	m1 * vvz+vz2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   // mit  1/6, 0, 1/18, 2/3, 0, 2/9, 1/6, 0, 1/18 Konditionieren
@@ -327,14 +328,14 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfaaa = m0;
                   m0 += c1o6 * oMdrho;
                   mfaba = m1-m0 * vvy;
-                  mfaca = m2-two*	m1 * vvy+vy2 * m0;
+                  mfaca = m2-c2o1*	m1 * vvy+vy2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfaab+mfacb;
                   m1 = mfacb-mfaab;
                   m0 = m2+mfabb;
                   mfaab = m0;
                   mfabb = m1-m0 * vvy;
-                  mfacb = m2-two*	m1 * vvy+vy2 * m0;
+                  mfacb = m2-c2o1*	m1 * vvy+vy2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfaac+mfacc;
                   m1 = mfacc-mfaac;
@@ -342,7 +343,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfaac = m0;
                   m0 += c1o18 * oMdrho;
                   mfabc = m1-m0 * vvy;
-                  mfacc = m2-two*	m1 * vvy+vy2 * m0;
+                  mfacc = m2-c2o1*	m1 * vvy+vy2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfbaa+mfbca;
@@ -351,14 +352,14 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfbaa = m0;
                   m0 += c2o3 * oMdrho;
                   mfbba = m1-m0 * vvy;
-                  mfbca = m2-two*	m1 * vvy+vy2 * m0;
+                  mfbca = m2-c2o1*	m1 * vvy+vy2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfbab+mfbcb;
                   m1 = mfbcb-mfbab;
                   m0 = m2+mfbbb;
                   mfbab = m0;
                   mfbbb = m1-m0 * vvy;
-                  mfbcb = m2-two*	m1 * vvy+vy2 * m0;
+                  mfbcb = m2-c2o1*	m1 * vvy+vy2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfbac+mfbcc;
                   m1 = mfbcc-mfbac;
@@ -366,7 +367,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfbac = m0;
                   m0 += c2o9 * oMdrho;
                   mfbbc = m1-m0 * vvy;
-                  mfbcc = m2-two*	m1 * vvy+vy2 * m0;
+                  mfbcc = m2-c2o1*	m1 * vvy+vy2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfcaa+mfcca;
@@ -375,14 +376,14 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfcaa = m0;
                   m0 += c1o6 * oMdrho;
                   mfcba = m1-m0 * vvy;
-                  mfcca = m2-two*	m1 * vvy+vy2 * m0;
+                  mfcca = m2-c2o1*	m1 * vvy+vy2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfcab+mfccb;
                   m1 = mfccb-mfcab;
                   m0 = m2+mfcbb;
                   mfcab = m0;
                   mfcbb = m1-m0 * vvy;
-                  mfccb = m2-two*	m1 * vvy+vy2 * m0;
+                  mfccb = m2-c2o1*	m1 * vvy+vy2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfcac+mfccc;
                   m1 = mfccc-mfcac;
@@ -390,7 +391,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfcac = m0;
                   m0 += c1o18 * oMdrho;
                   mfcbc = m1-m0 * vvy;
-                  mfccc = m2-two*	m1 * vvy+vy2 * m0;
+                  mfccc = m2-c2o1*	m1 * vvy+vy2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   // mit     1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9		Konditionieren
@@ -400,16 +401,16 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   m1 = mfcaa-mfaaa;
                   m0 = m2+mfbaa;
                   mfaaa = m0;
-                  m0 += one* oMdrho;
+                  m0 += c1o1* oMdrho;
                   mfbaa = m1-m0 * vvx;
-                  mfcaa = m2-two*	m1 * vvx+vx2 * m0;
+                  mfcaa = m2-c2o1*	m1 * vvx+vx2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfaba+mfcba;
                   m1 = mfcba-mfaba;
                   m0 = m2+mfbba;
                   mfaba = m0;
                   mfbba = m1-m0 * vvx;
-                  mfcba = m2-two*	m1 * vvx+vx2 * m0;
+                  mfcba = m2-c2o1*	m1 * vvx+vx2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfaca+mfcca;
                   m1 = mfcca-mfaca;
@@ -417,7 +418,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfaca = m0;
                   m0 += c1o3 * oMdrho;
                   mfbca = m1-m0 * vvx;
-                  mfcca = m2-two*	m1 * vvx+vx2 * m0;
+                  mfcca = m2-c2o1*	m1 * vvx+vx2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfaab+mfcab;
@@ -425,21 +426,21 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   m0 = m2+mfbab;
                   mfaab = m0;
                   mfbab = m1-m0 * vvx;
-                  mfcab = m2-two*	m1 * vvx+vx2 * m0;
+                  mfcab = m2-c2o1*	m1 * vvx+vx2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfabb+mfcbb;
                   m1 = mfcbb-mfabb;
                   m0 = m2+mfbbb;
                   mfabb = m0;
                   mfbbb = m1-m0 * vvx;
-                  mfcbb = m2-two*	m1 * vvx+vx2 * m0;
+                  mfcbb = m2-c2o1*	m1 * vvx+vx2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfacb+mfccb;
                   m1 = mfccb-mfacb;
                   m0 = m2+mfbcb;
                   mfacb = m0;
                   mfbcb = m1-m0 * vvx;
-                  mfccb = m2-two*	m1 * vvx+vx2 * m0;
+                  mfccb = m2-c2o1*	m1 * vvx+vx2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfaac+mfcac;
@@ -448,14 +449,14 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfaac = m0;
                   m0 += c1o3 * oMdrho;
                   mfbac = m1-m0 * vvx;
-                  mfcac = m2-two*	m1 * vvx+vx2 * m0;
+                  mfcac = m2-c2o1*	m1 * vvx+vx2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfabc+mfcbc;
                   m1 = mfcbc-mfabc;
                   m0 = m2+mfbbc;
                   mfabc = m0;
                   mfbbc = m1-m0 * vvx;
-                  mfcbc = m2-two*	m1 * vvx+vx2 * m0;
+                  mfcbc = m2-c2o1*	m1 * vvx+vx2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m2 = mfacc+mfccc;
                   m1 = mfccc-mfacc;
@@ -463,7 +464,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   mfacc = m0;
                   m0 += c1o9 * oMdrho;
                   mfbcc = m1-m0 * vvx;
-                  mfccc = m2-two*	m1 * vvx+vx2 * m0;
+                  mfccc = m2-c2o1*	m1 * vvx+vx2 * m0;
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
 
@@ -477,7 +478,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   ////////////////////////////////////////////////////////////
                   //3.
                   //////////////////////////////
-                  LBMReal OxyyPxzz = one;//three  * (two - omega) / (three  - omega);//
+                  real OxyyPxzz = c1o1;//three  * (two - omega) / (three  - omega);//
                   //LBMReal OxyyMxzz = one;//six    * (two - omega) / (six    - omega);//
 //                  LBMReal Oxyz = one;//twelve * (two - omega) / (twelve + omega);//
                   //////////////////////////////
@@ -501,47 +502,47 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   ////////////////////////////////////////////////////////////
                   //4.
                   //////////////////////////////
-                  LBMReal O4 = one;
+                  real O4 = c1o1;
                   //////////////////////////////
-                  //LBMReal O4        = omega;//TRT
+                  //real O4        = omega;//TRT
                   ////////////////////////////////////////////////////////////
                   //5.
                   //////////////////////////////
-                  LBMReal O5 = one;
+                  real O5 = c1o1;
                   ////////////////////////////////////////////////////////////
                   //6.
                   //////////////////////////////
-                  LBMReal O6 = one;
+                  real O6 = c1o1;
                   ////////////////////////////////////////////////////////////
 
 
                   //central moments to cumulants
                   //4.
-                  LBMReal CUMcbb = mfcbb-((mfcaa+c1o3) * mfabb+two * mfbba * mfbab)/rho;	//ab 15.05.2015 verwendet
-                  LBMReal CUMbcb = mfbcb-((mfaca+c1o3) * mfbab+two * mfbba * mfabb)/rho; //ab 15.05.2015 verwendet
-                  LBMReal CUMbbc = mfbbc-((mfaac+c1o3) * mfbba+two * mfbab * mfabb)/rho; //ab 15.05.2015 verwendet
+                  real CUMcbb = mfcbb-((mfcaa+c1o3) * mfabb+c2o1 * mfbba * mfbab)/rho;	//ab 15.05.2015 verwendet
+                  real CUMbcb = mfbcb-((mfaca+c1o3) * mfbab+c2o1 * mfbba * mfabb)/rho; //ab 15.05.2015 verwendet
+                  real CUMbbc = mfbbc-((mfaac+c1o3) * mfbba+c2o1 * mfbab * mfabb)/rho; //ab 15.05.2015 verwendet
 
-                  LBMReal CUMcca = mfcca-(((mfcaa * mfaca+two * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));
-                  LBMReal CUMcac = mfcac-(((mfcaa * mfaac+two * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));
-                  LBMReal CUMacc = mfacc-(((mfaac * mfaca+two * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));
+                  real CUMcca = mfcca-(((mfcaa * mfaca+c2o1 * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));
+                  real CUMcac = mfcac-(((mfcaa * mfaac+c2o1 * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));
+                  real CUMacc = mfacc-(((mfaac * mfaca+c2o1 * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));
 
                   //5.
-                  LBMReal CUMbcc = mfbcc-((mfaac * mfbca+mfaca * mfbac+four * mfabb * mfbbb+two * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
-                  LBMReal CUMcbc = mfcbc-((mfaac * mfcba+mfcaa * mfabc+four * mfbab * mfbbb+two * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
-                  LBMReal CUMccb = mfccb-((mfcaa * mfacb+mfaca * mfcab+four * mfbba * mfbbb+two * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
+                  real CUMbcc = mfbcc-((mfaac * mfbca+mfaca * mfbac+c4o1 * mfabb * mfbbb+c2o1 * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
+                  real CUMcbc = mfcbc-((mfaac * mfcba+mfcaa * mfabc+c4o1 * mfbab * mfbbb+c2o1 * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
+                  real CUMccb = mfccb-((mfcaa * mfacb+mfaca * mfcab+c4o1 * mfbba * mfbbb+c2o1 * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
 
                   //6.
 
-                  LBMReal CUMccc = mfccc+((-four *  mfbbb * mfbbb
+                  real CUMccc = mfccc+((-c4o1 *  mfbbb * mfbbb
                      -(mfcaa * mfacc+mfaca * mfcac+mfaac * mfcca)
-                     -four * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
-                     -two * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
-                     +(four * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
-                        +two * (mfcaa * mfaca * mfaac)
-                        +sixteen *  mfbba * mfbab * mfabb)/(rho * rho)
+                     -c4o1 * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
+                     -c2o1 * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
+                     +(c4o1 * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
+                        +c2o1 * (mfcaa * mfaca * mfaac)
+                        +c16o1 *  mfbba * mfbab * mfabb)/(rho * rho)
                      -c1o3 * (mfacc+mfcac+mfcca)/rho
                      -c1o9 * (mfcaa+mfaca+mfaac)/rho
-                     +(two * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
+                     +(c2o1 * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
                         +(mfaac * mfaca+mfaac * mfcaa+mfaca * mfcaa)+c1o3 *(mfaac+mfaca+mfcaa))/(rho * rho) * c2o3
                      +c1o27*((drho * drho-drho)/(rho*rho)));
                   //+ c1o27*(one -three/rho +two/(rho*rho)));
@@ -551,9 +552,9 @@ void CompressibleCumulantLBMKernel::calculate(int step)
 
       //2.
       // linear combinations
-                  LBMReal mxxPyyPzz = mfcaa+mfaca+mfaac;
-                  LBMReal mxxMyy = mfcaa-mfaca;
-                  LBMReal mxxMzz = mfcaa-mfaac;
+                  real mxxPyyPzz = mfcaa+mfaca+mfaac;
+                  real mxxMyy = mfcaa-mfaca;
+                  real mxxMzz = mfcaa-mfaac;
 
                   //////////////////////////////////////////////////////////////////////////
          // 			LBMReal magicBulk=(CUMacc+CUMcac+CUMcca)*(one/OxxPyyPzz-c1o2)*c3o2*8.;
@@ -593,14 +594,14 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
                   //incl. correction		(hat noch nicht so gut funktioniert...Optimierungsbedarf??)
                   {
-                     LBMReal dxux = c1o2 * (-omega) *(mxxMyy+mxxMzz)+c1o2 *  OxxPyyPzz * (mfaaa-mxxPyyPzz);
-                     LBMReal dyuy = dxux+omega * c3o2 * mxxMyy;
-                     LBMReal dzuz = dxux+omega * c3o2 * mxxMzz;
+                     real dxux = c1o2 * (-omega) *(mxxMyy+mxxMzz)+c1o2 *  OxxPyyPzz * (mfaaa-mxxPyyPzz);
+                     real dyuy = dxux+omega * c3o2 * mxxMyy;
+                     real dzuz = dxux+omega * c3o2 * mxxMzz;
 
                      //relax
-                     mxxPyyPzz += OxxPyyPzz*(mfaaa-mxxPyyPzz)-three * (one-c1o2 * OxxPyyPzz) * (vx2 * dxux+vy2 * dyuy+vz2 * dzuz);//-magicBulk*OxxPyyPzz;
-                     mxxMyy += omega * (-mxxMyy)-three * (one+c1o2 * (-omega)) * (vx2 * dxux-vy2 * dyuy);
-                     mxxMzz += omega * (-mxxMzz)-three * (one+c1o2 * (-omega)) * (vx2 * dxux-vz2 * dzuz);
+                     mxxPyyPzz += OxxPyyPzz*(mfaaa-mxxPyyPzz)-c3o1 * (c1o1-c1o2 * OxxPyyPzz) * (vx2 * dxux+vy2 * dyuy+vz2 * dzuz);//-magicBulk*OxxPyyPzz;
+                     mxxMyy += omega * (-mxxMyy)- c3o1 * (c1o1+c1o2 * (-omega)) * (vx2 * dxux-vy2 * dyuy);
+                     mxxMzz += omega * (-mxxMzz)- c3o1 * (c1o1+c1o2 * (-omega)) * (vx2 * dxux-vz2 * dzuz);
 
                      //////////////////////////////////////////////////////////////////////////
                      //limiter-Scheise Teil 2
@@ -630,20 +631,20 @@ void CompressibleCumulantLBMKernel::calculate(int step)
 
                   // linear combinations back
                   mfcaa = c1o3 * (mxxMyy+mxxMzz+mxxPyyPzz);
-                  mfaca = c1o3 * (-two*  mxxMyy+mxxMzz+mxxPyyPzz);
-                  mfaac = c1o3 * (mxxMyy-two* mxxMzz+mxxPyyPzz);
+                  mfaca = c1o3 * (-c2o1*  mxxMyy+mxxMzz+mxxPyyPzz);
+                  mfaac = c1o3 * (mxxMyy-c2o1* mxxMzz+mxxPyyPzz);
 
                   //3.
                   // linear combinations
 
-                  LBMReal mxxyPyzz = mfcba+mfabc;
-                  LBMReal mxxyMyzz = mfcba-mfabc;
+                  real mxxyPyzz = mfcba+mfabc;
+                  real mxxyMyzz = mfcba-mfabc;
 
-                  LBMReal mxxzPyyz = mfcab+mfacb;
-                  LBMReal mxxzMyyz = mfcab-mfacb;
+                  real mxxzPyyz = mfcab+mfacb;
+                  real mxxzMyyz = mfcab-mfacb;
 
-                  LBMReal mxyyPxzz = mfbca+mfbac;
-                  LBMReal mxyyMxzz = mfbca-mfbac;
+                  real mxyyPxzz = mfbca+mfbac;
+                  real mxyyMxzz = mfbca-mfbac;
 
                   //relax
                   //////////////////////////////////////////////////////////////////////////
@@ -721,31 +722,31 @@ void CompressibleCumulantLBMKernel::calculate(int step)
 
                   //back cumulants to central moments
                   //4.
-                  mfcbb = CUMcbb+((mfcaa+c1o3) * mfabb+two * mfbba * mfbab)/rho;
-                  mfbcb = CUMbcb+((mfaca+c1o3) * mfbab+two * mfbba * mfabb)/rho;
-                  mfbbc = CUMbbc+((mfaac+c1o3) * mfbba+two * mfbab * mfabb)/rho;
+                  mfcbb = CUMcbb+((mfcaa+c1o3) * mfabb+c2o1 * mfbba * mfbab)/rho;
+                  mfbcb = CUMbcb+((mfaca+c1o3) * mfbab+c2o1 * mfbba * mfabb)/rho;
+                  mfbbc = CUMbbc+((mfaac+c1o3) * mfbba+c2o1 * mfbab * mfabb)/rho;
 
-                  mfcca = CUMcca+(((mfcaa * mfaca+two * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
-                  mfcac = CUMcac+(((mfcaa * mfaac+two * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));//(one/rho-one));
-                  mfacc = CUMacc+(((mfaac * mfaca+two * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
+                  mfcca = CUMcca+(((mfcaa * mfaca+c2o1 * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
+                  mfcac = CUMcac+(((mfcaa * mfaac+c2o1 * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));//(one/rho-one));
+                  mfacc = CUMacc+(((mfaac * mfaca+c2o1 * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
 
                   //5.
-                  mfbcc = CUMbcc+((mfaac * mfbca+mfaca * mfbac+four * mfabb * mfbbb+two * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
-                  mfcbc = CUMcbc+((mfaac * mfcba+mfcaa * mfabc+four * mfbab * mfbbb+two * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
-                  mfccb = CUMccb+((mfcaa * mfacb+mfaca * mfcab+four * mfbba * mfbbb+two * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
+                  mfbcc = CUMbcc+((mfaac * mfbca+mfaca * mfbac+c4o1 * mfabb * mfbbb+c2o1 * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
+                  mfcbc = CUMcbc+((mfaac * mfcba+mfcaa * mfabc+c4o1 * mfbab * mfbbb+c2o1 * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
+                  mfccb = CUMccb+((mfcaa * mfacb+mfaca * mfcab+c4o1 * mfbba * mfbbb+c2o1 * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
 
                   //6.
 
-                  mfccc = CUMccc-((-four *  mfbbb * mfbbb
+                  mfccc = CUMccc-((-c4o1 *  mfbbb * mfbbb
                      -(mfcaa * mfacc+mfaca * mfcac+mfaac * mfcca)
-                     -four * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
-                     -two * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
-                     +(four * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
-                        +two * (mfcaa * mfaca * mfaac)
-                        +sixteen *  mfbba * mfbab * mfabb)/(rho * rho)
+                     -c4o1 * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
+                     -c2o1 * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
+                     +(c4o1 * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
+                        +c2o1 * (mfcaa * mfaca * mfaac)
+                        +c16o1 *  mfbba * mfbab * mfabb)/(rho * rho)
                      -c1o3 * (mfacc+mfcac+mfcca)/rho
                      -c1o9 * (mfcaa+mfaca+mfaac)/rho
-                     +(two * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
+                     +(c2o1 * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
                         +(mfaac * mfaca+mfaac * mfcaa+mfaca * mfcaa)+c1o3 *(mfaac+mfaca+mfcaa))/(rho * rho) * c2o3
                      +c1o27*((drho * drho-drho)/(rho*rho)));
                   ////////////////////////////////////////////////////////////////////////////////////
@@ -761,22 +762,22 @@ void CompressibleCumulantLBMKernel::calculate(int step)
             //mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
             ////////////////////////////////////////////////////////////////////////////////////
             // Z - Dir
-                  m0 = mfaac * c1o2+mfaab * (vvz-c1o2)+(mfaaa+one* oMdrho) * (vz2-vvz) * c1o2;
-                  m1 = -mfaac-two* mfaab *  vvz+mfaaa                * (one-vz2)-one* oMdrho * vz2;
-                  m2 = mfaac * c1o2+mfaab * (vvz+c1o2)+(mfaaa+one* oMdrho) * (vz2+vvz) * c1o2;
+                  m0 = mfaac * c1o2+mfaab * (vvz-c1o2)+(mfaaa+c1o1* oMdrho) * (vz2-vvz) * c1o2;
+                  m1 = -mfaac-c2o1* mfaab *  vvz+mfaaa                * (c1o1-vz2)-c1o1* oMdrho * vz2;
+                  m2 = mfaac * c1o2+mfaab * (vvz+c1o2)+(mfaaa+c1o1* oMdrho) * (vz2+vvz) * c1o2;
                   mfaaa = m0;
                   mfaab = m1;
                   mfaac = m2;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m0 = mfabc * c1o2+mfabb * (vvz-c1o2)+mfaba * (vz2-vvz) * c1o2;
-                  m1 = -mfabc-two* mfabb *  vvz+mfaba * (one-vz2);
+                  m1 = -mfabc-c2o1* mfabb *  vvz+mfaba * (c1o1-vz2);
                   m2 = mfabc * c1o2+mfabb * (vvz+c1o2)+mfaba * (vz2+vvz) * c1o2;
                   mfaba = m0;
                   mfabb = m1;
                   mfabc = m2;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m0 = mfacc * c1o2+mfacb * (vvz-c1o2)+(mfaca+c1o3 * oMdrho) * (vz2-vvz) * c1o2;
-                  m1 = -mfacc-two* mfacb *  vvz+mfaca                  * (one-vz2)-c1o3 * oMdrho * vz2;
+                  m1 = -mfacc-c2o1* mfacb *  vvz+mfaca                  * (c1o1-vz2)-c1o3 * oMdrho * vz2;
                   m2 = mfacc * c1o2+mfacb * (vvz+c1o2)+(mfaca+c1o3 * oMdrho) * (vz2+vvz) * c1o2;
                   mfaca = m0;
                   mfacb = m1;
@@ -784,21 +785,21 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   m0 = mfbac * c1o2+mfbab * (vvz-c1o2)+mfbaa * (vz2-vvz) * c1o2;
-                  m1 = -mfbac-two* mfbab *  vvz+mfbaa * (one-vz2);
+                  m1 = -mfbac-c2o1* mfbab *  vvz+mfbaa * (c1o1-vz2);
                   m2 = mfbac * c1o2+mfbab * (vvz+c1o2)+mfbaa * (vz2+vvz) * c1o2;
                   mfbaa = m0;
                   mfbab = m1;
                   mfbac = m2;
                   /////////b//////////////////////////////////////////////////////////////////////////
                   m0 = mfbbc * c1o2+mfbbb * (vvz-c1o2)+mfbba * (vz2-vvz) * c1o2;
-                  m1 = -mfbbc-two* mfbbb *  vvz+mfbba * (one-vz2);
+                  m1 = -mfbbc-c2o1* mfbbb *  vvz+mfbba * (c1o1-vz2);
                   m2 = mfbbc * c1o2+mfbbb * (vvz+c1o2)+mfbba * (vz2+vvz) * c1o2;
                   mfbba = m0;
                   mfbbb = m1;
                   mfbbc = m2;
                   /////////b//////////////////////////////////////////////////////////////////////////
                   m0 = mfbcc * c1o2+mfbcb * (vvz-c1o2)+mfbca * (vz2-vvz) * c1o2;
-                  m1 = -mfbcc-two* mfbcb *  vvz+mfbca * (one-vz2);
+                  m1 = -mfbcc-c2o1* mfbcb *  vvz+mfbca * (c1o1-vz2);
                   m2 = mfbcc * c1o2+mfbcb * (vvz+c1o2)+mfbca * (vz2+vvz) * c1o2;
                   mfbca = m0;
                   mfbcb = m1;
@@ -806,21 +807,21 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   m0 = mfcac * c1o2+mfcab * (vvz-c1o2)+(mfcaa+c1o3 * oMdrho) * (vz2-vvz) * c1o2;
-                  m1 = -mfcac-two* mfcab *  vvz+mfcaa                  * (one-vz2)-c1o3 * oMdrho * vz2;
+                  m1 = -mfcac-c2o1* mfcab *  vvz+mfcaa                  * (c1o1-vz2)-c1o3 * oMdrho * vz2;
                   m2 = mfcac * c1o2+mfcab * (vvz+c1o2)+(mfcaa+c1o3 * oMdrho) * (vz2+vvz) * c1o2;
                   mfcaa = m0;
                   mfcab = m1;
                   mfcac = m2;
                   /////////c//////////////////////////////////////////////////////////////////////////
                   m0 = mfcbc * c1o2+mfcbb * (vvz-c1o2)+mfcba * (vz2-vvz) * c1o2;
-                  m1 = -mfcbc-two* mfcbb *  vvz+mfcba * (one-vz2);
+                  m1 = -mfcbc-c2o1* mfcbb *  vvz+mfcba * (c1o1-vz2);
                   m2 = mfcbc * c1o2+mfcbb * (vvz+c1o2)+mfcba * (vz2+vvz) * c1o2;
                   mfcba = m0;
                   mfcbb = m1;
                   mfcbc = m2;
                   /////////c//////////////////////////////////////////////////////////////////////////
                   m0 = mfccc * c1o2+mfccb * (vvz-c1o2)+(mfcca+c1o9 * oMdrho) * (vz2-vvz) * c1o2;
-                  m1 = -mfccc-two* mfccb *  vvz+mfcca                  * (one-vz2)-c1o9 * oMdrho * vz2;
+                  m1 = -mfccc-c2o1* mfccb *  vvz+mfcca                  * (c1o1-vz2)-c1o9 * oMdrho * vz2;
                   m2 = mfccc * c1o2+mfccb * (vvz+c1o2)+(mfcca+c1o9 * oMdrho) * (vz2+vvz) * c1o2;
                   mfcca = m0;
                   mfccb = m1;
@@ -831,21 +832,21 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   ////////////////////////////////////////////////////////////////////////////////////
                   // Y - Dir
                   m0 = mfaca * c1o2+mfaba * (vvy-c1o2)+(mfaaa+c1o6 * oMdrho) * (vy2-vvy) * c1o2;
-                  m1 = -mfaca-two* mfaba *  vvy+mfaaa                  * (one-vy2)-c1o6 * oMdrho * vy2;
+                  m1 = -mfaca-c2o1* mfaba *  vvy+mfaaa                  * (c1o1-vy2)-c1o6 * oMdrho * vy2;
                   m2 = mfaca * c1o2+mfaba * (vvy+c1o2)+(mfaaa+c1o6 * oMdrho) * (vy2+vvy) * c1o2;
                   mfaaa = m0;
                   mfaba = m1;
                   mfaca = m2;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m0 = mfacb * c1o2+mfabb * (vvy-c1o2)+(mfaab+c2o3 * oMdrho) * (vy2-vvy) * c1o2;
-                  m1 = -mfacb-two* mfabb *  vvy+mfaab                  * (one-vy2)-c2o3 * oMdrho * vy2;
+                  m1 = -mfacb-c2o1* mfabb *  vvy+mfaab                  * (c1o1-vy2)-c2o3 * oMdrho * vy2;
                   m2 = mfacb * c1o2+mfabb * (vvy+c1o2)+(mfaab+c2o3 * oMdrho) * (vy2+vvy) * c1o2;
                   mfaab = m0;
                   mfabb = m1;
                   mfacb = m2;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m0 = mfacc * c1o2+mfabc * (vvy-c1o2)+(mfaac+c1o6 * oMdrho) * (vy2-vvy) * c1o2;
-                  m1 = -mfacc-two* mfabc *  vvy+mfaac                  * (one-vy2)-c1o6 * oMdrho * vy2;
+                  m1 = -mfacc-c2o1* mfabc *  vvy+mfaac                  * (c1o1-vy2)-c1o6 * oMdrho * vy2;
                   m2 = mfacc * c1o2+mfabc * (vvy+c1o2)+(mfaac+c1o6 * oMdrho) * (vy2+vvy) * c1o2;
                   mfaac = m0;
                   mfabc = m1;
@@ -853,21 +854,21 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   m0 = mfbca * c1o2+mfbba * (vvy-c1o2)+mfbaa * (vy2-vvy) * c1o2;
-                  m1 = -mfbca-two* mfbba *  vvy+mfbaa * (one-vy2);
+                  m1 = -mfbca-c2o1* mfbba *  vvy+mfbaa * ( c1o1-vy2);
                   m2 = mfbca * c1o2+mfbba * (vvy+c1o2)+mfbaa * (vy2+vvy) * c1o2;
                   mfbaa = m0;
                   mfbba = m1;
                   mfbca = m2;
                   /////////b//////////////////////////////////////////////////////////////////////////
                   m0 = mfbcb * c1o2+mfbbb * (vvy-c1o2)+mfbab * (vy2-vvy) * c1o2;
-                  m1 = -mfbcb-two* mfbbb *  vvy+mfbab * (one-vy2);
+                  m1 = -mfbcb-c2o1* mfbbb *  vvy+mfbab * (c1o1-vy2);
                   m2 = mfbcb * c1o2+mfbbb * (vvy+c1o2)+mfbab * (vy2+vvy) * c1o2;
                   mfbab = m0;
                   mfbbb = m1;
                   mfbcb = m2;
                   /////////b//////////////////////////////////////////////////////////////////////////
                   m0 = mfbcc * c1o2+mfbbc * (vvy-c1o2)+mfbac * (vy2-vvy) * c1o2;
-                  m1 = -mfbcc-two* mfbbc *  vvy+mfbac * (one-vy2);
+                  m1 = -mfbcc-c2o1* mfbbc *  vvy+mfbac * (c1o1-vy2);
                   m2 = mfbcc * c1o2+mfbbc * (vvy+c1o2)+mfbac * (vy2+vvy) * c1o2;
                   mfbac = m0;
                   mfbbc = m1;
@@ -875,21 +876,21 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   m0 = mfcca * c1o2+mfcba * (vvy-c1o2)+(mfcaa+c1o18 * oMdrho) * (vy2-vvy) * c1o2;
-                  m1 = -mfcca-two* mfcba *  vvy+mfcaa                   * (one-vy2)-c1o18 * oMdrho * vy2;
+                  m1 = -mfcca-c2o1* mfcba *  vvy+mfcaa                   * (c1o1-vy2)-c1o18 * oMdrho * vy2;
                   m2 = mfcca * c1o2+mfcba * (vvy+c1o2)+(mfcaa+c1o18 * oMdrho) * (vy2+vvy) * c1o2;
                   mfcaa = m0;
                   mfcba = m1;
                   mfcca = m2;
                   /////////c//////////////////////////////////////////////////////////////////////////
                   m0 = mfccb * c1o2+mfcbb * (vvy-c1o2)+(mfcab+c2o9 * oMdrho) * (vy2-vvy) * c1o2;
-                  m1 = -mfccb-two* mfcbb *  vvy+mfcab                  * (one-vy2)-c2o9 * oMdrho * vy2;
+                  m1 = -mfccb-c2o1* mfcbb *  vvy+mfcab                  * (c1o1-vy2)-c2o9 * oMdrho * vy2;
                   m2 = mfccb * c1o2+mfcbb * (vvy+c1o2)+(mfcab+c2o9 * oMdrho) * (vy2+vvy) * c1o2;
                   mfcab = m0;
                   mfcbb = m1;
                   mfccb = m2;
                   /////////c//////////////////////////////////////////////////////////////////////////
                   m0 = mfccc * c1o2+mfcbc * (vvy-c1o2)+(mfcac+c1o18 * oMdrho) * (vy2-vvy) * c1o2;
-                  m1 = -mfccc-two* mfcbc *  vvy+mfcac                   * (one-vy2)-c1o18 * oMdrho * vy2;
+                  m1 = -mfccc-c2o1* mfcbc *  vvy+mfcac                   * (c1o1-vy2)-c1o18 * oMdrho * vy2;
                   m2 = mfccc * c1o2+mfcbc * (vvy+c1o2)+(mfcac+c1o18 * oMdrho) * (vy2+vvy) * c1o2;
                   mfcac = m0;
                   mfcbc = m1;
@@ -900,21 +901,21 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   ////////////////////////////////////////////////////////////////////////////////////
                   // X - Dir
                   m0 = mfcaa * c1o2+mfbaa * (vvx-c1o2)+(mfaaa+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
-                  m1 = -mfcaa-two* mfbaa *  vvx+mfaaa                   * (one-vx2)-c1o36 * oMdrho * vx2;
+                  m1 = -mfcaa-c2o1* mfbaa *  vvx+mfaaa                   * (c1o1-vx2)-c1o36 * oMdrho * vx2;
                   m2 = mfcaa * c1o2+mfbaa * (vvx+c1o2)+(mfaaa+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
                   mfaaa = m0;
                   mfbaa = m1;
                   mfcaa = m2;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m0 = mfcba * c1o2+mfbba * (vvx-c1o2)+(mfaba+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
-                  m1 = -mfcba-two* mfbba *  vvx+mfaba                  * (one-vx2)-c1o9 * oMdrho * vx2;
+                  m1 = -mfcba-c2o1* mfbba *  vvx+mfaba                  * (c1o1-vx2)-c1o9 * oMdrho * vx2;
                   m2 = mfcba * c1o2+mfbba * (vvx+c1o2)+(mfaba+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
                   mfaba = m0;
                   mfbba = m1;
                   mfcba = m2;
                   ////////////////////////////////////////////////////////////////////////////////////
                   m0 = mfcca * c1o2+mfbca * (vvx-c1o2)+(mfaca+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
-                  m1 = -mfcca-two* mfbca *  vvx+mfaca                   * (one-vx2)-c1o36 * oMdrho * vx2;
+                  m1 = -mfcca-c2o1* mfbca *  vvx+mfaca                   * (c1o1-vx2)-c1o36 * oMdrho * vx2;
                   m2 = mfcca * c1o2+mfbca * (vvx+c1o2)+(mfaca+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
                   mfaca = m0;
                   mfbca = m1;
@@ -922,21 +923,21 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   m0 = mfcab * c1o2+mfbab * (vvx-c1o2)+(mfaab+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
-                  m1 = -mfcab-two* mfbab *  vvx+mfaab                  * (one-vx2)-c1o9 * oMdrho * vx2;
+                  m1 = -mfcab-c2o1* mfbab *  vvx+mfaab                  * (c1o1-vx2)-c1o9 * oMdrho * vx2;
                   m2 = mfcab * c1o2+mfbab * (vvx+c1o2)+(mfaab+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
                   mfaab = m0;
                   mfbab = m1;
                   mfcab = m2;
                   ///////////b////////////////////////////////////////////////////////////////////////
                   m0 = mfcbb * c1o2+mfbbb * (vvx-c1o2)+(mfabb+c4o9 * oMdrho) * (vx2-vvx) * c1o2;
-                  m1 = -mfcbb-two* mfbbb *  vvx+mfabb                  * (one-vx2)-c4o9 * oMdrho * vx2;
+                  m1 = -mfcbb-c2o1* mfbbb *  vvx+mfabb                  * (c1o1-vx2)-c4o9 * oMdrho * vx2;
                   m2 = mfcbb * c1o2+mfbbb * (vvx+c1o2)+(mfabb+c4o9 * oMdrho) * (vx2+vvx) * c1o2;
                   mfabb = m0;
                   mfbbb = m1;
                   mfcbb = m2;
                   ///////////b////////////////////////////////////////////////////////////////////////
                   m0 = mfccb * c1o2+mfbcb * (vvx-c1o2)+(mfacb+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
-                  m1 = -mfccb-two* mfbcb *  vvx+mfacb                  * (one-vx2)-c1o9 * oMdrho * vx2;
+                  m1 = -mfccb-c2o1* mfbcb *  vvx+mfacb                  * (c1o1-vx2)-c1o9 * oMdrho * vx2;
                   m2 = mfccb * c1o2+mfbcb * (vvx+c1o2)+(mfacb+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
                   mfacb = m0;
                   mfbcb = m1;
@@ -944,21 +945,21 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   ////////////////////////////////////////////////////////////////////////////////////
                   ////////////////////////////////////////////////////////////////////////////////////
                   m0 = mfcac * c1o2+mfbac * (vvx-c1o2)+(mfaac+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
-                  m1 = -mfcac-two* mfbac *  vvx+mfaac                   * (one-vx2)-c1o36 * oMdrho * vx2;
+                  m1 = -mfcac-c2o1* mfbac *  vvx+mfaac                   * (c1o1-vx2)-c1o36 * oMdrho * vx2;
                   m2 = mfcac * c1o2+mfbac * (vvx+c1o2)+(mfaac+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
                   mfaac = m0;
                   mfbac = m1;
                   mfcac = m2;
                   ///////////c////////////////////////////////////////////////////////////////////////
                   m0 = mfcbc * c1o2+mfbbc * (vvx-c1o2)+(mfabc+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
-                  m1 = -mfcbc-two* mfbbc *  vvx+mfabc                  * (one-vx2)-c1o9 * oMdrho * vx2;
+                  m1 = -mfcbc-c2o1* mfbbc *  vvx+mfabc                  * (c1o1-vx2)-c1o9 * oMdrho * vx2;
                   m2 = mfcbc * c1o2+mfbbc * (vvx+c1o2)+(mfabc+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
                   mfabc = m0;
                   mfbbc = m1;
                   mfcbc = m2;
                   ///////////c////////////////////////////////////////////////////////////////////////
                   m0 = mfccc * c1o2+mfbcc * (vvx-c1o2)+(mfacc+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
-                  m1 = -mfccc-two* mfbcc *  vvx+mfacc                   * (one-vx2)-c1o36 * oMdrho * vx2;
+                  m1 = -mfccc-c2o1* mfbcc *  vvx+mfacc                   * (c1o1-vx2)-c1o36 * oMdrho * vx2;
                   m2 = mfccc * c1o2+mfbcc * (vvx+c1o2)+(mfacc+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
                   mfacc = m0;
                   mfbcc = m1;
@@ -969,11 +970,11 @@ void CompressibleCumulantLBMKernel::calculate(int step)
                   //proof correctness
                   //////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-                  LBMReal drho_post = (mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
+                  real drho_post = (mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
                      +(mfaab+mfacb+mfcab+mfccb)+(mfaba+mfabc+mfcba+mfcbc)+(mfbaa+mfbac+mfbca+mfbcc)
                      +(mfabb+mfcbb)+(mfbab+mfbcb)+(mfbba+mfbbc)+mfbbb;
                   //LBMReal dif = fabs(rho - rho_post);
-                  LBMReal dif = drho - drho_post;
+                  real dif = drho - drho_post;
 #ifdef SINGLEPRECISION
                   if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
@@ -1030,7 +1031,7 @@ void CompressibleCumulantLBMKernel::calculate(int step)
    //timer.stop();
 }
 //////////////////////////////////////////////////////////////////////////
-double CompressibleCumulantLBMKernel::getCalculationTime()
+real CompressibleCumulantLBMKernel::getCalculationTime()
 {
    //return timer.getDuration();
    return timer.getTotalTime();
diff --git a/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulantLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulantLBMKernel.h
index 802a707708583b9f76700f5f12038f70b29db7b4..7af69201cde7bd8812173f1b6bd9d397e731686b 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulantLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/CompressibleCumulantLBMKernel.h
@@ -21,32 +21,32 @@ public:
    ~CompressibleCumulantLBMKernel() override;
    void calculate(int step) override;
    SPtr<LBMKernel> clone() override;
-   double getCalculationTime() override;
+   real getCalculationTime() override;
    void setBulkOmegaToOmega(bool value);
    void setRelaxationParameter(Parameter p);
 protected:
    virtual void initDataSet();
-   LBMReal f[D3Q27System::ENDF+1];
+   real f[D3Q27System::ENDF+1];
 
    UbTimer timer;
 
-   LBMReal OxyyMxzz;
+   real OxyyMxzz;
    Parameter parameter;
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
 
    mu::value_type muX1,muX2,muX3;
    mu::value_type muDeltaT;
    mu::value_type muNu;
-   LBMReal forcingX1;
-   LBMReal forcingX2;
-   LBMReal forcingX3;
+   real forcingX1;
+   real forcingX2;
+   real forcingX3;
    
    // bulk viscosity
    bool bulkOmegaToOmega;
-   LBMReal OxxPyyPzz; 
+   real OxxPyyPzz; 
 };
 #endif // CompressibleCumulantLBMKernel_h__
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetInterpolationProcessor.cpp b/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetInterpolationProcessor.cpp
index a359db9cd61ef3042130f5148abdc4cf1488617a..d98d7489a7fbe466a31278cc7ca555168e962f6d 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetInterpolationProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetInterpolationProcessor.cpp
@@ -1,10 +1,11 @@
 #include "CompressibleOffsetInterpolationProcessor.h"
 #include "D3Q27System.h"
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::basics::constant;
 
 //////////////////////////////////////////////////////////////////////////
-CompressibleOffsetInterpolationProcessor::CompressibleOffsetInterpolationProcessor(LBMReal omegaC, LBMReal omegaF)
+CompressibleOffsetInterpolationProcessor::CompressibleOffsetInterpolationProcessor(real omegaC, real omegaF)
    : omegaC(omegaC), omegaF(omegaF)
 {
 
@@ -19,13 +20,13 @@ InterpolationProcessorPtr CompressibleOffsetInterpolationProcessor::clone()
    return iproc;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetInterpolationProcessor::setOmegas( LBMReal omegaC, LBMReal omegaF )
+void CompressibleOffsetInterpolationProcessor::setOmegas( real omegaC, real omegaF )
 {
    this->omegaC = omegaC;
    this->omegaF = omegaF;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetInterpolationProcessor::setOffsets(LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void CompressibleOffsetInterpolationProcessor::setOffsets(real xoff, real yoff, real zoff)
 {
    this->xoff = xoff;
    this->yoff = yoff;
@@ -35,7 +36,7 @@ void CompressibleOffsetInterpolationProcessor::setOffsets(LBMReal xoff, LBMReal
    this->zoff_sq = zoff * zoff;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetInterpolationProcessor::interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void CompressibleOffsetInterpolationProcessor::interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, real xoff, real yoff, real zoff)
 {
    setOffsets(xoff, yoff, zoff);
    calcInterpolatedCoefficiets(icellC, omegaC, 0.5);
@@ -49,49 +50,51 @@ void CompressibleOffsetInterpolationProcessor::interpolateCoarseToFine(D3Q27ICel
    calcInterpolatedNodeCF(icellF.TNE, omegaF,  0.25,  0.25,  0.25, calcPressTNE(),  1,  1,  1);
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC, LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void CompressibleOffsetInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC, real xoff, real yoff, real zoff)
 {
    setOffsets(xoff, yoff, zoff);
    calcInterpolatedCoefficiets(icellF, omegaF, 2.0);
    calcInterpolatedNodeFC(icellC, omegaC);
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetInterpolationProcessor::calcMoments(const LBMReal* const f, LBMReal omega, LBMReal& press, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3, 
-                                                    LBMReal& kxy, LBMReal& kyz, LBMReal& kxz, LBMReal& kxxMyy, LBMReal& kxxMzz)
+void CompressibleOffsetInterpolationProcessor::calcMoments(const real* const f, real omega, real& press, real& vx1, real& vx2, real& vx3, 
+                                                    real& kxy, real& kyz, real& kxz, real& kxxMyy, real& kxxMzz)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
+   using namespace vf::basics::constant;
 
-   LBMReal drho = 0.0;
+   real drho = 0.0;
    D3Q27System::calcCompMacroscopicValues(f,drho,vx1,vx2,vx3);
    
    press = drho; //interpolate rho!
 
-   kxy   = -3.*omega*((((f[DIR_MMP]+f[DIR_PPM])-(f[DIR_MPP]+f[DIR_PMM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_MPM]+f[DIR_PMP])))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_MP0]+f[DIR_PM0]))/(one + drho)-(vx1*vx2));// might not be optimal MG 25.2.13
-   kyz   = -3.*omega*((((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMP]+f[DIR_MPM]))+((f[DIR_PMM]+f[DIR_MPP])-(f[DIR_MMP]+f[DIR_PPM])))+((f[DIR_0MM]+f[DIR_0PP])-(f[DIR_0MP]+f[DIR_0PM]))/(one + drho)-(vx2*vx3));
-   kxz   = -3.*omega*((((f[DIR_MPM]+f[DIR_PMP])-(f[DIR_MMP]+f[DIR_PPM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMM]+f[DIR_MPP])))+((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_M0P]+f[DIR_P0M]))/(one + drho)-(vx1*vx3));
-   kxxMyy = -3./2.*omega*((((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_M0P]+f[DIR_P0M])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_0M0]+f[DIR_0P0]))/(one + drho)-(vx1*vx1-vx2*vx2));
-   kxxMzz = -3./2.*omega*((((f[DIR_MP0]+f[DIR_PM0])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_00M]+f[DIR_00P]))/(one + drho)-(vx1*vx1-vx3*vx3));
+   kxy   = -3.*omega*((((f[DIR_MMP]+f[DIR_PPM])-(f[DIR_MPP]+f[DIR_PMM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_MPM]+f[DIR_PMP])))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_MP0]+f[DIR_PM0]))/(c1o1 + drho)-(vx1*vx2));// might not be optimal MG 25.2.13
+   kyz   = -3.*omega*((((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMP]+f[DIR_MPM]))+((f[DIR_PMM]+f[DIR_MPP])-(f[DIR_MMP]+f[DIR_PPM])))+((f[DIR_0MM]+f[DIR_0PP])-(f[DIR_0MP]+f[DIR_0PM]))/(c1o1 + drho)-(vx2*vx3));
+   kxz   = -3.*omega*((((f[DIR_MPM]+f[DIR_PMP])-(f[DIR_MMP]+f[DIR_PPM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMM]+f[DIR_MPP])))+((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_M0P]+f[DIR_P0M]))/(c1o1 + drho)-(vx1*vx3));
+   kxxMyy = -3./2.*omega*((((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_M0P]+f[DIR_P0M])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_0M0]+f[DIR_0P0]))/(c1o1 + drho)-(vx1*vx1-vx2*vx2));
+   kxxMzz = -3./2.*omega*((((f[DIR_MP0]+f[DIR_PM0])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_00M]+f[DIR_00P]))/(c1o1 + drho)-(vx1*vx1-vx3*vx3));
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetInterpolationProcessor::calcInterpolatedCoefficiets(const D3Q27ICell& icell, LBMReal omega, LBMReal eps_new)
+void CompressibleOffsetInterpolationProcessor::calcInterpolatedCoefficiets(const D3Q27ICell& icell, real omega, real eps_new)
 {
-   LBMReal        vx1_SWT,vx2_SWT,vx3_SWT;
-   LBMReal        vx1_NWT,vx2_NWT,vx3_NWT;
-   LBMReal        vx1_NET,vx2_NET,vx3_NET;
-   LBMReal        vx1_SET,vx2_SET,vx3_SET;
-   LBMReal        vx1_SWB,vx2_SWB,vx3_SWB;
-   LBMReal        vx1_NWB,vx2_NWB,vx3_NWB;
-   LBMReal        vx1_NEB,vx2_NEB,vx3_NEB;
-   LBMReal        vx1_SEB,vx2_SEB,vx3_SEB;
-
-   LBMReal        kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT;
-   LBMReal        kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT;
-   LBMReal        kxyFromfcNEQ_NET, kyzFromfcNEQ_NET, kxzFromfcNEQ_NET, kxxMyyFromfcNEQ_NET, kxxMzzFromfcNEQ_NET;
-   LBMReal        kxyFromfcNEQ_SET, kyzFromfcNEQ_SET, kxzFromfcNEQ_SET, kxxMyyFromfcNEQ_SET, kxxMzzFromfcNEQ_SET;
-   LBMReal        kxyFromfcNEQ_SWB, kyzFromfcNEQ_SWB, kxzFromfcNEQ_SWB, kxxMyyFromfcNEQ_SWB, kxxMzzFromfcNEQ_SWB;
-   LBMReal        kxyFromfcNEQ_NWB, kyzFromfcNEQ_NWB, kxzFromfcNEQ_NWB, kxxMyyFromfcNEQ_NWB, kxxMzzFromfcNEQ_NWB;
-   LBMReal        kxyFromfcNEQ_NEB, kyzFromfcNEQ_NEB, kxzFromfcNEQ_NEB, kxxMyyFromfcNEQ_NEB, kxxMzzFromfcNEQ_NEB;
-   LBMReal        kxyFromfcNEQ_SEB, kyzFromfcNEQ_SEB, kxzFromfcNEQ_SEB, kxxMyyFromfcNEQ_SEB, kxxMzzFromfcNEQ_SEB;
+   real        vx1_SWT,vx2_SWT,vx3_SWT;
+   real        vx1_NWT,vx2_NWT,vx3_NWT;
+   real        vx1_NET,vx2_NET,vx3_NET;
+   real        vx1_SET,vx2_SET,vx3_SET;
+   real        vx1_SWB,vx2_SWB,vx3_SWB;
+   real        vx1_NWB,vx2_NWB,vx3_NWB;
+   real        vx1_NEB,vx2_NEB,vx3_NEB;
+   real        vx1_SEB,vx2_SEB,vx3_SEB;
+
+   real        kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT;
+   real        kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT;
+   real        kxyFromfcNEQ_NET, kyzFromfcNEQ_NET, kxzFromfcNEQ_NET, kxxMyyFromfcNEQ_NET, kxxMzzFromfcNEQ_NET;
+   real        kxyFromfcNEQ_SET, kyzFromfcNEQ_SET, kxzFromfcNEQ_SET, kxxMyyFromfcNEQ_SET, kxxMzzFromfcNEQ_SET;
+   real        kxyFromfcNEQ_SWB, kyzFromfcNEQ_SWB, kxzFromfcNEQ_SWB, kxxMyyFromfcNEQ_SWB, kxxMzzFromfcNEQ_SWB;
+   real        kxyFromfcNEQ_NWB, kyzFromfcNEQ_NWB, kxzFromfcNEQ_NWB, kxxMyyFromfcNEQ_NWB, kxxMzzFromfcNEQ_NWB;
+   real        kxyFromfcNEQ_NEB, kyzFromfcNEQ_NEB, kxzFromfcNEQ_NEB, kxxMyyFromfcNEQ_NEB, kxxMzzFromfcNEQ_NEB;
+   real        kxyFromfcNEQ_SEB, kyzFromfcNEQ_SEB, kxzFromfcNEQ_SEB, kxxMyyFromfcNEQ_SEB, kxxMzzFromfcNEQ_SEB;
 
    calcMoments(icell.TSW,omega,press_SWT,vx1_SWT,vx2_SWT,vx3_SWT, kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT);
    calcMoments(icell.TNW,omega,press_NWT,vx1_NWT,vx2_NWT,vx3_NWT, kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT);
@@ -346,7 +349,7 @@ void CompressibleOffsetInterpolationProcessor::calcInterpolatedCoefficiets(const
    cyz= cyz + xoff*cxyz;
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-   const LBMReal o = omega;
+   const real o = omega;
 
    f_E = eps_new*((2*(-2*ax + by + cz-kxxMzzAverage-kxxMyyAverage))/(27.*o));
    f_N = eps_new*((2*(ax - 2*by + cz+2*kxxMyyAverage-kxxMzzAverage))/(27.*o));
@@ -454,21 +457,22 @@ void CompressibleOffsetInterpolationProcessor::calcInterpolatedCoefficiets(const
    yz_TNW =   0.0625*eps_new *((                bxyz +     cxyz)/(72.*o));
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetInterpolationProcessor::calcInterpolatedNodeCF(LBMReal* f, LBMReal  /*omega*/, LBMReal  /*x*/, LBMReal  /*y*/, LBMReal  /*z*/, LBMReal press, LBMReal xs, LBMReal ys, LBMReal zs)
+void CompressibleOffsetInterpolationProcessor::calcInterpolatedNodeCF(real* f, real  /*omega*/, real  /*x*/, real  /*y*/, real  /*z*/, real press, real xs, real ys, real zs)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
-   LBMReal rho  = press ;//+ (2.*axx*x+axy*y+axz*z+axyz*y*z+ax + 2.*byy*y+bxy*x+byz*z+bxyz*x*z+by + 2.*czz*z+cxz*x+cyz*y+cxyz*x*y+cz)/3.;
-   LBMReal vx1  = a0 + 0.25*( xs*ax + ys*ay + zs*az) + 0.0625*(axx + xs*ys*axy + xs*zs*axz + ayy + ys*zs*ayz + azz) + 0.015625*(xs*ys*zs*axyz);
-   LBMReal vx2  = b0 + 0.25*( xs*bx + ys*by + zs*bz) + 0.0625*(bxx + xs*ys*bxy + xs*zs*bxz + byy + ys*zs*byz + bzz) + 0.015625*(xs*ys*zs*bxyz);
-   LBMReal vx3  = c0 + 0.25*( xs*cx + ys*cy + zs*cz) + 0.0625*(cxx + xs*ys*cxy + xs*zs*cxz + cyy + ys*zs*cyz + czz) + 0.015625*(xs*ys*zs*cxyz);
+   real rho  = press ;//+ (2.*axx*x+axy*y+axz*z+axyz*y*z+ax + 2.*byy*y+bxy*x+byz*z+bxyz*x*z+by + 2.*czz*z+cxz*x+cyz*y+cxyz*x*y+cz)/3.;
+   real vx1  = a0 + 0.25*( xs*ax + ys*ay + zs*az) + 0.0625*(axx + xs*ys*axy + xs*zs*axz + ayy + ys*zs*ayz + azz) + 0.015625*(xs*ys*zs*axyz);
+   real vx2  = b0 + 0.25*( xs*bx + ys*by + zs*bz) + 0.0625*(bxx + xs*ys*bxy + xs*zs*bxz + byy + ys*zs*byz + bzz) + 0.015625*(xs*ys*zs*bxyz);
+   real vx3  = c0 + 0.25*( xs*cx + ys*cy + zs*cz) + 0.0625*(cxx + xs*ys*cxy + xs*zs*cxz + cyy + ys*zs*cyz + czz) + 0.015625*(xs*ys*zs*cxyz);
 
    //////////////////////////////////////////////////////////////////////////
    //DRAFT
    //vx1 -= forcingF*0.5;
    //////////////////////////////////////////////////////////////////////////
 
-   LBMReal feq[ENDF+1];
+   real feq[ENDF+1];
    D3Q27System::calcCompFeq(feq,rho,vx1,vx2,vx3);
 
    f[DIR_P00]    = f_E    + xs*x_E    + ys*y_E    + zs*z_E    + xs*ys*xy_E    + xs*zs*xz_E    + ys*zs*yz_E    + feq[DIR_P00];
@@ -501,7 +505,7 @@ void CompressibleOffsetInterpolationProcessor::calcInterpolatedNodeCF(LBMReal* f
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SWB -0.25, -0.25, -0.25
-LBMReal CompressibleOffsetInterpolationProcessor::calcPressBSW()
+real CompressibleOffsetInterpolationProcessor::calcPressBSW()
 {
    return   press_SWT * (0.140625 + 0.1875 * xoff + 0.1875 * yoff - 0.5625 * zoff) +
       press_NWT * (0.046875 + 0.0625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -514,7 +518,7 @@ LBMReal CompressibleOffsetInterpolationProcessor::calcPressBSW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SWT -0.25, -0.25, 0.25
-LBMReal CompressibleOffsetInterpolationProcessor::calcPressTSW()
+real CompressibleOffsetInterpolationProcessor::calcPressTSW()
 {
    return   press_SWT * (0.421875 + 0.5625 * xoff + 0.5625 * yoff - 0.5625 * zoff) +
       press_NWT * (0.140625 + 0.1875 * xoff - 0.5625 * yoff - 0.1875 * zoff) +
@@ -527,7 +531,7 @@ LBMReal CompressibleOffsetInterpolationProcessor::calcPressTSW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SET 0.25, -0.25, 0.25
-LBMReal CompressibleOffsetInterpolationProcessor::calcPressTSE()
+real CompressibleOffsetInterpolationProcessor::calcPressTSE()
 {
    return   press_SET * (0.421875 - 0.5625 * xoff + 0.5625 * yoff - 0.5625 * zoff) +
       press_NET * (0.140625 - 0.1875 * xoff - 0.5625 * yoff - 0.1875 * zoff) +
@@ -540,7 +544,7 @@ LBMReal CompressibleOffsetInterpolationProcessor::calcPressTSE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SEB 0.25, -0.25, -0.25
-LBMReal CompressibleOffsetInterpolationProcessor::calcPressBSE()
+real CompressibleOffsetInterpolationProcessor::calcPressBSE()
 {
    return   press_SET * (0.140625 - 0.1875 * xoff + 0.1875 * yoff - 0.5625 * zoff) +
       press_NET * (0.046875 - 0.0625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -553,7 +557,7 @@ LBMReal CompressibleOffsetInterpolationProcessor::calcPressBSE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NWB -0.25, 0.25, -0.25
-LBMReal CompressibleOffsetInterpolationProcessor::calcPressBNW()
+real CompressibleOffsetInterpolationProcessor::calcPressBNW()
 {
    return   press_NWT * (0.140625 + 0.1875 * xoff - 0.1875 * yoff - 0.5625 * zoff) +
       press_NET * (0.046875 - 0.1875 * xoff - 0.0625 * yoff - 0.1875 * zoff) +
@@ -566,7 +570,7 @@ LBMReal CompressibleOffsetInterpolationProcessor::calcPressBNW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NWT -0.25, 0.25, 0.25
-LBMReal CompressibleOffsetInterpolationProcessor::calcPressTNW()
+real CompressibleOffsetInterpolationProcessor::calcPressTNW()
 {
    return   press_NWT * (0.421875 + 0.5625 * xoff - 0.5625 * yoff - 0.5625 * zoff) +
       press_NET * (0.140625 - 0.5625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -579,7 +583,7 @@ LBMReal CompressibleOffsetInterpolationProcessor::calcPressTNW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NET 0.25, 0.25, 0.25
-LBMReal CompressibleOffsetInterpolationProcessor::calcPressTNE()
+real CompressibleOffsetInterpolationProcessor::calcPressTNE()
 {
    return   press_NET * (0.421875 - 0.5625 * xoff - 0.5625 * yoff - 0.5625 * zoff) +
       press_NWT * (0.140625 + 0.5625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -592,7 +596,7 @@ LBMReal CompressibleOffsetInterpolationProcessor::calcPressTNE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NEB 0.25, 0.25, -0.25
-LBMReal CompressibleOffsetInterpolationProcessor::calcPressBNE()
+real CompressibleOffsetInterpolationProcessor::calcPressBNE()
 {
    return   press_NET * (0.140625 - 0.1875 * xoff - 0.1875 * yoff - 0.5625 * zoff) +
       press_NWT * (0.046875 + 0.1875 * xoff - 0.0625 * yoff - 0.1875 * zoff) +
@@ -605,11 +609,12 @@ LBMReal CompressibleOffsetInterpolationProcessor::calcPressBNE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position C 0.0, 0.0, 0.0
-void CompressibleOffsetInterpolationProcessor::calcInterpolatedNodeFC(LBMReal* f, LBMReal omega)
+void CompressibleOffsetInterpolationProcessor::calcInterpolatedNodeFC(real* f, real omega)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
-   LBMReal press  =  press_NET * (0.125 - 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
+   real press  =  press_NET * (0.125 - 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
       press_NWT * (0.125 + 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
       press_SET * (0.125 - 0.25 * xoff + 0.25 * yoff - 0.25 * zoff) +
       press_SWT * (0.125 + 0.25 * xoff + 0.25 * yoff - 0.25 * zoff) +
@@ -617,22 +622,22 @@ void CompressibleOffsetInterpolationProcessor::calcInterpolatedNodeFC(LBMReal* f
       press_NWB * (0.125 + 0.25 * xoff - 0.25 * yoff + 0.25 * zoff) +
       press_SEB * (0.125 - 0.25 * xoff + 0.25 * yoff + 0.25 * zoff) +
       press_SWB * (0.125 + 0.25 * xoff + 0.25 * yoff + 0.25 * zoff);
-   LBMReal vx1  = a0;
-   LBMReal vx2  = b0;
-   LBMReal vx3  = c0;
+   real vx1  = a0;
+   real vx2  = b0;
+   real vx3  = c0;
 
-   LBMReal rho = press ;//+ (ax+by+cz)/3.;
+   real rho = press ;//+ (ax+by+cz)/3.;
 
    //////////////////////////////////////////////////////////////////////////
    //DRAFT
    //vx1 -= forcingC*0.5;
    //////////////////////////////////////////////////////////////////////////
 
-   LBMReal feq[ENDF+1];
+   real feq[ENDF+1];
    D3Q27System::calcCompFeq(feq,rho,vx1,vx2,vx3);
 
-   LBMReal eps_new = 2.;
-   LBMReal o  = omega;
+   real eps_new = 2.;
+   real o  = omega;
 //   LBMReal op = 1.;
 
    //f_E    = eps_new *((5.*ax*o + 5.*by*o + 5.*cz*o - 8.*ax*op + 4.*by*op + 4.*cz*op)/(54.*o*op));
@@ -694,14 +699,14 @@ void CompressibleOffsetInterpolationProcessor::calcInterpolatedNodeFC(LBMReal* f
    f[DIR_000] = f_ZERO + feq[DIR_000];
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetInterpolationProcessor::calcInterpolatedVelocity(LBMReal x, LBMReal y, LBMReal z, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3)
+void CompressibleOffsetInterpolationProcessor::calcInterpolatedVelocity(real x, real y, real z, real& vx1, real& vx2, real& vx3)
 {
 	vx1  = a0 + ax*x + ay*y + az*z + axx*x*x + ayy*y*y + azz*z*z + axy*x*y + axz*x*z + ayz*y*z+axyz*x*y*z;
 	vx2  = b0 + bx*x + by*y + bz*z + bxx*x*x + byy*y*y + bzz*z*z + bxy*x*y + bxz*x*z + byz*y*z+bxyz*x*y*z;
 	vx3  = c0 + cx*x + cy*y + cz*z + cxx*x*x + cyy*y*y + czz*z*z + cxy*x*y + cxz*x*z + cyz*y*z+cxyz*x*y*z;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetInterpolationProcessor::calcInterpolatedShearStress(LBMReal x, LBMReal y, LBMReal z,LBMReal& tauxx, LBMReal& tauyy, LBMReal& tauzz,LBMReal& tauxy, LBMReal& tauxz, LBMReal& tauyz)
+void CompressibleOffsetInterpolationProcessor::calcInterpolatedShearStress(real x, real y, real z,real& tauxx, real& tauyy, real& tauzz,real& tauxy, real& tauxz, real& tauyz)
 {
 	tauxx=ax+2*axx*x+axy*y+axz*z+axyz*y*z;
 	tauyy=by+2*byy*y+bxy*x+byz*z+bxyz*x*z;
diff --git a/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetInterpolationProcessor.h b/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetInterpolationProcessor.h
index b81277683d0feaf97ed2f9c45cc108a99b9d8a3c..fefd6ed1566fe0a4c3c414748a522edbfede48a1 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetInterpolationProcessor.h
+++ b/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetInterpolationProcessor.h
@@ -15,52 +15,52 @@ class CompressibleOffsetInterpolationProcessor : public InterpolationProcessor
 {
 public:
    CompressibleOffsetInterpolationProcessor() = default;
-   CompressibleOffsetInterpolationProcessor(LBMReal omegaC, LBMReal omegaF);
+   CompressibleOffsetInterpolationProcessor(real omegaC, real omegaF);
    ~CompressibleOffsetInterpolationProcessor() override = default;
 
    InterpolationProcessorPtr clone() override;
-   void setOmegas(LBMReal omegaC, LBMReal omegaF) override;
+   void setOmegas(real omegaC, real omegaF) override;
    void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF) override;
-   void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, LBMReal xoff, LBMReal yoff, LBMReal zoff) override;
-   void interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC) override; 
-   void interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC, LBMReal xoff, LBMReal yoff, LBMReal zoff) override; 
+   void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, real xoff, real yoff, real zoff) override;
+   void interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC) override; 
+   void interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC, real xoff, real yoff, real zoff) override; 
    //LBMReal forcingC, forcingF;
 protected:   
 private:
-   LBMReal omegaC{0.0}, omegaF{0.0};
-   LBMReal a0, ax, ay, az, axx, ayy, azz, axy, axz, ayz, b0, bx, by, bz, bxx, byy, bzz, bxy, bxz, byz, c0, cx, cy, cz, cxx, cyy, czz, cxy, cxz, cyz, axyz, bxyz, cxyz;
-   LBMReal xoff,    yoff,    zoff;
-   LBMReal xoff_sq, yoff_sq, zoff_sq;
-   LBMReal press_SWT, press_NWT, press_NET, press_SET, press_SWB, press_NWB, press_NEB, press_SEB;
+   real omegaC{0.0}, omegaF{0.0};
+   real a0, ax, ay, az, axx, ayy, azz, axy, axz, ayz, b0, bx, by, bz, bxx, byy, bzz, bxy, bxz, byz, c0, cx, cy, cz, cxx, cyy, czz, cxy, cxz, cyz, axyz, bxyz, cxyz;
+   real xoff,    yoff,    zoff;
+   real xoff_sq, yoff_sq, zoff_sq;
+   real press_SWT, press_NWT, press_NET, press_SET, press_SWB, press_NWB, press_NEB, press_SEB;
 
-   LBMReal  f_E,  f_N,  f_T,  f_NE,  f_SE,  f_BE,  f_TE,  f_TN,  f_BN,  f_TNE,  f_TNW,  f_TSE,  f_TSW,  f_ZERO;
-   LBMReal  x_E,  x_N,  x_T,  x_NE,  x_SE,  x_BE,  x_TE,  x_TN,  x_BN,  x_TNE,  x_TNW,  x_TSE,  x_TSW,  x_ZERO;
-   LBMReal  y_E,  y_N,  y_T,  y_NE,  y_SE,  y_BE,  y_TE,  y_TN,  y_BN,  y_TNE,  y_TNW,  y_TSE,  y_TSW,  y_ZERO;
-   LBMReal  z_E,  z_N,  z_T,  z_NE,  z_SE,  z_BE,  z_TE,  z_TN,  z_BN,  z_TNE,  z_TNW,  z_TSE,  z_TSW,  z_ZERO;
-   LBMReal xy_E, xy_N, xy_T, xy_NE, xy_SE, xy_BE, xy_TE, xy_TN, xy_BN, xy_TNE, xy_TNW, xy_TSE, xy_TSW/*, xy_ZERO*/;
-   LBMReal xz_E, xz_N, xz_T, xz_NE, xz_SE, xz_BE, xz_TE, xz_TN, xz_BN, xz_TNE, xz_TNW, xz_TSE, xz_TSW/*, xz_ZERO*/;
-   LBMReal yz_E, yz_N, yz_T, yz_NE, yz_SE, yz_BE, yz_TE, yz_TN, yz_BN, yz_TNE, yz_TNW, yz_TSE, yz_TSW/*, yz_ZERO*/;
+   real  f_E,  f_N,  f_T,  f_NE,  f_SE,  f_BE,  f_TE,  f_TN,  f_BN,  f_TNE,  f_TNW,  f_TSE,  f_TSW,  f_ZERO;
+   real  x_E,  x_N,  x_T,  x_NE,  x_SE,  x_BE,  x_TE,  x_TN,  x_BN,  x_TNE,  x_TNW,  x_TSE,  x_TSW,  x_ZERO;
+   real  y_E,  y_N,  y_T,  y_NE,  y_SE,  y_BE,  y_TE,  y_TN,  y_BN,  y_TNE,  y_TNW,  y_TSE,  y_TSW,  y_ZERO;
+   real  z_E,  z_N,  z_T,  z_NE,  z_SE,  z_BE,  z_TE,  z_TN,  z_BN,  z_TNE,  z_TNW,  z_TSE,  z_TSW,  z_ZERO;
+   real xy_E, xy_N, xy_T, xy_NE, xy_SE, xy_BE, xy_TE, xy_TN, xy_BN, xy_TNE, xy_TNW, xy_TSE, xy_TSW/*, xy_ZERO*/;
+   real xz_E, xz_N, xz_T, xz_NE, xz_SE, xz_BE, xz_TE, xz_TN, xz_BN, xz_TNE, xz_TNW, xz_TSE, xz_TSW/*, xz_ZERO*/;
+   real yz_E, yz_N, yz_T, yz_NE, yz_SE, yz_BE, yz_TE, yz_TN, yz_BN, yz_TNE, yz_TNW, yz_TSE, yz_TSW/*, yz_ZERO*/;
 
-   LBMReal kxyAverage, kyzAverage, kxzAverage, kxxMyyAverage, kxxMzzAverage; 
+   real kxyAverage, kyzAverage, kxzAverage, kxxMyyAverage, kxxMzzAverage; 
 
 //   LBMReal a,b,c;
 
-   void setOffsets(LBMReal xoff, LBMReal yoff, LBMReal zoff) override;
-   void calcMoments(const LBMReal* const f, LBMReal omega, LBMReal& rho, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3, 
-      LBMReal& kxy, LBMReal& kyz, LBMReal& kxz, LBMReal& kxxMyy, LBMReal& kxxMzz);
-   void calcInterpolatedCoefficiets(const D3Q27ICell& icell, LBMReal omega, LBMReal eps_new) override;
-   void calcInterpolatedNodeCF(LBMReal* f, LBMReal omega, LBMReal x, LBMReal y, LBMReal z, LBMReal press, LBMReal xs, LBMReal ys, LBMReal zs);
-   LBMReal calcPressBSW();
-   LBMReal calcPressTSW();
-   LBMReal calcPressTSE();
-   LBMReal calcPressBSE();
-   LBMReal calcPressBNW();
-   LBMReal calcPressTNW();
-   LBMReal calcPressTNE();
-   LBMReal calcPressBNE();
-   void calcInterpolatedNodeFC(LBMReal* f, LBMReal omega) override;
-   void calcInterpolatedVelocity(LBMReal x, LBMReal y, LBMReal z,LBMReal& vx1, LBMReal& vx2, LBMReal& vx3) override;
-   void calcInterpolatedShearStress(LBMReal x, LBMReal y, LBMReal z,LBMReal& tauxx, LBMReal& tauyy, LBMReal& tauzz,LBMReal& tauxy, LBMReal& tauxz, LBMReal& tauyz) override;
+   void setOffsets(real xoff, real yoff, real zoff) override;
+   void calcMoments(const real* const f, real omega, real& rho, real& vx1, real& vx2, real& vx3, 
+      real& kxy, real& kyz, real& kxz, real& kxxMyy, real& kxxMzz);
+   void calcInterpolatedCoefficiets(const D3Q27ICell& icell, real omega, real eps_new) override;
+   void calcInterpolatedNodeCF(real* f, real omega, real x, real y, real z, real press, real xs, real ys, real zs);
+   real calcPressBSW();
+   real calcPressTSW();
+   real calcPressTSE();
+   real calcPressBSE();
+   real calcPressBNW();
+   real calcPressTNW();
+   real calcPressTNE();
+   real calcPressBNE();
+   void calcInterpolatedNodeFC(real* f, real omega) override;
+   void calcInterpolatedVelocity(real x, real y, real z,real& vx1, real& vx2, real& vx3) override;
+   void calcInterpolatedShearStress(real x, real y, real z,real& tauxx, real& tauyy, real& tauzz,real& tauxy, real& tauxz, real& tauyz) override;
 };
 
 //////////////////////////////////////////////////////////////////////////
@@ -69,7 +69,7 @@ inline void CompressibleOffsetInterpolationProcessor::interpolateCoarseToFine(D3
    this->interpolateCoarseToFine(icellC, icellF, 0.0, 0.0, 0.0);
 }
 //////////////////////////////////////////////////////////////////////////
-inline void CompressibleOffsetInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC)
+inline void CompressibleOffsetInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC)
 {
    this->interpolateFineToCoarse(icellF, icellC, 0.0, 0.0, 0.0);
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetMomentsInterpolationProcessor.cpp b/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetMomentsInterpolationProcessor.cpp
index e6883626c94e8de2b01f0c331f7580a7a7b9b9d2..68b8506d896c3d5c93c9021a7a03ab3fe7a8621e 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetMomentsInterpolationProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetMomentsInterpolationProcessor.cpp
@@ -1,24 +1,25 @@
 #include "CompressibleOffsetMomentsInterpolationProcessor.h"
 #include "D3Q27System.h"
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::basics::constant;
 
 CompressibleOffsetMomentsInterpolationProcessor::CompressibleOffsetMomentsInterpolationProcessor()
     
 {
    this->bulkViscosity = 0.0;
    this->shearViscosity = 0.0;
-   this->OxxPyyPzzC = one;
-   this->OxxPyyPzzF = one;
+   this->OxxPyyPzzC = c1o1;
+   this->OxxPyyPzzF = c1o1;
 }
 //////////////////////////////////////////////////////////////////////////
-CompressibleOffsetMomentsInterpolationProcessor::CompressibleOffsetMomentsInterpolationProcessor(LBMReal omegaC, LBMReal omegaF)
+CompressibleOffsetMomentsInterpolationProcessor::CompressibleOffsetMomentsInterpolationProcessor(real omegaC, real omegaF)
    : omegaC(omegaC), omegaF(omegaF)
 {
    this->bulkViscosity = 0.0;
    this->shearViscosity = 0.0;
-   this->OxxPyyPzzC = one;
-   this->OxxPyyPzzF = one;
+   this->OxxPyyPzzC = c1o1;
+   this->OxxPyyPzzF = c1o1;
 }
 //////////////////////////////////////////////////////////////////////////
 CompressibleOffsetMomentsInterpolationProcessor::~CompressibleOffsetMomentsInterpolationProcessor()
@@ -34,13 +35,13 @@ InterpolationProcessorPtr CompressibleOffsetMomentsInterpolationProcessor::clone
    return iproc;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetMomentsInterpolationProcessor::setOmegas( LBMReal omegaC, LBMReal omegaF )
+void CompressibleOffsetMomentsInterpolationProcessor::setOmegas( real omegaC, real omegaF )
 {
    this->omegaC = omegaC;
    this->omegaF = omegaF;
 
-   LBMReal dtC = (3.0*shearViscosity)/((1/omegaC)-0.5);
-   LBMReal dtF = (3.0*shearViscosity)/((1/omegaF)-0.5);
+   real dtC = (3.0*shearViscosity)/((1/omegaC)-0.5);
+   real dtF = (3.0*shearViscosity)/((1/omegaF)-0.5);
 
    if (bulkViscosity != 0)
    {
@@ -49,12 +50,12 @@ void CompressibleOffsetMomentsInterpolationProcessor::setOmegas( LBMReal omegaC,
    }
    else
    {
-      this->OxxPyyPzzC = one;
-      this->OxxPyyPzzF = one;
+      this->OxxPyyPzzC = c1o1;
+      this->OxxPyyPzzF = c1o1;
    }
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetMomentsInterpolationProcessor::setOffsets(LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void CompressibleOffsetMomentsInterpolationProcessor::setOffsets(real xoff, real yoff, real zoff)
 {
    this->xoff = xoff;
    this->yoff = yoff;
@@ -64,7 +65,7 @@ void CompressibleOffsetMomentsInterpolationProcessor::setOffsets(LBMReal xoff, L
    this->zoff_sq = zoff * zoff;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetMomentsInterpolationProcessor::interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void CompressibleOffsetMomentsInterpolationProcessor::interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, real xoff, real yoff, real zoff)
 {
    setOffsets(xoff, yoff, zoff);
    calcInterpolatedCoefficiets(icellC, omegaC, 0.5);
@@ -78,49 +79,50 @@ void CompressibleOffsetMomentsInterpolationProcessor::interpolateCoarseToFine(D3
    calcInterpolatedNodeCF(icellF.TNE, omegaF,  0.25,  0.25,  0.25, calcPressTNE(),  1,  1,  1);
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetMomentsInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC, LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void CompressibleOffsetMomentsInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC, real xoff, real yoff, real zoff)
 {
    setOffsets(xoff, yoff, zoff);
    calcInterpolatedCoefficiets(icellF, omegaF, 2.0);
    calcInterpolatedNodeFC(icellC, omegaC);
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetMomentsInterpolationProcessor::calcMoments(const LBMReal* const f, LBMReal omega, LBMReal& press, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3, 
-                                                    LBMReal& kxy, LBMReal& kyz, LBMReal& kxz, LBMReal& kxxMyy, LBMReal& kxxMzz)
+void CompressibleOffsetMomentsInterpolationProcessor::calcMoments(const real* const f, real omega, real& press, real& vx1, real& vx2, real& vx3, 
+                                                    real& kxy, real& kyz, real& kxz, real& kxxMyy, real& kxxMzz)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
-   LBMReal drho = 0.0;
+   real drho = 0.0;
    D3Q27System::calcCompMacroscopicValues(f,drho,vx1,vx2,vx3);
    
    press = drho; //interpolate rho!
 
-   kxy   = -3.*omega*((((f[DIR_MMP]+f[DIR_PPM])-(f[DIR_MPP]+f[DIR_PMM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_MPM]+f[DIR_PMP])))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_MP0]+f[DIR_PM0]))/(one + drho)-(vx1*vx2));// might not be optimal MG 25.2.13
-   kyz   = -3.*omega*((((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMP]+f[DIR_MPM]))+((f[DIR_PMM]+f[DIR_MPP])-(f[DIR_MMP]+f[DIR_PPM])))+((f[DIR_0MM]+f[DIR_0PP])-(f[DIR_0MP]+f[DIR_0PM]))/(one + drho)-(vx2*vx3));
-   kxz   = -3.*omega*((((f[DIR_MPM]+f[DIR_PMP])-(f[DIR_MMP]+f[DIR_PPM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMM]+f[DIR_MPP])))+((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_M0P]+f[DIR_P0M]))/(one + drho)-(vx1*vx3));
-   kxxMyy = -3./2.*omega*((((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_M0P]+f[DIR_P0M])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_0M0]+f[DIR_0P0]))/(one + drho)-(vx1*vx1-vx2*vx2));
-   kxxMzz = -3./2.*omega*((((f[DIR_MP0]+f[DIR_PM0])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_00M]+f[DIR_00P]))/(one + drho)-(vx1*vx1-vx3*vx3));
+   kxy   = -3.*omega*((((f[DIR_MMP]+f[DIR_PPM])-(f[DIR_MPP]+f[DIR_PMM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_MPM]+f[DIR_PMP])))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_MP0]+f[DIR_PM0]))/(c1o1 + drho)-(vx1*vx2));// might not be optimal MG 25.2.13
+   kyz   = -3.*omega*((((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMP]+f[DIR_MPM]))+((f[DIR_PMM]+f[DIR_MPP])-(f[DIR_MMP]+f[DIR_PPM])))+((f[DIR_0MM]+f[DIR_0PP])-(f[DIR_0MP]+f[DIR_0PM]))/(c1o1 + drho)-(vx2*vx3));
+   kxz   = -3.*omega*((((f[DIR_MPM]+f[DIR_PMP])-(f[DIR_MMP]+f[DIR_PPM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMM]+f[DIR_MPP])))+((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_M0P]+f[DIR_P0M]))/(c1o1 + drho)-(vx1*vx3));
+   kxxMyy = -3./2.*omega*((((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_M0P]+f[DIR_P0M])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_0M0]+f[DIR_0P0]))/(c1o1 + drho)-(vx1*vx1-vx2*vx2));
+   kxxMzz = -3./2.*omega*((((f[DIR_MP0]+f[DIR_PM0])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_00M]+f[DIR_00P]))/(c1o1 + drho)-(vx1*vx1-vx3*vx3));
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedCoefficiets(const D3Q27ICell& icell, LBMReal omega, LBMReal eps_new)
+void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedCoefficiets(const D3Q27ICell& icell, real omega, real eps_new)
 {
-   LBMReal        vx1_SWT,vx2_SWT,vx3_SWT;
-   LBMReal        vx1_NWT,vx2_NWT,vx3_NWT;
-   LBMReal        vx1_NET,vx2_NET,vx3_NET;
-   LBMReal        vx1_SET,vx2_SET,vx3_SET;
-   LBMReal        vx1_SWB,vx2_SWB,vx3_SWB;
-   LBMReal        vx1_NWB,vx2_NWB,vx3_NWB;
-   LBMReal        vx1_NEB,vx2_NEB,vx3_NEB;
-   LBMReal        vx1_SEB,vx2_SEB,vx3_SEB;
-
-   LBMReal        kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT;
-   LBMReal        kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT;
-   LBMReal        kxyFromfcNEQ_NET, kyzFromfcNEQ_NET, kxzFromfcNEQ_NET, kxxMyyFromfcNEQ_NET, kxxMzzFromfcNEQ_NET;
-   LBMReal        kxyFromfcNEQ_SET, kyzFromfcNEQ_SET, kxzFromfcNEQ_SET, kxxMyyFromfcNEQ_SET, kxxMzzFromfcNEQ_SET;
-   LBMReal        kxyFromfcNEQ_SWB, kyzFromfcNEQ_SWB, kxzFromfcNEQ_SWB, kxxMyyFromfcNEQ_SWB, kxxMzzFromfcNEQ_SWB;
-   LBMReal        kxyFromfcNEQ_NWB, kyzFromfcNEQ_NWB, kxzFromfcNEQ_NWB, kxxMyyFromfcNEQ_NWB, kxxMzzFromfcNEQ_NWB;
-   LBMReal        kxyFromfcNEQ_NEB, kyzFromfcNEQ_NEB, kxzFromfcNEQ_NEB, kxxMyyFromfcNEQ_NEB, kxxMzzFromfcNEQ_NEB;
-   LBMReal        kxyFromfcNEQ_SEB, kyzFromfcNEQ_SEB, kxzFromfcNEQ_SEB, kxxMyyFromfcNEQ_SEB, kxxMzzFromfcNEQ_SEB;
+   real        vx1_SWT,vx2_SWT,vx3_SWT;
+   real        vx1_NWT,vx2_NWT,vx3_NWT;
+   real        vx1_NET,vx2_NET,vx3_NET;
+   real        vx1_SET,vx2_SET,vx3_SET;
+   real        vx1_SWB,vx2_SWB,vx3_SWB;
+   real        vx1_NWB,vx2_NWB,vx3_NWB;
+   real        vx1_NEB,vx2_NEB,vx3_NEB;
+   real        vx1_SEB,vx2_SEB,vx3_SEB;
+
+   real        kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT;
+   real        kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT;
+   real        kxyFromfcNEQ_NET, kyzFromfcNEQ_NET, kxzFromfcNEQ_NET, kxxMyyFromfcNEQ_NET, kxxMzzFromfcNEQ_NET;
+   real        kxyFromfcNEQ_SET, kyzFromfcNEQ_SET, kxzFromfcNEQ_SET, kxxMyyFromfcNEQ_SET, kxxMzzFromfcNEQ_SET;
+   real        kxyFromfcNEQ_SWB, kyzFromfcNEQ_SWB, kxzFromfcNEQ_SWB, kxxMyyFromfcNEQ_SWB, kxxMzzFromfcNEQ_SWB;
+   real        kxyFromfcNEQ_NWB, kyzFromfcNEQ_NWB, kxzFromfcNEQ_NWB, kxxMyyFromfcNEQ_NWB, kxxMzzFromfcNEQ_NWB;
+   real        kxyFromfcNEQ_NEB, kyzFromfcNEQ_NEB, kxzFromfcNEQ_NEB, kxxMyyFromfcNEQ_NEB, kxxMzzFromfcNEQ_NEB;
+   real        kxyFromfcNEQ_SEB, kyzFromfcNEQ_SEB, kxzFromfcNEQ_SEB, kxxMyyFromfcNEQ_SEB, kxxMzzFromfcNEQ_SEB;
 
    calcMoments(icell.TSW,omega,press_SWT,vx1_SWT,vx2_SWT,vx3_SWT, kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT);
    calcMoments(icell.TNW,omega,press_NWT,vx1_NWT,vx2_NWT,vx3_NWT, kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT);
@@ -375,7 +377,7 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedCoefficiet
    cyz= cyz + xoff*cxyz;
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-   const LBMReal o = omega;
+   const real o = omega;
 
    f_E = eps_new*((2*(-2*ax + by + cz-kxxMzzAverage-kxxMyyAverage))/(27.*o));
    f_N = eps_new*((2*(ax - 2*by + cz+2*kxxMyyAverage-kxxMzzAverage))/(27.*o));
@@ -483,79 +485,81 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedCoefficiet
    yz_TNW =   0.0625*eps_new *((                bxyz +     cxyz)/(72.*o));
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeCF(LBMReal* f, LBMReal omega, LBMReal x, LBMReal y, LBMReal z, LBMReal press, LBMReal xs, LBMReal ys, LBMReal zs)
+void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeCF(real* f, real omega, real x, real y, real z, real press, real xs, real ys, real zs)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
+   using namespace vf::basics::constant;
 
-   LBMReal eps_new = 0.5;
-   LBMReal o = omega;
+   real eps_new = 0.5;
+   real o = omega;
    //bulk viscosity
-   LBMReal oP = OxxPyyPzzF;
+   real oP = OxxPyyPzzF;
 
 //   LBMReal rho  = press ;//+ (2.*axx*x+axy*y+axz*z+axyz*y*z+ax + 2.*byy*y+bxy*x+byz*z+bxyz*x*z+by + 2.*czz*z+cxz*x+cyz*y+cxyz*x*y+cz)/3.;
-   LBMReal vx1  = a0 + 0.25*( xs*ax + ys*ay + zs*az) + 0.0625*(axx + xs*ys*axy + xs*zs*axz + ayy + ys*zs*ayz + azz) + 0.015625*(xs*ys*zs*axyz);
-   LBMReal vx2  = b0 + 0.25*( xs*bx + ys*by + zs*bz) + 0.0625*(bxx + xs*ys*bxy + xs*zs*bxz + byy + ys*zs*byz + bzz) + 0.015625*(xs*ys*zs*bxyz);
-   LBMReal vx3  = c0 + 0.25*( xs*cx + ys*cy + zs*cz) + 0.0625*(cxx + xs*ys*cxy + xs*zs*cxz + cyy + ys*zs*cyz + czz) + 0.015625*(xs*ys*zs*cxyz);
-
-   LBMReal mfcbb = zeroReal;
-   LBMReal mfabb = zeroReal;
-   LBMReal mfbcb = zeroReal;
-   LBMReal mfbab = zeroReal;
-   LBMReal mfbbc = zeroReal;
-   LBMReal mfbba = zeroReal;
-   LBMReal mfccb = zeroReal;
-   LBMReal mfaab = zeroReal;
-   LBMReal mfcab = zeroReal;
-   LBMReal mfacb = zeroReal;
-   LBMReal mfcbc = zeroReal;
-   LBMReal mfaba = zeroReal;
-   LBMReal mfcba = zeroReal;
-   LBMReal mfabc = zeroReal;
-   LBMReal mfbcc = zeroReal;
-   LBMReal mfbaa = zeroReal;
-   LBMReal mfbca = zeroReal;
-   LBMReal mfbac = zeroReal;
-   LBMReal mfbbb = zeroReal;
-   LBMReal mfccc = zeroReal;
-   LBMReal mfaac = zeroReal;
-   LBMReal mfcac = zeroReal;
-   LBMReal mfacc = zeroReal;
-   LBMReal mfcca = zeroReal;
-   LBMReal mfaaa = zeroReal;
-   LBMReal mfcaa = zeroReal;
-   LBMReal mfaca = zeroReal;
+   real vx1  = a0 + 0.25*( xs*ax + ys*ay + zs*az) + 0.0625*(axx + xs*ys*axy + xs*zs*axz + ayy + ys*zs*ayz + azz) + 0.015625*(xs*ys*zs*axyz);
+   real vx2  = b0 + 0.25*( xs*bx + ys*by + zs*bz) + 0.0625*(bxx + xs*ys*bxy + xs*zs*bxz + byy + ys*zs*byz + bzz) + 0.015625*(xs*ys*zs*bxyz);
+   real vx3  = c0 + 0.25*( xs*cx + ys*cy + zs*cz) + 0.0625*(cxx + xs*ys*cxy + xs*zs*cxz + cyy + ys*zs*cyz + czz) + 0.015625*(xs*ys*zs*cxyz);
+
+   real mfcbb = c0o1;
+   real mfabb = c0o1;
+   real mfbcb = c0o1;
+   real mfbab = c0o1;
+   real mfbbc = c0o1;
+   real mfbba = c0o1;
+   real mfccb = c0o1;
+   real mfaab = c0o1;
+   real mfcab = c0o1;
+   real mfacb = c0o1;
+   real mfcbc = c0o1;
+   real mfaba = c0o1;
+   real mfcba = c0o1;
+   real mfabc = c0o1;
+   real mfbcc = c0o1;
+   real mfbaa = c0o1;
+   real mfbca = c0o1;
+   real mfbac = c0o1;
+   real mfbbb = c0o1;
+   real mfccc = c0o1;
+   real mfaac = c0o1;
+   real mfcac = c0o1;
+   real mfacc = c0o1;
+   real mfcca = c0o1;
+   real mfaaa = c0o1;
+   real mfcaa = c0o1;
+   real mfaca = c0o1;
 
    mfaaa = press; // if drho is interpolated directly
 
-   LBMReal vx1Sq = vx1*vx1;
-   LBMReal vx2Sq = vx2*vx2;
-   LBMReal vx3Sq = vx3*vx3;
-   LBMReal oMdrho = one;
+   real vx1Sq = vx1*vx1;
+   real vx2Sq = vx2*vx2;
+   real vx3Sq = vx3*vx3;
+   real oMdrho = c1o1;
 
    //2.f
 
    // linear combinations
-   LBMReal mxxPyyPzz = mfaaa - c2o3*(ax + by + two*axx*x + bxy*x + axy*y + two*byy*y + axz*z + byz*z + bxyz*x*z + axyz*y*z + cz - cxz*x + cyz*y + cxyz*x*y + two*czz*z)*eps_new / oP* (one + press);
-   LBMReal mxxMyy    = -c2o3*(ax - by + kxxMyyAverage + two*axx*x - bxy*x + axy*y - two*byy*y + axz*z - byz*z - bxyz*x*z + axyz*y*z)*eps_new/o * (one + press);
-   LBMReal mxxMzz    = -c2o3*(ax - cz + kxxMzzAverage + two*axx*x - cxz*x + axy*y - cyz*y - cxyz*x*y + axz*z - two*czz*z + axyz*y*z)*eps_new/o * (one + press);
+   real mxxPyyPzz = mfaaa - c2o3*(ax + by + c2o1 *axx*x + bxy*x + axy*y + c2o1 *byy*y + axz*z + byz*z + bxyz*x*z + axyz*y*z + cz - cxz*x + cyz*y + cxyz*x*y + c2o1 *czz*z)*eps_new / oP* (c1o1 + press);
+   real mxxMyy    = -c2o3*(ax - by + kxxMyyAverage + c2o1 *axx*x - bxy*x + axy*y - c2o1 *byy*y + axz*z - byz*z - bxyz*x*z + axyz*y*z)*eps_new/o * (c1o1 + press);
+   real mxxMzz    = -c2o3*(ax - cz + kxxMzzAverage + c2o1 *axx*x - cxz*x + axy*y - cyz*y - cxyz*x*y + axz*z - c2o1 *czz*z + axyz*y*z)*eps_new/o * (c1o1 + press);
 
-   mfabb     = -c1o3 * (bz + cy + kyzAverage + bxz*x + cxy*x + byz*y + two*cyy*y + bxyz*x*y + two*bzz*z + cyz*z + cxyz*x*z)*eps_new/o * (one + press);
-   mfbab     = -c1o3 * (az + cx + kxzAverage + axz*x + two*cxx*x + ayz*y + cxy*y + axyz*x*y + two*azz*z + cxz*z + cxyz*y*z)*eps_new/o * (one + press);
-   mfbba     = -c1o3 * (ay + bx + kxyAverage + axy*x + two*bxx*x + two*ayy*y + bxy*y + ayz*z + bxz*z + axyz*x*z + bxyz*y*z)*eps_new/o * (one + press);
+   mfabb     = -c1o3 * (bz + cy + kyzAverage + bxz*x + cxy*x + byz*y + c2o1 *cyy*y + bxyz*x*y + c2o1 *bzz*z + cyz*z + cxyz*x*z)*eps_new/o * (c1o1 + press);
+   mfbab     = -c1o3 * (az + cx + kxzAverage + axz*x + c2o1 *cxx*x + ayz*y + cxy*y + axyz*x*y + c2o1 *azz*z + cxz*z + cxyz*y*z)*eps_new/o * (c1o1 + press);
+   mfbba     = -c1o3 * (ay + bx + kxyAverage + axy*x + c2o1 *bxx*x + c2o1 *ayy*y + bxy*y + ayz*z + bxz*z + axyz*x*z + bxyz*y*z)*eps_new/o * (c1o1 + press);
 
    // linear combinations back
    mfcaa = c1o3 * (mxxMyy +       mxxMzz + mxxPyyPzz) ;
-   mfaca = c1o3 * (-two * mxxMyy +       mxxMzz + mxxPyyPzz) ;
-   mfaac = c1o3 * (mxxMyy - two * mxxMzz + mxxPyyPzz) ;
+   mfaca = c1o3 * (-c2o1 * mxxMyy +       mxxMzz + mxxPyyPzz) ;
+   mfaac = c1o3 * (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz) ;
 
    //three
-   mfbbb = zeroReal;
-   LBMReal mxxyPyzz = zeroReal;
-   LBMReal mxxyMyzz = zeroReal;
-   LBMReal mxxzPyyz = zeroReal;
-   LBMReal mxxzMyyz = zeroReal;
-   LBMReal mxyyPxzz =  zeroReal;
-   LBMReal mxyyMxzz = zeroReal;
+   mfbbb = c0o1;
+   real mxxyPyzz = c0o1;
+   real mxxyMyzz = c0o1;
+   real mxxzPyyz = c0o1;
+   real mxxzMyyz = c0o1;
+   real mxyyPxzz = c0o1;
+   real mxyyMxzz = c0o1;
 
    // linear combinations back
    mfcba = (mxxyMyzz + mxxyPyzz) * c1o2;
@@ -581,22 +585,22 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeCF(LBM
    //mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
    ////////////////////////////////////////////////////////////////////////////////////
    // Z - Dir
-   LBMReal m0 =  mfaac * c1o2 +      mfaab * (vx3 - c1o2) + (mfaaa + one * oMdrho) * (vx3Sq - vx3) * c1o2;
-   LBMReal m1 = -mfaac        - two * mfaab *  vx3         +  mfaaa                * (one - vx3Sq)              - one * oMdrho * vx3Sq;
-   LBMReal m2 =  mfaac * c1o2 +      mfaab * (vx3 + c1o2) + (mfaaa + one * oMdrho) * (vx3Sq + vx3) * c1o2;
+   real m0 =  mfaac * c1o2 +      mfaab * (vx3 - c1o2) + (mfaaa + c1o1 * oMdrho) * (vx3Sq - vx3) * c1o2;
+   real m1 = -mfaac        - c2o1 * mfaab *  vx3         +  mfaaa                * (c1o1 - vx3Sq)              - c1o1 * oMdrho * vx3Sq;
+   real m2 =  mfaac * c1o2 +      mfaab * (vx3 + c1o2) + (mfaaa + c1o1 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfaaa = m0;
    mfaab = m1;
    mfaac = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfabc * c1o2 +      mfabb * (vx3 - c1o2) + mfaba * (vx3Sq - vx3) * c1o2;
-   m1 = -mfabc        - two * mfabb *  vx3         + mfaba * (one - vx3Sq);
+   m1 = -mfabc        - c2o1 * mfabb *  vx3         + mfaba * (c1o1 - vx3Sq);
    m2 =  mfabc * c1o2 +      mfabb * (vx3 + c1o2) + mfaba * (vx3Sq + vx3) * c1o2;
    mfaba = m0;
    mfabb = m1;
    mfabc = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfacc * c1o2 +      mfacb * (vx3 - c1o2) + (mfaca + c1o3 * oMdrho) * (vx3Sq - vx3) * c1o2;
-   m1 = -mfacc        - two * mfacb *  vx3         +  mfaca                  * (one - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
+   m1 = -mfacc        - c2o1 * mfacb *  vx3         +  mfaca                  * (c1o1 - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
    m2 =  mfacc * c1o2 +      mfacb * (vx3 + c1o2) + (mfaca + c1o3 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfaca = m0;
    mfacb = m1;
@@ -604,21 +608,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeCF(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfbac * c1o2 +      mfbab * (vx3 - c1o2) + mfbaa * (vx3Sq - vx3) * c1o2;
-   m1 = -mfbac        - two * mfbab *  vx3         + mfbaa * (one - vx3Sq);
+   m1 = -mfbac        - c2o1 * mfbab *  vx3         + mfbaa * (c1o1 - vx3Sq);
    m2 =  mfbac * c1o2 +      mfbab * (vx3 + c1o2) + mfbaa * (vx3Sq + vx3) * c1o2;
    mfbaa = m0;
    mfbab = m1;
    mfbac = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbbc * c1o2 +      mfbbb * (vx3 - c1o2) + mfbba * (vx3Sq - vx3) * c1o2;
-   m1 = -mfbbc        - two * mfbbb *  vx3         + mfbba * (one - vx3Sq);
+   m1 = -mfbbc        - c2o1 * mfbbb *  vx3         + mfbba * (c1o1 - vx3Sq);
    m2 =  mfbbc * c1o2 +      mfbbb * (vx3 + c1o2) + mfbba * (vx3Sq + vx3) * c1o2;
    mfbba = m0;
    mfbbb = m1;
    mfbbc = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbcc * c1o2 +      mfbcb * (vx3 - c1o2) + mfbca * (vx3Sq - vx3) * c1o2;
-   m1 = -mfbcc        - two * mfbcb *  vx3         + mfbca * (one - vx3Sq);
+   m1 = -mfbcc        - c2o1 * mfbcb *  vx3         + mfbca * (c1o1 - vx3Sq);
    m2 =  mfbcc * c1o2 +      mfbcb * (vx3 + c1o2) + mfbca * (vx3Sq + vx3) * c1o2;
    mfbca = m0;
    mfbcb = m1;
@@ -626,21 +630,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeCF(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcac * c1o2 +      mfcab * (vx3 - c1o2) + (mfcaa + c1o3 * oMdrho) * (vx3Sq - vx3) * c1o2;
-   m1 = -mfcac        - two * mfcab *  vx3         +  mfcaa                  * (one - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
+   m1 = -mfcac        - c2o1 * mfcab *  vx3         +  mfcaa                  * (c1o1 - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
    m2 =  mfcac * c1o2 +      mfcab * (vx3 + c1o2) + (mfcaa + c1o3 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfcaa = m0;
    mfcab = m1;
    mfcac = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfcbc * c1o2 +      mfcbb * (vx3 - c1o2) + mfcba * (vx3Sq - vx3) * c1o2;
-   m1 = -mfcbc        - two * mfcbb *  vx3         + mfcba * (one - vx3Sq);
+   m1 = -mfcbc        - c2o1 * mfcbb *  vx3         + mfcba * (c1o1 - vx3Sq);
    m2 =  mfcbc * c1o2 +      mfcbb * (vx3 + c1o2) + mfcba * (vx3Sq + vx3) * c1o2;
    mfcba = m0;
    mfcbb = m1;
    mfcbc = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfccc * c1o2 +      mfccb * (vx3 - c1o2) + (mfcca + c1o9 * oMdrho) * (vx3Sq - vx3) * c1o2;
-   m1 = -mfccc        - two * mfccb *  vx3         +  mfcca                  * (one - vx3Sq)              - c1o9 * oMdrho * vx3Sq;
+   m1 = -mfccc        - c2o1 * mfccb *  vx3         +  mfcca                  * (c1o1 - vx3Sq)              - c1o9 * oMdrho * vx3Sq;
    m2 =  mfccc * c1o2 +      mfccb * (vx3 + c1o2) + (mfcca + c1o9 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfcca = m0;
    mfccb = m1;
@@ -651,21 +655,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeCF(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    // Y - Dir
    m0 =  mfaca * c1o2 +      mfaba * (vx2 - c1o2) + (mfaaa + c1o6 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfaca        - two * mfaba *  vx2         +  mfaaa                  * (one - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
+   m1 = -mfaca        - c2o1 * mfaba *  vx2         +  mfaaa                  * (c1o1 - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
    m2 =  mfaca * c1o2 +      mfaba * (vx2 + c1o2) + (mfaaa + c1o6 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfaaa = m0;
    mfaba = m1;
    mfaca = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfacb * c1o2 +      mfabb * (vx2 - c1o2) + (mfaab + c2o3 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfacb        - two * mfabb *  vx2         +  mfaab                  * (one - vx2Sq)              - c2o3 * oMdrho * vx2Sq;
+   m1 = -mfacb        - c2o1 * mfabb *  vx2         +  mfaab                  * (c1o1 - vx2Sq)              - c2o3 * oMdrho * vx2Sq;
    m2 =  mfacb * c1o2 +      mfabb * (vx2 + c1o2) + (mfaab + c2o3 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfaab = m0;
    mfabb = m1;
    mfacb = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfacc * c1o2 +      mfabc * (vx2 - c1o2) + (mfaac + c1o6 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfacc        - two * mfabc *  vx2         +  mfaac                  * (one - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
+   m1 = -mfacc        - c2o1 * mfabc *  vx2         +  mfaac                  * (c1o1 - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
    m2 =  mfacc * c1o2 +      mfabc * (vx2 + c1o2) + (mfaac + c1o6 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfaac = m0;
    mfabc = m1;
@@ -673,21 +677,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeCF(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfbca * c1o2 +      mfbba * (vx2 - c1o2) + mfbaa * (vx2Sq - vx2) * c1o2;
-   m1 = -mfbca        - two * mfbba *  vx2         + mfbaa * (one - vx2Sq);
+   m1 = -mfbca        - c2o1 * mfbba *  vx2         + mfbaa * (c1o1 - vx2Sq);
    m2 =  mfbca * c1o2 +      mfbba * (vx2 + c1o2) + mfbaa * (vx2Sq + vx2) * c1o2;
    mfbaa = m0;
    mfbba = m1;
    mfbca = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbcb * c1o2 +      mfbbb * (vx2 - c1o2) + mfbab * (vx2Sq - vx2) * c1o2;
-   m1 = -mfbcb        - two * mfbbb *  vx2         + mfbab * (one - vx2Sq);
+   m1 = -mfbcb        - c2o1 * mfbbb *  vx2         + mfbab * (c1o1 - vx2Sq);
    m2 =  mfbcb * c1o2 +      mfbbb * (vx2 + c1o2) + mfbab * (vx2Sq + vx2) * c1o2;
    mfbab = m0;
    mfbbb = m1;
    mfbcb = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbcc * c1o2 +      mfbbc * (vx2 - c1o2) + mfbac * (vx2Sq - vx2) * c1o2;
-   m1 = -mfbcc        - two * mfbbc *  vx2         + mfbac * (one - vx2Sq);
+   m1 = -mfbcc        - c2o1 * mfbbc *  vx2         + mfbac * (c1o1 - vx2Sq);
    m2 =  mfbcc * c1o2 +      mfbbc * (vx2 + c1o2) + mfbac * (vx2Sq + vx2) * c1o2;
    mfbac = m0;
    mfbbc = m1;
@@ -695,21 +699,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeCF(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcca * c1o2 +      mfcba * (vx2 - c1o2) + (mfcaa + c1o18 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfcca        - two * mfcba *  vx2         +  mfcaa                   * (one - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
+   m1 = -mfcca        - c2o1 * mfcba *  vx2         +  mfcaa                   * (c1o1 - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
    m2 =  mfcca * c1o2 +      mfcba * (vx2 + c1o2) + (mfcaa + c1o18 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfcaa = m0;
    mfcba = m1;
    mfcca = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfccb * c1o2 +      mfcbb * (vx2 - c1o2) + (mfcab + c2o9 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfccb        - two * mfcbb *  vx2         +  mfcab                  * (one - vx2Sq)              - c2o9 * oMdrho * vx2Sq;
+   m1 = -mfccb        - c2o1 * mfcbb *  vx2         +  mfcab                  * (c1o1 - vx2Sq)              - c2o9 * oMdrho * vx2Sq;
    m2 =  mfccb * c1o2 +      mfcbb * (vx2 + c1o2) + (mfcab + c2o9 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfcab = m0;
    mfcbb = m1;
    mfccb = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfccc * c1o2 +      mfcbc * (vx2 - c1o2) + (mfcac + c1o18 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfccc        - two * mfcbc *  vx2         +  mfcac                   * (one - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
+   m1 = -mfccc        - c2o1 * mfcbc *  vx2         +  mfcac                   * (c1o1 - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
    m2 =  mfccc * c1o2 +      mfcbc * (vx2 + c1o2) + (mfcac + c1o18 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfcac = m0;
    mfcbc = m1;
@@ -720,21 +724,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeCF(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    // X - Dir
    m0 =  mfcaa * c1o2 +      mfbaa * (vx1 - c1o2) + (mfaaa + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcaa        - two * mfbaa *  vx1         +  mfaaa                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfcaa        - c2o1 * mfbaa *  vx1         +  mfaaa                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfcaa * c1o2 +      mfbaa * (vx1 + c1o2) + (mfaaa + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaaa = m0;
    mfbaa = m1;
    mfcaa = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcba * c1o2 +      mfbba * (vx1 - c1o2) + (mfaba + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcba        - two * mfbba *  vx1         +  mfaba                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfcba        - c2o1 * mfbba *  vx1         +  mfaba                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfcba * c1o2 +      mfbba * (vx1 + c1o2) + (mfaba + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaba = m0;
    mfbba = m1;
    mfcba = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcca * c1o2 +      mfbca * (vx1 - c1o2) + (mfaca + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcca        - two * mfbca *  vx1         +  mfaca                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfcca        - c2o1 * mfbca *  vx1         +  mfaca                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfcca * c1o2 +      mfbca * (vx1 + c1o2) + (mfaca + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaca = m0;
    mfbca = m1;
@@ -742,21 +746,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeCF(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcab * c1o2 +      mfbab * (vx1 - c1o2) + (mfaab + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcab        - two * mfbab *  vx1         +  mfaab                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfcab        - c2o1 * mfbab *  vx1         +  mfaab                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfcab * c1o2 +      mfbab * (vx1 + c1o2) + (mfaab + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaab = m0;
    mfbab = m1;
    mfcab = m2;
    ///////////b////////////////////////////////////////////////////////////////////////
    m0 =  mfcbb * c1o2 +      mfbbb * (vx1 - c1o2) + (mfabb + c4o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcbb        - two * mfbbb *  vx1         +  mfabb                  * (one - vx1Sq)              - c4o9 * oMdrho * vx1Sq;
+   m1 = -mfcbb        - c2o1 * mfbbb *  vx1         +  mfabb                  * (c1o1 - vx1Sq)              - c4o9 * oMdrho * vx1Sq;
    m2 =  mfcbb * c1o2 +      mfbbb * (vx1 + c1o2) + (mfabb + c4o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfabb = m0;
    mfbbb = m1;
    mfcbb = m2;
    ///////////b////////////////////////////////////////////////////////////////////////
    m0 =  mfccb * c1o2 +      mfbcb * (vx1 - c1o2) + (mfacb + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfccb        - two * mfbcb *  vx1         +  mfacb                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfccb        - c2o1 * mfbcb *  vx1         +  mfacb                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfccb * c1o2 +      mfbcb * (vx1 + c1o2) + (mfacb + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfacb = m0;
    mfbcb = m1;
@@ -764,21 +768,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeCF(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcac * c1o2 +      mfbac * (vx1 - c1o2) + (mfaac + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcac        - two * mfbac *  vx1         +  mfaac                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfcac        - c2o1 * mfbac *  vx1         +  mfaac                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfcac * c1o2 +      mfbac * (vx1 + c1o2) + (mfaac + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaac = m0;
    mfbac = m1;
    mfcac = m2;
    ///////////c////////////////////////////////////////////////////////////////////////
    m0 =  mfcbc * c1o2 +      mfbbc * (vx1 - c1o2) + (mfabc + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcbc        - two * mfbbc *  vx1         +  mfabc                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfcbc        - c2o1 * mfbbc *  vx1         +  mfabc                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfcbc * c1o2 +      mfbbc * (vx1 + c1o2) + (mfabc + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfabc = m0;
    mfbbc = m1;
    mfcbc = m2;
    ///////////c////////////////////////////////////////////////////////////////////////
    m0 =  mfccc * c1o2 +      mfbcc * (vx1 - c1o2) + (mfacc + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfccc        - two * mfbcc *  vx1         +  mfacc                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfccc        - c2o1 * mfbcc *  vx1         +  mfacc                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfccc * c1o2 +      mfbcc * (vx1 + c1o2) + (mfacc + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfacc = m0;
    mfbcc = m1;
@@ -815,7 +819,7 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeCF(LBM
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SWB -0.25, -0.25, -0.25
-LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressBSW()
+real CompressibleOffsetMomentsInterpolationProcessor::calcPressBSW()
 {
    return   press_SWT * (0.140625 + 0.1875 * xoff + 0.1875 * yoff - 0.5625 * zoff) +
       press_NWT * (0.046875 + 0.0625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -828,7 +832,7 @@ LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressBSW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SWT -0.25, -0.25, 0.25
-LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressTSW()
+real CompressibleOffsetMomentsInterpolationProcessor::calcPressTSW()
 {
    return   press_SWT * (0.421875 + 0.5625 * xoff + 0.5625 * yoff - 0.5625 * zoff) +
       press_NWT * (0.140625 + 0.1875 * xoff - 0.5625 * yoff - 0.1875 * zoff) +
@@ -841,7 +845,7 @@ LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressTSW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SET 0.25, -0.25, 0.25
-LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressTSE()
+real CompressibleOffsetMomentsInterpolationProcessor::calcPressTSE()
 {
    return   press_SET * (0.421875 - 0.5625 * xoff + 0.5625 * yoff - 0.5625 * zoff) +
       press_NET * (0.140625 - 0.1875 * xoff - 0.5625 * yoff - 0.1875 * zoff) +
@@ -854,7 +858,7 @@ LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressTSE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SEB 0.25, -0.25, -0.25
-LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressBSE()
+real CompressibleOffsetMomentsInterpolationProcessor::calcPressBSE()
 {
    return   press_SET * (0.140625 - 0.1875 * xoff + 0.1875 * yoff - 0.5625 * zoff) +
       press_NET * (0.046875 - 0.0625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -867,7 +871,7 @@ LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressBSE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NWB -0.25, 0.25, -0.25
-LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressBNW()
+real CompressibleOffsetMomentsInterpolationProcessor::calcPressBNW()
 {
    return   press_NWT * (0.140625 + 0.1875 * xoff - 0.1875 * yoff - 0.5625 * zoff) +
       press_NET * (0.046875 - 0.1875 * xoff - 0.0625 * yoff - 0.1875 * zoff) +
@@ -880,7 +884,7 @@ LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressBNW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NWT -0.25, 0.25, 0.25
-LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressTNW()
+real CompressibleOffsetMomentsInterpolationProcessor::calcPressTNW()
 {
    return   press_NWT * (0.421875 + 0.5625 * xoff - 0.5625 * yoff - 0.5625 * zoff) +
       press_NET * (0.140625 - 0.5625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -893,7 +897,7 @@ LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressTNW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NET 0.25, 0.25, 0.25
-LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressTNE()
+real CompressibleOffsetMomentsInterpolationProcessor::calcPressTNE()
 {
    return   press_NET * (0.421875 - 0.5625 * xoff - 0.5625 * yoff - 0.5625 * zoff) +
       press_NWT * (0.140625 + 0.5625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -906,7 +910,7 @@ LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressTNE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NEB 0.25, 0.25, -0.25
-LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressBNE()
+real CompressibleOffsetMomentsInterpolationProcessor::calcPressBNE()
 {
    return   press_NET * (0.140625 - 0.1875 * xoff - 0.1875 * yoff - 0.5625 * zoff) +
       press_NWT * (0.046875 + 0.1875 * xoff - 0.0625 * yoff - 0.1875 * zoff) +
@@ -919,11 +923,12 @@ LBMReal CompressibleOffsetMomentsInterpolationProcessor::calcPressBNE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position C 0.0, 0.0, 0.0
-void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(LBMReal* f, LBMReal omega)
+void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(real* f, real omega)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
-   LBMReal press  =  press_NET * (0.125 - 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
+   real press  =  press_NET * (0.125 - 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
       press_NWT * (0.125 + 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
       press_SET * (0.125 - 0.25 * xoff + 0.25 * yoff - 0.25 * zoff) +
       press_SWT * (0.125 + 0.25 * xoff + 0.25 * yoff - 0.25 * zoff) +
@@ -931,81 +936,81 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(LBM
       press_NWB * (0.125 + 0.25 * xoff - 0.25 * yoff + 0.25 * zoff) +
       press_SEB * (0.125 - 0.25 * xoff + 0.25 * yoff + 0.25 * zoff) +
       press_SWB * (0.125 + 0.25 * xoff + 0.25 * yoff + 0.25 * zoff);
-   LBMReal vx1  = a0;
-   LBMReal vx2  = b0;
-   LBMReal vx3  = c0;
+   real vx1  = a0;
+   real vx2  = b0;
+   real vx3  = c0;
 
 //   LBMReal rho = press ;//+ (ax+by+cz)/3.;
 
-   LBMReal eps_new = 2.;
-   LBMReal o  = omega;
+   real eps_new = 2.;
+   real o  = omega;
    //bulk viscosity
-   LBMReal oP = OxxPyyPzzC;
-
-   LBMReal mfcbb = zeroReal;
-   LBMReal mfabb = zeroReal;
-   LBMReal mfbcb = zeroReal;
-   LBMReal mfbab = zeroReal;
-   LBMReal mfbbc = zeroReal;
-   LBMReal mfbba = zeroReal;
-   LBMReal mfccb = zeroReal;
-   LBMReal mfaab = zeroReal;
-   LBMReal mfcab = zeroReal;
-   LBMReal mfacb = zeroReal;
-   LBMReal mfcbc = zeroReal;
-   LBMReal mfaba = zeroReal;
-   LBMReal mfcba = zeroReal;
-   LBMReal mfabc = zeroReal;
-   LBMReal mfbcc = zeroReal;
-   LBMReal mfbaa = zeroReal;
-   LBMReal mfbca = zeroReal;
-   LBMReal mfbac = zeroReal;
-   LBMReal mfbbb = zeroReal;
-   LBMReal mfccc = zeroReal;
-   LBMReal mfaac = zeroReal;
-   LBMReal mfcac = zeroReal;
-   LBMReal mfacc = zeroReal;
-   LBMReal mfcca = zeroReal;
-   LBMReal mfaaa = zeroReal;
-   LBMReal mfcaa = zeroReal;
-   LBMReal mfaca = zeroReal;
+   real oP = OxxPyyPzzC;
+
+   real mfcbb = c0o1;
+   real mfabb = c0o1;
+   real mfbcb = c0o1;
+   real mfbab = c0o1;
+   real mfbbc = c0o1;
+   real mfbba = c0o1;
+   real mfccb = c0o1;
+   real mfaab = c0o1;
+   real mfcab = c0o1;
+   real mfacb = c0o1;
+   real mfcbc = c0o1;
+   real mfaba = c0o1;
+   real mfcba = c0o1;
+   real mfabc = c0o1;
+   real mfbcc = c0o1;
+   real mfbaa = c0o1;
+   real mfbca = c0o1;
+   real mfbac = c0o1;
+   real mfbbb = c0o1;
+   real mfccc = c0o1;
+   real mfaac = c0o1;
+   real mfcac = c0o1;
+   real mfacc = c0o1;
+   real mfcca = c0o1;
+   real mfaaa = c0o1;
+   real mfcaa = c0o1;
+   real mfaca = c0o1;
 
    mfaaa = press; // if drho is interpolated directly
 
-   LBMReal vx1Sq = vx1*vx1;
-   LBMReal vx2Sq = vx2*vx2;
-   LBMReal vx3Sq = vx3*vx3;
-   LBMReal oMdrho = one;
+   real vx1Sq = vx1*vx1;
+   real vx2Sq = vx2*vx2;
+   real vx3Sq = vx3*vx3;
+   real oMdrho = c1o1;
    //oMdrho = one - mfaaa;
 
    //2.f
    // linear combinations
 
 /////////////////////////
-   LBMReal mxxPyyPzz = mfaaa    -c2o3*(ax+by+cz)*eps_new/oP*(one+press);
+   real mxxPyyPzz = mfaaa    -c2o3*(ax+by+cz)*eps_new/oP*(c1o1 +press);
 
-   LBMReal mxxMyy    = -c2o3*((ax - by)+kxxMyyAverage)*eps_new/o * (one + press);
-   LBMReal mxxMzz    = -c2o3*((ax - cz)+kxxMzzAverage)*eps_new/o * (one + press);
+   real mxxMyy    = -c2o3*((ax - by)+kxxMyyAverage)*eps_new/o * (c1o1 + press);
+   real mxxMzz    = -c2o3*((ax - cz)+kxxMzzAverage)*eps_new/o * (c1o1 + press);
 
-   mfabb     = -c1o3 * ((bz + cy)+kyzAverage)*eps_new/o * (one + press);
-   mfbab     = -c1o3 * ((az + cx)+kxzAverage)*eps_new/o * (one + press);
-   mfbba     = -c1o3 * ((ay + bx)+kxyAverage)*eps_new/o * (one + press);
+   mfabb     = -c1o3 * ((bz + cy)+kyzAverage)*eps_new/o * (c1o1 + press);
+   mfbab     = -c1o3 * ((az + cx)+kxzAverage)*eps_new/o * (c1o1 + press);
+   mfbba     = -c1o3 * ((ay + bx)+kxyAverage)*eps_new/o * (c1o1 + press);
 
    ////////////////////////
    // linear combinations back
    mfcaa = c1o3 * (mxxMyy +       mxxMzz + mxxPyyPzz);
-   mfaca = c1o3 * (-two * mxxMyy +       mxxMzz + mxxPyyPzz);
-   mfaac = c1o3 * (mxxMyy - two * mxxMzz + mxxPyyPzz);
+   mfaca = c1o3 * (-c2o1 * mxxMyy +       mxxMzz + mxxPyyPzz);
+   mfaac = c1o3 * (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz);
 
    //three
-   mfbbb = zeroReal;
+   mfbbb = c0o1;
 
-   LBMReal mxxyPyzz = zeroReal;
-   LBMReal mxxyMyzz = zeroReal;
-   LBMReal mxxzPyyz = zeroReal;
-   LBMReal mxxzMyyz = zeroReal;
-   LBMReal mxyyPxzz = zeroReal;
-   LBMReal mxyyMxzz = zeroReal;
+   real mxxyPyzz = c0o1;
+   real mxxyMyzz = c0o1;
+   real mxxzPyyz = c0o1;
+   real mxxzMyyz = c0o1;
+   real mxyyPxzz = c0o1;
+   real mxyyMxzz = c0o1;
 
    // linear combinations back
    mfcba = (mxxyMyzz + mxxyPyzz) * c1o2;
@@ -1029,22 +1034,22 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(LBM
    //mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
    ////////////////////////////////////////////////////////////////////////////////////
    // Z - Dir
-   LBMReal m0 =  mfaac * c1o2 +      mfaab * (vx3 - c1o2) + (mfaaa + one * oMdrho) * (vx3Sq - vx3) * c1o2;
-   LBMReal m1 = -mfaac        - two * mfaab *  vx3         +  mfaaa                * (one - vx3Sq)              - one * oMdrho * vx3Sq;
-   LBMReal m2 =  mfaac * c1o2 +      mfaab * (vx3 + c1o2) + (mfaaa + one * oMdrho) * (vx3Sq + vx3) * c1o2;
+   real m0 =  mfaac * c1o2 +      mfaab * (vx3 - c1o2) + (mfaaa + c1o1 * oMdrho) * (vx3Sq - vx3) * c1o2;
+   real m1 = -mfaac        - c2o1 * mfaab *  vx3         +  mfaaa                * (c1o1 - vx3Sq)              - c1o1 * oMdrho * vx3Sq;
+   real m2 =  mfaac * c1o2 +      mfaab * (vx3 + c1o2) + (mfaaa + c1o1 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfaaa = m0;
    mfaab = m1;
    mfaac = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfabc * c1o2 +      mfabb * (vx3 - c1o2) + mfaba * (vx3Sq - vx3) * c1o2;
-   m1 = -mfabc        - two * mfabb *  vx3         + mfaba * (one - vx3Sq);
+   m1 = -mfabc        - c2o1 * mfabb *  vx3         + mfaba * (c1o1 - vx3Sq);
    m2 =  mfabc * c1o2 +      mfabb * (vx3 + c1o2) + mfaba * (vx3Sq + vx3) * c1o2;
    mfaba = m0;
    mfabb = m1;
    mfabc = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfacc * c1o2 +      mfacb * (vx3 - c1o2) + (mfaca + c1o3 * oMdrho) * (vx3Sq - vx3) * c1o2;
-   m1 = -mfacc        - two * mfacb *  vx3         +  mfaca                  * (one - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
+   m1 = -mfacc        - c2o1 * mfacb *  vx3         +  mfaca                  * (c1o1 - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
    m2 =  mfacc * c1o2 +      mfacb * (vx3 + c1o2) + (mfaca + c1o3 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfaca = m0;
    mfacb = m1;
@@ -1052,21 +1057,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfbac * c1o2 +      mfbab * (vx3 - c1o2) + mfbaa * (vx3Sq - vx3) * c1o2;
-   m1 = -mfbac        - two * mfbab *  vx3         + mfbaa * (one - vx3Sq);
+   m1 = -mfbac        - c2o1 * mfbab *  vx3         + mfbaa * (c1o1 - vx3Sq);
    m2 =  mfbac * c1o2 +      mfbab * (vx3 + c1o2) + mfbaa * (vx3Sq + vx3) * c1o2;
    mfbaa = m0;
    mfbab = m1;
    mfbac = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbbc * c1o2 +      mfbbb * (vx3 - c1o2) + mfbba * (vx3Sq - vx3) * c1o2;
-   m1 = -mfbbc        - two * mfbbb *  vx3         + mfbba * (one - vx3Sq);
+   m1 = -mfbbc        - c2o1 * mfbbb *  vx3         + mfbba * (c1o1 - vx3Sq);
    m2 =  mfbbc * c1o2 +      mfbbb * (vx3 + c1o2) + mfbba * (vx3Sq + vx3) * c1o2;
    mfbba = m0;
    mfbbb = m1;
    mfbbc = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbcc * c1o2 +      mfbcb * (vx3 - c1o2) + mfbca * (vx3Sq - vx3) * c1o2;
-   m1 = -mfbcc        - two * mfbcb *  vx3         + mfbca * (one - vx3Sq);
+   m1 = -mfbcc        - c2o1 * mfbcb *  vx3         + mfbca * (c1o1 - vx3Sq);
    m2 =  mfbcc * c1o2 +      mfbcb * (vx3 + c1o2) + mfbca * (vx3Sq + vx3) * c1o2;
    mfbca = m0;
    mfbcb = m1;
@@ -1074,21 +1079,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcac * c1o2 +      mfcab * (vx3 - c1o2) + (mfcaa + c1o3 * oMdrho) * (vx3Sq - vx3) * c1o2;
-   m1 = -mfcac        - two * mfcab *  vx3         +  mfcaa                  * (one - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
+   m1 = -mfcac        - c2o1 * mfcab *  vx3         +  mfcaa                  * (c1o1 - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
    m2 =  mfcac * c1o2 +      mfcab * (vx3 + c1o2) + (mfcaa + c1o3 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfcaa = m0;
    mfcab = m1;
    mfcac = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfcbc * c1o2 +      mfcbb * (vx3 - c1o2) + mfcba * (vx3Sq - vx3) * c1o2;
-   m1 = -mfcbc        - two * mfcbb *  vx3         + mfcba * (one - vx3Sq);
+   m1 = -mfcbc        - c2o1 * mfcbb *  vx3         + mfcba * (c1o1 - vx3Sq);
    m2 =  mfcbc * c1o2 +      mfcbb * (vx3 + c1o2) + mfcba * (vx3Sq + vx3) * c1o2;
    mfcba = m0;
    mfcbb = m1;
    mfcbc = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfccc * c1o2 +      mfccb * (vx3 - c1o2) + (mfcca + c1o9 * oMdrho) * (vx3Sq - vx3) * c1o2;
-   m1 = -mfccc        - two * mfccb *  vx3         +  mfcca                  * (one - vx3Sq)              - c1o9 * oMdrho * vx3Sq;
+   m1 = -mfccc        - c2o1 * mfccb *  vx3         +  mfcca                  * (c1o1 - vx3Sq)              - c1o9 * oMdrho * vx3Sq;
    m2 =  mfccc * c1o2 +      mfccb * (vx3 + c1o2) + (mfcca + c1o9 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfcca = m0;
    mfccb = m1;
@@ -1099,21 +1104,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    // Y - Dir
    m0 =  mfaca * c1o2 +      mfaba * (vx2 - c1o2) + (mfaaa + c1o6 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfaca        - two * mfaba *  vx2         +  mfaaa                  * (one - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
+   m1 = -mfaca        - c2o1 * mfaba *  vx2         +  mfaaa                  * (c1o1 - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
    m2 =  mfaca * c1o2 +      mfaba * (vx2 + c1o2) + (mfaaa + c1o6 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfaaa = m0;
    mfaba = m1;
    mfaca = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfacb * c1o2 +      mfabb * (vx2 - c1o2) + (mfaab + c2o3 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfacb        - two * mfabb *  vx2         +  mfaab                  * (one - vx2Sq)              - c2o3 * oMdrho * vx2Sq;
+   m1 = -mfacb        - c2o1 * mfabb *  vx2         +  mfaab                  * (c1o1 - vx2Sq)              - c2o3 * oMdrho * vx2Sq;
    m2 =  mfacb * c1o2 +      mfabb * (vx2 + c1o2) + (mfaab + c2o3 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfaab = m0;
    mfabb = m1;
    mfacb = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfacc * c1o2 +      mfabc * (vx2 - c1o2) + (mfaac + c1o6 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfacc        - two * mfabc *  vx2         +  mfaac                  * (one - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
+   m1 = -mfacc        - c2o1 * mfabc *  vx2         +  mfaac                  * (c1o1 - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
    m2 =  mfacc * c1o2 +      mfabc * (vx2 + c1o2) + (mfaac + c1o6 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfaac = m0;
    mfabc = m1;
@@ -1121,21 +1126,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfbca * c1o2 +      mfbba * (vx2 - c1o2) + mfbaa * (vx2Sq - vx2) * c1o2;
-   m1 = -mfbca        - two * mfbba *  vx2         + mfbaa * (one - vx2Sq);
+   m1 = -mfbca        - c2o1 * mfbba *  vx2         + mfbaa * (c1o1 - vx2Sq);
    m2 =  mfbca * c1o2 +      mfbba * (vx2 + c1o2) + mfbaa * (vx2Sq + vx2) * c1o2;
    mfbaa = m0;
    mfbba = m1;
    mfbca = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbcb * c1o2 +      mfbbb * (vx2 - c1o2) + mfbab * (vx2Sq - vx2) * c1o2;
-   m1 = -mfbcb        - two * mfbbb *  vx2         + mfbab * (one - vx2Sq);
+   m1 = -mfbcb        - c2o1 * mfbbb *  vx2         + mfbab * (c1o1 - vx2Sq);
    m2 =  mfbcb * c1o2 +      mfbbb * (vx2 + c1o2) + mfbab * (vx2Sq + vx2) * c1o2;
    mfbab = m0;
    mfbbb = m1;
    mfbcb = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbcc * c1o2 +      mfbbc * (vx2 - c1o2) + mfbac * (vx2Sq - vx2) * c1o2;
-   m1 = -mfbcc        - two * mfbbc *  vx2         + mfbac * (one - vx2Sq);
+   m1 = -mfbcc        - c2o1 * mfbbc *  vx2         + mfbac * (c1o1 - vx2Sq);
    m2 =  mfbcc * c1o2 +      mfbbc * (vx2 + c1o2) + mfbac * (vx2Sq + vx2) * c1o2;
    mfbac = m0;
    mfbbc = m1;
@@ -1143,21 +1148,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcca * c1o2 +      mfcba * (vx2 - c1o2) + (mfcaa + c1o18 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfcca        - two * mfcba *  vx2         +  mfcaa                   * (one - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
+   m1 = -mfcca        - c2o1 * mfcba *  vx2         +  mfcaa                   * (c1o1 - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
    m2 =  mfcca * c1o2 +      mfcba * (vx2 + c1o2) + (mfcaa + c1o18 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfcaa = m0;
    mfcba = m1;
    mfcca = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfccb * c1o2 +      mfcbb * (vx2 - c1o2) + (mfcab + c2o9 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfccb        - two * mfcbb *  vx2         +  mfcab                  * (one - vx2Sq)              - c2o9 * oMdrho * vx2Sq;
+   m1 = -mfccb        - c2o1 * mfcbb *  vx2         +  mfcab                  * (c1o1 - vx2Sq)              - c2o9 * oMdrho * vx2Sq;
    m2 =  mfccb * c1o2 +      mfcbb * (vx2 + c1o2) + (mfcab + c2o9 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfcab = m0;
    mfcbb = m1;
    mfccb = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfccc * c1o2 +      mfcbc * (vx2 - c1o2) + (mfcac + c1o18 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfccc        - two * mfcbc *  vx2         +  mfcac                   * (one - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
+   m1 = -mfccc        - c2o1 * mfcbc *  vx2         +  mfcac                   * (c1o1 - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
    m2 =  mfccc * c1o2 +      mfcbc * (vx2 + c1o2) + (mfcac + c1o18 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfcac = m0;
    mfcbc = m1;
@@ -1168,21 +1173,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    // X - Dir
    m0 =  mfcaa * c1o2 +      mfbaa * (vx1 - c1o2) + (mfaaa + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcaa        - two * mfbaa *  vx1         +  mfaaa                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfcaa        - c2o1 * mfbaa *  vx1         +  mfaaa                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfcaa * c1o2 +      mfbaa * (vx1 + c1o2) + (mfaaa + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaaa = m0;
    mfbaa = m1;
    mfcaa = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcba * c1o2 +      mfbba * (vx1 - c1o2) + (mfaba + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcba        - two * mfbba *  vx1         +  mfaba                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfcba        - c2o1 * mfbba *  vx1         +  mfaba                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfcba * c1o2 +      mfbba * (vx1 + c1o2) + (mfaba + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaba = m0;
    mfbba = m1;
    mfcba = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcca * c1o2 +      mfbca * (vx1 - c1o2) + (mfaca + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcca        - two * mfbca *  vx1         +  mfaca                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfcca        - c2o1 * mfbca *  vx1         +  mfaca                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfcca * c1o2 +      mfbca * (vx1 + c1o2) + (mfaca + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaca = m0;
    mfbca = m1;
@@ -1190,21 +1195,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcab * c1o2 +      mfbab * (vx1 - c1o2) + (mfaab + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcab        - two * mfbab *  vx1         +  mfaab                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfcab        - c2o1 * mfbab *  vx1         +  mfaab                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfcab * c1o2 +      mfbab * (vx1 + c1o2) + (mfaab + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaab = m0;
    mfbab = m1;
    mfcab = m2;
    ///////////b////////////////////////////////////////////////////////////////////////
    m0 =  mfcbb * c1o2 +      mfbbb * (vx1 - c1o2) + (mfabb + c4o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcbb        - two * mfbbb *  vx1         +  mfabb                  * (one - vx1Sq)              - c4o9 * oMdrho * vx1Sq;
+   m1 = -mfcbb        - c2o1 * mfbbb *  vx1         +  mfabb                  * (c1o1 - vx1Sq)              - c4o9 * oMdrho * vx1Sq;
    m2 =  mfcbb * c1o2 +      mfbbb * (vx1 + c1o2) + (mfabb + c4o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfabb = m0;
    mfbbb = m1;
    mfcbb = m2;
    ///////////b////////////////////////////////////////////////////////////////////////
    m0 =  mfccb * c1o2 +      mfbcb * (vx1 - c1o2) + (mfacb + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfccb        - two * mfbcb *  vx1         +  mfacb                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfccb        - c2o1 * mfbcb *  vx1         +  mfacb                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfccb * c1o2 +      mfbcb * (vx1 + c1o2) + (mfacb + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfacb = m0;
    mfbcb = m1;
@@ -1212,21 +1217,21 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(LBM
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcac * c1o2 +      mfbac * (vx1 - c1o2) + (mfaac + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcac        - two * mfbac *  vx1         +  mfaac                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfcac        - c2o1 * mfbac *  vx1         +  mfaac                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfcac * c1o2 +      mfbac * (vx1 + c1o2) + (mfaac + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaac = m0;
    mfbac = m1;
    mfcac = m2;
    ///////////c////////////////////////////////////////////////////////////////////////
    m0 =  mfcbc * c1o2 +      mfbbc * (vx1 - c1o2) + (mfabc + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcbc        - two * mfbbc *  vx1         +  mfabc                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfcbc        - c2o1 * mfbbc *  vx1         +  mfabc                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfcbc * c1o2 +      mfbbc * (vx1 + c1o2) + (mfabc + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfabc = m0;
    mfbbc = m1;
    mfcbc = m2;
    ///////////c////////////////////////////////////////////////////////////////////////
    m0 =  mfccc * c1o2 +      mfbcc * (vx1 - c1o2) + (mfacc + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfccc        - two * mfbcc *  vx1         +  mfacc                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfccc        - c2o1 * mfbcc *  vx1         +  mfacc                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfccc * c1o2 +      mfbcc * (vx1 + c1o2) + (mfacc + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfacc = m0;
    mfbcc = m1;
@@ -1262,14 +1267,14 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedNodeFC(LBM
    f[DIR_MMM] = mfaaa;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedVelocity(LBMReal x, LBMReal y, LBMReal z, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3)
+void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedVelocity(real x, real y, real z, real& vx1, real& vx2, real& vx3)
 {
 	vx1  = a0 + ax*x + ay*y + az*z + axx*x*x + ayy*y*y + azz*z*z + axy*x*y + axz*x*z + ayz*y*z+axyz*x*y*z;
 	vx2  = b0 + bx*x + by*y + bz*z + bxx*x*x + byy*y*y + bzz*z*z + bxy*x*y + bxz*x*z + byz*y*z+bxyz*x*y*z;
 	vx3  = c0 + cx*x + cy*y + cz*z + cxx*x*x + cyy*y*y + czz*z*z + cxy*x*y + cxz*x*z + cyz*y*z+cxyz*x*y*z;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedShearStress(LBMReal x, LBMReal y, LBMReal z,LBMReal& tauxx, LBMReal& tauyy, LBMReal& tauzz,LBMReal& tauxy, LBMReal& tauxz, LBMReal& tauyz)
+void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedShearStress(real x, real y, real z,real& tauxx, real& tauyy, real& tauzz,real& tauxy, real& tauxz, real& tauyz)
 {
 	tauxx=ax+2*axx*x+axy*y+axz*z+axyz*y*z;
 	tauyy=by+2*byy*y+bxy*x+byz*z+bxyz*x*z;
@@ -1279,7 +1284,7 @@ void CompressibleOffsetMomentsInterpolationProcessor::calcInterpolatedShearStres
 	tauyz=0.5*((bz+2.0*bzz*z+bxz*x+byz*y+bxyz*x*y)+(cy+2.0*cyy*y+cxy*x+cyz*z+cxyz*x*z));
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetMomentsInterpolationProcessor::setBulkViscosity(LBMReal shearViscosity, LBMReal bulkViscosity)
+void CompressibleOffsetMomentsInterpolationProcessor::setBulkViscosity(real shearViscosity, real bulkViscosity)
 {
    this->shearViscosity = shearViscosity;
    this->bulkViscosity  = bulkViscosity;
diff --git a/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetMomentsInterpolationProcessor.h b/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetMomentsInterpolationProcessor.h
index bee108e64a9294a3286e3b79519d496bd5ac91cf..32ab8cedf89e2e644f2f939f49ed4b0101eb0e32 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetMomentsInterpolationProcessor.h
+++ b/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetMomentsInterpolationProcessor.h
@@ -15,57 +15,57 @@ class CompressibleOffsetMomentsInterpolationProcessor : public InterpolationProc
 {
 public:
    CompressibleOffsetMomentsInterpolationProcessor();
-   CompressibleOffsetMomentsInterpolationProcessor(LBMReal omegaC, LBMReal omegaF);
+   CompressibleOffsetMomentsInterpolationProcessor(real omegaC, real omegaF);
    ~CompressibleOffsetMomentsInterpolationProcessor() override;
    InterpolationProcessorPtr clone() override;
-   void setOmegas(LBMReal omegaC, LBMReal omegaF) override;
+   void setOmegas(real omegaC, real omegaF) override;
    void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF) override;
-   void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, LBMReal xoff, LBMReal yoff, LBMReal zoff) override;
-   void interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC) override; 
-   void interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC, LBMReal xoff, LBMReal yoff, LBMReal zoff) override; 
-   void setBulkViscosity(LBMReal shearViscosity, LBMReal bulkViscosity);
+   void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, real xoff, real yoff, real zoff) override;
+   void interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC) override; 
+   void interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC, real xoff, real yoff, real zoff) override; 
+   void setBulkViscosity(real shearViscosity, real bulkViscosity);
 protected:   
 private:
-   LBMReal omegaC{0.0}, omegaF{0.0};
-   LBMReal a0, ax, ay, az, axx, ayy, azz, axy, axz, ayz, b0, bx, by, bz, bxx, byy, bzz, bxy, bxz, byz, c0, cx, cy, cz, cxx, cyy, czz, cxy, cxz, cyz, axyz, bxyz, cxyz;
-   LBMReal xoff,    yoff,    zoff;
-   LBMReal xoff_sq, yoff_sq, zoff_sq;
-   LBMReal press_SWT, press_NWT, press_NET, press_SET, press_SWB, press_NWB, press_NEB, press_SEB;
+   real omegaC{0.0}, omegaF{0.0};
+   real a0, ax, ay, az, axx, ayy, azz, axy, axz, ayz, b0, bx, by, bz, bxx, byy, bzz, bxy, bxz, byz, c0, cx, cy, cz, cxx, cyy, czz, cxy, cxz, cyz, axyz, bxyz, cxyz;
+   real xoff,    yoff,    zoff;
+   real xoff_sq, yoff_sq, zoff_sq;
+   real press_SWT, press_NWT, press_NET, press_SET, press_SWB, press_NWB, press_NEB, press_SEB;
 
-   LBMReal  f_E,  f_N,  f_T,  f_NE,  f_SE,  f_BE,  f_TE,  f_TN,  f_BN,  f_TNE,  f_TNW,  f_TSE,  f_TSW,  f_ZERO;
-   LBMReal  x_E,  x_N,  x_T,  x_NE,  x_SE,  x_BE,  x_TE,  x_TN,  x_BN,  x_TNE,  x_TNW,  x_TSE,  x_TSW,  x_ZERO;
-   LBMReal  y_E,  y_N,  y_T,  y_NE,  y_SE,  y_BE,  y_TE,  y_TN,  y_BN,  y_TNE,  y_TNW,  y_TSE,  y_TSW,  y_ZERO;
-   LBMReal  z_E,  z_N,  z_T,  z_NE,  z_SE,  z_BE,  z_TE,  z_TN,  z_BN,  z_TNE,  z_TNW,  z_TSE,  z_TSW,  z_ZERO;
-   LBMReal xy_E, xy_N, xy_T, xy_NE, xy_SE, xy_BE, xy_TE, xy_TN, xy_BN, xy_TNE, xy_TNW, xy_TSE, xy_TSW/*, xy_ZERO*/;
-   LBMReal xz_E, xz_N, xz_T, xz_NE, xz_SE, xz_BE, xz_TE, xz_TN, xz_BN, xz_TNE, xz_TNW, xz_TSE, xz_TSW/*, xz_ZERO*/;
-   LBMReal yz_E, yz_N, yz_T, yz_NE, yz_SE, yz_BE, yz_TE, yz_TN, yz_BN, yz_TNE, yz_TNW, yz_TSE, yz_TSW/*, yz_ZERO*/;
+   real  f_E,  f_N,  f_T,  f_NE,  f_SE,  f_BE,  f_TE,  f_TN,  f_BN,  f_TNE,  f_TNW,  f_TSE,  f_TSW,  f_ZERO;
+   real  x_E,  x_N,  x_T,  x_NE,  x_SE,  x_BE,  x_TE,  x_TN,  x_BN,  x_TNE,  x_TNW,  x_TSE,  x_TSW,  x_ZERO;
+   real  y_E,  y_N,  y_T,  y_NE,  y_SE,  y_BE,  y_TE,  y_TN,  y_BN,  y_TNE,  y_TNW,  y_TSE,  y_TSW,  y_ZERO;
+   real  z_E,  z_N,  z_T,  z_NE,  z_SE,  z_BE,  z_TE,  z_TN,  z_BN,  z_TNE,  z_TNW,  z_TSE,  z_TSW,  z_ZERO;
+   real xy_E, xy_N, xy_T, xy_NE, xy_SE, xy_BE, xy_TE, xy_TN, xy_BN, xy_TNE, xy_TNW, xy_TSE, xy_TSW/*, xy_ZERO*/;
+   real xz_E, xz_N, xz_T, xz_NE, xz_SE, xz_BE, xz_TE, xz_TN, xz_BN, xz_TNE, xz_TNW, xz_TSE, xz_TSW/*, xz_ZERO*/;
+   real yz_E, yz_N, yz_T, yz_NE, yz_SE, yz_BE, yz_TE, yz_TN, yz_BN, yz_TNE, yz_TNW, yz_TSE, yz_TSW/*, yz_ZERO*/;
 
-   LBMReal kxyAverage, kyzAverage, kxzAverage, kxxMyyAverage, kxxMzzAverage; 
+   real kxyAverage, kyzAverage, kxzAverage, kxxMyyAverage, kxxMzzAverage; 
 
-//   LBMReal a,b,c;
+//   real a,b,c;
 
    // bulk viscosity
-   LBMReal shearViscosity;
-   LBMReal bulkViscosity;
-   LBMReal OxxPyyPzzC;
-   LBMReal OxxPyyPzzF;
+   real shearViscosity;
+   real bulkViscosity;
+   real OxxPyyPzzC;
+   real OxxPyyPzzF;
 
-   void setOffsets(LBMReal xoff, LBMReal yoff, LBMReal zoff) override;
-   void calcMoments(const LBMReal* const f, LBMReal omega, LBMReal& rho, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3, 
-      LBMReal& kxy, LBMReal& kyz, LBMReal& kxz, LBMReal& kxxMyy, LBMReal& kxxMzz);
-   void calcInterpolatedCoefficiets(const D3Q27ICell& icell, LBMReal omega, LBMReal eps_new) override;
-   void calcInterpolatedNodeCF(LBMReal* f, LBMReal omega, LBMReal x, LBMReal y, LBMReal z, LBMReal press, LBMReal xs, LBMReal ys, LBMReal zs);
-   LBMReal calcPressBSW();
-   LBMReal calcPressTSW();
-   LBMReal calcPressTSE();
-   LBMReal calcPressBSE();
-   LBMReal calcPressBNW();
-   LBMReal calcPressTNW();
-   LBMReal calcPressTNE();
-   LBMReal calcPressBNE();
-   void calcInterpolatedNodeFC(LBMReal* f, LBMReal omega) override;
-   void calcInterpolatedVelocity(LBMReal x, LBMReal y, LBMReal z,LBMReal& vx1, LBMReal& vx2, LBMReal& vx3) override;
-   void calcInterpolatedShearStress(LBMReal x, LBMReal y, LBMReal z,LBMReal& tauxx, LBMReal& tauyy, LBMReal& tauzz,LBMReal& tauxy, LBMReal& tauxz, LBMReal& tauyz) override;
+   void setOffsets(real xoff, real yoff, real zoff) override;
+   void calcMoments(const real* const f, real omega, real& rho, real& vx1, real& vx2, real& vx3, 
+      real& kxy, real& kyz, real& kxz, real& kxxMyy, real& kxxMzz);
+   void calcInterpolatedCoefficiets(const D3Q27ICell& icell, real omega, real eps_new) override;
+   void calcInterpolatedNodeCF(real* f, real omega, real x, real y, real z, real press, real xs, real ys, real zs);
+   real calcPressBSW();
+   real calcPressTSW();
+   real calcPressTSE();
+   real calcPressBSE();
+   real calcPressBNW();
+   real calcPressTNW();
+   real calcPressTNE();
+   real calcPressBNE();
+   void calcInterpolatedNodeFC(real* f, real omega) override;
+   void calcInterpolatedVelocity(real x, real y, real z,real& vx1, real& vx2, real& vx3) override;
+   void calcInterpolatedShearStress(real x, real y, real z,real& tauxx, real& tauyy, real& tauzz,real& tauxy, real& tauxz, real& tauyz) override;
 };
 
 //////////////////////////////////////////////////////////////////////////
@@ -74,7 +74,7 @@ inline void CompressibleOffsetMomentsInterpolationProcessor::interpolateCoarseTo
    this->interpolateCoarseToFine(icellC, icellF, 0.0, 0.0, 0.0);
 }
 //////////////////////////////////////////////////////////////////////////
-inline void CompressibleOffsetMomentsInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC)
+inline void CompressibleOffsetMomentsInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC)
 {
    this->interpolateFineToCoarse(icellF, icellC, 0.0, 0.0, 0.0);
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetSquarePressureInterpolationProcessor.cpp b/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetSquarePressureInterpolationProcessor.cpp
index c9cc8138dcf1d4ce11ee4e2aa7b733f2174f367d..80eb84112dc43ab2a502c9213636b7f8dde18a35 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetSquarePressureInterpolationProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetSquarePressureInterpolationProcessor.cpp
@@ -1,22 +1,23 @@
 #include "CompressibleOffsetSquarePressureInterpolationProcessor.h"
 #include "D3Q27System.h"
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::basics::constant;
 
 CompressibleOffsetSquarePressureInterpolationProcessor::CompressibleOffsetSquarePressureInterpolationProcessor()
     
 {
    this->bulkOmegaToOmega = false;
-   this->OxxPyyPzzC = one;
-   this->OxxPyyPzzF = one;
+   this->OxxPyyPzzC = c1o1;
+   this->OxxPyyPzzF = c1o1;
 }
 //////////////////////////////////////////////////////////////////////////
-CompressibleOffsetSquarePressureInterpolationProcessor::CompressibleOffsetSquarePressureInterpolationProcessor(LBMReal omegaC, LBMReal omegaF)
+CompressibleOffsetSquarePressureInterpolationProcessor::CompressibleOffsetSquarePressureInterpolationProcessor(real omegaC, real omegaF)
    : omegaC(omegaC), omegaF(omegaF)
 {
    this->bulkOmegaToOmega = false;
-   this->OxxPyyPzzC = one;
-   this->OxxPyyPzzF = one;
+   this->OxxPyyPzzC = c1o1;
+   this->OxxPyyPzzF = c1o1;
 }
 //////////////////////////////////////////////////////////////////////////
 CompressibleOffsetSquarePressureInterpolationProcessor::~CompressibleOffsetSquarePressureInterpolationProcessor()
@@ -32,19 +33,19 @@ InterpolationProcessorPtr CompressibleOffsetSquarePressureInterpolationProcessor
    }
    else
    {
-      dynamicPointerCast<CompressibleOffsetSquarePressureInterpolationProcessor>(iproc)->OxxPyyPzzC = one;
-      dynamicPointerCast<CompressibleOffsetSquarePressureInterpolationProcessor>(iproc)->OxxPyyPzzF = one;
+      dynamicPointerCast<CompressibleOffsetSquarePressureInterpolationProcessor>(iproc)->OxxPyyPzzC = c1o1;
+      dynamicPointerCast<CompressibleOffsetSquarePressureInterpolationProcessor>(iproc)->OxxPyyPzzF = c1o1;
    }
    return iproc;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetSquarePressureInterpolationProcessor::setOmegas( LBMReal omegaC, LBMReal omegaF )
+void CompressibleOffsetSquarePressureInterpolationProcessor::setOmegas( real omegaC, real omegaF )
 {
    this->omegaC = omegaC;
    this->omegaF = omegaF;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetSquarePressureInterpolationProcessor::setOffsets(LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void CompressibleOffsetSquarePressureInterpolationProcessor::setOffsets(real xoff, real yoff, real zoff)
 {
    this->xoff = xoff;
    this->yoff = yoff;
@@ -54,7 +55,7 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::setOffsets(LBMReal
    this->zoff_sq = zoff * zoff;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetSquarePressureInterpolationProcessor::interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void CompressibleOffsetSquarePressureInterpolationProcessor::interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, real xoff, real yoff, real zoff)
 {
    setOffsets(xoff, yoff, zoff);
    calcInterpolatedCoefficiets(icellC, omegaC, 0.5);
@@ -68,49 +69,50 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::interpolateCoarseTo
    calcInterpolatedNodeCF(icellF.TNE, omegaF,  0.25,  0.25,  0.25, calcPressTNE(),  1,  1,  1);
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetSquarePressureInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC, LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void CompressibleOffsetSquarePressureInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC, real xoff, real yoff, real zoff)
 {
    setOffsets(xoff, yoff, zoff);
    calcInterpolatedCoefficiets(icellF, omegaF, 2.0);
    calcInterpolatedNodeFC(icellC, omegaC);
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetSquarePressureInterpolationProcessor::calcMoments(const LBMReal* const f, LBMReal omega, LBMReal& press, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3, 
-                                                    LBMReal& kxy, LBMReal& kyz, LBMReal& kxz, LBMReal& kxxMyy, LBMReal& kxxMzz)
+void CompressibleOffsetSquarePressureInterpolationProcessor::calcMoments(const real* const f, real omega, real& press, real& vx1, real& vx2, real& vx3, 
+                                                    real& kxy, real& kyz, real& kxz, real& kxxMyy, real& kxxMzz)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
-   LBMReal drho = 0.0;
+   real drho = 0.0;
    D3Q27System::calcCompMacroscopicValues(f,drho,vx1,vx2,vx3);
    
    press = drho; //interpolate rho!
 
-   kxy   = -3.*omega*((((f[DIR_MMP]+f[DIR_PPM])-(f[DIR_MPP]+f[DIR_PMM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_MPM]+f[DIR_PMP])))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_MP0]+f[DIR_PM0]))/(one + drho)-(vx1*vx2));// might not be optimal MG 25.2.13
-   kyz   = -3.*omega*((((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMP]+f[DIR_MPM]))+((f[DIR_PMM]+f[DIR_MPP])-(f[DIR_MMP]+f[DIR_PPM])))+((f[DIR_0MM]+f[DIR_0PP])-(f[DIR_0MP]+f[DIR_0PM]))/(one + drho)-(vx2*vx3));
-   kxz   = -3.*omega*((((f[DIR_MPM]+f[DIR_PMP])-(f[DIR_MMP]+f[DIR_PPM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMM]+f[DIR_MPP])))+((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_M0P]+f[DIR_P0M]))/(one + drho)-(vx1*vx3));
-   kxxMyy = -3./2.*omega*((((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_M0P]+f[DIR_P0M])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_0M0]+f[DIR_0P0]))/(one + drho)-(vx1*vx1-vx2*vx2));
-   kxxMzz = -3./2.*omega*((((f[DIR_MP0]+f[DIR_PM0])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_00M]+f[DIR_00P]))/(one + drho)-(vx1*vx1-vx3*vx3));
+   kxy   = -3.*omega*((((f[DIR_MMP]+f[DIR_PPM])-(f[DIR_MPP]+f[DIR_PMM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_MPM]+f[DIR_PMP])))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_MP0]+f[DIR_PM0]))/(c1o1 + drho)-(vx1*vx2));// might not be optimal MG 25.2.13
+   kyz   = -3.*omega*((((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMP]+f[DIR_MPM]))+((f[DIR_PMM]+f[DIR_MPP])-(f[DIR_MMP]+f[DIR_PPM])))+((f[DIR_0MM]+f[DIR_0PP])-(f[DIR_0MP]+f[DIR_0PM]))/(c1o1 + drho)-(vx2*vx3));
+   kxz   = -3.*omega*((((f[DIR_MPM]+f[DIR_PMP])-(f[DIR_MMP]+f[DIR_PPM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMM]+f[DIR_MPP])))+((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_M0P]+f[DIR_P0M]))/(c1o1 + drho)-(vx1*vx3));
+   kxxMyy = -3./2.*omega*((((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_M0P]+f[DIR_P0M])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_0M0]+f[DIR_0P0]))/(c1o1 + drho)-(vx1*vx1-vx2*vx2));
+   kxxMzz = -3./2.*omega*((((f[DIR_MP0]+f[DIR_PM0])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_00M]+f[DIR_00P]))/(c1o1 + drho)-(vx1*vx1-vx3*vx3));
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedCoefficiets(const D3Q27ICell& icell, LBMReal omega, LBMReal eps_new)
+void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedCoefficiets(const D3Q27ICell& icell, real omega, real eps_new)
 {
-   LBMReal        vx1_SWT,vx2_SWT,vx3_SWT;
-   LBMReal        vx1_NWT,vx2_NWT,vx3_NWT;
-   LBMReal        vx1_NET,vx2_NET,vx3_NET;
-   LBMReal        vx1_SET,vx2_SET,vx3_SET;
-   LBMReal        vx1_SWB,vx2_SWB,vx3_SWB;
-   LBMReal        vx1_NWB,vx2_NWB,vx3_NWB;
-   LBMReal        vx1_NEB,vx2_NEB,vx3_NEB;
-   LBMReal        vx1_SEB,vx2_SEB,vx3_SEB;
-
-   LBMReal        kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT;
-   LBMReal        kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT;
-   LBMReal        kxyFromfcNEQ_NET, kyzFromfcNEQ_NET, kxzFromfcNEQ_NET, kxxMyyFromfcNEQ_NET, kxxMzzFromfcNEQ_NET;
-   LBMReal        kxyFromfcNEQ_SET, kyzFromfcNEQ_SET, kxzFromfcNEQ_SET, kxxMyyFromfcNEQ_SET, kxxMzzFromfcNEQ_SET;
-   LBMReal        kxyFromfcNEQ_SWB, kyzFromfcNEQ_SWB, kxzFromfcNEQ_SWB, kxxMyyFromfcNEQ_SWB, kxxMzzFromfcNEQ_SWB;
-   LBMReal        kxyFromfcNEQ_NWB, kyzFromfcNEQ_NWB, kxzFromfcNEQ_NWB, kxxMyyFromfcNEQ_NWB, kxxMzzFromfcNEQ_NWB;
-   LBMReal        kxyFromfcNEQ_NEB, kyzFromfcNEQ_NEB, kxzFromfcNEQ_NEB, kxxMyyFromfcNEQ_NEB, kxxMzzFromfcNEQ_NEB;
-   LBMReal        kxyFromfcNEQ_SEB, kyzFromfcNEQ_SEB, kxzFromfcNEQ_SEB, kxxMyyFromfcNEQ_SEB, kxxMzzFromfcNEQ_SEB;
+   real        vx1_SWT,vx2_SWT,vx3_SWT;
+   real        vx1_NWT,vx2_NWT,vx3_NWT;
+   real        vx1_NET,vx2_NET,vx3_NET;
+   real        vx1_SET,vx2_SET,vx3_SET;
+   real        vx1_SWB,vx2_SWB,vx3_SWB;
+   real        vx1_NWB,vx2_NWB,vx3_NWB;
+   real        vx1_NEB,vx2_NEB,vx3_NEB;
+   real        vx1_SEB,vx2_SEB,vx3_SEB;
+
+   real        kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT;
+   real        kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT;
+   real        kxyFromfcNEQ_NET, kyzFromfcNEQ_NET, kxzFromfcNEQ_NET, kxxMyyFromfcNEQ_NET, kxxMzzFromfcNEQ_NET;
+   real        kxyFromfcNEQ_SET, kyzFromfcNEQ_SET, kxzFromfcNEQ_SET, kxxMyyFromfcNEQ_SET, kxxMzzFromfcNEQ_SET;
+   real        kxyFromfcNEQ_SWB, kyzFromfcNEQ_SWB, kxzFromfcNEQ_SWB, kxxMyyFromfcNEQ_SWB, kxxMzzFromfcNEQ_SWB;
+   real        kxyFromfcNEQ_NWB, kyzFromfcNEQ_NWB, kxzFromfcNEQ_NWB, kxxMyyFromfcNEQ_NWB, kxxMzzFromfcNEQ_NWB;
+   real        kxyFromfcNEQ_NEB, kyzFromfcNEQ_NEB, kxzFromfcNEQ_NEB, kxxMyyFromfcNEQ_NEB, kxxMzzFromfcNEQ_NEB;
+   real        kxyFromfcNEQ_SEB, kyzFromfcNEQ_SEB, kxzFromfcNEQ_SEB, kxxMyyFromfcNEQ_SEB, kxxMzzFromfcNEQ_SEB;
 
    calcMoments(icell.TSW,omega,press_SWT,vx1_SWT,vx2_SWT,vx3_SWT, kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT);
    calcMoments(icell.TNW,omega,press_NWT,vx1_NWT,vx2_NWT,vx3_NWT, kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT);
@@ -365,7 +367,7 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedCoe
    cyz= cyz + xoff*cxyz;
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-   const LBMReal o = omega;
+   const real o = omega;
 
    f_E = eps_new*((2*(-2*ax + by + cz-kxxMzzAverage-kxxMyyAverage))/(27.*o));
    f_N = eps_new*((2*(ax - 2*by + cz+2*kxxMyyAverage-kxxMzzAverage))/(27.*o));
@@ -473,84 +475,85 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedCoe
    yz_TNW =   0.0625*eps_new *((                bxyz +     cxyz)/(72.*o));
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNodeCF(LBMReal* f, LBMReal omega, LBMReal x, LBMReal y, LBMReal z, LBMReal press, LBMReal xs, LBMReal ys, LBMReal zs)
+void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNodeCF(real* f, real omega, real x, real y, real z, real press, real xs, real ys, real zs)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
-   LBMReal eps_new = 0.5;
-   LBMReal o = omega;
+   real eps_new = 0.5;
+   real o = omega;
    //bulk viscosity
-   LBMReal oP = OxxPyyPzzF;
+   real oP = OxxPyyPzzF;
 
-   LBMReal rho  = press ;//+ (2.*axx*x+axy*y+axz*z+axyz*y*z+ax + 2.*byy*y+bxy*x+byz*z+bxyz*x*z+by + 2.*czz*z+cxz*x+cyz*y+cxyz*x*y+cz)/3.;
+   real rho  = press ;//+ (2.*axx*x+axy*y+axz*z+axyz*y*z+ax + 2.*byy*y+bxy*x+byz*z+bxyz*x*z+by + 2.*czz*z+cxz*x+cyz*y+cxyz*x*y+cz)/3.;
 
-   LBMReal laplaceRho = (xoff!=0.0 || yoff!=0.0 || zoff!= 0.0) ? 0.0 :(-3.0*(by*by+ax*ax+cz*cz)-6.0*(ay*bx+bz*cy+az*cx))*(1.0+rho);
+   real laplaceRho = (xoff!=0.0 || yoff!=0.0 || zoff!= 0.0) ? 0.0 :(-3.0*(by*by+ax*ax+cz*cz)-6.0*(ay*bx+bz*cy+az*cx))*(1.0+rho);
 
    rho=rho+laplaceRho*(3.0/16.0);
 
-   LBMReal vx1  = a0 + 0.25*( xs*ax + ys*ay + zs*az) + 0.0625*(axx + xs*ys*axy + xs*zs*axz + ayy + ys*zs*ayz + azz) + 0.015625*(xs*ys*zs*axyz);
-   LBMReal vx2  = b0 + 0.25*( xs*bx + ys*by + zs*bz) + 0.0625*(bxx + xs*ys*bxy + xs*zs*bxz + byy + ys*zs*byz + bzz) + 0.015625*(xs*ys*zs*bxyz);
-   LBMReal vx3  = c0 + 0.25*( xs*cx + ys*cy + zs*cz) + 0.0625*(cxx + xs*ys*cxy + xs*zs*cxz + cyy + ys*zs*cyz + czz) + 0.015625*(xs*ys*zs*cxyz);
-
-   LBMReal mfcbb = zeroReal;
-   LBMReal mfabb = zeroReal;
-   LBMReal mfbcb = zeroReal;
-   LBMReal mfbab = zeroReal;
-   LBMReal mfbbc = zeroReal;
-   LBMReal mfbba = zeroReal;
-   LBMReal mfccb = zeroReal;
-   LBMReal mfaab = zeroReal;
-   LBMReal mfcab = zeroReal;
-   LBMReal mfacb = zeroReal;
-   LBMReal mfcbc = zeroReal;
-   LBMReal mfaba = zeroReal;
-   LBMReal mfcba = zeroReal;
-   LBMReal mfabc = zeroReal;
-   LBMReal mfbcc = zeroReal;
-   LBMReal mfbaa = zeroReal;
-   LBMReal mfbca = zeroReal;
-   LBMReal mfbac = zeroReal;
-   LBMReal mfbbb = zeroReal;
-   LBMReal mfccc = zeroReal;
-   LBMReal mfaac = zeroReal;
-   LBMReal mfcac = zeroReal;
-   LBMReal mfacc = zeroReal;
-   LBMReal mfcca = zeroReal;
-   LBMReal mfaaa = zeroReal;
-   LBMReal mfcaa = zeroReal;
-   LBMReal mfaca = zeroReal;
+   real vx1  = a0 + 0.25*( xs*ax + ys*ay + zs*az) + 0.0625*(axx + xs*ys*axy + xs*zs*axz + ayy + ys*zs*ayz + azz) + 0.015625*(xs*ys*zs*axyz);
+   real vx2  = b0 + 0.25*( xs*bx + ys*by + zs*bz) + 0.0625*(bxx + xs*ys*bxy + xs*zs*bxz + byy + ys*zs*byz + bzz) + 0.015625*(xs*ys*zs*bxyz);
+   real vx3  = c0 + 0.25*( xs*cx + ys*cy + zs*cz) + 0.0625*(cxx + xs*ys*cxy + xs*zs*cxz + cyy + ys*zs*cyz + czz) + 0.015625*(xs*ys*zs*cxyz);
+
+   real mfcbb = c0o1;
+   real mfabb = c0o1;
+   real mfbcb = c0o1;
+   real mfbab = c0o1;
+   real mfbbc = c0o1;
+   real mfbba = c0o1;
+   real mfccb = c0o1;
+   real mfaab = c0o1;
+   real mfcab = c0o1;
+   real mfacb = c0o1;
+   real mfcbc = c0o1;
+   real mfaba = c0o1;
+   real mfcba = c0o1;
+   real mfabc = c0o1;
+   real mfbcc = c0o1;
+   real mfbaa = c0o1;
+   real mfbca = c0o1;
+   real mfbac = c0o1;
+   real mfbbb = c0o1;
+   real mfccc = c0o1;
+   real mfaac = c0o1;
+   real mfcac = c0o1;
+   real mfacc = c0o1;
+   real mfcca = c0o1;
+   real mfaaa = c0o1;
+   real mfcaa = c0o1;
+   real mfaca = c0o1;
 
    mfaaa = rho; // if drho is interpolated directly
 
-   LBMReal vx1Sq = vx1*vx1;
-   LBMReal vx2Sq = vx2*vx2;
-   LBMReal vx3Sq = vx3*vx3;
-   LBMReal oMdrho = one;
+   real vx1Sq = vx1*vx1;
+   real vx2Sq = vx2*vx2;
+   real vx3Sq = vx3*vx3;
+   real oMdrho = c1o1;
 
    //2.f
 
    // linear combinations
-   LBMReal mxxPyyPzz = mfaaa - c2o3*(ax + by + two*axx*x + bxy*x + axy*y + two*byy*y + axz*z + byz*z + bxyz*x*z + axyz*y*z + cz - cxz*x + cyz*y + cxyz*x*y + two*czz*z)*eps_new / oP* (one + press);
-   LBMReal mxxMyy    = -c2o3*(ax - by + kxxMyyAverage + two*axx*x - bxy*x + axy*y - two*byy*y + axz*z - byz*z - bxyz*x*z + axyz*y*z)*eps_new/o * (one + press);
-   LBMReal mxxMzz    = -c2o3*(ax - cz + kxxMzzAverage + two*axx*x - cxz*x + axy*y - cyz*y - cxyz*x*y + axz*z - two*czz*z + axyz*y*z)*eps_new/o * (one + press);
+   real mxxPyyPzz = mfaaa - c2o3*(ax + by + c2o1*axx*x + bxy*x + axy*y + c2o1*byy*y + axz*z + byz*z + bxyz*x*z + axyz*y*z + cz - cxz*x + cyz*y + cxyz*x*y + c2o1*czz*z)*eps_new / oP* (c1o1 + press);
+   real mxxMyy    = -c2o3*(ax - by + kxxMyyAverage + c2o1*axx*x - bxy*x + axy*y - c2o1*byy*y + axz*z - byz*z - bxyz*x*z + axyz*y*z)*eps_new/o * (c1o1 + press);
+   real mxxMzz    = -c2o3*(ax - cz + kxxMzzAverage + c2o1*axx*x - cxz*x + axy*y - cyz*y - cxyz*x*y + axz*z - c2o1*czz*z + axyz*y*z)*eps_new/o * (c1o1 + press);
 
-   mfabb     = -c1o3 * (bz + cy + kyzAverage + bxz*x + cxy*x + byz*y + two*cyy*y + bxyz*x*y + two*bzz*z + cyz*z + cxyz*x*z)*eps_new/o * (one + press);
-   mfbab     = -c1o3 * (az + cx + kxzAverage + axz*x + two*cxx*x + ayz*y + cxy*y + axyz*x*y + two*azz*z + cxz*z + cxyz*y*z)*eps_new/o * (one + press);
-   mfbba     = -c1o3 * (ay + bx + kxyAverage + axy*x + two*bxx*x + two*ayy*y + bxy*y + ayz*z + bxz*z + axyz*x*z + bxyz*y*z)*eps_new/o * (one + press);
+   mfabb     = -c1o3 * (bz + cy + kyzAverage + bxz*x + cxy*x + byz*y + c2o1*cyy*y + bxyz*x*y + c2o1*bzz*z + cyz*z + cxyz*x*z)*eps_new/o * (c1o1 + press);
+   mfbab     = -c1o3 * (az + cx + kxzAverage + axz*x + c2o1*cxx*x + ayz*y + cxy*y + axyz*x*y + c2o1*azz*z + cxz*z + cxyz*y*z)*eps_new/o * (c1o1 + press);
+   mfbba     = -c1o3 * (ay + bx + kxyAverage + axy*x + c2o1*bxx*x + c2o1*ayy*y + bxy*y + ayz*z + bxz*z + axyz*x*z + bxyz*y*z)*eps_new/o * (c1o1 + press);
 
    // linear combinations back
    mfcaa = c1o3 * (mxxMyy +       mxxMzz + mxxPyyPzz) ;
-   mfaca = c1o3 * (-two * mxxMyy +       mxxMzz + mxxPyyPzz) ;
-   mfaac = c1o3 * (mxxMyy - two * mxxMzz + mxxPyyPzz) ;
+   mfaca = c1o3 * (-c2o1 * mxxMyy +       mxxMzz + mxxPyyPzz) ;
+   mfaac = c1o3 * (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz) ;
 
    //three
-   mfbbb = zeroReal;
-   LBMReal mxxyPyzz = zeroReal;
-   LBMReal mxxyMyzz = zeroReal;
-   LBMReal mxxzPyyz = zeroReal;
-   LBMReal mxxzMyyz = zeroReal;
-   LBMReal mxyyPxzz =  zeroReal;
-   LBMReal mxyyMxzz = zeroReal;
+   mfbbb = c0o1;
+   real mxxyPyzz = c0o1;
+   real mxxyMyzz = c0o1;
+   real mxxzPyyz = c0o1;
+   real mxxzMyyz = c0o1;
+   real mxyyPxzz =  c0o1;
+   real mxyyMxzz = c0o1;
 
    // linear combinations back
    mfcba = (mxxyMyzz + mxxyPyzz) * c1o2;
@@ -576,22 +579,22 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    //mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
    ////////////////////////////////////////////////////////////////////////////////////
    // Z - Dir
-   LBMReal m0 =  mfaac * c1o2 +      mfaab * (vx3 - c1o2) + (mfaaa + one * oMdrho) * (vx3Sq - vx3) * c1o2;
-   LBMReal m1 = -mfaac        - two * mfaab *  vx3         +  mfaaa                * (one - vx3Sq)              - one * oMdrho * vx3Sq;
-   LBMReal m2 =  mfaac * c1o2 +      mfaab * (vx3 + c1o2) + (mfaaa + one * oMdrho) * (vx3Sq + vx3) * c1o2;
+   real m0 =  mfaac * c1o2 +      mfaab * (vx3 - c1o2) + (mfaaa + c1o1 * oMdrho) * (vx3Sq - vx3) * c1o2;
+   real m1 = -mfaac        - c2o1 * mfaab *  vx3         +  mfaaa                * (c1o1 - vx3Sq)              - c1o1 * oMdrho * vx3Sq;
+   real m2 =  mfaac * c1o2 +      mfaab * (vx3 + c1o2) + (mfaaa + c1o1 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfaaa = m0;
    mfaab = m1;
    mfaac = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfabc * c1o2 +      mfabb * (vx3 - c1o2) + mfaba * (vx3Sq - vx3) * c1o2;
-   m1 = -mfabc        - two * mfabb *  vx3         + mfaba * (one - vx3Sq);
+   m1 = -mfabc        - c2o1 * mfabb *  vx3         + mfaba * (c1o1 - vx3Sq);
    m2 =  mfabc * c1o2 +      mfabb * (vx3 + c1o2) + mfaba * (vx3Sq + vx3) * c1o2;
    mfaba = m0;
    mfabb = m1;
    mfabc = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfacc * c1o2 +      mfacb * (vx3 - c1o2) + (mfaca + c1o3 * oMdrho) * (vx3Sq - vx3) * c1o2;
-   m1 = -mfacc        - two * mfacb *  vx3         +  mfaca                  * (one - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
+   m1 = -mfacc        - c2o1 * mfacb *  vx3         +  mfaca                  * (c1o1 - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
    m2 =  mfacc * c1o2 +      mfacb * (vx3 + c1o2) + (mfaca + c1o3 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfaca = m0;
    mfacb = m1;
@@ -599,21 +602,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfbac * c1o2 +      mfbab * (vx3 - c1o2) + mfbaa * (vx3Sq - vx3) * c1o2;
-   m1 = -mfbac        - two * mfbab *  vx3         + mfbaa * (one - vx3Sq);
+   m1 = -mfbac        - c2o1 * mfbab *  vx3         + mfbaa * (c1o1 - vx3Sq);
    m2 =  mfbac * c1o2 +      mfbab * (vx3 + c1o2) + mfbaa * (vx3Sq + vx3) * c1o2;
    mfbaa = m0;
    mfbab = m1;
    mfbac = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbbc * c1o2 +      mfbbb * (vx3 - c1o2) + mfbba * (vx3Sq - vx3) * c1o2;
-   m1 = -mfbbc        - two * mfbbb *  vx3         + mfbba * (one - vx3Sq);
+   m1 = -mfbbc        - c2o1 * mfbbb *  vx3         + mfbba * (c1o1 - vx3Sq);
    m2 =  mfbbc * c1o2 +      mfbbb * (vx3 + c1o2) + mfbba * (vx3Sq + vx3) * c1o2;
    mfbba = m0;
    mfbbb = m1;
    mfbbc = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbcc * c1o2 +      mfbcb * (vx3 - c1o2) + mfbca * (vx3Sq - vx3) * c1o2;
-   m1 = -mfbcc        - two * mfbcb *  vx3         + mfbca * (one - vx3Sq);
+   m1 = -mfbcc        - c2o1 * mfbcb *  vx3         + mfbca * (c1o1 - vx3Sq);
    m2 =  mfbcc * c1o2 +      mfbcb * (vx3 + c1o2) + mfbca * (vx3Sq + vx3) * c1o2;
    mfbca = m0;
    mfbcb = m1;
@@ -621,21 +624,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcac * c1o2 +      mfcab * (vx3 - c1o2) + (mfcaa + c1o3 * oMdrho) * (vx3Sq - vx3) * c1o2;
-   m1 = -mfcac        - two * mfcab *  vx3         +  mfcaa                  * (one - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
+   m1 = -mfcac        - c2o1 * mfcab *  vx3         +  mfcaa                  * (c1o1 - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
    m2 =  mfcac * c1o2 +      mfcab * (vx3 + c1o2) + (mfcaa + c1o3 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfcaa = m0;
    mfcab = m1;
    mfcac = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfcbc * c1o2 +      mfcbb * (vx3 - c1o2) + mfcba * (vx3Sq - vx3) * c1o2;
-   m1 = -mfcbc        - two * mfcbb *  vx3         + mfcba * (one - vx3Sq);
+   m1 = -mfcbc        - c2o1 * mfcbb *  vx3         + mfcba * (c1o1 - vx3Sq);
    m2 =  mfcbc * c1o2 +      mfcbb * (vx3 + c1o2) + mfcba * (vx3Sq + vx3) * c1o2;
    mfcba = m0;
    mfcbb = m1;
    mfcbc = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfccc * c1o2 +      mfccb * (vx3 - c1o2) + (mfcca + c1o9 * oMdrho) * (vx3Sq - vx3) * c1o2;
-   m1 = -mfccc        - two * mfccb *  vx3         +  mfcca                  * (one - vx3Sq)              - c1o9 * oMdrho * vx3Sq;
+   m1 = -mfccc        - c2o1 * mfccb *  vx3         +  mfcca                  * (c1o1 - vx3Sq)              - c1o9 * oMdrho * vx3Sq;
    m2 =  mfccc * c1o2 +      mfccb * (vx3 + c1o2) + (mfcca + c1o9 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfcca = m0;
    mfccb = m1;
@@ -646,21 +649,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    // Y - Dir
    m0 =  mfaca * c1o2 +      mfaba * (vx2 - c1o2) + (mfaaa + c1o6 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfaca        - two * mfaba *  vx2         +  mfaaa                  * (one - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
+   m1 = -mfaca        - c2o1 * mfaba *  vx2         +  mfaaa                  * (c1o1 - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
    m2 =  mfaca * c1o2 +      mfaba * (vx2 + c1o2) + (mfaaa + c1o6 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfaaa = m0;
    mfaba = m1;
    mfaca = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfacb * c1o2 +      mfabb * (vx2 - c1o2) + (mfaab + c2o3 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfacb        - two * mfabb *  vx2         +  mfaab                  * (one - vx2Sq)              - c2o3 * oMdrho * vx2Sq;
+   m1 = -mfacb        - c2o1 * mfabb *  vx2         +  mfaab                  * (c1o1 - vx2Sq)              - c2o3 * oMdrho * vx2Sq;
    m2 =  mfacb * c1o2 +      mfabb * (vx2 + c1o2) + (mfaab + c2o3 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfaab = m0;
    mfabb = m1;
    mfacb = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfacc * c1o2 +      mfabc * (vx2 - c1o2) + (mfaac + c1o6 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfacc        - two * mfabc *  vx2         +  mfaac                  * (one - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
+   m1 = -mfacc        - c2o1 * mfabc *  vx2         +  mfaac                  * (c1o1 - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
    m2 =  mfacc * c1o2 +      mfabc * (vx2 + c1o2) + (mfaac + c1o6 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfaac = m0;
    mfabc = m1;
@@ -668,21 +671,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfbca * c1o2 +      mfbba * (vx2 - c1o2) + mfbaa * (vx2Sq - vx2) * c1o2;
-   m1 = -mfbca        - two * mfbba *  vx2         + mfbaa * (one - vx2Sq);
+   m1 = -mfbca        - c2o1 * mfbba *  vx2         + mfbaa * (c1o1 - vx2Sq);
    m2 =  mfbca * c1o2 +      mfbba * (vx2 + c1o2) + mfbaa * (vx2Sq + vx2) * c1o2;
    mfbaa = m0;
    mfbba = m1;
    mfbca = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbcb * c1o2 +      mfbbb * (vx2 - c1o2) + mfbab * (vx2Sq - vx2) * c1o2;
-   m1 = -mfbcb        - two * mfbbb *  vx2         + mfbab * (one - vx2Sq);
+   m1 = -mfbcb        - c2o1 * mfbbb *  vx2         + mfbab * (c1o1 - vx2Sq);
    m2 =  mfbcb * c1o2 +      mfbbb * (vx2 + c1o2) + mfbab * (vx2Sq + vx2) * c1o2;
    mfbab = m0;
    mfbbb = m1;
    mfbcb = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbcc * c1o2 +      mfbbc * (vx2 - c1o2) + mfbac * (vx2Sq - vx2) * c1o2;
-   m1 = -mfbcc        - two * mfbbc *  vx2         + mfbac * (one - vx2Sq);
+   m1 = -mfbcc        - c2o1 * mfbbc *  vx2         + mfbac * (c1o1 - vx2Sq);
    m2 =  mfbcc * c1o2 +      mfbbc * (vx2 + c1o2) + mfbac * (vx2Sq + vx2) * c1o2;
    mfbac = m0;
    mfbbc = m1;
@@ -690,21 +693,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcca * c1o2 +      mfcba * (vx2 - c1o2) + (mfcaa + c1o18 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfcca        - two * mfcba *  vx2         +  mfcaa                   * (one - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
+   m1 = -mfcca        - c2o1 * mfcba *  vx2         +  mfcaa                   * (c1o1 - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
    m2 =  mfcca * c1o2 +      mfcba * (vx2 + c1o2) + (mfcaa + c1o18 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfcaa = m0;
    mfcba = m1;
    mfcca = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfccb * c1o2 +      mfcbb * (vx2 - c1o2) + (mfcab + c2o9 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfccb        - two * mfcbb *  vx2         +  mfcab                  * (one - vx2Sq)              - c2o9 * oMdrho * vx2Sq;
+   m1 = -mfccb        - c2o1 * mfcbb *  vx2         +  mfcab                  * (c1o1 - vx2Sq)              - c2o9 * oMdrho * vx2Sq;
    m2 =  mfccb * c1o2 +      mfcbb * (vx2 + c1o2) + (mfcab + c2o9 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfcab = m0;
    mfcbb = m1;
    mfccb = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfccc * c1o2 +      mfcbc * (vx2 - c1o2) + (mfcac + c1o18 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfccc        - two * mfcbc *  vx2         +  mfcac                   * (one - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
+   m1 = -mfccc        - c2o1 * mfcbc *  vx2         +  mfcac                   * (c1o1 - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
    m2 =  mfccc * c1o2 +      mfcbc * (vx2 + c1o2) + (mfcac + c1o18 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfcac = m0;
    mfcbc = m1;
@@ -715,21 +718,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    // X - Dir
    m0 =  mfcaa * c1o2 +      mfbaa * (vx1 - c1o2) + (mfaaa + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcaa        - two * mfbaa *  vx1         +  mfaaa                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfcaa        - c2o1 * mfbaa *  vx1         +  mfaaa                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfcaa * c1o2 +      mfbaa * (vx1 + c1o2) + (mfaaa + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaaa = m0;
    mfbaa = m1;
    mfcaa = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcba * c1o2 +      mfbba * (vx1 - c1o2) + (mfaba + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcba        - two * mfbba *  vx1         +  mfaba                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfcba        - c2o1 * mfbba *  vx1         +  mfaba                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfcba * c1o2 +      mfbba * (vx1 + c1o2) + (mfaba + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaba = m0;
    mfbba = m1;
    mfcba = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcca * c1o2 +      mfbca * (vx1 - c1o2) + (mfaca + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcca        - two * mfbca *  vx1         +  mfaca                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfcca        - c2o1 * mfbca *  vx1         +  mfaca                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfcca * c1o2 +      mfbca * (vx1 + c1o2) + (mfaca + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaca = m0;
    mfbca = m1;
@@ -737,21 +740,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcab * c1o2 +      mfbab * (vx1 - c1o2) + (mfaab + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcab        - two * mfbab *  vx1         +  mfaab                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfcab        - c2o1 * mfbab *  vx1         +  mfaab                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfcab * c1o2 +      mfbab * (vx1 + c1o2) + (mfaab + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaab = m0;
    mfbab = m1;
    mfcab = m2;
    ///////////b////////////////////////////////////////////////////////////////////////
    m0 =  mfcbb * c1o2 +      mfbbb * (vx1 - c1o2) + (mfabb + c4o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcbb        - two * mfbbb *  vx1         +  mfabb                  * (one - vx1Sq)              - c4o9 * oMdrho * vx1Sq;
+   m1 = -mfcbb        - c2o1 * mfbbb *  vx1         +  mfabb                  * (c1o1 - vx1Sq)              - c4o9 * oMdrho * vx1Sq;
    m2 =  mfcbb * c1o2 +      mfbbb * (vx1 + c1o2) + (mfabb + c4o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfabb = m0;
    mfbbb = m1;
    mfcbb = m2;
    ///////////b////////////////////////////////////////////////////////////////////////
    m0 =  mfccb * c1o2 +      mfbcb * (vx1 - c1o2) + (mfacb + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfccb        - two * mfbcb *  vx1         +  mfacb                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfccb        - c2o1 * mfbcb *  vx1         +  mfacb                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfccb * c1o2 +      mfbcb * (vx1 + c1o2) + (mfacb + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfacb = m0;
    mfbcb = m1;
@@ -759,21 +762,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcac * c1o2 +      mfbac * (vx1 - c1o2) + (mfaac + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcac        - two * mfbac *  vx1         +  mfaac                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfcac        - c2o1 * mfbac *  vx1         +  mfaac                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfcac * c1o2 +      mfbac * (vx1 + c1o2) + (mfaac + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaac = m0;
    mfbac = m1;
    mfcac = m2;
    ///////////c////////////////////////////////////////////////////////////////////////
    m0 =  mfcbc * c1o2 +      mfbbc * (vx1 - c1o2) + (mfabc + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcbc        - two * mfbbc *  vx1         +  mfabc                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfcbc        - c2o1 * mfbbc *  vx1         +  mfabc                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfcbc * c1o2 +      mfbbc * (vx1 + c1o2) + (mfabc + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfabc = m0;
    mfbbc = m1;
    mfcbc = m2;
    ///////////c////////////////////////////////////////////////////////////////////////
    m0 =  mfccc * c1o2 +      mfbcc * (vx1 - c1o2) + (mfacc + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfccc        - two * mfbcc *  vx1         +  mfacc                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfccc        - c2o1 * mfbcc *  vx1         +  mfacc                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfccc * c1o2 +      mfbcc * (vx1 + c1o2) + (mfacc + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfacc = m0;
    mfbcc = m1;
@@ -810,7 +813,7 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SWB -0.25, -0.25, -0.25
-LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressBSW()
+real CompressibleOffsetSquarePressureInterpolationProcessor::calcPressBSW()
 {
    return   press_SWT * (0.140625 + 0.1875 * xoff + 0.1875 * yoff - 0.5625 * zoff) +
       press_NWT * (0.046875 + 0.0625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -823,7 +826,7 @@ LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressBSW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SWT -0.25, -0.25, 0.25
-LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressTSW()
+real CompressibleOffsetSquarePressureInterpolationProcessor::calcPressTSW()
 {
    return   press_SWT * (0.421875 + 0.5625 * xoff + 0.5625 * yoff - 0.5625 * zoff) +
       press_NWT * (0.140625 + 0.1875 * xoff - 0.5625 * yoff - 0.1875 * zoff) +
@@ -836,7 +839,7 @@ LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressTSW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SET 0.25, -0.25, 0.25
-LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressTSE()
+real CompressibleOffsetSquarePressureInterpolationProcessor::calcPressTSE()
 {
    return   press_SET * (0.421875 - 0.5625 * xoff + 0.5625 * yoff - 0.5625 * zoff) +
       press_NET * (0.140625 - 0.1875 * xoff - 0.5625 * yoff - 0.1875 * zoff) +
@@ -849,7 +852,7 @@ LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressTSE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SEB 0.25, -0.25, -0.25
-LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressBSE()
+real CompressibleOffsetSquarePressureInterpolationProcessor::calcPressBSE()
 {
    return   press_SET * (0.140625 - 0.1875 * xoff + 0.1875 * yoff - 0.5625 * zoff) +
       press_NET * (0.046875 - 0.0625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -862,7 +865,7 @@ LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressBSE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NWB -0.25, 0.25, -0.25
-LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressBNW()
+real CompressibleOffsetSquarePressureInterpolationProcessor::calcPressBNW()
 {
    return   press_NWT * (0.140625 + 0.1875 * xoff - 0.1875 * yoff - 0.5625 * zoff) +
       press_NET * (0.046875 - 0.1875 * xoff - 0.0625 * yoff - 0.1875 * zoff) +
@@ -875,7 +878,7 @@ LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressBNW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NWT -0.25, 0.25, 0.25
-LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressTNW()
+real CompressibleOffsetSquarePressureInterpolationProcessor::calcPressTNW()
 {
    return   press_NWT * (0.421875 + 0.5625 * xoff - 0.5625 * yoff - 0.5625 * zoff) +
       press_NET * (0.140625 - 0.5625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -888,7 +891,7 @@ LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressTNW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NET 0.25, 0.25, 0.25
-LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressTNE()
+real CompressibleOffsetSquarePressureInterpolationProcessor::calcPressTNE()
 {
    return   press_NET * (0.421875 - 0.5625 * xoff - 0.5625 * yoff - 0.5625 * zoff) +
       press_NWT * (0.140625 + 0.5625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -901,7 +904,7 @@ LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressTNE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NEB 0.25, 0.25, -0.25
-LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressBNE()
+real CompressibleOffsetSquarePressureInterpolationProcessor::calcPressBNE()
 {
    return   press_NET * (0.140625 - 0.1875 * xoff - 0.1875 * yoff - 0.5625 * zoff) +
       press_NWT * (0.046875 + 0.1875 * xoff - 0.0625 * yoff - 0.1875 * zoff) +
@@ -914,11 +917,12 @@ LBMReal CompressibleOffsetSquarePressureInterpolationProcessor::calcPressBNE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position C 0.0, 0.0, 0.0
-void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNodeFC(LBMReal* f, LBMReal omega)
+void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNodeFC(real* f, real omega)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
-   LBMReal press  =  press_NET * (0.125 - 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
+   real press  =  press_NET * (0.125 - 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
       press_NWT * (0.125 + 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
       press_SET * (0.125 - 0.25 * xoff + 0.25 * yoff - 0.25 * zoff) +
       press_SWT * (0.125 + 0.25 * xoff + 0.25 * yoff - 0.25 * zoff) +
@@ -926,86 +930,86 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
       press_NWB * (0.125 + 0.25 * xoff - 0.25 * yoff + 0.25 * zoff) +
       press_SEB * (0.125 - 0.25 * xoff + 0.25 * yoff + 0.25 * zoff) +
       press_SWB * (0.125 + 0.25 * xoff + 0.25 * yoff + 0.25 * zoff);
-   LBMReal vx1  = a0;
-   LBMReal vx2  = b0;
-   LBMReal vx3  = c0;
+   real vx1  = a0;
+   real vx2  = b0;
+   real vx3  = c0;
   
    
-   LBMReal rho = press ;//+ (ax+by+cz)/3.;
+   real rho = press ;//+ (ax+by+cz)/3.;
 
-   LBMReal laplaceRho = (xoff!=0.0 || yoff!=0.0 || zoff!= 0.0) ? 0.0 :(-3.0*(by*by+ax*ax+cz*cz)-6.0*(ay*bx+bz*cy+az*cx))*(1.0+rho);
+   real laplaceRho = (xoff!=0.0 || yoff!=0.0 || zoff!= 0.0) ? 0.0 :(-3.0*(by*by+ax*ax+cz*cz)-6.0*(ay*bx+bz*cy+az*cx))*(1.0+rho);
 
    rho=rho-laplaceRho*0.25;
 
-   LBMReal eps_new = 2.0;
-   LBMReal o  = omega;
+   real eps_new = 2.0;
+   real o  = omega;
    //bulk viscosity
-   LBMReal oP = OxxPyyPzzC;
-
-   LBMReal mfcbb = zeroReal;
-   LBMReal mfabb = zeroReal;
-   LBMReal mfbcb = zeroReal;
-   LBMReal mfbab = zeroReal;
-   LBMReal mfbbc = zeroReal;
-   LBMReal mfbba = zeroReal;
-   LBMReal mfccb = zeroReal;
-   LBMReal mfaab = zeroReal;
-   LBMReal mfcab = zeroReal;
-   LBMReal mfacb = zeroReal;
-   LBMReal mfcbc = zeroReal;
-   LBMReal mfaba = zeroReal;
-   LBMReal mfcba = zeroReal;
-   LBMReal mfabc = zeroReal;
-   LBMReal mfbcc = zeroReal;
-   LBMReal mfbaa = zeroReal;
-   LBMReal mfbca = zeroReal;
-   LBMReal mfbac = zeroReal;
-   LBMReal mfbbb = zeroReal;
-   LBMReal mfccc = zeroReal;
-   LBMReal mfaac = zeroReal;
-   LBMReal mfcac = zeroReal;
-   LBMReal mfacc = zeroReal;
-   LBMReal mfcca = zeroReal;
-   LBMReal mfaaa = zeroReal;
-   LBMReal mfcaa = zeroReal;
-   LBMReal mfaca = zeroReal;
+   real oP = OxxPyyPzzC;
+
+   real mfcbb = c0o1;
+   real mfabb = c0o1;
+   real mfbcb = c0o1;
+   real mfbab = c0o1;
+   real mfbbc = c0o1;
+   real mfbba = c0o1;
+   real mfccb = c0o1;
+   real mfaab = c0o1;
+   real mfcab = c0o1;
+   real mfacb = c0o1;
+   real mfcbc = c0o1;
+   real mfaba = c0o1;
+   real mfcba = c0o1;
+   real mfabc = c0o1;
+   real mfbcc = c0o1;
+   real mfbaa = c0o1;
+   real mfbca = c0o1;
+   real mfbac = c0o1;
+   real mfbbb = c0o1;
+   real mfccc = c0o1;
+   real mfaac = c0o1;
+   real mfcac = c0o1;
+   real mfacc = c0o1;
+   real mfcca = c0o1;
+   real mfaaa = c0o1;
+   real mfcaa = c0o1;
+   real mfaca = c0o1;
 
    mfaaa = rho; // if drho is interpolated directly
 
-   LBMReal vx1Sq = vx1*vx1;
-   LBMReal vx2Sq = vx2*vx2;
-   LBMReal vx3Sq = vx3*vx3;
-   LBMReal oMdrho = one;
-   //oMdrho = one - mfaaa;
+   real vx1Sq = vx1*vx1;
+   real vx2Sq = vx2*vx2;
+   real vx3Sq = vx3*vx3;
+   real oMdrho = c1o1;
+   //oMdrho = c1o1 - mfaaa;
 
    //2.f
    // linear combinations
 
 /////////////////////////
-   LBMReal mxxPyyPzz = mfaaa    -c2o3*(ax+by+cz)*eps_new/oP*(one+press);
+   real mxxPyyPzz = mfaaa    -c2o3*(ax+by+cz)*eps_new/oP*(c1o1+press);
 
-   LBMReal mxxMyy    = -c2o3*((ax - by)+kxxMyyAverage)*eps_new/o * (one + press);
-   LBMReal mxxMzz    = -c2o3*((ax - cz)+kxxMzzAverage)*eps_new/o * (one + press);
+   real mxxMyy    = -c2o3*((ax - by)+kxxMyyAverage)*eps_new/o * (c1o1 + press);
+   real mxxMzz    = -c2o3*((ax - cz)+kxxMzzAverage)*eps_new/o * (c1o1 + press);
 
-   mfabb     = -c1o3 * ((bz + cy)+kyzAverage)*eps_new/o * (one + press);
-   mfbab     = -c1o3 * ((az + cx)+kxzAverage)*eps_new/o * (one + press);
-   mfbba     = -c1o3 * ((ay + bx)+kxyAverage)*eps_new/o * (one + press);
+   mfabb     = -c1o3 * ((bz + cy)+kyzAverage)*eps_new/o * (c1o1 + press);
+   mfbab     = -c1o3 * ((az + cx)+kxzAverage)*eps_new/o * (c1o1 + press);
+   mfbba     = -c1o3 * ((ay + bx)+kxyAverage)*eps_new/o * (c1o1 + press);
 
    ////////////////////////
    // linear combinations back
    mfcaa = c1o3 * (mxxMyy +       mxxMzz + mxxPyyPzz);
-   mfaca = c1o3 * (-two * mxxMyy +       mxxMzz + mxxPyyPzz);
-   mfaac = c1o3 * (mxxMyy - two * mxxMzz + mxxPyyPzz);
+   mfaca = c1o3 * (-c2o1 * mxxMyy +       mxxMzz + mxxPyyPzz);
+   mfaac = c1o3 * (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz);
 
    //three
-   mfbbb = zeroReal;
+   mfbbb = c0o1;
 
-   LBMReal mxxyPyzz = zeroReal;
-   LBMReal mxxyMyzz = zeroReal;
-   LBMReal mxxzPyyz = zeroReal;
-   LBMReal mxxzMyyz = zeroReal;
-   LBMReal mxyyPxzz =  zeroReal;
-   LBMReal mxyyMxzz = zeroReal;
+   real mxxyPyzz = c0o1;
+   real mxxyMyzz = c0o1;
+   real mxxzPyyz = c0o1;
+   real mxxzMyyz = c0o1;
+   real mxyyPxzz =  c0o1;
+   real mxyyMxzz = c0o1;
 
    // linear combinations back
    mfcba = (mxxyMyzz + mxxyPyzz) * c1o2;
@@ -1029,22 +1033,22 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    //mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
    ////////////////////////////////////////////////////////////////////////////////////
    // Z - Dir
-   LBMReal m0 =  mfaac * c1o2 +      mfaab * (vx3 - c1o2) + (mfaaa + one * oMdrho) * (vx3Sq - vx3) * c1o2;
-   LBMReal m1 = -mfaac        - two * mfaab *  vx3         +  mfaaa                * (one - vx3Sq)              - one * oMdrho * vx3Sq;
-   LBMReal m2 =  mfaac * c1o2 +      mfaab * (vx3 + c1o2) + (mfaaa + one * oMdrho) * (vx3Sq + vx3) * c1o2;
+   real m0 =  mfaac * c1o2 +      mfaab * (vx3 - c1o2) + (mfaaa + c1o1 * oMdrho) * (vx3Sq - vx3) * c1o2;
+   real m1 = -mfaac        - c2o1 * mfaab *  vx3         +  mfaaa                * (c1o1 - vx3Sq)              - c1o1 * oMdrho * vx3Sq;
+   real m2 =  mfaac * c1o2 +      mfaab * (vx3 + c1o2) + (mfaaa + c1o1 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfaaa = m0;
    mfaab = m1;
    mfaac = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfabc * c1o2 +      mfabb * (vx3 - c1o2) + mfaba * (vx3Sq - vx3) * c1o2;
-   m1 = -mfabc        - two * mfabb *  vx3         + mfaba * (one - vx3Sq);
+   m1 = -mfabc        - c2o1 * mfabb *  vx3         + mfaba * (c1o1 - vx3Sq);
    m2 =  mfabc * c1o2 +      mfabb * (vx3 + c1o2) + mfaba * (vx3Sq + vx3) * c1o2;
    mfaba = m0;
    mfabb = m1;
    mfabc = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfacc * c1o2 +      mfacb * (vx3 - c1o2) + (mfaca + c1o3 * oMdrho) * (vx3Sq - vx3) * c1o2;
-   m1 = -mfacc        - two * mfacb *  vx3         +  mfaca                  * (one - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
+   m1 = -mfacc        - c2o1 * mfacb *  vx3         +  mfaca                  * (c1o1 - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
    m2 =  mfacc * c1o2 +      mfacb * (vx3 + c1o2) + (mfaca + c1o3 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfaca = m0;
    mfacb = m1;
@@ -1052,21 +1056,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfbac * c1o2 +      mfbab * (vx3 - c1o2) + mfbaa * (vx3Sq - vx3) * c1o2;
-   m1 = -mfbac        - two * mfbab *  vx3         + mfbaa * (one - vx3Sq);
+   m1 = -mfbac        - c2o1 * mfbab *  vx3         + mfbaa * (c1o1 - vx3Sq);
    m2 =  mfbac * c1o2 +      mfbab * (vx3 + c1o2) + mfbaa * (vx3Sq + vx3) * c1o2;
    mfbaa = m0;
    mfbab = m1;
    mfbac = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbbc * c1o2 +      mfbbb * (vx3 - c1o2) + mfbba * (vx3Sq - vx3) * c1o2;
-   m1 = -mfbbc        - two * mfbbb *  vx3         + mfbba * (one - vx3Sq);
+   m1 = -mfbbc        - c2o1 * mfbbb *  vx3         + mfbba * (c1o1 - vx3Sq);
    m2 =  mfbbc * c1o2 +      mfbbb * (vx3 + c1o2) + mfbba * (vx3Sq + vx3) * c1o2;
    mfbba = m0;
    mfbbb = m1;
    mfbbc = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbcc * c1o2 +      mfbcb * (vx3 - c1o2) + mfbca * (vx3Sq - vx3) * c1o2;
-   m1 = -mfbcc        - two * mfbcb *  vx3         + mfbca * (one - vx3Sq);
+   m1 = -mfbcc        - c2o1 * mfbcb *  vx3         + mfbca * (c1o1 - vx3Sq);
    m2 =  mfbcc * c1o2 +      mfbcb * (vx3 + c1o2) + mfbca * (vx3Sq + vx3) * c1o2;
    mfbca = m0;
    mfbcb = m1;
@@ -1074,21 +1078,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcac * c1o2 +      mfcab * (vx3 - c1o2) + (mfcaa + c1o3 * oMdrho) * (vx3Sq - vx3) * c1o2;
-   m1 = -mfcac        - two * mfcab *  vx3         +  mfcaa                  * (one - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
+   m1 = -mfcac        - c2o1 * mfcab *  vx3         +  mfcaa                  * (c1o1 - vx3Sq)              - c1o3 * oMdrho * vx3Sq;
    m2 =  mfcac * c1o2 +      mfcab * (vx3 + c1o2) + (mfcaa + c1o3 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfcaa = m0;
    mfcab = m1;
    mfcac = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfcbc * c1o2 +      mfcbb * (vx3 - c1o2) + mfcba * (vx3Sq - vx3) * c1o2;
-   m1 = -mfcbc        - two * mfcbb *  vx3         + mfcba * (one - vx3Sq);
+   m1 = -mfcbc        - c2o1 * mfcbb *  vx3         + mfcba * (c1o1 - vx3Sq);
    m2 =  mfcbc * c1o2 +      mfcbb * (vx3 + c1o2) + mfcba * (vx3Sq + vx3) * c1o2;
    mfcba = m0;
    mfcbb = m1;
    mfcbc = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfccc * c1o2 +      mfccb * (vx3 - c1o2) + (mfcca + c1o9 * oMdrho) * (vx3Sq - vx3) * c1o2;
-   m1 = -mfccc        - two * mfccb *  vx3         +  mfcca                  * (one - vx3Sq)              - c1o9 * oMdrho * vx3Sq;
+   m1 = -mfccc        - c2o1 * mfccb *  vx3         +  mfcca                  * (c1o1 - vx3Sq)              - c1o9 * oMdrho * vx3Sq;
    m2 =  mfccc * c1o2 +      mfccb * (vx3 + c1o2) + (mfcca + c1o9 * oMdrho) * (vx3Sq + vx3) * c1o2;
    mfcca = m0;
    mfccb = m1;
@@ -1099,21 +1103,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    // Y - Dir
    m0 =  mfaca * c1o2 +      mfaba * (vx2 - c1o2) + (mfaaa + c1o6 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfaca        - two * mfaba *  vx2         +  mfaaa                  * (one - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
+   m1 = -mfaca        - c2o1 * mfaba *  vx2         +  mfaaa                  * (c1o1 - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
    m2 =  mfaca * c1o2 +      mfaba * (vx2 + c1o2) + (mfaaa + c1o6 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfaaa = m0;
    mfaba = m1;
    mfaca = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfacb * c1o2 +      mfabb * (vx2 - c1o2) + (mfaab + c2o3 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfacb        - two * mfabb *  vx2         +  mfaab                  * (one - vx2Sq)              - c2o3 * oMdrho * vx2Sq;
+   m1 = -mfacb        - c2o1 * mfabb *  vx2         +  mfaab                  * (c1o1 - vx2Sq)              - c2o3 * oMdrho * vx2Sq;
    m2 =  mfacb * c1o2 +      mfabb * (vx2 + c1o2) + (mfaab + c2o3 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfaab = m0;
    mfabb = m1;
    mfacb = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfacc * c1o2 +      mfabc * (vx2 - c1o2) + (mfaac + c1o6 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfacc        - two * mfabc *  vx2         +  mfaac                  * (one - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
+   m1 = -mfacc        - c2o1 * mfabc *  vx2         +  mfaac                  * (c1o1 - vx2Sq)              - c1o6 * oMdrho * vx2Sq;
    m2 =  mfacc * c1o2 +      mfabc * (vx2 + c1o2) + (mfaac + c1o6 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfaac = m0;
    mfabc = m1;
@@ -1121,21 +1125,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfbca * c1o2 +      mfbba * (vx2 - c1o2) + mfbaa * (vx2Sq - vx2) * c1o2;
-   m1 = -mfbca        - two * mfbba *  vx2         + mfbaa * (one - vx2Sq);
+   m1 = -mfbca        - c2o1 * mfbba *  vx2         + mfbaa * (c1o1 - vx2Sq);
    m2 =  mfbca * c1o2 +      mfbba * (vx2 + c1o2) + mfbaa * (vx2Sq + vx2) * c1o2;
    mfbaa = m0;
    mfbba = m1;
    mfbca = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbcb * c1o2 +      mfbbb * (vx2 - c1o2) + mfbab * (vx2Sq - vx2) * c1o2;
-   m1 = -mfbcb        - two * mfbbb *  vx2         + mfbab * (one - vx2Sq);
+   m1 = -mfbcb        - c2o1 * mfbbb *  vx2         + mfbab * (c1o1 - vx2Sq);
    m2 =  mfbcb * c1o2 +      mfbbb * (vx2 + c1o2) + mfbab * (vx2Sq + vx2) * c1o2;
    mfbab = m0;
    mfbbb = m1;
    mfbcb = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 =  mfbcc * c1o2 +      mfbbc * (vx2 - c1o2) + mfbac * (vx2Sq - vx2) * c1o2;
-   m1 = -mfbcc        - two * mfbbc *  vx2         + mfbac * (one - vx2Sq);
+   m1 = -mfbcc        - c2o1 * mfbbc *  vx2         + mfbac * (c1o1 - vx2Sq);
    m2 =  mfbcc * c1o2 +      mfbbc * (vx2 + c1o2) + mfbac * (vx2Sq + vx2) * c1o2;
    mfbac = m0;
    mfbbc = m1;
@@ -1143,21 +1147,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcca * c1o2 +      mfcba * (vx2 - c1o2) + (mfcaa + c1o18 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfcca        - two * mfcba *  vx2         +  mfcaa                   * (one - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
+   m1 = -mfcca        - c2o1 * mfcba *  vx2         +  mfcaa                   * (c1o1 - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
    m2 =  mfcca * c1o2 +      mfcba * (vx2 + c1o2) + (mfcaa + c1o18 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfcaa = m0;
    mfcba = m1;
    mfcca = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfccb * c1o2 +      mfcbb * (vx2 - c1o2) + (mfcab + c2o9 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfccb        - two * mfcbb *  vx2         +  mfcab                  * (one - vx2Sq)              - c2o9 * oMdrho * vx2Sq;
+   m1 = -mfccb        - c2o1 * mfcbb *  vx2         +  mfcab                  * (c1o1 - vx2Sq)              - c2o9 * oMdrho * vx2Sq;
    m2 =  mfccb * c1o2 +      mfcbb * (vx2 + c1o2) + (mfcab + c2o9 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfcab = m0;
    mfcbb = m1;
    mfccb = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 =  mfccc * c1o2 +      mfcbc * (vx2 - c1o2) + (mfcac + c1o18 * oMdrho) * (vx2Sq - vx2) * c1o2;
-   m1 = -mfccc        - two * mfcbc *  vx2         +  mfcac                   * (one - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
+   m1 = -mfccc        - c2o1 * mfcbc *  vx2         +  mfcac                   * (c1o1 - vx2Sq)              - c1o18 * oMdrho * vx2Sq;
    m2 =  mfccc * c1o2 +      mfcbc * (vx2 + c1o2) + (mfcac + c1o18 * oMdrho) * (vx2Sq + vx2) * c1o2;
    mfcac = m0;
    mfcbc = m1;
@@ -1168,21 +1172,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    // X - Dir
    m0 =  mfcaa * c1o2 +      mfbaa * (vx1 - c1o2) + (mfaaa + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcaa        - two * mfbaa *  vx1         +  mfaaa                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfcaa        - c2o1 * mfbaa *  vx1         +  mfaaa                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfcaa * c1o2 +      mfbaa * (vx1 + c1o2) + (mfaaa + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaaa = m0;
    mfbaa = m1;
    mfcaa = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcba * c1o2 +      mfbba * (vx1 - c1o2) + (mfaba + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcba        - two * mfbba *  vx1         +  mfaba                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfcba        - c2o1 * mfbba *  vx1         +  mfaba                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfcba * c1o2 +      mfbba * (vx1 + c1o2) + (mfaba + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaba = m0;
    mfbba = m1;
    mfcba = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcca * c1o2 +      mfbca * (vx1 - c1o2) + (mfaca + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcca        - two * mfbca *  vx1         +  mfaca                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfcca        - c2o1 * mfbca *  vx1         +  mfaca                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfcca * c1o2 +      mfbca * (vx1 + c1o2) + (mfaca + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaca = m0;
    mfbca = m1;
@@ -1190,21 +1194,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcab * c1o2 +      mfbab * (vx1 - c1o2) + (mfaab + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcab        - two * mfbab *  vx1         +  mfaab                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfcab        - c2o1 * mfbab *  vx1         +  mfaab                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfcab * c1o2 +      mfbab * (vx1 + c1o2) + (mfaab + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaab = m0;
    mfbab = m1;
    mfcab = m2;
    ///////////b////////////////////////////////////////////////////////////////////////
    m0 =  mfcbb * c1o2 +      mfbbb * (vx1 - c1o2) + (mfabb + c4o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcbb        - two * mfbbb *  vx1         +  mfabb                  * (one - vx1Sq)              - c4o9 * oMdrho * vx1Sq;
+   m1 = -mfcbb        - c2o1 * mfbbb *  vx1         +  mfabb                  * (c1o1 - vx1Sq)              - c4o9 * oMdrho * vx1Sq;
    m2 =  mfcbb * c1o2 +      mfbbb * (vx1 + c1o2) + (mfabb + c4o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfabb = m0;
    mfbbb = m1;
    mfcbb = m2;
    ///////////b////////////////////////////////////////////////////////////////////////
    m0 =  mfccb * c1o2 +      mfbcb * (vx1 - c1o2) + (mfacb + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfccb        - two * mfbcb *  vx1         +  mfacb                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfccb        - c2o1 * mfbcb *  vx1         +  mfacb                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfccb * c1o2 +      mfbcb * (vx1 + c1o2) + (mfacb + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfacb = m0;
    mfbcb = m1;
@@ -1212,21 +1216,21 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 =  mfcac * c1o2 +      mfbac * (vx1 - c1o2) + (mfaac + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcac        - two * mfbac *  vx1         +  mfaac                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfcac        - c2o1 * mfbac *  vx1         +  mfaac                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfcac * c1o2 +      mfbac * (vx1 + c1o2) + (mfaac + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfaac = m0;
    mfbac = m1;
    mfcac = m2;
    ///////////c////////////////////////////////////////////////////////////////////////
    m0 =  mfcbc * c1o2 +      mfbbc * (vx1 - c1o2) + (mfabc + c1o9 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfcbc        - two * mfbbc *  vx1         +  mfabc                  * (one - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
+   m1 = -mfcbc        - c2o1 * mfbbc *  vx1         +  mfabc                  * (c1o1 - vx1Sq)              - c1o9 * oMdrho * vx1Sq;
    m2 =  mfcbc * c1o2 +      mfbbc * (vx1 + c1o2) + (mfabc + c1o9 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfabc = m0;
    mfbbc = m1;
    mfcbc = m2;
    ///////////c////////////////////////////////////////////////////////////////////////
    m0 =  mfccc * c1o2 +      mfbcc * (vx1 - c1o2) + (mfacc + c1o36 * oMdrho) * (vx1Sq - vx1) * c1o2;
-   m1 = -mfccc        - two * mfbcc *  vx1         +  mfacc                   * (one - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
+   m1 = -mfccc        - c2o1 * mfbcc *  vx1         +  mfacc                   * (c1o1 - vx1Sq)              - c1o36 * oMdrho * vx1Sq;
    m2 =  mfccc * c1o2 +      mfbcc * (vx1 + c1o2) + (mfacc + c1o36 * oMdrho) * (vx1Sq + vx1) * c1o2;
    mfacc = m0;
    mfbcc = m1;
@@ -1262,14 +1266,14 @@ void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedNod
    f[DIR_MMM]  = mfaaa;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedVelocity(LBMReal x, LBMReal y, LBMReal z, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3)
+void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedVelocity(real x, real y, real z, real& vx1, real& vx2, real& vx3)
 {
 	vx1  = a0 + ax*x + ay*y + az*z + axx*x*x + ayy*y*y + azz*z*z + axy*x*y + axz*x*z + ayz*y*z+axyz*x*y*z;
 	vx2  = b0 + bx*x + by*y + bz*z + bxx*x*x + byy*y*y + bzz*z*z + bxy*x*y + bxz*x*z + byz*y*z+bxyz*x*y*z;
 	vx3  = c0 + cx*x + cy*y + cz*z + cxx*x*x + cyy*y*y + czz*z*z + cxy*x*y + cxz*x*z + cyz*y*z+cxyz*x*y*z;
 }
 //////////////////////////////////////////////////////////////////////////
-void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedShearStress(LBMReal x, LBMReal y, LBMReal z,LBMReal& tauxx, LBMReal& tauyy, LBMReal& tauzz,LBMReal& tauxy, LBMReal& tauxz, LBMReal& tauyz)
+void CompressibleOffsetSquarePressureInterpolationProcessor::calcInterpolatedShearStress(real x, real y, real z,real& tauxx, real& tauyy, real& tauzz,real& tauxy, real& tauxz, real& tauyz)
 {
 	tauxx=ax+2*axx*x+axy*y+axz*z+axyz*y*z;
 	tauyy=by+2*byy*y+bxy*x+byz*z+bxyz*x*z;
diff --git a/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetSquarePressureInterpolationProcessor.h b/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetSquarePressureInterpolationProcessor.h
index e456668afc3d8b3a5c993774d60df5c9edff28a7..d9285289f63e7dfca694342df092b6e5f4ecb4d5 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetSquarePressureInterpolationProcessor.h
+++ b/src/cpu/VirtualFluidsCore/LBM/CompressibleOffsetSquarePressureInterpolationProcessor.h
@@ -15,56 +15,56 @@ class CompressibleOffsetSquarePressureInterpolationProcessor : public Interpolat
 {
 public:
    CompressibleOffsetSquarePressureInterpolationProcessor();
-   CompressibleOffsetSquarePressureInterpolationProcessor(LBMReal omegaC, LBMReal omegaF);
+   CompressibleOffsetSquarePressureInterpolationProcessor(real omegaC, real omegaF);
    ~CompressibleOffsetSquarePressureInterpolationProcessor() override;
    InterpolationProcessorPtr clone() override;
-   void setOmegas(LBMReal omegaC, LBMReal omegaF) override;
+   void setOmegas(real omegaC, real omegaF) override;
    void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF) override;
-   void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, LBMReal xoff, LBMReal yoff, LBMReal zoff) override;
-   void interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC) override; 
-   void interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC, LBMReal xoff, LBMReal yoff, LBMReal zoff) override; 
+   void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, real xoff, real yoff, real zoff) override;
+   void interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC) override; 
+   void interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC, real xoff, real yoff, real zoff) override; 
    void setBulkOmegaToOmega(bool value);
 protected:   
 private:
-   LBMReal omegaC{0.0}, omegaF{0.0};
-   LBMReal a0, ax, ay, az, axx, ayy, azz, axy, axz, ayz, b0, bx, by, bz, bxx, byy, bzz, bxy, bxz, byz, c0, cx, cy, cz, cxx, cyy, czz, cxy, cxz, cyz, axyz, bxyz, cxyz;
-   LBMReal xoff,    yoff,    zoff;
-   LBMReal xoff_sq, yoff_sq, zoff_sq;
-   LBMReal press_SWT, press_NWT, press_NET, press_SET, press_SWB, press_NWB, press_NEB, press_SEB;
+   real omegaC{0.0}, omegaF{0.0};
+   real a0, ax, ay, az, axx, ayy, azz, axy, axz, ayz, b0, bx, by, bz, bxx, byy, bzz, bxy, bxz, byz, c0, cx, cy, cz, cxx, cyy, czz, cxy, cxz, cyz, axyz, bxyz, cxyz;
+   real xoff,    yoff,    zoff;
+   real xoff_sq, yoff_sq, zoff_sq;
+   real press_SWT, press_NWT, press_NET, press_SET, press_SWB, press_NWB, press_NEB, press_SEB;
 
-   LBMReal  f_E,  f_N,  f_T,  f_NE,  f_SE,  f_BE,  f_TE,  f_TN,  f_BN,  f_TNE,  f_TNW,  f_TSE,  f_TSW,  f_ZERO;
-   LBMReal  x_E,  x_N,  x_T,  x_NE,  x_SE,  x_BE,  x_TE,  x_TN,  x_BN,  x_TNE,  x_TNW,  x_TSE,  x_TSW,  x_ZERO;
-   LBMReal  y_E,  y_N,  y_T,  y_NE,  y_SE,  y_BE,  y_TE,  y_TN,  y_BN,  y_TNE,  y_TNW,  y_TSE,  y_TSW,  y_ZERO;
-   LBMReal  z_E,  z_N,  z_T,  z_NE,  z_SE,  z_BE,  z_TE,  z_TN,  z_BN,  z_TNE,  z_TNW,  z_TSE,  z_TSW,  z_ZERO;
-   LBMReal xy_E, xy_N, xy_T, xy_NE, xy_SE, xy_BE, xy_TE, xy_TN, xy_BN, xy_TNE, xy_TNW, xy_TSE, xy_TSW/*, xy_ZERO*/;
-   LBMReal xz_E, xz_N, xz_T, xz_NE, xz_SE, xz_BE, xz_TE, xz_TN, xz_BN, xz_TNE, xz_TNW, xz_TSE, xz_TSW/*, xz_ZERO*/;
-   LBMReal yz_E, yz_N, yz_T, yz_NE, yz_SE, yz_BE, yz_TE, yz_TN, yz_BN, yz_TNE, yz_TNW, yz_TSE, yz_TSW/*, yz_ZERO*/;
+   real  f_E,  f_N,  f_T,  f_NE,  f_SE,  f_BE,  f_TE,  f_TN,  f_BN,  f_TNE,  f_TNW,  f_TSE,  f_TSW,  f_ZERO;
+   real  x_E,  x_N,  x_T,  x_NE,  x_SE,  x_BE,  x_TE,  x_TN,  x_BN,  x_TNE,  x_TNW,  x_TSE,  x_TSW,  x_ZERO;
+   real  y_E,  y_N,  y_T,  y_NE,  y_SE,  y_BE,  y_TE,  y_TN,  y_BN,  y_TNE,  y_TNW,  y_TSE,  y_TSW,  y_ZERO;
+   real  z_E,  z_N,  z_T,  z_NE,  z_SE,  z_BE,  z_TE,  z_TN,  z_BN,  z_TNE,  z_TNW,  z_TSE,  z_TSW,  z_ZERO;
+   real xy_E, xy_N, xy_T, xy_NE, xy_SE, xy_BE, xy_TE, xy_TN, xy_BN, xy_TNE, xy_TNW, xy_TSE, xy_TSW/*, xy_ZERO*/;
+   real xz_E, xz_N, xz_T, xz_NE, xz_SE, xz_BE, xz_TE, xz_TN, xz_BN, xz_TNE, xz_TNW, xz_TSE, xz_TSW/*, xz_ZERO*/;
+   real yz_E, yz_N, yz_T, yz_NE, yz_SE, yz_BE, yz_TE, yz_TN, yz_BN, yz_TNE, yz_TNW, yz_TSE, yz_TSW/*, yz_ZERO*/;
 
-   LBMReal kxyAverage, kyzAverage, kxzAverage, kxxMyyAverage, kxxMzzAverage; 
+   real kxyAverage, kyzAverage, kxzAverage, kxxMyyAverage, kxxMzzAverage; 
 
-//   LBMReal a,b,c;
+//   real a,b,c;
 
    // bulk viscosity
    bool bulkOmegaToOmega;
-   LBMReal OxxPyyPzzC;
-   LBMReal OxxPyyPzzF;
+   real OxxPyyPzzC;
+   real OxxPyyPzzF;
 
-   void setOffsets(LBMReal xoff, LBMReal yoff, LBMReal zoff) override;
-   void calcMoments(const LBMReal* const f, LBMReal omega, LBMReal& rho, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3, 
-      LBMReal& kxy, LBMReal& kyz, LBMReal& kxz, LBMReal& kxxMyy, LBMReal& kxxMzz);
-   void calcInterpolatedCoefficiets(const D3Q27ICell& icell, LBMReal omega, LBMReal eps_new) override;
-   void calcInterpolatedNodeCF(LBMReal* f, LBMReal omega, LBMReal x, LBMReal y, LBMReal z, LBMReal press, LBMReal xs, LBMReal ys, LBMReal zs);
-   LBMReal calcPressBSW();
-   LBMReal calcPressTSW();
-   LBMReal calcPressTSE();
-   LBMReal calcPressBSE();
-   LBMReal calcPressBNW();
-   LBMReal calcPressTNW();
-   LBMReal calcPressTNE();
-   LBMReal calcPressBNE();
-   void calcInterpolatedNodeFC(LBMReal* f, LBMReal omega) override;
-   void calcInterpolatedVelocity(LBMReal x, LBMReal y, LBMReal z,LBMReal& vx1, LBMReal& vx2, LBMReal& vx3) override;
-   void calcInterpolatedShearStress(LBMReal x, LBMReal y, LBMReal z,LBMReal& tauxx, LBMReal& tauyy, LBMReal& tauzz,LBMReal& tauxy, LBMReal& tauxz, LBMReal& tauyz) override;
+   void setOffsets(real xoff, real yoff, real zoff) override;
+   void calcMoments(const real* const f, real omega, real& rho, real& vx1, real& vx2, real& vx3, 
+      real& kxy, real& kyz, real& kxz, real& kxxMyy, real& kxxMzz);
+   void calcInterpolatedCoefficiets(const D3Q27ICell& icell, real omega, real eps_new) override;
+   void calcInterpolatedNodeCF(real* f, real omega, real x, real y, real z, real press, real xs, real ys, real zs);
+   real calcPressBSW();
+   real calcPressTSW();
+   real calcPressTSE();
+   real calcPressBSE();
+   real calcPressBNW();
+   real calcPressTNW();
+   real calcPressTNE();
+   real calcPressBNE();
+   void calcInterpolatedNodeFC(real* f, real omega) override;
+   void calcInterpolatedVelocity(real x, real y, real z,real& vx1, real& vx2, real& vx3) override;
+   void calcInterpolatedShearStress(real x, real y, real z,real& tauxx, real& tauyy, real& tauzz,real& tauxy, real& tauxz, real& tauyz) override;
 };
 
 //////////////////////////////////////////////////////////////////////////
@@ -73,7 +73,7 @@ inline void CompressibleOffsetSquarePressureInterpolationProcessor::interpolateC
    this->interpolateCoarseToFine(icellC, icellF, 0.0, 0.0, 0.0);
 }
 //////////////////////////////////////////////////////////////////////////
-inline void CompressibleOffsetSquarePressureInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC)
+inline void CompressibleOffsetSquarePressureInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC)
 {
    this->interpolateFineToCoarse(icellF, icellC, 0.0, 0.0, 0.0);
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.cpp
index 2a895950a79cf011c25b0d352689216f53e96d41..736dbb791547633f22e0e9a0efd572271f28552c 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.cpp
@@ -41,7 +41,8 @@
 
 #define PROOF_CORRECTNESS
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::basics::constant;
 
 //////////////////////////////////////////////////////////////////////////
 CumulantK17LBMKernel::CumulantK17LBMKernel()
@@ -127,7 +128,7 @@ void CumulantK17LBMKernel::calculate(int step)
     int maxX2 = bcArrayMaxX2 - ghostLayerWidth;
     int maxX3 = bcArrayMaxX3 - ghostLayerWidth;
 
-    LBMReal omega = collFactor;
+    real omega = collFactor;
 
     for (int x3 = minX3; x3 < maxX3; x3++)
     {
@@ -164,54 +165,54 @@ void CumulantK17LBMKernel::calculate(int step)
                     // a b c
                     //-1 0 1
 
-                    LBMReal mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
-                    LBMReal mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
-                    LBMReal mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
-                    LBMReal mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
-                    LBMReal mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
-                    LBMReal mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
-                    LBMReal mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
-                    LBMReal mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
-                    LBMReal mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
-                    LBMReal mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
-                    LBMReal mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
-                    LBMReal mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
-                    LBMReal mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
-
-                    LBMReal mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
-                    LBMReal mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
-                    LBMReal mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
-                    LBMReal mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
-                    LBMReal mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
-                    LBMReal mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
-                    LBMReal mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
-                    LBMReal mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
-                    LBMReal mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
-                    LBMReal mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                    LBMReal mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                    LBMReal mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                    LBMReal mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                    LBMReal mfbbb = (*this->restDistributions)(x1, x2, x3);
+                    real mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
+                    real mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
+                    real mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
+                    real mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
+                    real mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
+                    real mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
+                    real mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
+                    real mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
+                    real mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
+                    real mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
+                    real mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
+                    real mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
+                    real mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
+
+                    real mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
+                    real mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
+                    real mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
+                    real mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
+                    real mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
+                    real mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
+                    real mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
+                    real mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
+                    real mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
+                    real mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                    real mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                    real mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                    real mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                    real mfbbb = (*this->restDistributions)(x1, x2, x3);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3)
                     //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
                     //!
-                    LBMReal drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
+                    real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
                                     (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
                                     ((mfabb + mfcbb) + (mfbab + mfbcb)) + (mfbba + mfbbc)) + mfbbb;
 
-                    LBMReal rho = c1 + drho;
-                    LBMReal OOrho = c1 / rho;
+                    real rho = c1o1 + drho;
+                    real OOrho = c1o1 / rho;
                     ////////////////////////////////////////////////////////////////////////////////////
-                    LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+                    real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
                                    (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
                                    (mfcbb - mfabb)) / rho;
-                    LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+                    real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
                                    (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
                                    (mfbcb - mfbab)) / rho;
-                    LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+                    real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
                                    (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
                                    (mfbbc - mfbba)) / rho;
                     ////////////////////////////////////////////////////////////////////////////////////
@@ -219,9 +220,9 @@ void CumulantK17LBMKernel::calculate(int step)
                     ///////////////////////////////////////////////////////////////////////////////////////////
                     if (withForcing)
                     {
-                        muX1 = static_cast<double>(x1 - 1 + ix1 * maxX1);
-                        muX2 = static_cast<double>(x2 - 1 + ix2 * maxX2);
-                        muX3 = static_cast<double>(x3 - 1 + ix3 * maxX3);
+                        muX1 = static_cast<real>(x1 - 1 + ix1 * maxX1);
+                        muX2 = static_cast<real>(x2 - 1 + ix2 * maxX2);
+                        muX3 = static_cast<real>(x3 - 1 + ix3 * maxX3);
 
                         forcingX1 = muForcingX1.Eval();
                         forcingX2 = muForcingX2.Eval();
@@ -237,17 +238,17 @@ void CumulantK17LBMKernel::calculate(int step)
                     }
                     ////////////////////////////////////////////////////////////////////////////////////
                     // calculate the square of velocities for this lattice node
-                    LBMReal vx2 = vvx * vvx;
-                    LBMReal vy2 = vvy * vvy;
-                    LBMReal vz2 = vvz * vvz;
+                    real vx2 = vvx * vvx;
+                    real vy2 = vvy * vvy;
+                    real vz2 = vvz * vvz;
                     ////////////////////////////////////////////////////////////////////////////////////
                     //! - Set relaxation limiters for third order cumulants to default value \f$ \lambda=0.001 \f$ according to section 6 in
                     //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
                     //!
-                    LBMReal wadjust;
-                    LBMReal qudricLimitP = c1o100;
-                    LBMReal qudricLimitM = c1o100;
-                    LBMReal qudricLimitD = c1o100;
+                    real wadjust;
+                    real qudricLimitP = c1o100;
+                    real qudricLimitM = c1o100;
+                    real qudricLimitD = c1o100;
                     ////////////////////////////////////////////////////////////////////////////////////
                     //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in
                     //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -256,39 +257,39 @@ void CumulantK17LBMKernel::calculate(int step)
                     //!
                     ////////////////////////////////////////////////////////////////////////////////////
                     // Z - Dir
-                    forwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36, c1o36);
-                    forwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9, c1o9);
-                    forwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36, c1o36);
-                    forwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9, c1o9);
+                    forwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36);
+                    forwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9);
+                    forwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36);
+                    forwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9);
                     forwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9);
-                    forwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9, c1o9);
-                    forwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36, c1o36);
-                    forwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9, c1o9);
-                    forwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36, c1o36);
+                    forwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9);
+                    forwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36);
+                    forwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9);
+                    forwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     // Y - Dir
-                    forwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6, c1o6);
+                    forwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6);
                     forwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
-                    forwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18, c1o18);
+                    forwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18);
                     forwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3);
                     forwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
                     forwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9);
-                    forwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6, c1o6);
+                    forwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6);
                     forwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
-                    forwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18, c1o18);
+                    forwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     // X - Dir
-                    forwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1, c1);
+                    forwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1);
                     forwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
-                    forwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3, c1o3);
+                    forwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3);
                     forwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
                     forwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
                     forwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
-                    forwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3, c1o3);
+                    forwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3);
                     forwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
-                    forwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9, c1o9);
+                    forwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9o1, c1o9);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     //! - Setting relaxation rates for non-hydrodynamic cumulants (default values). Variable names and equations according to
@@ -304,29 +305,29 @@ void CumulantK17LBMKernel::calculate(int step)
                     //!
                     ////////////////////////////////////////////////////////////
                     //2.
-                    LBMReal OxxPyyPzz = c1;
+                    real OxxPyyPzz = c1o1;
                     ////////////////////////////////////////////////////////////
                     //3.
-                    LBMReal OxyyPxzz = c8  * (-c2 + omega) * ( c1 + c2*omega) / (-c8 - c14*omega + c7*omega*omega);
-                    LBMReal OxyyMxzz = c8  * (-c2 + omega) * (-c7 + c4*omega) / (c56 - c50*omega + c9*omega*omega);
-                    LBMReal Oxyz     = c24 * (-c2 + omega) * (-c2 - c7*omega + c3*omega*omega) / (c48 + c152*omega - c130*omega*omega + c29*omega*omega*omega);
+                    real OxyyPxzz = c8o1 * (-c2o1 + omega) * ( c1o1 + c2o1 *omega) / (-c8o1 - c14o1 *omega + c7o1 *omega*omega);
+                    real OxyyMxzz = c8o1 * (-c2o1 + omega) * (-c7o1 + c4o1 *omega) / (c56o1 - c50o1 *omega + c9o1 *omega*omega);
+                    real Oxyz     = c24o1 * (-c2o1 + omega) * (-c2o1 - c7o1 *omega + c3o1 *omega*omega) / (c48o1 + c152o1 *omega - c130o1 *omega*omega + c29o1 *omega*omega*omega);
                     ////////////////////////////////////////////////////////////
                     //4.
-                    LBMReal O4 = c1;
+                    real O4 = c1o1;
                     ////////////////////////////////////////////////////////////
                     //5.
-                    LBMReal O5 = c1;
+                    real O5 = c1o1;
                     ////////////////////////////////////////////////////////////
                     //6.
-                    LBMReal O6 = c1;
+                    real O6 = c1o1;
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     //! - A and B: parameters for fourth order convergence of the diffusion term according to Eq. (115) and (116)
                     //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
                     //! with simplifications assuming \f$\omega_2 = 1.0\f$ (modify for different bulk viscosity).
                     //!
-                    LBMReal A = (c4 + c2*omega - c3*omega*omega) / (c2 - c7*omega + c5*omega*omega);
-                    LBMReal B = (c4 + c28*omega - c14*omega*omega) / (c6 - c21*omega + c15*omega*omega);
+                    real A = (c4o1 + c2o1 *omega - c3o1 *omega*omega) / (c2o1 - c7o1 *omega + c5o1 *omega*omega);
+                    real B = (c4o1 + c28o1 *omega - c14o1 *omega*omega) / (c6o1 - c21o1 *omega + c15o1 *omega*omega);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     //! - Compute cumulants from central moments according to Eq. (20)-(23) in
@@ -334,30 +335,30 @@ void CumulantK17LBMKernel::calculate(int step)
                     //!
                     ////////////////////////////////////////////////////////////
                     //4.
-                    LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2 * mfbba * mfbab) * OOrho;
-                    LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2 * mfbba * mfabb) * OOrho;
-                    LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2 * mfbab * mfabb) * OOrho;
+                    real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) * OOrho;
+                    real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) * OOrho;
+                    real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) * OOrho;
 
-                    LBMReal CUMcca = mfcca - (((mfcaa * mfaca + c2 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) * OOrho - c1o9 * (drho * OOrho));
-                    LBMReal CUMcac = mfcac - (((mfcaa * mfaac + c2 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) * OOrho - c1o9 * (drho * OOrho));
-                    LBMReal CUMacc = mfacc - (((mfaac * mfaca + c2 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) * OOrho - c1o9 * (drho * OOrho));
+                    real CUMcca = mfcca - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) * OOrho - c1o9 * (drho * OOrho));
+                    real CUMcac = mfcac - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) * OOrho - c1o9 * (drho * OOrho));
+                    real CUMacc = mfacc - (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) * OOrho - c1o9 * (drho * OOrho));
                     ////////////////////////////////////////////////////////////
                     //5.
-                    LBMReal CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4 * mfabb * mfbbb + c2 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) * OOrho;
-                    LBMReal CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4 * mfbab * mfbbb + c2 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) * OOrho;
-                    LBMReal CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4 * mfbba * mfbbb + c2 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) * OOrho;
+                    real CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) * OOrho;
+                    real CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) * OOrho;
+                    real CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) * OOrho;
                     ////////////////////////////////////////////////////////////
                     //6.
-                    LBMReal CUMccc = mfccc + ((-c4 * mfbbb * mfbbb
+                    real CUMccc = mfccc + ((-c4o1 * mfbbb * mfbbb
                                                - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
-                                               - c4 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
-                                               - c2 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho
-                                              + (c4 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
-                                                 + c2 * (mfcaa * mfaca * mfaac)
-                                                 + c16 * mfbba * mfbab * mfabb) * OOrho * OOrho
+                                               - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
+                                               - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho
+                                              + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
+                                                 + c2o1 * (mfcaa * mfaca * mfaac)
+                                                 + c16o1 * mfbba * mfbab * mfabb) * OOrho * OOrho
                                               - c1o3 * (mfacc + mfcac + mfcca) * OOrho
                                               - c1o9 * (mfcaa + mfaca + mfaac) * OOrho
-                                              + (c2 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
+                                              + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
                                                  + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) * OOrho * OOrho * c2o3
                                               + c1o27 * ((drho * drho - drho) * OOrho * OOrho));
 
@@ -366,19 +367,19 @@ void CumulantK17LBMKernel::calculate(int step)
                     //!
                     ////////////////////////////////////////////////////////////
                     //2.
-                    LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
-                    LBMReal mxxMyy = mfcaa - mfaca;
-                    LBMReal mxxMzz = mfcaa - mfaac;
+                    real mxxPyyPzz = mfcaa + mfaca + mfaac;
+                    real mxxMyy = mfcaa - mfaca;
+                    real mxxMzz = mfcaa - mfaac;
                     ////////////////////////////////////////////////////////////
                     //3.
-                    LBMReal mxxyPyzz = mfcba + mfabc;
-                    LBMReal mxxyMyzz = mfcba - mfabc;
+                    real mxxyPyzz = mfcba + mfabc;
+                    real mxxyMyzz = mfcba - mfabc;
 
-                    LBMReal mxxzPyyz = mfcab + mfacb;
-                    LBMReal mxxzMyyz = mfcab - mfacb;
+                    real mxxzPyyz = mfcab + mfacb;
+                    real mxxzMyyz = mfcab - mfacb;
 
-                    LBMReal mxyyPxzz = mfbca + mfbac;
-                    LBMReal mxyyMxzz = mfbca - mfbac;
+                    real mxyyPxzz = mfbca + mfbac;
+                    real mxyyMxzz = mfbca - mfbac;
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     //incl. correction
@@ -389,19 +390,19 @@ void CumulantK17LBMKernel::calculate(int step)
                     //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
                     //! Note that the division by rho is omitted here as we need rho times the gradients later.
                     //!
-                    LBMReal Dxy = -c3 * omega * mfbba;
-                    LBMReal Dxz = -c3 * omega * mfbab;
-                    LBMReal Dyz = -c3 * omega * mfabb;
-                    LBMReal dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
-                    LBMReal dyuy = dxux + omega * c3o2 * mxxMyy;
-                    LBMReal dzuz = dxux + omega * c3o2 * mxxMzz;
+                    real Dxy = -c3o1 * omega * mfbba;
+                    real Dxz = -c3o1 * omega * mfbab;
+                    real Dyz = -c3o1 * omega * mfabb;
+                    real dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
+                    real dyuy = dxux + omega * c3o2 * mxxMyy;
+                    real dzuz = dxux + omega * c3o2 * mxxMzz;
                     ////////////////////////////////////////////////////////////
                     //! - Relaxation of second order cumulants with correction terms according to Eq. (33)-(35) in
                     //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
                     //!
-                    mxxPyyPzz += OxxPyyPzz * (mfaaa - mxxPyyPzz) - c3 * (c1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
-                    mxxMyy += omega * (-mxxMyy) - c3 * (c1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
-                    mxxMzz += omega * (-mxxMzz) - c3 * (c1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
+                    mxxPyyPzz += OxxPyyPzz * (mfaaa - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
+                    mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
+                    mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
 
                     /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
                     ////no correction
@@ -420,19 +421,19 @@ void CumulantK17LBMKernel::calculate(int step)
                     //! - Relaxation of third order cumulants including limiter according to Eq. (116)-(123)
                     //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
                     //!
-                    wadjust = Oxyz + (c1 - Oxyz) * abs(mfbbb) / (abs(mfbbb) + qudricLimitD);
+                    wadjust = Oxyz + (c1o1 - Oxyz) * abs(mfbbb) / (abs(mfbbb) + qudricLimitD);
                     mfbbb += wadjust * (-mfbbb);
-                    wadjust = OxyyPxzz + (c1 - OxyyPxzz) * abs(mxxyPyzz) / (abs(mxxyPyzz) + qudricLimitP);
+                    wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxyPyzz) / (abs(mxxyPyzz) + qudricLimitP);
                     mxxyPyzz += wadjust * (-mxxyPyzz);
-                    wadjust = OxyyMxzz + (c1 - OxyyMxzz) * abs(mxxyMyzz) / (abs(mxxyMyzz) + qudricLimitM);
+                    wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxyMyzz) / (abs(mxxyMyzz) + qudricLimitM);
                     mxxyMyzz += wadjust * (-mxxyMyzz);
-                    wadjust = OxyyPxzz + (c1 - OxyyPxzz) * abs(mxxzPyyz) / (abs(mxxzPyyz) + qudricLimitP);
+                    wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxzPyyz) / (abs(mxxzPyyz) + qudricLimitP);
                     mxxzPyyz += wadjust * (-mxxzPyyz);
-                    wadjust = OxyyMxzz + (c1 - OxyyMxzz) * abs(mxxzMyyz) / (abs(mxxzMyyz) + qudricLimitM);
+                    wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxzMyyz) / (abs(mxxzMyyz) + qudricLimitM);
                     mxxzMyyz += wadjust * (-mxxzMyyz);
-                    wadjust = OxyyPxzz + (c1 - OxyyPxzz) * abs(mxyyPxzz) / (abs(mxyyPxzz) + qudricLimitP);
+                    wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxyyPxzz) / (abs(mxyyPxzz) + qudricLimitP);
                     mxyyPxzz += wadjust * (-mxyyPxzz);
-                    wadjust = OxyyMxzz + (c1 - OxyyMxzz) * abs(mxyyMxzz) / (abs(mxyyMxzz) + qudricLimitM);
+                    wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxyyMxzz) / (abs(mxyyMxzz) + qudricLimitM);
                     mxyyMxzz += wadjust * (-mxyyMxzz);
                     //////////////////////////////////////////////////////////////////////////
                     // no limiter
@@ -448,8 +449,8 @@ void CumulantK17LBMKernel::calculate(int step)
                     //! - Compute inverse linear combinations of second and third order cumulants
                     //!
                     mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
-                    mfaca = c1o3 * (-c2 * mxxMyy + mxxMzz + mxxPyyPzz);
-                    mfaac = c1o3 * (mxxMyy - c2 * mxxMzz + mxxPyyPzz);
+                    mfaca = c1o3 * (-c2o1 * mxxMyy + mxxMzz + mxxPyyPzz);
+                    mfaac = c1o3 * (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz);
 
                     mfcba = (mxxyMyzz + mxxyPyzz) * c1o2;
                     mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2;
@@ -465,12 +466,12 @@ void CumulantK17LBMKernel::calculate(int step)
                     //! - Relax fourth order cumulants to modified equilibrium for fourth order convergence of diffusion according to Eq. (43)-(48)
                     //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
                     //!
-                    CUMacc = -O4 * (c1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1 - O4) * (CUMacc);
-                    CUMcac = -O4 * (c1 / omega - c1o2) * (dxux + dzuz) * c2o3 * A + (c1 - O4) * (CUMcac);
-                    CUMcca = -O4 * (c1 / omega - c1o2) * (dyuy + dxux) * c2o3 * A + (c1 - O4) * (CUMcca);
-                    CUMbbc = -O4 * (c1 / omega - c1o2) * Dxy * c1o3 * B + (c1 - O4) * (CUMbbc);
-                    CUMbcb = -O4 * (c1 / omega - c1o2) * Dxz * c1o3 * B + (c1 - O4) * (CUMbcb);
-                    CUMcbb = -O4 * (c1 / omega - c1o2) * Dyz * c1o3 * B + (c1 - O4) * (CUMcbb);
+                    CUMacc = -O4 * (c1o1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc);
+                    CUMcac = -O4 * (c1o1 / omega - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac);
+                    CUMcca = -O4 * (c1o1 / omega - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca);
+                    CUMbbc = -O4 * (c1o1 / omega - c1o2) * Dxy * c1o3 * B + (c1o1 - O4) * (CUMbbc);
+                    CUMbcb = -O4 * (c1o1 / omega - c1o2) * Dxz * c1o3 * B + (c1o1 - O4) * (CUMbcb);
+                    CUMcbb = -O4 * (c1o1 / omega - c1o2) * Dyz * c1o3 * B + (c1o1 - O4) * (CUMcbb);
 
                     //////////////////////////////////////////////////////////////////////////
                     //5.
@@ -489,32 +490,32 @@ void CumulantK17LBMKernel::calculate(int step)
 
                     //////////////////////////////////////////////////////////////////////////
                     //4.
-                    mfcbb = CUMcbb + c1o3 * ((c3 * mfcaa + c1) * mfabb + c6 * mfbba * mfbab) * OOrho;
-                    mfbcb = CUMbcb + c1o3 * ((c3 * mfaca + c1) * mfbab + c6 * mfbba * mfabb) * OOrho;
-                    mfbbc = CUMbbc + c1o3 * ((c3 * mfaac + c1) * mfbba + c6 * mfbab * mfabb) * OOrho;
+                    mfcbb = CUMcbb + c1o3 * ((c3o1 * mfcaa + c1o1) * mfabb + c6o1 * mfbba * mfbab) * OOrho;
+                    mfbcb = CUMbcb + c1o3 * ((c3o1 * mfaca + c1o1) * mfbab + c6o1 * mfbba * mfabb) * OOrho;
+                    mfbbc = CUMbbc + c1o3 * ((c3o1 * mfaac + c1o1) * mfbba + c6o1 * mfbab * mfabb) * OOrho;
 
-                    mfcca = CUMcca + (((mfcaa * mfaca + c2 * mfbba * mfbba) * c9 + c3 * (mfcaa + mfaca)) * OOrho - (drho * OOrho)) * c1o9;
-                    mfcac = CUMcac + (((mfcaa * mfaac + c2 * mfbab * mfbab) * c9 + c3 * (mfcaa + mfaac)) * OOrho - (drho * OOrho)) * c1o9;
-                    mfacc = CUMacc + (((mfaac * mfaca + c2 * mfabb * mfabb) * c9 + c3 * (mfaac + mfaca)) * OOrho - (drho * OOrho)) * c1o9;
+                    mfcca = CUMcca + (((mfcaa * mfaca + c2o1 * mfbba * mfbba) * c9o1 + c3o1 * (mfcaa + mfaca)) * OOrho - (drho * OOrho)) * c1o9;
+                    mfcac = CUMcac + (((mfcaa * mfaac + c2o1 * mfbab * mfbab) * c9o1 + c3o1 * (mfcaa + mfaac)) * OOrho - (drho * OOrho)) * c1o9;
+                    mfacc = CUMacc + (((mfaac * mfaca + c2o1 * mfabb * mfabb) * c9o1 + c3o1 * (mfaac + mfaca)) * OOrho - (drho * OOrho)) * c1o9;
 
                     //////////////////////////////////////////////////////////////////////////
                     //5.
-                    mfbcc = CUMbcc + c1o3 * (c3 * (mfaac * mfbca + mfaca * mfbac + c4 * mfabb * mfbbb + c2 * (mfbab * mfacb + mfbba * mfabc)) + (mfbca + mfbac)) * OOrho;
-                    mfcbc = CUMcbc + c1o3 * (c3 * (mfaac * mfcba + mfcaa * mfabc + c4 * mfbab * mfbbb + c2 * (mfabb * mfcab + mfbba * mfbac)) + (mfcba + mfabc)) * OOrho;
-                    mfccb = CUMccb + c1o3 * (c3 * (mfcaa * mfacb + mfaca * mfcab + c4 * mfbba * mfbbb + c2 * (mfbab * mfbca + mfabb * mfcba)) + (mfacb + mfcab)) * OOrho;
+                    mfbcc = CUMbcc + c1o3 * (c3o1 * (mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + (mfbca + mfbac)) * OOrho;
+                    mfcbc = CUMcbc + c1o3 * (c3o1 * (mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + (mfcba + mfabc)) * OOrho;
+                    mfccb = CUMccb + c1o3 * (c3o1 * (mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + (mfacb + mfcab)) * OOrho;
 
                     //////////////////////////////////////////////////////////////////////////
                     //6.
-                    mfccc = CUMccc - ((-c4 * mfbbb * mfbbb
+                    mfccc = CUMccc - ((-c4o1 * mfbbb * mfbbb
                                        - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
-                                       - c4 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
-                                       - c2 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho
-                                      + (c4 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
-                                         + c2 * (mfcaa * mfaca * mfaac)
-                                         + c16 * mfbba * mfbab * mfabb) * OOrho * OOrho
+                                       - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
+                                       - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho
+                                      + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
+                                         + c2o1 * (mfcaa * mfaca * mfaac)
+                                         + c16o1 * mfbba * mfbab * mfabb) * OOrho * OOrho
                                       - c1o3 * (mfacc + mfcac + mfcca) * OOrho
                                       - c1o9 * (mfcaa + mfaca + mfaac) * OOrho
-                                      + (c2 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
+                                      + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
                                          + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) * OOrho * OOrho * c2o3
                                       + c1o27 * ((drho * drho - drho) * OOrho * OOrho));
 
@@ -536,49 +537,49 @@ void CumulantK17LBMKernel::calculate(int step)
                     //!
                     ////////////////////////////////////////////////////////////////////////////////////
                     // X - Dir
-                    backwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1, c1);
+                    backwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1);
                     backwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
-                    backwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3, c1o3);
+                    backwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3);
                     backwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
                     backwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
                     backwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
-                    backwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3, c1o3);
+                    backwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3);
                     backwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
-                    backwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9, c1o9);
+                    backwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9o1, c1o9);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     // Y - Dir
-                    backwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6, c1o6);
+                    backwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6);
                     backwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
-                    backwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18, c1o18);
+                    backwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18);
                     backwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3);
                     backwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
                     backwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9);
-                    backwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6, c1o6);
+                    backwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6);
                     backwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
-                    backwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18, c1o18);
+                    backwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18);
 
                     ////////////////////////////////////////////////////////////////////////////////////
                     // Z - Dir
-                    backwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36, c1o36);
-                    backwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9, c1o9);
-                    backwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36, c1o36);
-                    backwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9, c1o9);
+                    backwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36);
+                    backwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9);
+                    backwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36);
+                    backwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9);
                     backwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9);
-                    backwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9, c1o9);
-                    backwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36, c1o36);
-                    backwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9, c1o9);
-                    backwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36, c1o36);
+                    backwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9);
+                    backwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36);
+                    backwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9);
+                    backwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36);
                     ////////////////////////////////////////////////////////////////////////////////////
 
                     //////////////////////////////////////////////////////////////////////////
                     //proof correctness
                     //////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-                    LBMReal drho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+                    real drho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
                                         + (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
                                         + (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
-                    LBMReal dif = drho - drho_post;
+                    real dif = drho - drho_post;
 #ifdef SINGLEPRECISION
                     if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
diff --git a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.h
index aab4d669655efe5bd489feb3829da28e67aa9ecb..84b14553dc2100aa244a5f7e3fc0d5025d93e195 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.h
@@ -55,27 +55,27 @@ public:
     ~CumulantK17LBMKernel() = default;
     void calculate(int step) override;
     SPtr<LBMKernel> clone() override;
-    double getCalculationTime() override { return .0; }
+    real getCalculationTime() override { return .0; }
 
 protected:
-    inline void forwardInverseChimeraWithK(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K);
-    inline void backwardInverseChimeraWithK(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K);
-    inline void forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
-    inline void backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
+    inline void forwardInverseChimeraWithK(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K);
+    inline void backwardInverseChimeraWithK(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K);
+    inline void forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
+    inline void backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
 
     virtual void initDataSet();
-    LBMReal f[D3Q27System::ENDF + 1];
+    real f[D3Q27System::ENDF + 1];
 
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr restDistributions;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr restDistributions;
 
     mu::value_type muX1, muX2, muX3;
     mu::value_type muDeltaT;
     mu::value_type muNu;
-    LBMReal forcingX1;
-    LBMReal forcingX2;
-    LBMReal forcingX3;
+    real forcingX1;
+    real forcingX2;
+    real forcingX3;
 };
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -84,17 +84,18 @@ protected:
 //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
 //! Modified for lower round-off errors.
 ////////////////////////////////////////////////////////////////////////////////
-inline void CumulantK17LBMKernel::forwardInverseChimeraWithK(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K)
+inline void CumulantK17LBMKernel::forwardInverseChimeraWithK(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K)
 {
-    using namespace UbMath;
-    LBMReal m2 = mfa + mfc;
-    LBMReal m1 = mfc - mfa;
-    LBMReal m0 = m2 + mfb;
+//    using namespace UbMath;
+    using namespace vf::basics::constant;
+    real m2 = mfa + mfc;
+    real m1 = mfc - mfa;
+    real m0 = m2 + mfb;
     mfa = m0;
     m0 *= Kinverse;
-    m0 += c1;
+    m0 += c1o1;
     mfb = (m1 * Kinverse - m0 * vv) * K;
-    mfc = ((m2 - c2 * m1 * vv) * Kinverse + v2 * m0) * K;
+    mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K;
 }
 ////////////////////////////////////////////////////////////////////////////////
 //! \brief backward chimera transformation \ref backwardInverseChimeraWithK
@@ -102,12 +103,14 @@ inline void CumulantK17LBMKernel::forwardInverseChimeraWithK(LBMReal& mfa, LBMRe
 //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
 //! ] Modified for lower round-off errors.
 ////////////////////////////////////////////////////////////////////////////////
-inline void CumulantK17LBMKernel::backwardInverseChimeraWithK(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K)
+inline void CumulantK17LBMKernel::backwardInverseChimeraWithK(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K)
 {
-    using namespace UbMath;
-    LBMReal m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1) * (v2 - vv) * c1o2) * K;
-    LBMReal m1 = (((mfa - mfc) - c2 * mfb * vv) * Kinverse + (mfa * Kinverse + c1) * (-v2)) * K;
-    mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1) * (v2 + vv) * c1o2) * K;
+//    using namespace UbMath;
+    using namespace vf::basics::constant;
+ 
+    real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 - vv) * c1o2) * K;
+    real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (-v2)) * K;
+    mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 + vv) * c1o2) * K;
     mfa = m0;
     mfb = m1;
 }
@@ -118,12 +121,14 @@ inline void CumulantK17LBMKernel::backwardInverseChimeraWithK(LBMReal& mfa, LBMR
 //! for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations.
 //! Modified for lower round-off errors.
 ////////////////////////////////////////////////////////////////////////////////
-inline void CumulantK17LBMKernel::forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2)
+inline void CumulantK17LBMKernel::forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2)
 {
-    using namespace UbMath;
-    LBMReal m1 = (mfa + mfc) + mfb;
-    LBMReal m2 = mfc - mfa;
-    mfc = (mfc + mfa) + (v2 * m1 - c2 * vv * m2);
+//    using namespace UbMath;
+    using namespace vf::basics::constant;
+    
+    real m1 = (mfa + mfc) + mfb;
+    real m2 = mfc - mfa;
+    mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2);
     mfb = m2 - vv * m1;
     mfa = m1;
 }
@@ -134,11 +139,13 @@ inline void CumulantK17LBMKernel::forwardChimera(LBMReal& mfa, LBMReal& mfb, LBM
 //! for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations.
 //! Modified for lower round-off errors.
 ////////////////////////////////////////////////////////////////////////////////
-inline void CumulantK17LBMKernel::backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2)
+inline void CumulantK17LBMKernel::backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2)
 {
-    using namespace UbMath;
-    LBMReal ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
-    LBMReal mb = ((mfa - mfc) - mfa * v2) - c2 * mfb * vv;
+//    using namespace UbMath;
+    using namespace vf::basics::constant;
+
+    real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
+    real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv;
     mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2);
     mfb = mb;
     mfa = ma;
diff --git a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.cpp b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.cpp
index e001cbf970d59f6f847589810eeaacb86a3e22a2..e95271b949ff91dc7ec2c62a4a45227f8e25f519 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.cpp
@@ -46,7 +46,8 @@
 
 //#define PROOF_CORRECTNESS
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::basics::constant;
 
 //////////////////////////////////////////////////////////////////////////
 CumulantK17LBMKernelUnified::CumulantK17LBMKernelUnified()
@@ -132,7 +133,7 @@ void CumulantK17LBMKernelUnified::calculate(int step)
     int maxX2 = bcArrayMaxX2 - ghostLayerWidth;
     int maxX3 = bcArrayMaxX3 - ghostLayerWidth;
 
-    LBMReal omega = collFactor;
+    real omega = collFactor;
 
     for (int x3 = minX3; x3 < maxX3; x3++)
     {
@@ -169,43 +170,43 @@ void CumulantK17LBMKernelUnified::calculate(int step)
                     // a b c
                     //-1 0 1
 
-                    LBMReal mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
-                    LBMReal mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
-                    LBMReal mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
-                    LBMReal mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
-                    LBMReal mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
-                    LBMReal mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
-                    LBMReal mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
-                    LBMReal mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
-                    LBMReal mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
-                    LBMReal mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
-                    LBMReal mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
-                    LBMReal mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
-                    LBMReal mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
-
-                    LBMReal mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
-                    LBMReal mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
-                    LBMReal mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
-                    LBMReal mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
-                    LBMReal mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
-                    LBMReal mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
-                    LBMReal mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
-                    LBMReal mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
-                    LBMReal mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
-                    LBMReal mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                    LBMReal mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                    LBMReal mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                    LBMReal mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                    LBMReal mfbbb = (*this->restDistributions)(x1, x2, x3);
+                    real mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
+                    real mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
+                    real mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
+                    real mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
+                    real mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
+                    real mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
+                    real mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
+                    real mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
+                    real mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
+                    real mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
+                    real mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
+                    real mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
+                    real mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
+
+                    real mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
+                    real mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
+                    real mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
+                    real mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
+                    real mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
+                    real mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
+                    real mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
+                    real mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
+                    real mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
+                    real mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                    real mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                    real mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                    real mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                    real mfbbb = (*this->restDistributions)(x1, x2, x3);
 
                     
-                    LBMReal forces[3] = {0., 0., 0.};
+                    real forces[3] = {0., 0., 0.};
                     if (withForcing)
                     {
-                        muX1 = static_cast<double>(x1 - 1 + ix1 * maxX1);
-                        muX2 = static_cast<double>(x2 - 1 + ix2 * maxX2);
-                        muX3 = static_cast<double>(x3 - 1 + ix3 * maxX3);
+                        muX1 = static_cast<real>(x1 - 1 + ix1 * maxX1);
+                        muX2 = static_cast<real>(x2 - 1 + ix2 * maxX2);
+                        muX3 = static_cast<real>(x3 - 1 + ix3 * maxX3);
 
                         forcingX1 = muForcingX1.Eval();
                         forcingX2 = muForcingX2.Eval();
@@ -281,10 +282,10 @@ void CumulantK17LBMKernelUnified::calculate(int step)
                     //proof correctness
                     //////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-                    LBMReal drho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+                    real drho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
                                         + (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
                                         + (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
-                    LBMReal dif = distribution.getDensity_() - drho_post;
+                    real dif = distribution.getDensity_() - drho_post;
 #ifdef SINGLEPRECISION
                     if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
diff --git a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.h b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.h
index 175fdd4cba2a0c8ce47248f5de6672f34dda0cc3..d13e82efce5f5bc9211476ea86d989fe8663ab8c 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.h
+++ b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.h
@@ -55,22 +55,22 @@ public:
     ~CumulantK17LBMKernelUnified() = default;
     void calculate(int step) override;
     SPtr<LBMKernel> clone() override;
-    double getCalculationTime() override { return .0; }
+    real getCalculationTime() override { return .0; }
 
 protected:
     virtual void initDataSet();
-    LBMReal f[D3Q27System::ENDF + 1];
+    real f[D3Q27System::ENDF + 1];
 
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr restDistributions;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr restDistributions;
 
     mu::value_type muX1, muX2, muX3;
     mu::value_type muDeltaT;
     mu::value_type muNu;
-    LBMReal forcingX1;
-    LBMReal forcingX2;
-    LBMReal forcingX3;
+    real forcingX1;
+    real forcingX2;
+    real forcingX3;
 };
 
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/CumulantLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/CumulantLBMKernel.cpp
index 1ecfc5a4ce6e4106750fad71b9d63ac7e5dd0fc9..d2f25ad275a5c01d685e915f2b37f101b40d556d 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CumulantLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/CumulantLBMKernel.cpp
@@ -8,7 +8,8 @@
 
 #define PROOF_CORRECTNESS
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::basics::constant;
 
 //////////////////////////////////////////////////////////////////////////
 CumulantLBMKernel::CumulantLBMKernel()
@@ -57,7 +58,7 @@ SPtr<LBMKernel> CumulantLBMKernel::clone()
    }
    else
    {
-      dynamicPointerCast<CumulantLBMKernel>(kernel)->OxxPyyPzz = UbMath::one;
+      dynamicPointerCast<CumulantLBMKernel>(kernel)->OxxPyyPzz = c1o1;
    }
    return kernel;
 }
@@ -1028,7 +1029,7 @@ SPtr<LBMKernel> CumulantLBMKernel::clone()
 //   //timer.stop();
 //}
 //////////////////////////////////////////////////////////////////////////
-double CumulantLBMKernel::getCalculationTime()
+real CumulantLBMKernel::getCalculationTime()
 {
    //return timer.getDuration();
    return timer.getTotalTime();
@@ -1097,50 +1098,50 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    // a b c
    //-1 0 1
 
-   LBMReal mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
-   LBMReal mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
-   LBMReal mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
-   LBMReal mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
-   LBMReal mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
-   LBMReal mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
-   LBMReal mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
-   LBMReal mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
-   LBMReal mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
-   LBMReal mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
-   LBMReal mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
-   LBMReal mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
-   LBMReal mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
-
-   LBMReal mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
-   LBMReal mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
-   LBMReal mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
-   LBMReal mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
-   LBMReal mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
-   LBMReal mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
-   LBMReal mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
-   LBMReal mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
-   LBMReal mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
-   LBMReal mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-   LBMReal mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
-   LBMReal mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
-   LBMReal mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-   LBMReal mfbbb = (*this->zeroDistributions)(x1, x2, x3);
-
-   ////////////////////////////////////////////////////////////////////////////////////
-   LBMReal drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
+   real mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
+   real mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
+   real mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
+   real mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
+   real mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
+   real mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
+   real mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
+   real mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
+   real mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
+   real mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
+   real mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
+   real mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
+   real mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
+
+   real mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
+   real mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
+   real mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
+   real mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
+   real mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
+   real mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
+   real mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
+   real mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
+   real mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
+   real mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+   real mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
+   real mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
+   real mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+   real mfbbb = (*this->zeroDistributions)(x1, x2, x3);
+
+   ////////////////////////////////////////////////////////////////////////////////////
+   real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
       (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
       ((mfabb + mfcbb) + (mfbab + mfbcb)) + (mfbba + mfbbc)) + mfbbb;
 
-   LBMReal rho = UbMath::one + drho;
+   real rho = c1o1 + drho;
    ////////////////////////////////////////////////////////////////////////////////////
-   LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+   real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
       (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
       (mfcbb - mfabb)) / rho;
-   LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+   real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
       (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
       (mfbcb - mfbab)) / rho;
-   LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+   real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
       (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
       (mfbbc - mfbba)) / rho;
    ////////////////////////////////////////////////////////////////////////////////////
@@ -1149,9 +1150,9 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    ///////////////////////////////////////////////////////////////////////////////////////////
    if (withForcing)
    {
-      muX1 = static_cast<double>(x1 - 1 + ix1 * maxX1);
-      muX2 = static_cast<double>(x2 - 1 + ix2 * maxX2);
-      muX3 = static_cast<double>(x3 - 1 + ix3 * maxX3);
+      muX1 = static_cast<real>(x1 - 1 + ix1 * maxX1);
+      muX2 = static_cast<real>(x2 - 1 + ix2 * maxX2);
+      muX3 = static_cast<real>(x3 - 1 + ix3 * maxX3);
 
       forcingX1 = muForcingX1.Eval();
       forcingX2 = muForcingX2.Eval();
@@ -1163,12 +1164,12 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    }
    ///////////////////////////////////////////////////////////////////////////////////////////               
 ////////////////////////////////////////////////////////////////////////////////////
-   LBMReal oMdrho = one; // comp special
+   real oMdrho = c1o1; // comp special
    ////////////////////////////////////////////////////////////////////////////////////
-   LBMReal m0, m1, m2;
-   LBMReal vx2;
-   LBMReal vy2;
-   LBMReal vz2;
+   real m0, m1, m2;
+   real vx2;
+   real vy2;
+   real vz2;
    vx2 = vvx * vvx;
    vy2 = vvy * vvy;
    vz2 = vvz * vvz;
@@ -1192,7 +1193,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfaaa = m0;
    m0 += c1o36 * oMdrho;
    mfaab = m1 - m0 * vvz;
-   mfaac = m2 - two * m1 * vvz + vz2 * m0;
+   mfaac = m2 - c2o1 * m1 * vvz + vz2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfaba + mfabc;
    m1 = mfabc - mfaba;
@@ -1200,7 +1201,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfaba = m0;
    m0 += c1o9 * oMdrho;
    mfabb = m1 - m0 * vvz;
-   mfabc = m2 - two * m1 * vvz + vz2 * m0;
+   mfabc = m2 - c2o1 * m1 * vvz + vz2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfaca + mfacc;
    m1 = mfacc - mfaca;
@@ -1208,7 +1209,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfaca = m0;
    m0 += c1o36 * oMdrho;
    mfacb = m1 - m0 * vvz;
-   mfacc = m2 - two * m1 * vvz + vz2 * m0;
+   mfacc = m2 - c2o1 * m1 * vvz + vz2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfbaa + mfbac;
@@ -1217,7 +1218,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfbaa = m0;
    m0 += c1o9 * oMdrho;
    mfbab = m1 - m0 * vvz;
-   mfbac = m2 - two * m1 * vvz + vz2 * m0;
+   mfbac = m2 - c2o1 * m1 * vvz + vz2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfbba + mfbbc;
    m1 = mfbbc - mfbba;
@@ -1225,7 +1226,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfbba = m0;
    m0 += c4o9 * oMdrho;
    mfbbb = m1 - m0 * vvz;
-   mfbbc = m2 - two * m1 * vvz + vz2 * m0;
+   mfbbc = m2 - c2o1 * m1 * vvz + vz2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfbca + mfbcc;
    m1 = mfbcc - mfbca;
@@ -1233,7 +1234,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfbca = m0;
    m0 += c1o9 * oMdrho;
    mfbcb = m1 - m0 * vvz;
-   mfbcc = m2 - two * m1 * vvz + vz2 * m0;
+   mfbcc = m2 - c2o1 * m1 * vvz + vz2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfcaa + mfcac;
@@ -1242,7 +1243,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfcaa = m0;
    m0 += c1o36 * oMdrho;
    mfcab = m1 - m0 * vvz;
-   mfcac = m2 - two * m1 * vvz + vz2 * m0;
+   mfcac = m2 - c2o1 * m1 * vvz + vz2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfcba + mfcbc;
    m1 = mfcbc - mfcba;
@@ -1250,7 +1251,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfcba = m0;
    m0 += c1o9 * oMdrho;
    mfcbb = m1 - m0 * vvz;
-   mfcbc = m2 - two * m1 * vvz + vz2 * m0;
+   mfcbc = m2 - c2o1 * m1 * vvz + vz2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfcca + mfccc;
    m1 = mfccc - mfcca;
@@ -1258,7 +1259,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfcca = m0;
    m0 += c1o36 * oMdrho;
    mfccb = m1 - m0 * vvz;
-   mfccc = m2 - two * m1 * vvz + vz2 * m0;
+   mfccc = m2 - c2o1 * m1 * vvz + vz2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    // mit  1/6, 0, 1/18, 2/3, 0, 2/9, 1/6, 0, 1/18 Konditionieren
@@ -1270,14 +1271,14 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfaaa = m0;
    m0 += c1o6 * oMdrho;
    mfaba = m1 - m0 * vvy;
-   mfaca = m2 - two * m1 * vvy + vy2 * m0;
+   mfaca = m2 - c2o1 * m1 * vvy + vy2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfaab + mfacb;
    m1 = mfacb - mfaab;
    m0 = m2 + mfabb;
    mfaab = m0;
    mfabb = m1 - m0 * vvy;
-   mfacb = m2 - two * m1 * vvy + vy2 * m0;
+   mfacb = m2 - c2o1 * m1 * vvy + vy2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfaac + mfacc;
    m1 = mfacc - mfaac;
@@ -1285,7 +1286,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfaac = m0;
    m0 += c1o18 * oMdrho;
    mfabc = m1 - m0 * vvy;
-   mfacc = m2 - two * m1 * vvy + vy2 * m0;
+   mfacc = m2 - c2o1 * m1 * vvy + vy2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfbaa + mfbca;
@@ -1294,14 +1295,14 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfbaa = m0;
    m0 += c2o3 * oMdrho;
    mfbba = m1 - m0 * vvy;
-   mfbca = m2 - two * m1 * vvy + vy2 * m0;
+   mfbca = m2 - c2o1 * m1 * vvy + vy2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfbab + mfbcb;
    m1 = mfbcb - mfbab;
    m0 = m2 + mfbbb;
    mfbab = m0;
    mfbbb = m1 - m0 * vvy;
-   mfbcb = m2 - two * m1 * vvy + vy2 * m0;
+   mfbcb = m2 - c2o1 * m1 * vvy + vy2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfbac + mfbcc;
    m1 = mfbcc - mfbac;
@@ -1309,7 +1310,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfbac = m0;
    m0 += c2o9 * oMdrho;
    mfbbc = m1 - m0 * vvy;
-   mfbcc = m2 - two * m1 * vvy + vy2 * m0;
+   mfbcc = m2 - c2o1 * m1 * vvy + vy2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfcaa + mfcca;
@@ -1318,14 +1319,14 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfcaa = m0;
    m0 += c1o6 * oMdrho;
    mfcba = m1 - m0 * vvy;
-   mfcca = m2 - two * m1 * vvy + vy2 * m0;
+   mfcca = m2 - c2o1 * m1 * vvy + vy2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfcab + mfccb;
    m1 = mfccb - mfcab;
    m0 = m2 + mfcbb;
    mfcab = m0;
    mfcbb = m1 - m0 * vvy;
-   mfccb = m2 - two * m1 * vvy + vy2 * m0;
+   mfccb = m2 - c2o1 * m1 * vvy + vy2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfcac + mfccc;
    m1 = mfccc - mfcac;
@@ -1333,7 +1334,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfcac = m0;
    m0 += c1o18 * oMdrho;
    mfcbc = m1 - m0 * vvy;
-   mfccc = m2 - two * m1 * vvy + vy2 * m0;
+   mfccc = m2 - c2o1 * m1 * vvy + vy2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    // mit     1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9		Konditionieren
@@ -1343,16 +1344,16 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    m1 = mfcaa - mfaaa;
    m0 = m2 + mfbaa;
    mfaaa = m0;
-   m0 += one * oMdrho;
+   m0 += c1o1 * oMdrho;
    mfbaa = m1 - m0 * vvx;
-   mfcaa = m2 - two * m1 * vvx + vx2 * m0;
+   mfcaa = m2 - c2o1 * m1 * vvx + vx2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfaba + mfcba;
    m1 = mfcba - mfaba;
    m0 = m2 + mfbba;
    mfaba = m0;
    mfbba = m1 - m0 * vvx;
-   mfcba = m2 - two * m1 * vvx + vx2 * m0;
+   mfcba = m2 - c2o1 * m1 * vvx + vx2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfaca + mfcca;
    m1 = mfcca - mfaca;
@@ -1360,7 +1361,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfaca = m0;
    m0 += c1o3 * oMdrho;
    mfbca = m1 - m0 * vvx;
-   mfcca = m2 - two * m1 * vvx + vx2 * m0;
+   mfcca = m2 - c2o1 * m1 * vvx + vx2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfaab + mfcab;
@@ -1368,21 +1369,21 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    m0 = m2 + mfbab;
    mfaab = m0;
    mfbab = m1 - m0 * vvx;
-   mfcab = m2 - two * m1 * vvx + vx2 * m0;
+   mfcab = m2 - c2o1 * m1 * vvx + vx2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfabb + mfcbb;
    m1 = mfcbb - mfabb;
    m0 = m2 + mfbbb;
    mfabb = m0;
    mfbbb = m1 - m0 * vvx;
-   mfcbb = m2 - two * m1 * vvx + vx2 * m0;
+   mfcbb = m2 - c2o1 * m1 * vvx + vx2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfacb + mfccb;
    m1 = mfccb - mfacb;
    m0 = m2 + mfbcb;
    mfacb = m0;
    mfbcb = m1 - m0 * vvx;
-   mfccb = m2 - two * m1 * vvx + vx2 * m0;
+   mfccb = m2 - c2o1 * m1 * vvx + vx2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfaac + mfcac;
@@ -1391,14 +1392,14 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfaac = m0;
    m0 += c1o3 * oMdrho;
    mfbac = m1 - m0 * vvx;
-   mfcac = m2 - two * m1 * vvx + vx2 * m0;
+   mfcac = m2 - c2o1 * m1 * vvx + vx2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfabc + mfcbc;
    m1 = mfcbc - mfabc;
    m0 = m2 + mfbbc;
    mfabc = m0;
    mfbbc = m1 - m0 * vvx;
-   mfcbc = m2 - two * m1 * vvx + vx2 * m0;
+   mfcbc = m2 - c2o1 * m1 * vvx + vx2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    m2 = mfacc + mfccc;
    m1 = mfccc - mfacc;
@@ -1406,7 +1407,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    mfacc = m0;
    m0 += c1o9 * oMdrho;
    mfbcc = m1 - m0 * vvx;
-   mfccc = m2 - two * m1 * vvx + vx2 * m0;
+   mfccc = m2 - c2o1 * m1 * vvx + vx2 * m0;
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
 
@@ -1420,7 +1421,7 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    ////////////////////////////////////////////////////////////
    //3.
    //////////////////////////////
-   LBMReal OxyyPxzz = one;//three  * (two - omega) / (three  - omega);//
+   real OxyyPxzz = c1o1;//three  * (two - omega) / (three  - omega);//
    //LBMReal OxyyMxzz = one;//six    * (two - omega) / (six    - omega);//
    //LBMReal Oxyz = one;//twelve * (two - omega) / (twelve + omega);//
    //////////////////////////////
@@ -1444,47 +1445,47 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    ////////////////////////////////////////////////////////////
    //4.
    //////////////////////////////
-   LBMReal O4 = one;
+   real O4 = c1o1;
    //////////////////////////////
-   //LBMReal O4        = omega;//TRT
+   //real O4        = omega;//TRT
    ////////////////////////////////////////////////////////////
    //5.
    //////////////////////////////
-   LBMReal O5 = one;
+   real O5 = c1o1;
    ////////////////////////////////////////////////////////////
    //6.
    //////////////////////////////
-   LBMReal O6 = one;
+   real O6 = c1o1;
    ////////////////////////////////////////////////////////////
 
 
    //central moments to cumulants
    //4.
-   LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + two * mfbba * mfbab) / rho;	//ab 15.05.2015 verwendet
-   LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + two * mfbba * mfabb) / rho; //ab 15.05.2015 verwendet
-   LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + two * mfbab * mfabb) / rho; //ab 15.05.2015 verwendet
+   real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) / rho;	//ab 15.05.2015 verwendet
+   real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) / rho; //ab 15.05.2015 verwendet
+   real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) / rho; //ab 15.05.2015 verwendet
 
-   LBMReal CUMcca = mfcca - (((mfcaa * mfaca + two * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) / rho - c1o9 * (drho / rho));
-   LBMReal CUMcac = mfcac - (((mfcaa * mfaac + two * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) / rho - c1o9 * (drho / rho));
-   LBMReal CUMacc = mfacc - (((mfaac * mfaca + two * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) / rho - c1o9 * (drho / rho));
+   real CUMcca = mfcca - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) / rho - c1o9 * (drho / rho));
+   real CUMcac = mfcac - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) / rho - c1o9 * (drho / rho));
+   real CUMacc = mfacc - (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) / rho - c1o9 * (drho / rho));
 
    //5.
-   LBMReal CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + four * mfabb * mfbbb + two * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) / rho;
-   LBMReal CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + four * mfbab * mfbbb + two * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) / rho;
-   LBMReal CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + four * mfbba * mfbbb + two * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) / rho;
+   real CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) / rho;
+   real CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) / rho;
+   real CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) / rho;
 
    //6.
 
-   LBMReal CUMccc = mfccc + ((-four * mfbbb * mfbbb
+   real CUMccc = mfccc + ((-c4o1 * mfbbb * mfbbb
       - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
-      - four * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
-      - two * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) / rho
-      + (four * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
-         + two * (mfcaa * mfaca * mfaac)
-         + sixteen * mfbba * mfbab * mfabb) / (rho * rho)
+      - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
+      - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) / rho
+      + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
+         + c2o1 * (mfcaa * mfaca * mfaac)
+         + c16o1 * mfbba * mfbab * mfabb) / (rho * rho)
       - c1o3 * (mfacc + mfcac + mfcca) / rho
       - c1o9 * (mfcaa + mfaca + mfaac) / rho
-      + (two * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
+      + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
          + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) / (rho * rho) * c2o3
       + c1o27 * ((drho * drho - drho) / (rho * rho)));
    //+ c1o27*(one -three/rho +two/(rho*rho)));
@@ -1494,9 +1495,9 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
 
 //2.
 // linear combinations
-   LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
-   LBMReal mxxMyy = mfcaa - mfaca;
-   LBMReal mxxMzz = mfcaa - mfaac;
+   real mxxPyyPzz = mfcaa + mfaca + mfaac;
+   real mxxMyy = mfcaa - mfaca;
+   real mxxMzz = mfcaa - mfaac;
 
    //////////////////////////////////////////////////////////////////////////
 // 			LBMReal magicBulk=(CUMacc+CUMcac+CUMcca)*(one/OxxPyyPzz-c1o2)*c3o2*8.;
@@ -1536,14 +1537,14 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
          ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
          //incl. correction		(hat noch nicht so gut funktioniert...Optimierungsbedarf??)
    {
-      LBMReal dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
-      LBMReal dyuy = dxux + omega * c3o2 * mxxMyy;
-      LBMReal dzuz = dxux + omega * c3o2 * mxxMzz;
+      real dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
+      real dyuy = dxux + omega * c3o2 * mxxMyy;
+      real dzuz = dxux + omega * c3o2 * mxxMzz;
 
       //relax
-      mxxPyyPzz += OxxPyyPzz * (mfaaa - mxxPyyPzz) - three * (one - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);//-magicBulk*OxxPyyPzz;
-      mxxMyy += omega * (-mxxMyy) - three * (one + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
-      mxxMzz += omega * (-mxxMzz) - three * (one + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
+      mxxPyyPzz += OxxPyyPzz * (mfaaa - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);//-magicBulk*OxxPyyPzz;
+      mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
+      mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
 
       //////////////////////////////////////////////////////////////////////////
       //limiter-Scheise Teil 2
@@ -1573,20 +1574,20 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
 
    // linear combinations back
    mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
-   mfaca = c1o3 * (-two * mxxMyy + mxxMzz + mxxPyyPzz);
-   mfaac = c1o3 * (mxxMyy - two * mxxMzz + mxxPyyPzz);
+   mfaca = c1o3 * (-c2o1 * mxxMyy + mxxMzz + mxxPyyPzz);
+   mfaac = c1o3 * (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz);
 
    //3.
    // linear combinations
 
-   LBMReal mxxyPyzz = mfcba + mfabc;
-   LBMReal mxxyMyzz = mfcba - mfabc;
+   real mxxyPyzz = mfcba + mfabc;
+   real mxxyMyzz = mfcba - mfabc;
 
-   LBMReal mxxzPyyz = mfcab + mfacb;
-   LBMReal mxxzMyyz = mfcab - mfacb;
+   real mxxzPyyz = mfcab + mfacb;
+   real mxxzMyyz = mfcab - mfacb;
 
-   LBMReal mxyyPxzz = mfbca + mfbac;
-   LBMReal mxyyMxzz = mfbca - mfbac;
+   real mxyyPxzz = mfbca + mfbac;
+   real mxyyMxzz = mfbca - mfbac;
 
    //relax
    //////////////////////////////////////////////////////////////////////////
@@ -1664,31 +1665,31 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
 
    //back cumulants to central moments
    //4.
-   mfcbb = CUMcbb + ((mfcaa + c1o3) * mfabb + two * mfbba * mfbab) / rho;
-   mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + two * mfbba * mfabb) / rho;
-   mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + two * mfbab * mfabb) / rho;
+   mfcbb = CUMcbb + ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) / rho;
+   mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) / rho;
+   mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) / rho;
 
-   mfcca = CUMcca + (((mfcaa * mfaca + two * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) / rho - c1o9 * (drho / rho));//(one/rho-one));
-   mfcac = CUMcac + (((mfcaa * mfaac + two * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) / rho - c1o9 * (drho / rho));//(one/rho-one));
-   mfacc = CUMacc + (((mfaac * mfaca + two * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) / rho - c1o9 * (drho / rho));//(one/rho-one));
+   mfcca = CUMcca + (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) / rho - c1o9 * (drho / rho));//(one/rho-one));
+   mfcac = CUMcac + (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) / rho - c1o9 * (drho / rho));//(one/rho-one));
+   mfacc = CUMacc + (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) / rho - c1o9 * (drho / rho));//(one/rho-one));
 
    //5.
-   mfbcc = CUMbcc + ((mfaac * mfbca + mfaca * mfbac + four * mfabb * mfbbb + two * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) / rho;
-   mfcbc = CUMcbc + ((mfaac * mfcba + mfcaa * mfabc + four * mfbab * mfbbb + two * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) / rho;
-   mfccb = CUMccb + ((mfcaa * mfacb + mfaca * mfcab + four * mfbba * mfbbb + two * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) / rho;
+   mfbcc = CUMbcc + ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) / rho;
+   mfcbc = CUMcbc + ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) / rho;
+   mfccb = CUMccb + ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) / rho;
 
    //6.
 
-   mfccc = CUMccc - ((-four * mfbbb * mfbbb
+   mfccc = CUMccc - ((-c4o1 * mfbbb * mfbbb
       - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
-      - four * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
-      - two * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) / rho
-      + (four * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
-         + two * (mfcaa * mfaca * mfaac)
-         + sixteen * mfbba * mfbab * mfabb) / (rho * rho)
+      - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
+      - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) / rho
+      + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
+         + c2o1 * (mfcaa * mfaca * mfaac)
+         + c16o1 * mfbba * mfbab * mfabb) / (rho * rho)
       - c1o3 * (mfacc + mfcac + mfcca) / rho
       - c1o9 * (mfcaa + mfaca + mfaac) / rho
-      + (two * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
+      + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
          + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) / (rho * rho) * c2o3
       + c1o27 * ((drho * drho - drho) / (rho * rho)));
    ////////////////////////////////////////////////////////////////////////////////////
@@ -1704,22 +1705,22 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
 //mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
 ////////////////////////////////////////////////////////////////////////////////////
 // Z - Dir
-   m0 = mfaac * c1o2 + mfaab * (vvz - c1o2) + (mfaaa + one * oMdrho) * (vz2 - vvz) * c1o2;
-   m1 = -mfaac - two * mfaab * vvz + mfaaa * (one - vz2) - one * oMdrho * vz2;
-   m2 = mfaac * c1o2 + mfaab * (vvz + c1o2) + (mfaaa + one * oMdrho) * (vz2 + vvz) * c1o2;
+   m0 = mfaac * c1o2 + mfaab * (vvz - c1o2) + (mfaaa + c1o1 * oMdrho) * (vz2 - vvz) * c1o2;
+   m1 = -mfaac - c2o1 * mfaab * vvz + mfaaa * (c1o1 - vz2) - c1o1 * oMdrho * vz2;
+   m2 = mfaac * c1o2 + mfaab * (vvz + c1o2) + (mfaaa + c1o1 * oMdrho) * (vz2 + vvz) * c1o2;
    mfaaa = m0;
    mfaab = m1;
    mfaac = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 = mfabc * c1o2 + mfabb * (vvz - c1o2) + mfaba * (vz2 - vvz) * c1o2;
-   m1 = -mfabc - two * mfabb * vvz + mfaba * (one - vz2);
+   m1 = -mfabc - c2o1 * mfabb * vvz + mfaba * (c1o1 - vz2);
    m2 = mfabc * c1o2 + mfabb * (vvz + c1o2) + mfaba * (vz2 + vvz) * c1o2;
    mfaba = m0;
    mfabb = m1;
    mfabc = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 = mfacc * c1o2 + mfacb * (vvz - c1o2) + (mfaca + c1o3 * oMdrho) * (vz2 - vvz) * c1o2;
-   m1 = -mfacc - two * mfacb * vvz + mfaca * (one - vz2) - c1o3 * oMdrho * vz2;
+   m1 = -mfacc - c2o1 * mfacb * vvz + mfaca * (c1o1 - vz2) - c1o3 * oMdrho * vz2;
    m2 = mfacc * c1o2 + mfacb * (vvz + c1o2) + (mfaca + c1o3 * oMdrho) * (vz2 + vvz) * c1o2;
    mfaca = m0;
    mfacb = m1;
@@ -1727,21 +1728,21 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 = mfbac * c1o2 + mfbab * (vvz - c1o2) + mfbaa * (vz2 - vvz) * c1o2;
-   m1 = -mfbac - two * mfbab * vvz + mfbaa * (one - vz2);
+   m1 = -mfbac - c2o1 * mfbab * vvz + mfbaa * (c1o1 - vz2);
    m2 = mfbac * c1o2 + mfbab * (vvz + c1o2) + mfbaa * (vz2 + vvz) * c1o2;
    mfbaa = m0;
    mfbab = m1;
    mfbac = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 = mfbbc * c1o2 + mfbbb * (vvz - c1o2) + mfbba * (vz2 - vvz) * c1o2;
-   m1 = -mfbbc - two * mfbbb * vvz + mfbba * (one - vz2);
+   m1 = -mfbbc - c2o1 * mfbbb * vvz + mfbba * (c1o1 - vz2);
    m2 = mfbbc * c1o2 + mfbbb * (vvz + c1o2) + mfbba * (vz2 + vvz) * c1o2;
    mfbba = m0;
    mfbbb = m1;
    mfbbc = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 = mfbcc * c1o2 + mfbcb * (vvz - c1o2) + mfbca * (vz2 - vvz) * c1o2;
-   m1 = -mfbcc - two * mfbcb * vvz + mfbca * (one - vz2);
+   m1 = -mfbcc - c2o1 * mfbcb * vvz + mfbca * (c1o1 - vz2);
    m2 = mfbcc * c1o2 + mfbcb * (vvz + c1o2) + mfbca * (vz2 + vvz) * c1o2;
    mfbca = m0;
    mfbcb = m1;
@@ -1749,21 +1750,21 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 = mfcac * c1o2 + mfcab * (vvz - c1o2) + (mfcaa + c1o3 * oMdrho) * (vz2 - vvz) * c1o2;
-   m1 = -mfcac - two * mfcab * vvz + mfcaa * (one - vz2) - c1o3 * oMdrho * vz2;
+   m1 = -mfcac - c2o1 * mfcab * vvz + mfcaa * (c1o1 - vz2) - c1o3 * oMdrho * vz2;
    m2 = mfcac * c1o2 + mfcab * (vvz + c1o2) + (mfcaa + c1o3 * oMdrho) * (vz2 + vvz) * c1o2;
    mfcaa = m0;
    mfcab = m1;
    mfcac = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 = mfcbc * c1o2 + mfcbb * (vvz - c1o2) + mfcba * (vz2 - vvz) * c1o2;
-   m1 = -mfcbc - two * mfcbb * vvz + mfcba * (one - vz2);
+   m1 = -mfcbc - c2o1 * mfcbb * vvz + mfcba * (c1o1 - vz2);
    m2 = mfcbc * c1o2 + mfcbb * (vvz + c1o2) + mfcba * (vz2 + vvz) * c1o2;
    mfcba = m0;
    mfcbb = m1;
    mfcbc = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 = mfccc * c1o2 + mfccb * (vvz - c1o2) + (mfcca + c1o9 * oMdrho) * (vz2 - vvz) * c1o2;
-   m1 = -mfccc - two * mfccb * vvz + mfcca * (one - vz2) - c1o9 * oMdrho * vz2;
+   m1 = -mfccc - c2o1 * mfccb * vvz + mfcca * (c1o1 - vz2) - c1o9 * oMdrho * vz2;
    m2 = mfccc * c1o2 + mfccb * (vvz + c1o2) + (mfcca + c1o9 * oMdrho) * (vz2 + vvz) * c1o2;
    mfcca = m0;
    mfccb = m1;
@@ -1774,21 +1775,21 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    ////////////////////////////////////////////////////////////////////////////////////
    // Y - Dir
    m0 = mfaca * c1o2 + mfaba * (vvy - c1o2) + (mfaaa + c1o6 * oMdrho) * (vy2 - vvy) * c1o2;
-   m1 = -mfaca - two * mfaba * vvy + mfaaa * (one - vy2) - c1o6 * oMdrho * vy2;
+   m1 = -mfaca - c2o1 * mfaba * vvy + mfaaa * (c1o1 - vy2) - c1o6 * oMdrho * vy2;
    m2 = mfaca * c1o2 + mfaba * (vvy + c1o2) + (mfaaa + c1o6 * oMdrho) * (vy2 + vvy) * c1o2;
    mfaaa = m0;
    mfaba = m1;
    mfaca = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 = mfacb * c1o2 + mfabb * (vvy - c1o2) + (mfaab + c2o3 * oMdrho) * (vy2 - vvy) * c1o2;
-   m1 = -mfacb - two * mfabb * vvy + mfaab * (one - vy2) - c2o3 * oMdrho * vy2;
+   m1 = -mfacb - c2o1 * mfabb * vvy + mfaab * (c1o1 - vy2) - c2o3 * oMdrho * vy2;
    m2 = mfacb * c1o2 + mfabb * (vvy + c1o2) + (mfaab + c2o3 * oMdrho) * (vy2 + vvy) * c1o2;
    mfaab = m0;
    mfabb = m1;
    mfacb = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 = mfacc * c1o2 + mfabc * (vvy - c1o2) + (mfaac + c1o6 * oMdrho) * (vy2 - vvy) * c1o2;
-   m1 = -mfacc - two * mfabc * vvy + mfaac * (one - vy2) - c1o6 * oMdrho * vy2;
+   m1 = -mfacc - c2o1 * mfabc * vvy + mfaac * (c1o1 - vy2) - c1o6 * oMdrho * vy2;
    m2 = mfacc * c1o2 + mfabc * (vvy + c1o2) + (mfaac + c1o6 * oMdrho) * (vy2 + vvy) * c1o2;
    mfaac = m0;
    mfabc = m1;
@@ -1796,21 +1797,21 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 = mfbca * c1o2 + mfbba * (vvy - c1o2) + mfbaa * (vy2 - vvy) * c1o2;
-   m1 = -mfbca - two * mfbba * vvy + mfbaa * (one - vy2);
+   m1 = -mfbca - c2o1 * mfbba * vvy + mfbaa * (c1o1 - vy2);
    m2 = mfbca * c1o2 + mfbba * (vvy + c1o2) + mfbaa * (vy2 + vvy) * c1o2;
    mfbaa = m0;
    mfbba = m1;
    mfbca = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 = mfbcb * c1o2 + mfbbb * (vvy - c1o2) + mfbab * (vy2 - vvy) * c1o2;
-   m1 = -mfbcb - two * mfbbb * vvy + mfbab * (one - vy2);
+   m1 = -mfbcb - c2o1 * mfbbb * vvy + mfbab * (c1o1 - vy2);
    m2 = mfbcb * c1o2 + mfbbb * (vvy + c1o2) + mfbab * (vy2 + vvy) * c1o2;
    mfbab = m0;
    mfbbb = m1;
    mfbcb = m2;
    /////////b//////////////////////////////////////////////////////////////////////////
    m0 = mfbcc * c1o2 + mfbbc * (vvy - c1o2) + mfbac * (vy2 - vvy) * c1o2;
-   m1 = -mfbcc - two * mfbbc * vvy + mfbac * (one - vy2);
+   m1 = -mfbcc - c2o1 * mfbbc * vvy + mfbac * (c1o1 - vy2);
    m2 = mfbcc * c1o2 + mfbbc * (vvy + c1o2) + mfbac * (vy2 + vvy) * c1o2;
    mfbac = m0;
    mfbbc = m1;
@@ -1818,21 +1819,21 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 = mfcca * c1o2 + mfcba * (vvy - c1o2) + (mfcaa + c1o18 * oMdrho) * (vy2 - vvy) * c1o2;
-   m1 = -mfcca - two * mfcba * vvy + mfcaa * (one - vy2) - c1o18 * oMdrho * vy2;
+   m1 = -mfcca - c2o1 * mfcba * vvy + mfcaa * (c1o1 - vy2) - c1o18 * oMdrho * vy2;
    m2 = mfcca * c1o2 + mfcba * (vvy + c1o2) + (mfcaa + c1o18 * oMdrho) * (vy2 + vvy) * c1o2;
    mfcaa = m0;
    mfcba = m1;
    mfcca = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 = mfccb * c1o2 + mfcbb * (vvy - c1o2) + (mfcab + c2o9 * oMdrho) * (vy2 - vvy) * c1o2;
-   m1 = -mfccb - two * mfcbb * vvy + mfcab * (one - vy2) - c2o9 * oMdrho * vy2;
+   m1 = -mfccb - c2o1 * mfcbb * vvy + mfcab * (c1o1 - vy2) - c2o9 * oMdrho * vy2;
    m2 = mfccb * c1o2 + mfcbb * (vvy + c1o2) + (mfcab + c2o9 * oMdrho) * (vy2 + vvy) * c1o2;
    mfcab = m0;
    mfcbb = m1;
    mfccb = m2;
    /////////c//////////////////////////////////////////////////////////////////////////
    m0 = mfccc * c1o2 + mfcbc * (vvy - c1o2) + (mfcac + c1o18 * oMdrho) * (vy2 - vvy) * c1o2;
-   m1 = -mfccc - two * mfcbc * vvy + mfcac * (one - vy2) - c1o18 * oMdrho * vy2;
+   m1 = -mfccc - c2o1 * mfcbc * vvy + mfcac * (c1o1 - vy2) - c1o18 * oMdrho * vy2;
    m2 = mfccc * c1o2 + mfcbc * (vvy + c1o2) + (mfcac + c1o18 * oMdrho) * (vy2 + vvy) * c1o2;
    mfcac = m0;
    mfcbc = m1;
@@ -1843,21 +1844,21 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    ////////////////////////////////////////////////////////////////////////////////////
    // X - Dir
    m0 = mfcaa * c1o2 + mfbaa * (vvx - c1o2) + (mfaaa + c1o36 * oMdrho) * (vx2 - vvx) * c1o2;
-   m1 = -mfcaa - two * mfbaa * vvx + mfaaa * (one - vx2) - c1o36 * oMdrho * vx2;
+   m1 = -mfcaa - c2o1 * mfbaa * vvx + mfaaa * (c1o1 - vx2) - c1o36 * oMdrho * vx2;
    m2 = mfcaa * c1o2 + mfbaa * (vvx + c1o2) + (mfaaa + c1o36 * oMdrho) * (vx2 + vvx) * c1o2;
    mfaaa = m0;
    mfbaa = m1;
    mfcaa = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 = mfcba * c1o2 + mfbba * (vvx - c1o2) + (mfaba + c1o9 * oMdrho) * (vx2 - vvx) * c1o2;
-   m1 = -mfcba - two * mfbba * vvx + mfaba * (one - vx2) - c1o9 * oMdrho * vx2;
+   m1 = -mfcba - c2o1 * mfbba * vvx + mfaba * (c1o1 - vx2) - c1o9 * oMdrho * vx2;
    m2 = mfcba * c1o2 + mfbba * (vvx + c1o2) + (mfaba + c1o9 * oMdrho) * (vx2 + vvx) * c1o2;
    mfaba = m0;
    mfbba = m1;
    mfcba = m2;
    ////////////////////////////////////////////////////////////////////////////////////
    m0 = mfcca * c1o2 + mfbca * (vvx - c1o2) + (mfaca + c1o36 * oMdrho) * (vx2 - vvx) * c1o2;
-   m1 = -mfcca - two * mfbca * vvx + mfaca * (one - vx2) - c1o36 * oMdrho * vx2;
+   m1 = -mfcca - c2o1 * mfbca * vvx + mfaca * (c1o1 - vx2) - c1o36 * oMdrho * vx2;
    m2 = mfcca * c1o2 + mfbca * (vvx + c1o2) + (mfaca + c1o36 * oMdrho) * (vx2 + vvx) * c1o2;
    mfaca = m0;
    mfbca = m1;
@@ -1865,21 +1866,21 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 = mfcab * c1o2 + mfbab * (vvx - c1o2) + (mfaab + c1o9 * oMdrho) * (vx2 - vvx) * c1o2;
-   m1 = -mfcab - two * mfbab * vvx + mfaab * (one - vx2) - c1o9 * oMdrho * vx2;
+   m1 = -mfcab - c2o1 * mfbab * vvx + mfaab * (c1o1 - vx2) - c1o9 * oMdrho * vx2;
    m2 = mfcab * c1o2 + mfbab * (vvx + c1o2) + (mfaab + c1o9 * oMdrho) * (vx2 + vvx) * c1o2;
    mfaab = m0;
    mfbab = m1;
    mfcab = m2;
    ///////////b////////////////////////////////////////////////////////////////////////
    m0 = mfcbb * c1o2 + mfbbb * (vvx - c1o2) + (mfabb + c4o9 * oMdrho) * (vx2 - vvx) * c1o2;
-   m1 = -mfcbb - two * mfbbb * vvx + mfabb * (one - vx2) - c4o9 * oMdrho * vx2;
+   m1 = -mfcbb - c2o1 * mfbbb * vvx + mfabb * (c1o1 - vx2) - c4o9 * oMdrho * vx2;
    m2 = mfcbb * c1o2 + mfbbb * (vvx + c1o2) + (mfabb + c4o9 * oMdrho) * (vx2 + vvx) * c1o2;
    mfabb = m0;
    mfbbb = m1;
    mfcbb = m2;
    ///////////b////////////////////////////////////////////////////////////////////////
    m0 = mfccb * c1o2 + mfbcb * (vvx - c1o2) + (mfacb + c1o9 * oMdrho) * (vx2 - vvx) * c1o2;
-   m1 = -mfccb - two * mfbcb * vvx + mfacb * (one - vx2) - c1o9 * oMdrho * vx2;
+   m1 = -mfccb - c2o1 * mfbcb * vvx + mfacb * (c1o1 - vx2) - c1o9 * oMdrho * vx2;
    m2 = mfccb * c1o2 + mfbcb * (vvx + c1o2) + (mfacb + c1o9 * oMdrho) * (vx2 + vvx) * c1o2;
    mfacb = m0;
    mfbcb = m1;
@@ -1887,21 +1888,21 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    ////////////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////////////
    m0 = mfcac * c1o2 + mfbac * (vvx - c1o2) + (mfaac + c1o36 * oMdrho) * (vx2 - vvx) * c1o2;
-   m1 = -mfcac - two * mfbac * vvx + mfaac * (one - vx2) - c1o36 * oMdrho * vx2;
+   m1 = -mfcac - c2o1 * mfbac * vvx + mfaac * (c1o1 - vx2) - c1o36 * oMdrho * vx2;
    m2 = mfcac * c1o2 + mfbac * (vvx + c1o2) + (mfaac + c1o36 * oMdrho) * (vx2 + vvx) * c1o2;
    mfaac = m0;
    mfbac = m1;
    mfcac = m2;
    ///////////c////////////////////////////////////////////////////////////////////////
    m0 = mfcbc * c1o2 + mfbbc * (vvx - c1o2) + (mfabc + c1o9 * oMdrho) * (vx2 - vvx) * c1o2;
-   m1 = -mfcbc - two * mfbbc * vvx + mfabc * (one - vx2) - c1o9 * oMdrho * vx2;
+   m1 = -mfcbc - c2o1 * mfbbc * vvx + mfabc * (c1o1 - vx2) - c1o9 * oMdrho * vx2;
    m2 = mfcbc * c1o2 + mfbbc * (vvx + c1o2) + (mfabc + c1o9 * oMdrho) * (vx2 + vvx) * c1o2;
    mfabc = m0;
    mfbbc = m1;
    mfcbc = m2;
    ///////////c////////////////////////////////////////////////////////////////////////
    m0 = mfccc * c1o2 + mfbcc * (vvx - c1o2) + (mfacc + c1o36 * oMdrho) * (vx2 - vvx) * c1o2;
-   m1 = -mfccc - two * mfbcc * vvx + mfacc * (one - vx2) - c1o36 * oMdrho * vx2;
+   m1 = -mfccc - c2o1 * mfbcc * vvx + mfacc * (c1o1 - vx2) - c1o36 * oMdrho * vx2;
    m2 = mfccc * c1o2 + mfbcc * (vvx + c1o2) + (mfacc + c1o36 * oMdrho) * (vx2 + vvx) * c1o2;
    mfacc = m0;
    mfbcc = m1;
@@ -1912,11 +1913,11 @@ void CumulantLBMKernel::nodeCollision(int step, int x1, int x2, int x3)
    //proof correctness
    //////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-   LBMReal drho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+   real drho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
       + (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
       + (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
    //LBMReal dif = fabs(rho - rho_post);
-   LBMReal dif = drho - drho_post;
+   real dif = drho - drho_post;
 #ifdef SINGLEPRECISION
    if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
diff --git a/src/cpu/VirtualFluidsCore/LBM/CumulantLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/CumulantLBMKernel.h
index 995ce63d877d833e7907d6335e609cfa7478aebd..9fb6d834fc8904af84e20798b03ea23b68ad63a4 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CumulantLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/CumulantLBMKernel.h
@@ -21,35 +21,35 @@ public:
    virtual ~CumulantLBMKernel() = default;
    //virtual void calculate(int step);
    SPtr<LBMKernel> clone() override;
-   double getCalculationTime() override;
+   real getCalculationTime() override;
    void setBulkOmegaToOmega(bool value);
    void setRelaxationParameter(Parameter p);
 protected:
    void initData() override;
    void nodeCollision(int step, int x1, int x2, int x3) override;
    void initDataSet();
-   LBMReal f[D3Q27System::ENDF + 1];
+   real f[D3Q27System::ENDF + 1];
 
    UbTimer timer;
 
-   LBMReal OxyyMxzz;
+   real OxyyMxzz;
    Parameter parameter;
 
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
 
    mu::value_type muX1, muX2, muX3;
    mu::value_type muDeltaT;
    mu::value_type muNu;
-   LBMReal forcingX1;
-   LBMReal forcingX2;
-   LBMReal forcingX3;
+   real forcingX1;
+   real forcingX2;
+   real forcingX3;
 
    // bulk viscosity
    bool bulkOmegaToOmega;
-   LBMReal OxxPyyPzz;
+   real OxxPyyPzz;
 
-   LBMReal omega;
+   real omega;
 };
 #endif // CumulantLBMKernel_h__
\ No newline at end of file
diff --git a/src/cpu/VirtualFluidsCore/LBM/D3Q27System.cpp b/src/cpu/VirtualFluidsCore/LBM/D3Q27System.cpp
index 7e2fd17c0577ea4606f4b38530985411764b5bdb..5c9be26b7b1ace78e81f1a1c1f28a1e089c9a013 100644
--- a/src/cpu/VirtualFluidsCore/LBM/D3Q27System.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/D3Q27System.cpp
@@ -4,7 +4,8 @@
 
 namespace D3Q27System
 {
-using namespace UbMath;
+//using namespace UbMath;
+    using namespace vf::basics::constant;
 
 // index            0   1   2   3   4   5  6   7   8   9   10  11  12  13  14  15  16  17   18  19  20  21  22  23  24  25
 // f:               E,  W,  N,  S,  T,  B, NE, SW, SE, NW, TE, BW, BE, TW, TN, BS, BN, TS, TNE TNW TSE TSW BNE BNW BSE BSW
@@ -18,42 +19,42 @@ const int DX1[] = { 0,  1, -1,  0,  0,  0,  0,  1, -1,  1, -1,  1, -1,  1, -1,
 const int DX2[] = { 0,  0,  0,  1, -1,  0,  0,  1, -1, -1,  1,  0,  0,  0,  0,  1, -1,  1, -1,   1,  1, -1, -1,  1,  1, -1, -1 };
 const int DX3[] = { 0,  0,  0,  0,  0,  1, -1,  0,  0,  0,  0,  1, -1, -1,  1,  1, -1, -1,  1,   1,  1,  1,  1, -1, -1, -1, -1 };
 
-const double WEIGTH[] = { c8o27,  
+const real WEIGTH[] = { c8o27,  
                           c2o27,  c2o27,  c2o27,  c2o27,  c2o27,  c2o27,  
                           c1o54,  c1o54,  c1o54,  c1o54,  c1o54,  c1o54,  c1o54,  c1o54,  c1o54,  c1o54,  c1o54,  c1o54,
                           c1o216, c1o216, c1o216, c1o216, c1o216, c1o216, c1o216, c1o216 };
 
-const int INVDIR[] = { DIR_000, INV_P00,   INV_M00,   INV_0P0,   INV_0M0,   INV_00P,   INV_00M,   INV_PP0,  INV_MM0, INV_PM0,
-                       INV_MP0,  INV_P0P,  INV_M0M,  INV_P0M,  INV_M0P,  INV_0PP,  INV_0MM,  INV_0PM, INV_0MP,
-                       INV_PPP, INV_MPP, INV_PMP, INV_MMP, INV_PPM, INV_MPM, INV_PMM, INV_MMM };
+const int INVDIR[] = { vf::lbm::dir::DIR_000, vf::lbm::dir::INV_P00,   vf::lbm::dir::INV_M00,  vf::lbm::dir::INV_0P0,  vf::lbm::dir::INV_0M0,   vf::lbm::dir::INV_00P,   vf::lbm::dir::INV_00M,   vf::lbm::dir::INV_PP0,  vf::lbm::dir::INV_MM0, vf::lbm::dir::INV_PM0,
+                       vf::lbm::dir::INV_MP0, vf::lbm::dir::INV_P0P,  vf::lbm::dir::INV_M0M,  vf::lbm::dir::INV_P0M,  vf::lbm::dir::INV_M0P,  vf::lbm::dir::INV_0PP,  vf::lbm::dir::INV_0MM,  vf::lbm::dir::INV_0PM, vf::lbm::dir::INV_0MP,
+                       vf::lbm::dir::INV_PPP, vf::lbm::dir::INV_MPP, vf::lbm::dir::INV_PMP, vf::lbm::dir::INV_MMP, vf::lbm::dir::INV_PPM, vf::lbm::dir::INV_MPM, vf::lbm::dir::INV_PMM, vf::lbm::dir::INV_MMM };
 
 // index             0   1   2   3   4   5  6   7   8    9  10  11  12  13  14  15  16  17  18
 // direction:        E,  W,  N,  S,  T,  B, NE, SW, SE, NW, TE, BW, BE, TW, TN, BS, BN, TS, TNE TNW TSE TSW BNE BNW BSE
 // BSW
-const int EX1[] = { 0, 1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 };
-const int EX2[] = { 0, 0, 0, 1, -1, 0, 0, 1, -1, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 1, 1, -1, -1, 1, 1, -1, -1 };
-const int EX3[] = { 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, -1, -1, 1, 1, -1, -1, 1, 1, 1, 1, 1, -1, -1, -1, -1 };
+//const int EX1[] = { 0, 1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1, 0, 0, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 };
+//const int EX2[] = { 0, 0, 0, 1, -1, 0, 0, 1, -1, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 1, 1, -1, -1, 1, 1, -1, -1 };
+//const int EX3[] = { 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, -1, -1, 1, 1, -1, -1, 1, 1, 1, 1, 1, -1, -1, -1, -1 };
 
 //////////////////////////////////////////////////////////////////////////
 
 
 
-LBMReal getDensity(const LBMReal *const &f /*[27]*/)
+real getDensity(const real *const &f /*[27]*/)
 {
     return vf::lbm::getDensity(f);
 }
 
-LBMReal getIncompVelocityX1(const LBMReal *const &f /*[27]*/)
+real getIncompVelocityX1(const real *const &f /*[27]*/)
 {
     return vf::lbm::getIncompressibleVelocityX1(f);
 }
 
-LBMReal getIncompVelocityX2(const LBMReal *const &f /*[27]*/)
+real getIncompVelocityX2(const real *const &f /*[27]*/)
 {
     return vf::lbm::getIncompressibleVelocityX2(f);
 }
 
-LBMReal getIncompVelocityX3(const LBMReal *const &f /*[27]*/)
+real getIncompVelocityX3(const real *const &f /*[27]*/)
 {
     return vf::lbm::getIncompressibleVelocityX3(f);
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/D3Q27System.h b/src/cpu/VirtualFluidsCore/LBM/D3Q27System.h
index d8822219484599e8f31fa86564ecdaf844679d52..2843a77a21310a9569c3abb3552eea42bbd4f250 100644
--- a/src/cpu/VirtualFluidsCore/LBM/D3Q27System.h
+++ b/src/cpu/VirtualFluidsCore/LBM/D3Q27System.h
@@ -38,9 +38,13 @@
 #include <string>
 #include <iostream>
 
+#include "lbm/constants/D3Q27.h"
 #include "LBMSystem.h"
 #include "UbException.h"
 #include "UbMath.h"
+#include "basics/constants/NumericConstants.h"
+
+//using namespace vf::lbm::dir;
 
 //! \brief namespace for global system-functions
 namespace D3Q27System
@@ -53,15 +57,15 @@ static const int FENDDIR   = 26; // D3Q27
 static const int STARTF = 0;
 static const int ENDF   = 26; // D3Q27
 
-static const int STARTDIR = 1; //0
+//static const int STARTDIR = 1; //0
 static const int ENDDIR   = 26;//26 // all geometric directions
 
 extern const int DX1[ENDDIR + 1];
 extern const int DX2[ENDDIR + 1];
 extern const int DX3[ENDDIR + 1];
-extern const double WEIGTH[ENDDIR + 1];
+extern const real WEIGTH[ENDDIR + 1];
 
-extern const double cNorm[3][ENDDIR];
+extern const real cNorm[3][ENDDIR];
 
 static const int MINLEVEL = 0;
 static const int MAXLEVEL = 25;
@@ -126,60 +130,60 @@ extern const int EX3[ENDDIR + 1];
 //static constexpr int BSE = 25;
 //static constexpr int BSW = 26;
 
-static constexpr int DIR_000 = 0;
-static constexpr int DIR_P00 = 1;
-static constexpr int DIR_M00 = 2;
-static constexpr int DIR_0P0 = 3;
-static constexpr int DIR_0M0 = 4;
-static constexpr int DIR_00P = 5;
-static constexpr int DIR_00M = 6;
-static constexpr int DIR_PP0 = 7;
-static constexpr int DIR_MM0 = 8;
-static constexpr int DIR_PM0 = 9;
-static constexpr int DIR_MP0 = 10;
-static constexpr int DIR_P0P = 11;
-static constexpr int DIR_M0M = 12;
-static constexpr int DIR_P0M = 13;
-static constexpr int DIR_M0P = 14;
-static constexpr int DIR_0PP = 15;
-static constexpr int DIR_0MM = 16;
-static constexpr int DIR_0PM = 17;
-static constexpr int DIR_0MP = 18;
-static constexpr int DIR_PPP = 19;
-static constexpr int DIR_MPP = 20;
-static constexpr int DIR_PMP = 21;
-static constexpr int DIR_MMP = 22;
-static constexpr int DIR_PPM = 23;
-static constexpr int DIR_MPM = 24;
-static constexpr int DIR_PMM = 25;
-static constexpr int DIR_MMM = 26;
-
-static constexpr int INV_P00 = DIR_M00;
-static constexpr int INV_M00 = DIR_P00;
-static constexpr int INV_0P0 = DIR_0M0;
-static constexpr int INV_0M0 = DIR_0P0;
-static constexpr int INV_00P = DIR_00M;
-static constexpr int INV_00M = DIR_00P;
-static constexpr int INV_PP0 = DIR_MM0;
-static constexpr int INV_MM0 = DIR_PP0;
-static constexpr int INV_PM0 = DIR_MP0;
-static constexpr int INV_MP0 = DIR_PM0;
-static constexpr int INV_P0P = DIR_M0M;
-static constexpr int INV_M0M = DIR_P0P;
-static constexpr int INV_P0M = DIR_M0P;
-static constexpr int INV_M0P = DIR_P0M;
-static constexpr int INV_0PP = DIR_0MM;
-static constexpr int INV_0MM = DIR_0PP;
-static constexpr int INV_0PM = DIR_0MP;
-static constexpr int INV_0MP = DIR_0PM;
-static constexpr int INV_PPP = DIR_MMM;
-static constexpr int INV_MPP = DIR_PMM;
-static constexpr int INV_PMP = DIR_MPM;
-static constexpr int INV_MMP = DIR_PPM;
-static constexpr int INV_PPM = DIR_MMP;
-static constexpr int INV_MPM = DIR_PMP;
-static constexpr int INV_PMM = DIR_MPP;
-static constexpr int INV_MMM = DIR_PPP;
+//static constexpr int DIR_000 = 0;
+//static constexpr int DIR_P00 = 1;
+//static constexpr int DIR_M00 = 2;
+//static constexpr int DIR_0P0 = 3;
+//static constexpr int DIR_0M0 = 4;
+//static constexpr int DIR_00P = 5;
+//static constexpr int DIR_00M = 6;
+//static constexpr int DIR_PP0 = 7;
+//static constexpr int DIR_MM0 = 8;
+//static constexpr int DIR_PM0 = 9;
+//static constexpr int DIR_MP0 = 10;
+//static constexpr int DIR_P0P = 11;
+//static constexpr int DIR_M0M = 12;
+//static constexpr int DIR_P0M = 13;
+//static constexpr int DIR_M0P = 14;
+//static constexpr int DIR_0PP = 15;
+//static constexpr int DIR_0MM = 16;
+//static constexpr int DIR_0PM = 17;
+//static constexpr int DIR_0MP = 18;
+//static constexpr int DIR_PPP = 19;
+//static constexpr int DIR_MPP = 20;
+//static constexpr int DIR_PMP = 21;
+//static constexpr int DIR_MMP = 22;
+//static constexpr int DIR_PPM = 23;
+//static constexpr int DIR_MPM = 24;
+//static constexpr int DIR_PMM = 25;
+//static constexpr int DIR_MMM = 26;
+
+//static constexpr int INV_P00 = DIR_M00;
+//static constexpr int INV_M00 = DIR_P00;
+//static constexpr int INV_0P0 = DIR_0M0;
+//static constexpr int INV_0M0 = DIR_0P0;
+//static constexpr int INV_00P = DIR_00M;
+//static constexpr int INV_00M = DIR_00P;
+//static constexpr int INV_PP0 = DIR_MM0;
+//static constexpr int INV_MM0 = DIR_PP0;
+//static constexpr int INV_PM0 = DIR_MP0;
+//static constexpr int INV_MP0 = DIR_PM0;
+//static constexpr int INV_P0P = DIR_M0M;
+//static constexpr int INV_M0M = DIR_P0P;
+//static constexpr int INV_P0M = DIR_M0P;
+//static constexpr int INV_M0P = DIR_P0M;
+//static constexpr int INV_0PP = DIR_0MM;
+//static constexpr int INV_0MM = DIR_0PP;
+//static constexpr int INV_0PM = DIR_0MP;
+//static constexpr int INV_0MP = DIR_0PM;
+//static constexpr int INV_PPP = DIR_MMM;
+//static constexpr int INV_MPP = DIR_PMM;
+//static constexpr int INV_PMP = DIR_MPM;
+//static constexpr int INV_MMP = DIR_PPM;
+//static constexpr int INV_PPM = DIR_MMP;
+//static constexpr int INV_MPM = DIR_PMP;
+//static constexpr int INV_PMM = DIR_MPP;
+//static constexpr int INV_MMM = DIR_PPP;
 
 extern const int INVDIR[ENDDIR + 1];
 
@@ -213,6 +217,8 @@ static const int ET_BNE = 12;
 //////////////////////////////////////////////////////////////////////////
 inline std::string getDirectionString(int direction)
 {
+    using namespace vf::lbm::dir;
+
     switch (direction) {
         case DIR_P00:
             return "E";
@@ -273,109 +279,111 @@ inline std::string getDirectionString(int direction)
 //////////////////////////////////////////////////////////////////////////
 static inline void setNeighborCoordinatesForDirection(int &x1, int &x2, int &x3, const int &direction)
 {
+    using namespace vf::lbm::dir;
+
     switch (direction) {
-        case D3Q27System::DIR_P00:
+        case DIR_P00:
             x1++;
             break;
-        case D3Q27System::DIR_0P0:
+        case DIR_0P0:
             x2++;
             break;
-        case D3Q27System::DIR_00P:
+        case DIR_00P:
             x3++;
             break;
-        case D3Q27System::DIR_M00:
+        case DIR_M00:
             x1--;
             break;
-        case D3Q27System::DIR_0M0:
+        case DIR_0M0:
             x2--;
             break;
-        case D3Q27System::DIR_00M:
+        case DIR_00M:
             x3--;
             break;
-        case D3Q27System::DIR_PP0:
+        case DIR_PP0:
             x1++;
             x2++;
             break;
-        case D3Q27System::DIR_MP0:
+        case DIR_MP0:
             x1--;
             x2++;
             break;
-        case D3Q27System::DIR_MM0:
+        case DIR_MM0:
             x1--;
             x2--;
             break;
-        case D3Q27System::DIR_PM0:
+        case DIR_PM0:
             x1++;
             x2--;
             break;
-        case D3Q27System::DIR_P0P:
+        case DIR_P0P:
             x1++;
             x3++;
             break;
-        case D3Q27System::DIR_M0M:
+        case DIR_M0M:
             x1--;
             x3--;
             break;
-        case D3Q27System::DIR_P0M:
+        case DIR_P0M:
             x1++;
             x3--;
             break;
-        case D3Q27System::DIR_M0P:
+        case DIR_M0P:
             x1--;
             x3++;
             break;
-        case D3Q27System::DIR_0PP:
+        case DIR_0PP:
             x2++;
             x3++;
             break;
-        case D3Q27System::DIR_0MM:
+        case DIR_0MM:
             x2--;
             x3--;
             break;
-        case D3Q27System::DIR_0PM:
+        case DIR_0PM:
             x2++;
             x3--;
             break;
-        case D3Q27System::DIR_0MP:
+        case DIR_0MP:
             x2--;
             x3++;
             break;
-        case D3Q27System::DIR_PPP:
+        case DIR_PPP:
             x1++;
             x2++;
             x3++;
             break;
-        case D3Q27System::DIR_MPP:
+        case DIR_MPP:
             x1--;
             x2++;
             x3++;
             break;
-        case D3Q27System::DIR_PMP:
+        case DIR_PMP:
             x1++;
             x2--;
             x3++;
             break;
-        case D3Q27System::DIR_MMP:
+        case DIR_MMP:
             x1--;
             x2--;
             x3++;
             break;
-        case D3Q27System::DIR_PPM:
+        case DIR_PPM:
             x1++;
             x2++;
             x3--;
             break;
-        case D3Q27System::DIR_MPM:
+        case DIR_MPM:
             x1--;
             x2++;
             x3--;
             break;
-        case D3Q27System::DIR_PMM:
+        case DIR_PMM:
             x1++;
             x2--;
             x3--;
             break;
-        case D3Q27System::DIR_MMM:
+        case DIR_MMM:
             x1--;
             x2--;
             x3--;
@@ -388,88 +396,108 @@ static inline void setNeighborCoordinatesForDirection(int &x1, int &x2, int &x3,
 //////////////////////////////////////////////////////////////////////////
 // MACROSCOPIC VALUES
 /*=====================================================================*/
-LBMReal getDensity(const LBMReal *const &f /*[27]*/);
+real getDensity(const real *const &f /*[27]*/);
 /*=====================================================================*/
-static LBMReal getPressure(const LBMReal *const &f /*[27]*/) { return REAL_CAST(UbMath::c1o3) * getDensity(f); }
+static real getPressure(const real *const &f /*[27]*/) { return REAL_CAST(vf::basics::constant::c1o3) * getDensity(f); }
 /*=====================================================================*/
-LBMReal getIncompVelocityX1(const LBMReal *const &f /*[27]*/);
+real getIncompVelocityX1(const real *const &f /*[27]*/);
 /*=====================================================================*/
-LBMReal getIncompVelocityX2(const LBMReal *const &f /*[27]*/);
+real getIncompVelocityX2(const real *const &f /*[27]*/);
 /*=====================================================================*/
-LBMReal getIncompVelocityX3(const LBMReal *const &f /*[27]*/);
+real getIncompVelocityX3(const real *const &f /*[27]*/);
 
 
 /*=====================================================================*/
-static void calcDensity(const LBMReal *const &f /*[27]*/, LBMReal &rho)
+static void calcDensity(const real *const &f /*[27]*/, real &rho)
 {
+    using namespace vf::lbm::dir;
+
     rho = ((f[DIR_PPP] + f[DIR_MMM]) + (f[DIR_PMP] + f[DIR_MPM])) + ((f[DIR_PMM] + f[DIR_MPP]) + (f[DIR_MMP] + f[DIR_PPM])) +
           (((f[DIR_PP0] + f[DIR_MM0]) + (f[DIR_PM0] + f[DIR_MP0])) + ((f[DIR_P0P] + f[DIR_M0M]) + (f[DIR_P0M] + f[DIR_M0P])) +
            ((f[DIR_0PM] + f[DIR_0MP]) + (f[DIR_0PP] + f[DIR_0MM]))) +
           ((f[DIR_P00] + f[DIR_M00]) + (f[DIR_0P0] + f[DIR_0M0]) + (f[DIR_00P] + f[DIR_00M])) + f[DIR_000];
 }
 /*=====================================================================*/
-static void calcIncompVelocityX1(const LBMReal *const &f /*[27]*/, LBMReal &vx1)
+static void calcIncompVelocityX1(const real *const &f /*[27]*/, real &vx1)
 {
+    using namespace vf::lbm::dir;
+
     vx1 = ((((f[DIR_PPP] - f[DIR_MMM]) + (f[DIR_PMP] - f[DIR_MPM])) + ((f[DIR_PMM] - f[DIR_MPP]) + (f[DIR_PPM] - f[DIR_MMP]))) +
            (((f[DIR_P0M] - f[DIR_M0P]) + (f[DIR_P0P] - f[DIR_M0M])) + ((f[DIR_PM0] - f[DIR_MP0]) + (f[DIR_PP0] - f[DIR_MM0]))) + (f[DIR_P00] - f[DIR_M00]));
 }
 /*=====================================================================*/
-static void calcIncompVelocityX2(const LBMReal *const &f /*[27]*/, LBMReal &vx2)
+static void calcIncompVelocityX2(const real *const &f /*[27]*/, real &vx2)
 {
+    using namespace vf::lbm::dir;
+
     vx2 = ((((f[DIR_PPP] - f[DIR_MMM]) + (f[DIR_MPM] - f[DIR_PMP])) + ((f[DIR_MPP] - f[DIR_PMM]) + (f[DIR_PPM] - f[DIR_MMP]))) +
            (((f[DIR_0PM] - f[DIR_0MP]) + (f[DIR_0PP] - f[DIR_0MM])) + ((f[DIR_MP0] - f[DIR_PM0]) + (f[DIR_PP0] - f[DIR_MM0]))) + (f[DIR_0P0] - f[DIR_0M0]));
 }
 /*=====================================================================*/
-static void calcIncompVelocityX3(const LBMReal *const &f /*[27]*/, LBMReal &vx3)
+static void calcIncompVelocityX3(const real *const &f /*[27]*/, real &vx3)
 {
+    using namespace vf::lbm::dir;
+
     vx3 = ((((f[DIR_PPP] - f[DIR_MMM]) + (f[DIR_PMP] - f[DIR_MPM])) + ((f[DIR_MPP] - f[DIR_PMM]) + (f[DIR_MMP] - f[DIR_PPM]))) +
            (((f[DIR_0MP] - f[DIR_0PM]) + (f[DIR_0PP] - f[DIR_0MM])) + ((f[DIR_M0P] - f[DIR_P0M]) + (f[DIR_P0P] - f[DIR_M0M]))) + (f[DIR_00P] - f[DIR_00M]));
 }
 /*=====================================================================*/
-static LBMReal getCompVelocityX1(const LBMReal *const &f /*[27]*/)
+static real getCompVelocityX1(const real *const &f /*[27]*/)
 {
+    using namespace vf::lbm::dir;
+
     return ((((f[DIR_PPP] - f[DIR_MMM]) + (f[DIR_PMP] - f[DIR_MPM])) + ((f[DIR_PMM] - f[DIR_MPP]) + (f[DIR_PPM] - f[DIR_MMP]))) +
             (((f[DIR_P0M] - f[DIR_M0P]) + (f[DIR_P0P] - f[DIR_M0M])) + ((f[DIR_PM0] - f[DIR_MP0]) + (f[DIR_PP0] - f[DIR_MM0]))) + (f[DIR_P00] - f[DIR_M00])) /
            getDensity(f);
 }
 /*=====================================================================*/
-static LBMReal getCompVelocityX2(const LBMReal *const &f /*[27]*/)
+static real getCompVelocityX2(const real *const &f /*[27]*/)
 {
+    using namespace vf::lbm::dir;
+
     return ((((f[DIR_PPP] - f[DIR_MMM]) + (f[DIR_MPM] - f[DIR_PMP])) + ((f[DIR_MPP] - f[DIR_PMM]) + (f[DIR_PPM] - f[DIR_MMP]))) +
             (((f[DIR_0PM] - f[DIR_0MP]) + (f[DIR_0PP] - f[DIR_0MM])) + ((f[DIR_MP0] - f[DIR_PM0]) + (f[DIR_PP0] - f[DIR_MM0]))) + (f[DIR_0P0] - f[DIR_0M0])) /
            getDensity(f);
 }
 /*=====================================================================*/
-static LBMReal getCompVelocityX3(const LBMReal *const &f /*[27]*/)
+static real getCompVelocityX3(const real *const &f /*[27]*/)
 {
+    using namespace vf::lbm::dir;
+
     return ((((f[DIR_PPP] - f[DIR_MMM]) + (f[DIR_PMP] - f[DIR_MPM])) + ((f[DIR_MPP] - f[DIR_PMM]) + (f[DIR_MMP] - f[DIR_PPM]))) +
             (((f[DIR_0MP] - f[DIR_0PM]) + (f[DIR_0PP] - f[DIR_0MM])) + ((f[DIR_M0P] - f[DIR_P0M]) + (f[DIR_P0P] - f[DIR_M0M]))) + (f[DIR_00P] - f[DIR_00M])) /
            getDensity(f);
 }
 /*=====================================================================*/
-static void calcCompVelocityX1(const LBMReal *const &f /*[27]*/, LBMReal &vx1)
+static void calcCompVelocityX1(const real *const &f /*[27]*/, real &vx1)
 {
+    using namespace vf::lbm::dir;
+
     vx1 = ((((f[DIR_PPP] - f[DIR_MMM]) + (f[DIR_PMP] - f[DIR_MPM])) + ((f[DIR_PMM] - f[DIR_MPP]) + (f[DIR_PPM] - f[DIR_MMP]))) +
            (((f[DIR_P0M] - f[DIR_M0P]) + (f[DIR_P0P] - f[DIR_M0M])) + ((f[DIR_PM0] - f[DIR_MP0]) + (f[DIR_PP0] - f[DIR_MM0]))) + (f[DIR_P00] - f[DIR_M00])) /
           getDensity(f);
 }
 /*=====================================================================*/
-static void calcCompVelocityX2(const LBMReal *const &f /*[27]*/, LBMReal &vx2)
+static void calcCompVelocityX2(const real *const &f /*[27]*/, real &vx2)
 {
+    using namespace vf::lbm::dir;
+
     vx2 = ((((f[DIR_PPP] - f[DIR_MMM]) + (f[DIR_MPM] - f[DIR_PMP])) + ((f[DIR_MPP] - f[DIR_PMM]) + (f[DIR_PPM] - f[DIR_MMP]))) +
            (((f[DIR_0PM] - f[DIR_0MP]) + (f[DIR_0PP] - f[DIR_0MM])) + ((f[DIR_MP0] - f[DIR_PM0]) + (f[DIR_PP0] - f[DIR_MM0]))) + (f[DIR_0P0] - f[DIR_0M0])) /
           getDensity(f);
 }
 /*=====================================================================*/
-static void calcCompVelocityX3(const LBMReal *const &f /*[27]*/, LBMReal &vx3)
+static void calcCompVelocityX3(const real *const &f /*[27]*/, real &vx3)
 {
+    using namespace vf::lbm::dir;
+
     vx3 = ((((f[DIR_PPP] - f[DIR_MMM]) + (f[DIR_PMP] - f[DIR_MPM])) + ((f[DIR_MPP] - f[DIR_PMM]) + (f[DIR_MMP] - f[DIR_PPM]))) +
            (((f[DIR_0MP] - f[DIR_0PM]) + (f[DIR_0PP] - f[DIR_0MM])) + ((f[DIR_M0P] - f[DIR_P0M]) + (f[DIR_P0P] - f[DIR_M0M]))) + (f[DIR_00P] - f[DIR_00M])) /
           getDensity(f);
 }
 /*=====================================================================*/
-static void calcIncompMacroscopicValues(const LBMReal *const &f /*[27]*/, LBMReal &rho, LBMReal &vx1, LBMReal &vx2,
-                                        LBMReal &vx3)
+static void calcIncompMacroscopicValues(const real *const &f /*[27]*/, real &rho, real &vx1, real &vx2,
+                                        real &vx3)
 {
     D3Q27System::calcDensity(f, rho);
     D3Q27System::calcIncompVelocityX1(f, vx1);
@@ -478,342 +506,353 @@ static void calcIncompMacroscopicValues(const LBMReal *const &f /*[27]*/, LBMRea
 }
 
 /*=====================================================================*/
-static void calcCompMacroscopicValues(const LBMReal *const &f /*[27]*/, LBMReal &drho, LBMReal &vx1, LBMReal &vx2,
-                                      LBMReal &vx3)
+static void calcCompMacroscopicValues(const real *const &f /*[27]*/, real &drho, real &vx1, real &vx2,
+                                      real &vx3)
 {
     D3Q27System::calcDensity(f, drho);
     D3Q27System::calcIncompVelocityX1(f, vx1);
     D3Q27System::calcIncompVelocityX2(f, vx2);
     D3Q27System::calcIncompVelocityX3(f, vx3);
-    LBMReal rho = drho + UbMath::one;
+    //real rho = drho + vf::basics::constant::one;
+    real rho = drho + vf::basics::constant::c1o1;
     vx1 /= rho;
     vx2 /= rho;
     vx3 /= rho;
 }
 //////////////////////////////////////////////////////////////////////////
-static LBMReal getCompFeqForDirection(const int &direction, const LBMReal &drho, const LBMReal &vx1, const LBMReal &vx2,
-                                      const LBMReal &vx3)
+static real getCompFeqForDirection(const int &direction, const real &drho, const real &vx1, const real &vx2,
+                                      const real &vx3)
 {
-    LBMReal cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
-    LBMReal rho   = drho + UbMath::one;
+    using namespace vf::lbm::dir;
+
+    real cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
+    real rho   = drho + vf::basics::constant::c1o1;
     switch (direction) {
         case DIR_000:
-            return REAL_CAST(UbMath::c8o27 * (drho + rho * (-cu_sq)));
+            return REAL_CAST(vf::basics::constant::c8o27 * (drho + rho * (-cu_sq)));
         case DIR_P00:
-            return REAL_CAST(UbMath::c2o27 * (drho + rho * (3.0 * (vx1) + UbMath::c9o2 * (vx1) * (vx1)-cu_sq)));
+            return REAL_CAST(vf::basics::constant::c2o27 * (drho + rho * (3.0 * (vx1) +vf::basics::constant::c9o2 * (vx1) * (vx1)-cu_sq)));
         case DIR_M00:
-            return REAL_CAST(UbMath::c2o27 * (drho + rho * (3.0 * (-vx1) + UbMath::c9o2 * (-vx1) * (-vx1) - cu_sq)));
+            return REAL_CAST(vf::basics::constant::c2o27 * (drho + rho * (3.0 * (-vx1) + vf::basics::constant::c9o2 * (-vx1) * (-vx1) - cu_sq)));
         case DIR_0P0:
-            return REAL_CAST(UbMath::c2o27 * (drho + rho * (3.0 * (vx2) + UbMath::c9o2 * (vx2) * (vx2)-cu_sq)));
+            return REAL_CAST(vf::basics::constant::c2o27 * (drho + rho * (3.0 * (vx2) +vf::basics::constant::c9o2 * (vx2) * (vx2)-cu_sq)));
         case DIR_0M0:
-            return REAL_CAST(UbMath::c2o27 * (drho + rho * (3.0 * (-vx2) + UbMath::c9o2 * (-vx2) * (-vx2) - cu_sq)));
+            return REAL_CAST(vf::basics::constant::c2o27 * (drho + rho * (3.0 * (-vx2) + vf::basics::constant::c9o2 * (-vx2) * (-vx2) - cu_sq)));
         case DIR_00P:
-            return REAL_CAST(UbMath::c2o27 * (drho + rho * (3.0 * (vx3) + UbMath::c9o2 * (vx3) * (vx3)-cu_sq)));
+            return REAL_CAST(vf::basics::constant::c2o27 * (drho + rho * (3.0 * (vx3) + vf::basics::constant::c9o2 * (vx3) * (vx3)-cu_sq)));
         case DIR_00M:
-            return REAL_CAST(UbMath::c2o27 * (drho + rho * (3.0 * (-vx3) + UbMath::c9o2 * (-vx3) * (-vx3) - cu_sq)));
+            return REAL_CAST(vf::basics::constant::c2o27 * (drho + rho * (3.0 * (-vx3) + vf::basics::constant::c9o2 * (-vx3) * (-vx3) - cu_sq)));
         case DIR_PP0:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + rho * (3.0 * (vx1 + vx2) + UbMath::c9o2 * (vx1 + vx2) * (vx1 + vx2) - cu_sq)));
+            return REAL_CAST(vf::basics::constant::c1o54 *
+                             (drho + rho * (3.0 * (vx1 + vx2) + vf::basics::constant::c9o2 * (vx1 + vx2) * (vx1 + vx2) - cu_sq)));
         case DIR_MM0:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + rho * (3.0 * (-vx1 - vx2) + UbMath::c9o2 * (-vx1 - vx2) * (-vx1 - vx2) - cu_sq)));
+            return REAL_CAST(vf::basics::constant::c1o54 *
+                             (drho + rho * (3.0 * (-vx1 - vx2) + vf::basics::constant::c9o2 * (-vx1 - vx2) * (-vx1 - vx2) - cu_sq)));
         case DIR_PM0:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + rho * (3.0 * (vx1 - vx2) + UbMath::c9o2 * (vx1 - vx2) * (vx1 - vx2) - cu_sq)));
+            return REAL_CAST(vf::basics::constant::c1o54 *
+                             (drho + rho * (3.0 * (vx1 - vx2) + vf::basics::constant::c9o2 * (vx1 - vx2) * (vx1 - vx2) - cu_sq)));
         case DIR_MP0:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + rho * (3.0 * (-vx1 + vx2) + UbMath::c9o2 * (-vx1 + vx2) * (-vx1 + vx2) - cu_sq)));
+            return REAL_CAST(vf::basics::constant::c1o54 *
+                             (drho + rho * (3.0 * (-vx1 + vx2) + vf::basics::constant::c9o2 * (-vx1 + vx2) * (-vx1 + vx2) - cu_sq)));
         case DIR_P0P:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + rho * (3.0 * (vx1 + vx3) + UbMath::c9o2 * (vx1 + vx3) * (vx1 + vx3) - cu_sq)));
+            return REAL_CAST(vf::basics::constant::c1o54 *
+                             (drho + rho * (3.0 * (vx1 + vx3) + vf::basics::constant::c9o2 * (vx1 + vx3) * (vx1 + vx3) - cu_sq)));
         case DIR_M0M:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + rho * (3.0 * (-vx1 - vx3) + UbMath::c9o2 * (-vx1 - vx3) * (-vx1 - vx3) - cu_sq)));
+            return REAL_CAST(vf::basics::constant::c1o54 *
+                             (drho + rho * (3.0 * (-vx1 - vx3) + vf::basics::constant::c9o2 * (-vx1 - vx3) * (-vx1 - vx3) - cu_sq)));
         case DIR_P0M:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + rho * (3.0 * (vx1 - vx3) + UbMath::c9o2 * (vx1 - vx3) * (vx1 - vx3) - cu_sq)));
+            return REAL_CAST(vf::basics::constant::c1o54 *
+                             (drho + rho * (3.0 * (vx1 - vx3) + vf::basics::constant::c9o2 * (vx1 - vx3) * (vx1 - vx3) - cu_sq)));
         case DIR_M0P:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + rho * (3.0 * (-vx1 + vx3) + UbMath::c9o2 * (-vx1 + vx3) * (-vx1 + vx3) - cu_sq)));
+            return REAL_CAST(vf::basics::constant::c1o54 *
+                             (drho + rho * (3.0 * (-vx1 + vx3) + vf::basics::constant::c9o2 * (-vx1 + vx3) * (-vx1 + vx3) - cu_sq)));
         case DIR_0PP:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + rho * (3.0 * (vx2 + vx3) + UbMath::c9o2 * (vx2 + vx3) * (vx2 + vx3) - cu_sq)));
+            return REAL_CAST(vf::basics::constant::c1o54 *
+                             (drho + rho * (3.0 * (vx2 + vx3) + vf::basics::constant::c9o2 * (vx2 + vx3) * (vx2 + vx3) - cu_sq)));
         case DIR_0MM:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + rho * (3.0 * (-vx2 - vx3) + UbMath::c9o2 * (-vx2 - vx3) * (-vx2 - vx3) - cu_sq)));
+            return REAL_CAST(vf::basics::constant::c1o54 *
+                             (drho + rho * (3.0 * (-vx2 - vx3) + vf::basics::constant::c9o2 * (-vx2 - vx3) * (-vx2 - vx3) - cu_sq)));
         case DIR_0PM:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + rho * (3.0 * (vx2 - vx3) + UbMath::c9o2 * (vx2 - vx3) * (vx2 - vx3) - cu_sq)));
+            return REAL_CAST(vf::basics::constant::c1o54 *
+                             (drho + rho * (3.0 * (vx2 - vx3) + vf::basics::constant::c9o2 * (vx2 - vx3) * (vx2 - vx3) - cu_sq)));
         case DIR_0MP:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + rho * (3.0 * (-vx2 + vx3) + UbMath::c9o2 * (-vx2 + vx3) * (-vx2 + vx3) - cu_sq)));
+            return REAL_CAST(vf::basics::constant::c1o54 *
+                             (drho + rho * (3.0 * (-vx2 + vx3) + vf::basics::constant::c9o2 * (-vx2 + vx3) * (-vx2 + vx3) - cu_sq)));
         case DIR_PPP:
-            return REAL_CAST(UbMath::c1o216 *
+            return REAL_CAST(vf::basics::constant::c1o216 *
                              (drho + rho * (3.0 * (vx1 + vx2 + vx3) +
-                                            UbMath::c9o2 * (vx1 + vx2 + vx3) * (vx1 + vx2 + vx3) - cu_sq)));
+                                 vf::basics::constant::c9o2 * (vx1 + vx2 + vx3) * (vx1 + vx2 + vx3) - cu_sq)));
         case DIR_MMM:
-            return REAL_CAST(UbMath::c1o216 *
+            return REAL_CAST(vf::basics::constant::c1o216 *
                              (drho + rho * (3.0 * (-vx1 - vx2 - vx3) +
-                                            UbMath::c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq)));
+                                 vf::basics::constant::c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq)));
         case DIR_PPM:
-            return REAL_CAST(UbMath::c1o216 *
+            return REAL_CAST(vf::basics::constant::c1o216 *
                              (drho + rho * (3.0 * (vx1 + vx2 - vx3) +
-                                            UbMath::c9o2 * (vx1 + vx2 - vx3) * (vx1 + vx2 - vx3) - cu_sq)));
+                                 vf::basics::constant::c9o2 * (vx1 + vx2 - vx3) * (vx1 + vx2 - vx3) - cu_sq)));
         case DIR_MMP:
-            return REAL_CAST(UbMath::c1o216 *
+            return REAL_CAST(vf::basics::constant::c1o216 *
                              (drho + rho * (3.0 * (-vx1 - vx2 + vx3) +
-                                            UbMath::c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq)));
+                                            vf::basics::constant::c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq)));
         case DIR_PMP:
-            return REAL_CAST(UbMath::c1o216 *
+            return REAL_CAST(vf::basics::constant::c1o216 *
                              (drho + rho * (3.0 * (vx1 - vx2 + vx3) +
-                                            UbMath::c9o2 * (vx1 - vx2 + vx3) * (vx1 - vx2 + vx3) - cu_sq)));
+                                 vf::basics::constant::c9o2 * (vx1 - vx2 + vx3) * (vx1 - vx2 + vx3) - cu_sq)));
         case DIR_MPM:
-            return REAL_CAST(UbMath::c1o216 *
+            return REAL_CAST(vf::basics::constant::c1o216 *
                              (drho + rho * (3.0 * (-vx1 + vx2 - vx3) +
-                                            UbMath::c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq)));
+                                 vf::basics::constant::c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq)));
         case DIR_PMM:
-            return REAL_CAST(UbMath::c1o216 *
+            return REAL_CAST(vf::basics::constant::c1o216 *
                              (drho + rho * (3.0 * (vx1 - vx2 - vx3) +
-                                            UbMath::c9o2 * (vx1 - vx2 - vx3) * (vx1 - vx2 - vx3) - cu_sq)));
+                                 vf::basics::constant::c9o2 * (vx1 - vx2 - vx3) * (vx1 - vx2 - vx3) - cu_sq)));
         case DIR_MPP:
-            return REAL_CAST(UbMath::c1o216 *
+            return REAL_CAST(vf::basics::constant::c1o216 *
                              (drho + rho * (3.0 * (-vx1 + vx2 + vx3) +
-                                            UbMath::c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq)));
+                                 vf::basics::constant::c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq)));
         default:
             throw UbException(UB_EXARGS, "unknown dir");
     }
 }
 //////////////////////////////////////////////////////////////////////////
-static void calcCompFeq(LBMReal *const &feq /*[27]*/, const LBMReal &drho, const LBMReal &vx1, const LBMReal &vx2,
-                        const LBMReal &vx3)
+static void calcCompFeq(real *const &feq /*[27]*/, const real &drho, const real &vx1, const real &vx2,
+                        const real &vx3)
 {
-    LBMReal cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
-    LBMReal rho   = drho + UbMath::one;
-
-    feq[DIR_000] = UbMath::c8o27 * (drho + rho * (-cu_sq));
-    feq[DIR_P00]    = UbMath::c2o27 * (drho + rho * (3.0 * (vx1) + UbMath::c9o2 * (vx1) * (vx1)-cu_sq));
-    feq[DIR_M00]    = UbMath::c2o27 * (drho + rho * (3.0 * (-vx1) + UbMath::c9o2 * (-vx1) * (-vx1) - cu_sq));
-    feq[DIR_0P0]    = UbMath::c2o27 * (drho + rho * (3.0 * (vx2) + UbMath::c9o2 * (vx2) * (vx2)-cu_sq));
-    feq[DIR_0M0]    = UbMath::c2o27 * (drho + rho * (3.0 * (-vx2) + UbMath::c9o2 * (-vx2) * (-vx2) - cu_sq));
-    feq[DIR_00P]    = UbMath::c2o27 * (drho + rho * (3.0 * (vx3) + UbMath::c9o2 * (vx3) * (vx3)-cu_sq));
-    feq[DIR_00M]    = UbMath::c2o27 * (drho + rho * (3.0 * (-vx3) + UbMath::c9o2 * (-vx3) * (-vx3) - cu_sq));
-    feq[DIR_PP0]   = UbMath::c1o54 * (drho + rho * (3.0 * (vx1 + vx2) + UbMath::c9o2 * (vx1 + vx2) * (vx1 + vx2) - cu_sq));
-    feq[DIR_MM0]  = UbMath::c1o54 * (drho + rho * (3.0 * (-vx1 - vx2) + UbMath::c9o2 * (-vx1 - vx2) * (-vx1 - vx2) - cu_sq));
-    feq[DIR_PM0]  = UbMath::c1o54 * (drho + rho * (3.0 * (vx1 - vx2) + UbMath::c9o2 * (vx1 - vx2) * (vx1 - vx2) - cu_sq));
-    feq[DIR_MP0]  = UbMath::c1o54 * (drho + rho * (3.0 * (-vx1 + vx2) + UbMath::c9o2 * (-vx1 + vx2) * (-vx1 + vx2) - cu_sq));
-    feq[DIR_P0P]  = UbMath::c1o54 * (drho + rho * (3.0 * (vx1 + vx3) + UbMath::c9o2 * (vx1 + vx3) * (vx1 + vx3) - cu_sq));
-    feq[DIR_M0M]  = UbMath::c1o54 * (drho + rho * (3.0 * (-vx1 - vx3) + UbMath::c9o2 * (-vx1 - vx3) * (-vx1 - vx3) - cu_sq));
-    feq[DIR_P0M]  = UbMath::c1o54 * (drho + rho * (3.0 * (vx1 - vx3) + UbMath::c9o2 * (vx1 - vx3) * (vx1 - vx3) - cu_sq));
-    feq[DIR_M0P]  = UbMath::c1o54 * (drho + rho * (3.0 * (-vx1 + vx3) + UbMath::c9o2 * (-vx1 + vx3) * (-vx1 + vx3) - cu_sq));
-    feq[DIR_0PP]  = UbMath::c1o54 * (drho + rho * (3.0 * (vx2 + vx3) + UbMath::c9o2 * (vx2 + vx3) * (vx2 + vx3) - cu_sq));
-    feq[DIR_0MM]  = UbMath::c1o54 * (drho + rho * (3.0 * (-vx2 - vx3) + UbMath::c9o2 * (-vx2 - vx3) * (-vx2 - vx3) - cu_sq));
-    feq[DIR_0PM]  = UbMath::c1o54 * (drho + rho * (3.0 * (vx2 - vx3) + UbMath::c9o2 * (vx2 - vx3) * (vx2 - vx3) - cu_sq));
-    feq[DIR_0MP]  = UbMath::c1o54 * (drho + rho * (3.0 * (-vx2 + vx3) + UbMath::c9o2 * (-vx2 + vx3) * (-vx2 + vx3) - cu_sq));
-    feq[DIR_PPP] = UbMath::c1o216 *
-               (drho + rho * (3.0 * (vx1 + vx2 + vx3) + UbMath::c9o2 * (vx1 + vx2 + vx3) * (vx1 + vx2 + vx3) - cu_sq));
+    using namespace vf::lbm::dir;
+
+    real cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
+    real rho   = drho + vf::basics::constant::c1o1;
+
+    feq[DIR_000] = vf::basics::constant::c8o27 * (drho + rho * (-cu_sq));
+    feq[DIR_P00]    = vf::basics::constant::c2o27 * (drho + rho * (3.0 * (vx1) + vf::basics::constant::c9o2 * (vx1) * (vx1)-cu_sq));
+    feq[DIR_M00]    = vf::basics::constant::c2o27 * (drho + rho * (3.0 * (-vx1) + vf::basics::constant::c9o2 * (-vx1) * (-vx1) - cu_sq));
+    feq[DIR_0P0]    = vf::basics::constant::c2o27 * (drho + rho * (3.0 * (vx2) + vf::basics::constant::c9o2 * (vx2) * (vx2)-cu_sq));
+    feq[DIR_0M0]    = vf::basics::constant::c2o27 * (drho + rho * (3.0 * (-vx2) + vf::basics::constant::c9o2 * (-vx2) * (-vx2) - cu_sq));
+    feq[DIR_00P]    = vf::basics::constant::c2o27 * (drho + rho * (3.0 * (vx3) + vf::basics::constant::c9o2 * (vx3) * (vx3)-cu_sq));
+    feq[DIR_00M]    = vf::basics::constant::c2o27 * (drho + rho * (3.0 * (-vx3) + vf::basics::constant::c9o2 * (-vx3) * (-vx3) - cu_sq));
+    feq[DIR_PP0]   = vf::basics::constant::c1o54 * (drho + rho * (3.0 * (vx1 + vx2) + vf::basics::constant::c9o2 * (vx1 + vx2) * (vx1 + vx2) - cu_sq));
+    feq[DIR_MM0]  = vf::basics::constant::c1o54 * (drho + rho * (3.0 * (-vx1 - vx2) + vf::basics::constant::c9o2 * (-vx1 - vx2) * (-vx1 - vx2) - cu_sq));
+    feq[DIR_PM0]  = vf::basics::constant::c1o54 * (drho + rho * (3.0 * (vx1 - vx2) + vf::basics::constant::c9o2 * (vx1 - vx2) * (vx1 - vx2) - cu_sq));
+    feq[DIR_MP0]  = vf::basics::constant::c1o54 * (drho + rho * (3.0 * (-vx1 + vx2) + vf::basics::constant::c9o2 * (-vx1 + vx2) * (-vx1 + vx2) - cu_sq));
+    feq[DIR_P0P]  = vf::basics::constant::c1o54 * (drho + rho * (3.0 * (vx1 + vx3) + vf::basics::constant::c9o2 * (vx1 + vx3) * (vx1 + vx3) - cu_sq));
+    feq[DIR_M0M]  = vf::basics::constant::c1o54 * (drho + rho * (3.0 * (-vx1 - vx3) + vf::basics::constant::c9o2 * (-vx1 - vx3) * (-vx1 - vx3) - cu_sq));
+    feq[DIR_P0M]  = vf::basics::constant::c1o54 * (drho + rho * (3.0 * (vx1 - vx3) + vf::basics::constant::c9o2 * (vx1 - vx3) * (vx1 - vx3) - cu_sq));
+    feq[DIR_M0P]  = vf::basics::constant::c1o54 * (drho + rho * (3.0 * (-vx1 + vx3) + vf::basics::constant::c9o2 * (-vx1 + vx3) * (-vx1 + vx3) - cu_sq));
+    feq[DIR_0PP]  = vf::basics::constant::c1o54 * (drho + rho * (3.0 * (vx2 + vx3) + vf::basics::constant::c9o2 * (vx2 + vx3) * (vx2 + vx3) - cu_sq));
+    feq[DIR_0MM]  = vf::basics::constant::c1o54 * (drho + rho * (3.0 * (-vx2 - vx3) + vf::basics::constant::c9o2 * (-vx2 - vx3) * (-vx2 - vx3) - cu_sq));
+    feq[DIR_0PM]  = vf::basics::constant::c1o54 * (drho + rho * (3.0 * (vx2 - vx3) + vf::basics::constant::c9o2 * (vx2 - vx3) * (vx2 - vx3) - cu_sq));
+    feq[DIR_0MP]  = vf::basics::constant::c1o54 * (drho + rho * (3.0 * (-vx2 + vx3) + vf::basics::constant::c9o2 * (-vx2 + vx3) * (-vx2 + vx3) - cu_sq));
+    feq[DIR_PPP] = vf::basics::constant::c1o216 *
+               (drho + rho * (3.0 * (vx1 + vx2 + vx3) + vf::basics::constant::c9o2 * (vx1 + vx2 + vx3) * (vx1 + vx2 + vx3) - cu_sq));
     feq[DIR_MMM] =
-        UbMath::c1o216 *
-        (drho + rho * (3.0 * (-vx1 - vx2 - vx3) + UbMath::c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq));
-    feq[DIR_PPM] = UbMath::c1o216 *
-               (drho + rho * (3.0 * (vx1 + vx2 - vx3) + UbMath::c9o2 * (vx1 + vx2 - vx3) * (vx1 + vx2 - vx3) - cu_sq));
+        vf::basics::constant::c1o216 *
+        (drho + rho * (3.0 * (-vx1 - vx2 - vx3) + vf::basics::constant::c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq));
+    feq[DIR_PPM] = vf::basics::constant::c1o216 *
+               (drho + rho * (3.0 * (vx1 + vx2 - vx3) + vf::basics::constant::c9o2 * (vx1 + vx2 - vx3) * (vx1 + vx2 - vx3) - cu_sq));
     feq[DIR_MMP] =
-        UbMath::c1o216 *
-        (drho + rho * (3.0 * (-vx1 - vx2 + vx3) + UbMath::c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq));
-    feq[DIR_PMP] = UbMath::c1o216 *
-               (drho + rho * (3.0 * (vx1 - vx2 + vx3) + UbMath::c9o2 * (vx1 - vx2 + vx3) * (vx1 - vx2 + vx3) - cu_sq));
+        vf::basics::constant::c1o216 *
+        (drho + rho * (3.0 * (-vx1 - vx2 + vx3) + vf::basics::constant::c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq));
+    feq[DIR_PMP] = vf::basics::constant::c1o216 *
+               (drho + rho * (3.0 * (vx1 - vx2 + vx3) + vf::basics::constant::c9o2 * (vx1 - vx2 + vx3) * (vx1 - vx2 + vx3) - cu_sq));
     feq[DIR_MPM] =
-        UbMath::c1o216 *
-        (drho + rho * (3.0 * (-vx1 + vx2 - vx3) + UbMath::c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq));
-    feq[DIR_PMM] = UbMath::c1o216 *
-               (drho + rho * (3.0 * (vx1 - vx2 - vx3) + UbMath::c9o2 * (vx1 - vx2 - vx3) * (vx1 - vx2 - vx3) - cu_sq));
+        vf::basics::constant::c1o216 *
+        (drho + rho * (3.0 * (-vx1 + vx2 - vx3) + vf::basics::constant::c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq));
+    feq[DIR_PMM] = vf::basics::constant::c1o216 *
+               (drho + rho * (3.0 * (vx1 - vx2 - vx3) + vf::basics::constant::c9o2 * (vx1 - vx2 - vx3) * (vx1 - vx2 - vx3) - cu_sq));
     feq[DIR_MPP] =
-        UbMath::c1o216 *
-        (drho + rho * (3.0 * (-vx1 + vx2 + vx3) + UbMath::c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq));
+        vf::basics::constant::c1o216 *
+        (drho + rho * (3.0 * (-vx1 + vx2 + vx3) + vf::basics::constant::c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq));
 }
 //////////////////////////////////////////////////////////////////////////
-static LBMReal getIncompFeqForDirection(const int &direction, const LBMReal &drho, const LBMReal &vx1,
-                                        const LBMReal &vx2, const LBMReal &vx3)
+static real getIncompFeqForDirection(const int &direction, const real &drho, const real &vx1,
+                                        const real &vx2, const real &vx3)
 {
-    LBMReal cu_sq = 1.5f * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
+    using namespace vf::lbm::dir;
+
+    real cu_sq = 1.5f * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
 
     switch (direction) {
         case DIR_000:
-            return REAL_CAST(UbMath::c8o27 * (drho - cu_sq));
+            return REAL_CAST(vf::basics::constant::c8o27 * (drho - cu_sq));
         case DIR_P00:
-            return REAL_CAST(UbMath::c2o27 * (drho + 3.0 * (vx1) + UbMath::c9o2 * (vx1) * (vx1)-cu_sq));
+            return REAL_CAST(vf::basics::constant::c2o27 * (drho + 3.0 * (vx1) + vf::basics::constant::c9o2 * (vx1) * (vx1)-cu_sq));
         case DIR_M00:
-            return REAL_CAST(UbMath::c2o27 * (drho + 3.0 * (-vx1) + UbMath::c9o2 * (-vx1) * (-vx1) - cu_sq));
+            return REAL_CAST(vf::basics::constant::c2o27 * (drho + 3.0 * (-vx1) + vf::basics::constant::c9o2 * (-vx1) * (-vx1) - cu_sq));
         case DIR_0P0:
-            return REAL_CAST(UbMath::c2o27 * (drho + 3.0 * (vx2) + UbMath::c9o2 * (vx2) * (vx2)-cu_sq));
+            return REAL_CAST(vf::basics::constant::c2o27 * (drho + 3.0 * (vx2) + vf::basics::constant::c9o2 * (vx2) * (vx2)-cu_sq));
         case DIR_0M0:
-            return REAL_CAST(UbMath::c2o27 * (drho + 3.0 * (-vx2) + UbMath::c9o2 * (-vx2) * (-vx2) - cu_sq));
+            return REAL_CAST(vf::basics::constant::c2o27 * (drho + 3.0 * (-vx2) + vf::basics::constant::c9o2 * (-vx2) * (-vx2) - cu_sq));
         case DIR_00P:
-            return REAL_CAST(UbMath::c2o27 * (drho + 3.0 * (vx3) + UbMath::c9o2 * (vx3) * (vx3)-cu_sq));
+            return REAL_CAST(vf::basics::constant::c2o27 * (drho + 3.0 * (vx3) + vf::basics::constant::c9o2 * (vx3) * (vx3)-cu_sq));
         case DIR_00M:
-            return REAL_CAST(UbMath::c2o27 * (drho + 3.0 * (-vx3) + UbMath::c9o2 * (-vx3) * (-vx3) - cu_sq));
+            return REAL_CAST(vf::basics::constant::c2o27 * (drho + 3.0 * (-vx3) + vf::basics::constant::c9o2 * (-vx3) * (-vx3) - cu_sq));
         case DIR_PP0:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + 3.0 * (vx1 + vx2) + UbMath::c9o2 * (vx1 + vx2) * (vx1 + vx2) - cu_sq));
+            return REAL_CAST(vf::basics::constant::c1o54 *
+                             (drho + 3.0 * (vx1 + vx2) + vf::basics::constant::c9o2 * (vx1 + vx2) * (vx1 + vx2) - cu_sq));
         case DIR_MM0:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + 3.0 * (-vx1 - vx2) + UbMath::c9o2 * (-vx1 - vx2) * (-vx1 - vx2) - cu_sq));
+            return REAL_CAST(vf::basics::constant::c1o54 *
+                             (drho + 3.0 * (-vx1 - vx2) + vf::basics::constant::c9o2 * (-vx1 - vx2) * (-vx1 - vx2) - cu_sq));
         case DIR_PM0:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + 3.0 * (vx1 - vx2) + UbMath::c9o2 * (vx1 - vx2) * (vx1 - vx2) - cu_sq));
+            return REAL_CAST(vf::basics::constant::c1o54 *
+                             (drho + 3.0 * (vx1 - vx2) + vf::basics::constant::c9o2 * (vx1 - vx2) * (vx1 - vx2) - cu_sq));
         case DIR_MP0:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + 3.0 * (-vx1 + vx2) + UbMath::c9o2 * (-vx1 + vx2) * (-vx1 + vx2) - cu_sq));
+            return REAL_CAST(vf::basics::constant::c1o54 *
+                             (drho + 3.0 * (-vx1 + vx2) + vf::basics::constant::c9o2 * (-vx1 + vx2) * (-vx1 + vx2) - cu_sq));
         case DIR_P0P:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + 3.0 * (vx1 + vx3) + UbMath::c9o2 * (vx1 + vx3) * (vx1 + vx3) - cu_sq));
+            return REAL_CAST(vf::basics::constant::c1o54 *
+                             (drho + 3.0 * (vx1 + vx3) + vf::basics::constant::c9o2 * (vx1 + vx3) * (vx1 + vx3) - cu_sq));
         case DIR_M0M:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + 3.0 * (-vx1 - vx3) + UbMath::c9o2 * (-vx1 - vx3) * (-vx1 - vx3) - cu_sq));
+            return REAL_CAST(vf::basics::constant::c1o54 *
+                             (drho + 3.0 * (-vx1 - vx3) + vf::basics::constant::c9o2 * (-vx1 - vx3) * (-vx1 - vx3) - cu_sq));
         case DIR_P0M:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + 3.0 * (vx1 - vx3) + UbMath::c9o2 * (vx1 - vx3) * (vx1 - vx3) - cu_sq));
+            return REAL_CAST(vf::basics::constant::c1o54 *
+                             (drho + 3.0 * (vx1 - vx3) + vf::basics::constant::c9o2 * (vx1 - vx3) * (vx1 - vx3) - cu_sq));
         case DIR_M0P:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + 3.0 * (-vx1 + vx3) + UbMath::c9o2 * (-vx1 + vx3) * (-vx1 + vx3) - cu_sq));
+            return REAL_CAST(vf::basics::constant::c1o54 *
+                             (drho + 3.0 * (-vx1 + vx3) + vf::basics::constant::c9o2 * (-vx1 + vx3) * (-vx1 + vx3) - cu_sq));
         case DIR_0PP:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + 3.0 * (vx2 + vx3) + UbMath::c9o2 * (vx2 + vx3) * (vx2 + vx3) - cu_sq));
+            return REAL_CAST(vf::basics::constant::c1o54 *
+                             (drho + 3.0 * (vx2 + vx3) + vf::basics::constant::c9o2 * (vx2 + vx3) * (vx2 + vx3) - cu_sq));
         case DIR_0MM:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + 3.0 * (-vx2 - vx3) + UbMath::c9o2 * (-vx2 - vx3) * (-vx2 - vx3) - cu_sq));
+            return REAL_CAST(vf::basics::constant::c1o54 *
+                             (drho + 3.0 * (-vx2 - vx3) + vf::basics::constant::c9o2 * (-vx2 - vx3) * (-vx2 - vx3) - cu_sq));
         case DIR_0PM:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + 3.0 * (vx2 - vx3) + UbMath::c9o2 * (vx2 - vx3) * (vx2 - vx3) - cu_sq));
+            return REAL_CAST(vf::basics::constant::c1o54 *
+                             (drho + 3.0 * (vx2 - vx3) + vf::basics::constant::c9o2 * (vx2 - vx3) * (vx2 - vx3) - cu_sq));
         case DIR_0MP:
-            return REAL_CAST(UbMath::c1o54 *
-                             (drho + 3.0 * (-vx2 + vx3) + UbMath::c9o2 * (-vx2 + vx3) * (-vx2 + vx3) - cu_sq));
+            return REAL_CAST(vf::basics::constant::c1o54 *
+                             (drho + 3.0 * (-vx2 + vx3) + vf::basics::constant::c9o2 * (-vx2 + vx3) * (-vx2 + vx3) - cu_sq));
         case DIR_PPP:
-            return REAL_CAST(UbMath::c1o216 * (drho + 3.0 * (vx1 + vx2 + vx3) +
-                                               UbMath::c9o2 * (vx1 + vx2 + vx3) * (vx1 + vx2 + vx3) - cu_sq));
+            return REAL_CAST(vf::basics::constant::c1o216 * (drho + 3.0 * (vx1 + vx2 + vx3) +
+                                               vf::basics::constant::c9o2 * (vx1 + vx2 + vx3) * (vx1 + vx2 + vx3) - cu_sq));
         case DIR_MMM:
-            return REAL_CAST(UbMath::c1o216 * (drho + 3.0 * (-vx1 - vx2 - vx3) +
-                                               UbMath::c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq));
+            return REAL_CAST(vf::basics::constant::c1o216 * (drho + 3.0 * (-vx1 - vx2 - vx3) +
+                                               vf::basics::constant::c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq));
         case DIR_PPM:
-            return REAL_CAST(UbMath::c1o216 * (drho + 3.0 * (vx1 + vx2 - vx3) +
-                                               UbMath::c9o2 * (vx1 + vx2 - vx3) * (vx1 + vx2 - vx3) - cu_sq));
+            return REAL_CAST(vf::basics::constant::c1o216 * (drho + 3.0 * (vx1 + vx2 - vx3) +
+                                               vf::basics::constant::c9o2 * (vx1 + vx2 - vx3) * (vx1 + vx2 - vx3) - cu_sq));
         case DIR_MMP:
-            return REAL_CAST(UbMath::c1o216 * (drho + 3.0 * (-vx1 - vx2 + vx3) +
-                                               UbMath::c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq));
+            return REAL_CAST(vf::basics::constant::c1o216 * (drho + 3.0 * (-vx1 - vx2 + vx3) +
+                                               vf::basics::constant::c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq));
         case DIR_PMP:
-            return REAL_CAST(UbMath::c1o216 * (drho + 3.0 * (vx1 - vx2 + vx3) +
-                                               UbMath::c9o2 * (vx1 - vx2 + vx3) * (vx1 - vx2 + vx3) - cu_sq));
+            return REAL_CAST(vf::basics::constant::c1o216 * (drho + 3.0 * (vx1 - vx2 + vx3) +
+                                               vf::basics::constant::c9o2 * (vx1 - vx2 + vx3) * (vx1 - vx2 + vx3) - cu_sq));
         case DIR_MPM:
-            return REAL_CAST(UbMath::c1o216 * (drho + 3.0 * (-vx1 + vx2 - vx3) +
-                                               UbMath::c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq));
+            return REAL_CAST(vf::basics::constant::c1o216 * (drho + 3.0 * (-vx1 + vx2 - vx3) +
+                                               vf::basics::constant::c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq));
         case DIR_PMM:
-            return REAL_CAST(UbMath::c1o216 * (drho + 3.0 * (vx1 - vx2 - vx3) +
-                                               UbMath::c9o2 * (vx1 - vx2 - vx3) * (vx1 - vx2 - vx3) - cu_sq));
+            return REAL_CAST(vf::basics::constant::c1o216 * (drho + 3.0 * (vx1 - vx2 - vx3) +
+                                               vf::basics::constant::c9o2 * (vx1 - vx2 - vx3) * (vx1 - vx2 - vx3) - cu_sq));
         case DIR_MPP:
-            return REAL_CAST(UbMath::c1o216 * (drho + 3.0 * (-vx1 + vx2 + vx3) +
-                                               UbMath::c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq));
+            return REAL_CAST(vf::basics::constant::c1o216 * (drho + 3.0 * (-vx1 + vx2 + vx3) +
+                                               vf::basics::constant::c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq));
         default:
             throw UbException(UB_EXARGS, "unknown dir");
     }
 }
 //////////////////////////////////////////////////////////////////////////
-static void calcIncompFeq(LBMReal *const &feq /*[27]*/, const LBMReal &drho, const LBMReal &vx1, const LBMReal &vx2,
-                          const LBMReal &vx3)
+static void calcIncompFeq(real *const &feq /*[27]*/, const real &drho, const real &vx1, const real &vx2,
+                          const real &vx3)
 {
-    LBMReal cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
-
-    feq[DIR_000] = UbMath::c8o27 * (drho - cu_sq);
-    feq[DIR_P00]    = UbMath::c2o27 * (drho + 3.0 * (vx1) + UbMath::c9o2 * (vx1) * (vx1)-cu_sq);
-    feq[DIR_M00]    = UbMath::c2o27 * (drho + 3.0 * (-vx1) + UbMath::c9o2 * (-vx1) * (-vx1) - cu_sq);
-    feq[DIR_0P0]    = UbMath::c2o27 * (drho + 3.0 * (vx2) + UbMath::c9o2 * (vx2) * (vx2)-cu_sq);
-    feq[DIR_0M0]    = UbMath::c2o27 * (drho + 3.0 * (-vx2) + UbMath::c9o2 * (-vx2) * (-vx2) - cu_sq);
-    feq[DIR_00P]    = UbMath::c2o27 * (drho + 3.0 * (vx3) + UbMath::c9o2 * (vx3) * (vx3)-cu_sq);
-    feq[DIR_00M]    = UbMath::c2o27 * (drho + 3.0 * (-vx3) + UbMath::c9o2 * (-vx3) * (-vx3) - cu_sq);
-    feq[DIR_PP0]   = UbMath::c1o54 * (drho + 3.0 * (vx1 + vx2) + UbMath::c9o2 * (vx1 + vx2) * (vx1 + vx2) - cu_sq);
-    feq[DIR_MM0]   = UbMath::c1o54 * (drho + 3.0 * (-vx1 - vx2) + UbMath::c9o2 * (-vx1 - vx2) * (-vx1 - vx2) - cu_sq);
-    feq[DIR_PM0]   = UbMath::c1o54 * (drho + 3.0 * (vx1 - vx2) + UbMath::c9o2 * (vx1 - vx2) * (vx1 - vx2) - cu_sq);
-    feq[DIR_MP0]   = UbMath::c1o54 * (drho + 3.0 * (-vx1 + vx2) + UbMath::c9o2 * (-vx1 + vx2) * (-vx1 + vx2) - cu_sq);
-    feq[DIR_P0P]   = UbMath::c1o54 * (drho + 3.0 * (vx1 + vx3) + UbMath::c9o2 * (vx1 + vx3) * (vx1 + vx3) - cu_sq);
-    feq[DIR_M0M]   = UbMath::c1o54 * (drho + 3.0 * (-vx1 - vx3) + UbMath::c9o2 * (-vx1 - vx3) * (-vx1 - vx3) - cu_sq);
-    feq[DIR_P0M]   = UbMath::c1o54 * (drho + 3.0 * (vx1 - vx3) + UbMath::c9o2 * (vx1 - vx3) * (vx1 - vx3) - cu_sq);
-    feq[DIR_M0P]   = UbMath::c1o54 * (drho + 3.0 * (-vx1 + vx3) + UbMath::c9o2 * (-vx1 + vx3) * (-vx1 + vx3) - cu_sq);
-    feq[DIR_0PP]   = UbMath::c1o54 * (drho + 3.0 * (vx2 + vx3) + UbMath::c9o2 * (vx2 + vx3) * (vx2 + vx3) - cu_sq);
-    feq[DIR_0MM]   = UbMath::c1o54 * (drho + 3.0 * (-vx2 - vx3) + UbMath::c9o2 * (-vx2 - vx3) * (-vx2 - vx3) - cu_sq);
-    feq[DIR_0PM]   = UbMath::c1o54 * (drho + 3.0 * (vx2 - vx3) + UbMath::c9o2 * (vx2 - vx3) * (vx2 - vx3) - cu_sq);
-    feq[DIR_0MP]   = UbMath::c1o54 * (drho + 3.0 * (-vx2 + vx3) + UbMath::c9o2 * (-vx2 + vx3) * (-vx2 + vx3) - cu_sq);
-    feq[DIR_PPP]  = UbMath::c1o216 *
-               (drho + 3.0 * (vx1 + vx2 + vx3) + UbMath::c9o2 * (vx1 + vx2 + vx3) * (vx1 + vx2 + vx3) - cu_sq);
-    feq[DIR_MMM] = UbMath::c1o216 *
-               (drho + 3.0 * (-vx1 - vx2 - vx3) + UbMath::c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq);
-    feq[DIR_PPM] = UbMath::c1o216 *
-               (drho + 3.0 * (vx1 + vx2 - vx3) + UbMath::c9o2 * (vx1 + vx2 - vx3) * (vx1 + vx2 - vx3) - cu_sq);
-    feq[DIR_MMP] = UbMath::c1o216 *
-               (drho + 3.0 * (-vx1 - vx2 + vx3) + UbMath::c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq);
-    feq[DIR_PMP] = UbMath::c1o216 *
-               (drho + 3.0 * (vx1 - vx2 + vx3) + UbMath::c9o2 * (vx1 - vx2 + vx3) * (vx1 - vx2 + vx3) - cu_sq);
-    feq[DIR_MPM] = UbMath::c1o216 *
-               (drho + 3.0 * (-vx1 + vx2 - vx3) + UbMath::c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq);
-    feq[DIR_PMM] = UbMath::c1o216 *
-               (drho + 3.0 * (vx1 - vx2 - vx3) + UbMath::c9o2 * (vx1 - vx2 - vx3) * (vx1 - vx2 - vx3) - cu_sq);
-    feq[DIR_MPP] = UbMath::c1o216 *
-               (drho + 3.0 * (-vx1 + vx2 + vx3) + UbMath::c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq);
+    using namespace vf::lbm::dir;
+
+    real cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
+
+    feq[DIR_000] = vf::basics::constant::c8o27 * (drho - cu_sq);
+    feq[DIR_P00]    = vf::basics::constant::c2o27 * (drho + 3.0 * (vx1) + vf::basics::constant::c9o2 * (vx1) * (vx1)-cu_sq);
+    feq[DIR_M00]    = vf::basics::constant::c2o27 * (drho + 3.0 * (-vx1) + vf::basics::constant::c9o2 * (-vx1) * (-vx1) - cu_sq);
+    feq[DIR_0P0]    = vf::basics::constant::c2o27 * (drho + 3.0 * (vx2) + vf::basics::constant::c9o2 * (vx2) * (vx2)-cu_sq);
+    feq[DIR_0M0]    = vf::basics::constant::c2o27 * (drho + 3.0 * (-vx2) + vf::basics::constant::c9o2 * (-vx2) * (-vx2) - cu_sq);
+    feq[DIR_00P]    = vf::basics::constant::c2o27 * (drho + 3.0 * (vx3) + vf::basics::constant::c9o2 * (vx3) * (vx3)-cu_sq);
+    feq[DIR_00M]    = vf::basics::constant::c2o27 * (drho + 3.0 * (-vx3) + vf::basics::constant::c9o2 * (-vx3) * (-vx3) - cu_sq);
+    feq[DIR_PP0]   = vf::basics::constant::c1o54 * (drho + 3.0 * (vx1 + vx2) + vf::basics::constant::c9o2 * (vx1 + vx2) * (vx1 + vx2) - cu_sq);
+    feq[DIR_MM0]   = vf::basics::constant::c1o54 * (drho + 3.0 * (-vx1 - vx2) + vf::basics::constant::c9o2 * (-vx1 - vx2) * (-vx1 - vx2) - cu_sq);
+    feq[DIR_PM0]   = vf::basics::constant::c1o54 * (drho + 3.0 * (vx1 - vx2) + vf::basics::constant::c9o2 * (vx1 - vx2) * (vx1 - vx2) - cu_sq);
+    feq[DIR_MP0]   = vf::basics::constant::c1o54 * (drho + 3.0 * (-vx1 + vx2) + vf::basics::constant::c9o2 * (-vx1 + vx2) * (-vx1 + vx2) - cu_sq);
+    feq[DIR_P0P]   = vf::basics::constant::c1o54 * (drho + 3.0 * (vx1 + vx3) + vf::basics::constant::c9o2 * (vx1 + vx3) * (vx1 + vx3) - cu_sq);
+    feq[DIR_M0M]   = vf::basics::constant::c1o54 * (drho + 3.0 * (-vx1 - vx3) + vf::basics::constant::c9o2 * (-vx1 - vx3) * (-vx1 - vx3) - cu_sq);
+    feq[DIR_P0M]   = vf::basics::constant::c1o54 * (drho + 3.0 * (vx1 - vx3) + vf::basics::constant::c9o2 * (vx1 - vx3) * (vx1 - vx3) - cu_sq);
+    feq[DIR_M0P]   = vf::basics::constant::c1o54 * (drho + 3.0 * (-vx1 + vx3) + vf::basics::constant::c9o2 * (-vx1 + vx3) * (-vx1 + vx3) - cu_sq);
+    feq[DIR_0PP]   = vf::basics::constant::c1o54 * (drho + 3.0 * (vx2 + vx3) + vf::basics::constant::c9o2 * (vx2 + vx3) * (vx2 + vx3) - cu_sq);
+    feq[DIR_0MM]   = vf::basics::constant::c1o54 * (drho + 3.0 * (-vx2 - vx3) + vf::basics::constant::c9o2 * (-vx2 - vx3) * (-vx2 - vx3) - cu_sq);
+    feq[DIR_0PM]   = vf::basics::constant::c1o54 * (drho + 3.0 * (vx2 - vx3) + vf::basics::constant::c9o2 * (vx2 - vx3) * (vx2 - vx3) - cu_sq);
+    feq[DIR_0MP]   = vf::basics::constant::c1o54 * (drho + 3.0 * (-vx2 + vx3) + vf::basics::constant::c9o2 * (-vx2 + vx3) * (-vx2 + vx3) - cu_sq);
+    feq[DIR_PPP]  = vf::basics::constant::c1o216 *
+               (drho + 3.0 * (vx1 + vx2 + vx3) + vf::basics::constant::c9o2 * (vx1 + vx2 + vx3) * (vx1 + vx2 + vx3) - cu_sq);
+    feq[DIR_MMM] = vf::basics::constant::c1o216 *
+               (drho + 3.0 * (-vx1 - vx2 - vx3) + vf::basics::constant::c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq);
+    feq[DIR_PPM] = vf::basics::constant::c1o216 *
+               (drho + 3.0 * (vx1 + vx2 - vx3) + vf::basics::constant::c9o2 * (vx1 + vx2 - vx3) * (vx1 + vx2 - vx3) - cu_sq);
+    feq[DIR_MMP] = vf::basics::constant::c1o216 *
+               (drho + 3.0 * (-vx1 - vx2 + vx3) + vf::basics::constant::c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq);
+    feq[DIR_PMP] = vf::basics::constant::c1o216 *
+               (drho + 3.0 * (vx1 - vx2 + vx3) + vf::basics::constant::c9o2 * (vx1 - vx2 + vx3) * (vx1 - vx2 + vx3) - cu_sq);
+    feq[DIR_MPM] = vf::basics::constant::c1o216 *
+               (drho + 3.0 * (-vx1 + vx2 - vx3) + vf::basics::constant::c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq);
+    feq[DIR_PMM] = vf::basics::constant::c1o216 *
+               (drho + 3.0 * (vx1 - vx2 - vx3) + vf::basics::constant::c9o2 * (vx1 - vx2 - vx3) * (vx1 - vx2 - vx3) - cu_sq);
+    feq[DIR_MPP] = vf::basics::constant::c1o216 *
+               (drho + 3.0 * (-vx1 + vx2 + vx3) + vf::basics::constant::c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq);
 }
 //////////////////////////////////////////////////////////////////////////
-static inline float getBoundaryVelocityForDirection(const int &direction, const float &bcVelocityX1,
-                                                    const float &bcVelocityX2, const float &bcVelocityX3)
+static inline real getBoundaryVelocityForDirection(const int &direction, const real &bcVelocityX1,
+                                                    const real &bcVelocityX2, const real &bcVelocityX3)
 {
+    using namespace vf::lbm::dir;
+ 
     switch (direction) {
         case DIR_P00:
-            return (float)(UbMath::c4o9 * (+bcVelocityX1));
+            return (real)(vf::basics::constant::c4o9 * (+bcVelocityX1));
         case DIR_M00:
-            return (float)(UbMath::c4o9 * (-bcVelocityX1));
+            return (real)(vf::basics::constant::c4o9 * (-bcVelocityX1));
         case DIR_0P0:
-            return (float)(UbMath::c4o9 * (+bcVelocityX2));
+            return (real)(vf::basics::constant::c4o9 * (+bcVelocityX2));
         case DIR_0M0:
-            return (float)(UbMath::c4o9 * (-bcVelocityX2));
+            return (real)(vf::basics::constant::c4o9 * (-bcVelocityX2));
         case DIR_00P:
-            return (float)(UbMath::c4o9 * (+bcVelocityX3));
+            return (real)(vf::basics::constant::c4o9 * (+bcVelocityX3));
         case DIR_00M:
-            return (float)(UbMath::c4o9 * (-bcVelocityX3));
+            return (real)(vf::basics::constant::c4o9 * (-bcVelocityX3));
         case DIR_PP0:
-            return (float)(UbMath::c1o9 * (+bcVelocityX1 + bcVelocityX2));
+            return (real)(vf::basics::constant::c1o9 * (+bcVelocityX1 + bcVelocityX2));
         case DIR_MM0:
-            return (float)(UbMath::c1o9 * (-bcVelocityX1 - bcVelocityX2));
+            return (real)(vf::basics::constant::c1o9 * (-bcVelocityX1 - bcVelocityX2));
         case DIR_PM0:
-            return (float)(UbMath::c1o9 * (+bcVelocityX1 - bcVelocityX2));
+            return (real)(vf::basics::constant::c1o9 * (+bcVelocityX1 - bcVelocityX2));
         case DIR_MP0:
-            return (float)(UbMath::c1o9 * (-bcVelocityX1 + bcVelocityX2));
+            return (real)(vf::basics::constant::c1o9 * (-bcVelocityX1 + bcVelocityX2));
         case DIR_P0P:
-            return (float)(UbMath::c1o9 * (+bcVelocityX1 + bcVelocityX3));
+            return (real)(vf::basics::constant::c1o9 * (+bcVelocityX1 + bcVelocityX3));
         case DIR_M0M:
-            return (float)(UbMath::c1o9 * (-bcVelocityX1 - bcVelocityX3));
+            return (real)(vf::basics::constant::c1o9 * (-bcVelocityX1 - bcVelocityX3));
         case DIR_P0M:
-            return (float)(UbMath::c1o9 * (+bcVelocityX1 - bcVelocityX3));
+            return (real)(vf::basics::constant::c1o9 * (+bcVelocityX1 - bcVelocityX3));
         case DIR_M0P:
-            return (float)(UbMath::c1o9 * (-bcVelocityX1 + bcVelocityX3));
+            return (real)(vf::basics::constant::c1o9 * (-bcVelocityX1 + bcVelocityX3));
         case DIR_0PP:
-            return (float)(UbMath::c1o9 * (+bcVelocityX2 + bcVelocityX3));
+            return (real)(vf::basics::constant::c1o9 * (+bcVelocityX2 + bcVelocityX3));
         case DIR_0MM:
-            return (float)(UbMath::c1o9 * (-bcVelocityX2 - bcVelocityX3));
+            return (real)(vf::basics::constant::c1o9 * (-bcVelocityX2 - bcVelocityX3));
         case DIR_0PM:
-            return (float)(UbMath::c1o9 * (+bcVelocityX2 - bcVelocityX3));
+            return (real)(vf::basics::constant::c1o9 * (+bcVelocityX2 - bcVelocityX3));
         case DIR_0MP:
-            return (float)(UbMath::c1o9 * (-bcVelocityX2 + bcVelocityX3));
+            return (real)(vf::basics::constant::c1o9 * (-bcVelocityX2 + bcVelocityX3));
         case DIR_PPP:
-            return (float)(UbMath::c1o36 * (+bcVelocityX1 + bcVelocityX2 + bcVelocityX3));
+            return (real)(vf::basics::constant::c1o36 * (+bcVelocityX1 + bcVelocityX2 + bcVelocityX3));
         case DIR_MMM:
-            return (float)(UbMath::c1o36 * (-bcVelocityX1 - bcVelocityX2 - bcVelocityX3));
+            return (real)(vf::basics::constant::c1o36 * (-bcVelocityX1 - bcVelocityX2 - bcVelocityX3));
         case DIR_PPM:
-            return (float)(UbMath::c1o36 * (+bcVelocityX1 + bcVelocityX2 - bcVelocityX3));
+            return (real)(vf::basics::constant::c1o36 * (+bcVelocityX1 + bcVelocityX2 - bcVelocityX3));
         case DIR_MMP:
-            return (float)(UbMath::c1o36 * (-bcVelocityX1 - bcVelocityX2 + bcVelocityX3));
+            return (real)(vf::basics::constant::c1o36 * (-bcVelocityX1 - bcVelocityX2 + bcVelocityX3));
         case DIR_PMP:
-            return (float)(UbMath::c1o36 * (+bcVelocityX1 - bcVelocityX2 + bcVelocityX3));
+            return (real)(vf::basics::constant::c1o36 * (+bcVelocityX1 - bcVelocityX2 + bcVelocityX3));
         case DIR_MPM:
-            return (float)(UbMath::c1o36 * (-bcVelocityX1 + bcVelocityX2 - bcVelocityX3));
+            return (real)(vf::basics::constant::c1o36 * (-bcVelocityX1 + bcVelocityX2 - bcVelocityX3));
         case DIR_PMM:
-            return (float)(UbMath::c1o36 * (+bcVelocityX1 - bcVelocityX2 - bcVelocityX3));
+            return (real)(vf::basics::constant::c1o36 * (+bcVelocityX1 - bcVelocityX2 - bcVelocityX3));
         case DIR_MPP:
-            return (float)(UbMath::c1o36 * (-bcVelocityX1 + bcVelocityX2 + bcVelocityX3));
+            return (real)(vf::basics::constant::c1o36 * (-bcVelocityX1 + bcVelocityX2 + bcVelocityX3));
         default:
             throw UbException(UB_EXARGS, "unknown direction");
     }
@@ -822,8 +861,9 @@ static inline float getBoundaryVelocityForDirection(const int &direction, const
 static const int &getInvertDirection(const int &direction)
 {
 #ifdef _DEBUG
-    if (direction < STARTDIR || direction > ENDDIR)
-        throw UbException(UB_EXARGS, "unknown direction");
+ //   if (direction < STARTDIR || direction > ENDDIR)
+     if (direction < FSTARTDIR || direction > FENDDIR)
+       throw UbException(UB_EXARGS, "unknown direction");
 #endif
     return INVDIR[direction];
 }
@@ -839,48 +879,52 @@ static void getLBMDirections(std::vector<int> &dirs, bool onlyLBdirs = false)
     } else /*STARTDIR->ENDDIR*/
     {
         dirs.resize(ENDDIR + 1);
-        for (int dir = STARTDIR; dir <= ENDDIR; ++dir)
+        for (int dir = STARTF; dir <= ENDF; ++dir)
             dirs[dir] = dir;
     }
 }
 //////////////////////////////////////////////////////////////////////////
-static std::vector<int> getEX(const int &exn)
+static std::vector<int> getDX(const int &exn)
 {
     std::vector<int> ex;
     ex.resize(ENDDIR + 1);
     switch (exn) {
         case 1:
-            for (int dir = STARTDIR; dir <= ENDDIR; ++dir)
+            for (int dir = FSTARTDIR; dir <= FENDDIR; ++dir)
                 ex[dir] = DX1[dir];
             break;
         case 2:
-            for (int dir = STARTDIR; dir <= ENDDIR; ++dir)
+            for (int dir = FSTARTDIR; dir <= FENDDIR; ++dir)
                 ex[dir] = DX2[dir];
             break;
         case 3:
-            for (int dir = STARTDIR; dir <= ENDDIR; ++dir)
+            for (int dir = FSTARTDIR; dir <= FENDDIR; ++dir)
                 ex[dir] = DX3[dir];
             break;
     }
     return ex;
 }
 //////////////////////////////////////////////////////////////////////////
-static inline void calcDistanceToNeighbors(std::vector<double> &distNeigh, const double &deltaX1)
+static inline void calcDistanceToNeighbors(std::vector<real> &distNeigh, const real &deltaX1)
 {
+    using namespace vf::lbm::dir;
+
     // distNeigh.resize(FENDDIR+1, UbMath::sqrt2*deltaX1);
 
     distNeigh[DIR_P00] = distNeigh[DIR_M00] = distNeigh[DIR_0P0] = deltaX1;
     distNeigh[DIR_0M0] = distNeigh[DIR_00P] = distNeigh[DIR_00M] = deltaX1;
-    distNeigh[DIR_PP0] = distNeigh[DIR_MP0] = distNeigh[DIR_MM0] = distNeigh[DIR_PM0] = UbMath::sqrt2 * deltaX1;
-    distNeigh[DIR_P0P] = distNeigh[DIR_0PP] = distNeigh[DIR_M0P] = distNeigh[DIR_0MP] = UbMath::sqrt2 * deltaX1;
-    distNeigh[DIR_P0M] = distNeigh[DIR_0PM] = distNeigh[DIR_M0M] = distNeigh[DIR_0MM] = UbMath::sqrt2 * deltaX1;
-    distNeigh[DIR_PPP] = distNeigh[DIR_MPP] = distNeigh[DIR_PMP] = distNeigh[DIR_MMP] = UbMath::sqrt3 * deltaX1;
-    distNeigh[DIR_PPM] = distNeigh[DIR_MPM] = distNeigh[DIR_PMM] = distNeigh[DIR_MMM] = UbMath::sqrt3 * deltaX1;
+    distNeigh[DIR_PP0] = distNeigh[DIR_MP0] = distNeigh[DIR_MM0] = distNeigh[DIR_PM0] = vf::basics::constant::sqrt2 * deltaX1;
+    distNeigh[DIR_P0P] = distNeigh[DIR_0PP] = distNeigh[DIR_M0P] = distNeigh[DIR_0MP] = vf::basics::constant::sqrt2 * deltaX1;
+    distNeigh[DIR_P0M] = distNeigh[DIR_0PM] = distNeigh[DIR_M0M] = distNeigh[DIR_0MM] = vf::basics::constant::sqrt2 * deltaX1;
+    distNeigh[DIR_PPP] = distNeigh[DIR_MPP] = distNeigh[DIR_PMP] = distNeigh[DIR_MMP] = vf::basics::constant::sqrt3 * deltaX1;
+    distNeigh[DIR_PPM] = distNeigh[DIR_MPM] = distNeigh[DIR_PMM] = distNeigh[DIR_MMM] = vf::basics::constant::sqrt3 * deltaX1;
 }
 //////////////////////////////////////////////////////////////////////////
-static inline void calcDistanceToNeighbors(std::vector<double> &distNeigh, const double &deltaX1, const double &deltaX2,
-                                           const double &deltaX3)
+static inline void calcDistanceToNeighbors(std::vector<real> &distNeigh, const real &deltaX1, const real &deltaX2,
+                                           const real &deltaX3)
 {
+    using namespace vf::lbm::dir;
+
     // distNeigh.resize(FENDDIR+1, UbMath::sqrt2*deltaX1);
     distNeigh[DIR_P00] = distNeigh[DIR_M00] = deltaX1;
     distNeigh[DIR_0P0] = distNeigh[DIR_0M0] = deltaX2;
@@ -894,11 +938,13 @@ static inline void calcDistanceToNeighbors(std::vector<double> &distNeigh, const
         sqrt(deltaX1 * deltaX1 + deltaX2 * deltaX2 + deltaX3 * deltaX3);
 }
 //////////////////////////////////////////////////////////////////////////
-static inline void initRayVectors(double *const &rayX1, double *const &rayX2, double *const &rayX3)
+static inline void initRayVectors(real *const &rayX1, real *const &rayX2, real *const &rayX3)
 {
+    using namespace vf::lbm::dir;
+
     int fdir;
-    double c1oS2 = UbMath::one_over_sqrt2;
-    double c1oS3 = UbMath::one_over_sqrt3;
+    real c1oS2 = vf::basics::constant::one_over_sqrt2;
+    real c1oS3 = vf::basics::constant::one_over_sqrt3;
     fdir         = DIR_P00;
     rayX1[fdir]  = 1.0;
     rayX2[fdir]  = 0.0;
@@ -1005,64 +1051,68 @@ static inline void initRayVectors(double *const &rayX1, double *const &rayX2, do
     rayX3[fdir]  = -c1oS3;
 }
 //////////////////////////////////////////////////////////////////////////
-static inline LBMReal calcPress(const LBMReal *const f, LBMReal rho, LBMReal vx1, LBMReal vx2, LBMReal vx3)
+static inline real calcPress(const real *const f, real rho, real vx1, real vx2, real vx3)
 {
-    LBMReal op = 1.0;
+    using namespace vf::lbm::dir;
+
+    real op = 1.0;
     return ((f[DIR_P00] + f[DIR_M00] + f[DIR_0P0] + f[DIR_0M0] + f[DIR_00P] + f[DIR_00M] +
              2. * (f[DIR_PP0] + f[DIR_MM0] + f[DIR_PM0] + f[DIR_MP0] + f[DIR_P0P] + f[DIR_M0M] + f[DIR_P0M] + f[DIR_M0P] + f[DIR_0PP] + f[DIR_0MM] + f[DIR_0PM] + f[DIR_0MP]) +
              3. * (f[DIR_PPP] + f[DIR_MMP] + f[DIR_PMP] + f[DIR_MPP] + f[DIR_PPM] + f[DIR_MMM] + f[DIR_PMM] + f[DIR_MPM]) -
              (vx1 * vx1 + vx2 * vx2 + vx3 * vx3)) *
                 (1 - 0.5 * op) +
             op * 0.5 * (rho)) *
-           UbMath::c1o3;
+           vf::basics::constant::c1o3;
 }
 //////////////////////////////////////////////////////////////////////////
-static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
+static inline real getShearRate(const real *const f, real collFactorF)
 {
-    LBMReal mfcbb = f[DIR_P00];
-    LBMReal mfbcb = f[DIR_0P0];
-    LBMReal mfbbc = f[DIR_00P];
-    LBMReal mfccb = f[DIR_PP0];
-    LBMReal mfacb = f[DIR_MP0];
-    LBMReal mfcbc = f[DIR_P0P];
-    LBMReal mfabc = f[DIR_M0P];
-    LBMReal mfbcc = f[DIR_0PP];
-    LBMReal mfbac = f[DIR_0MP];
-    LBMReal mfccc = f[DIR_PPP];
-    LBMReal mfacc = f[DIR_MPP];
-    LBMReal mfcac = f[DIR_PMP];
-    LBMReal mfaac = f[DIR_MMP];
-
-    LBMReal mfabb = f[DIR_M00];
-    LBMReal mfbab = f[DIR_0M0];
-    LBMReal mfbba = f[DIR_00M];
-    LBMReal mfaab = f[DIR_MM0];
-    LBMReal mfcab = f[DIR_PM0];
-    LBMReal mfaba = f[DIR_M0M];
-    LBMReal mfcba = f[DIR_P0M];
-    LBMReal mfbaa = f[DIR_0MM];
-    LBMReal mfbca = f[DIR_0PM];
-    LBMReal mfaaa = f[DIR_MMM];
-    LBMReal mfcaa = f[DIR_PMM];
-    LBMReal mfaca = f[DIR_MPM];
-    LBMReal mfcca = f[DIR_PPM];
-
-    LBMReal mfbbb = f[DIR_000];
-
-    LBMReal m0, m1, m2;
-
-    LBMReal rho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca) + (mfaab + mfacb + mfcab + mfccb) +
+    using namespace vf::lbm::dir;
+
+    real mfcbb = f[DIR_P00];
+    real mfbcb = f[DIR_0P0];
+    real mfbbc = f[DIR_00P];
+    real mfccb = f[DIR_PP0];
+    real mfacb = f[DIR_MP0];
+    real mfcbc = f[DIR_P0P];
+    real mfabc = f[DIR_M0P];
+    real mfbcc = f[DIR_0PP];
+    real mfbac = f[DIR_0MP];
+    real mfccc = f[DIR_PPP];
+    real mfacc = f[DIR_MPP];
+    real mfcac = f[DIR_PMP];
+    real mfaac = f[DIR_MMP];
+
+    real mfabb = f[DIR_M00];
+    real mfbab = f[DIR_0M0];
+    real mfbba = f[DIR_00M];
+    real mfaab = f[DIR_MM0];
+    real mfcab = f[DIR_PM0];
+    real mfaba = f[DIR_M0M];
+    real mfcba = f[DIR_P0M];
+    real mfbaa = f[DIR_0MM];
+    real mfbca = f[DIR_0PM];
+    real mfaaa = f[DIR_MMM];
+    real mfcaa = f[DIR_PMM];
+    real mfaca = f[DIR_MPM];
+    real mfcca = f[DIR_PPM];
+
+    real mfbbb = f[DIR_000];
+
+    real m0, m1, m2;
+
+    real rho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca) + (mfaab + mfacb + mfcab + mfccb) +
                   (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc) + (mfabb + mfcbb) +
                   (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
-    LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+    real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
                    (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + (mfcbb - mfabb));
-    LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+    real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
                    (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + (mfbcb - mfbab));
-    LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+    real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
                    (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + (mfbbc - mfbba));
 
-    LBMReal oMdrho;
+    real oMdrho;
 
     oMdrho = mfccc + mfaaa;
     m0     = mfaca + mfcac;
@@ -1090,9 +1140,9 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m0 += mfbbb; // hat gefehlt
     oMdrho = 1. - (oMdrho + m0);
 
-    LBMReal vx2;
-    LBMReal vy2;
-    LBMReal vz2;
+    real vx2;
+    real vy2;
+    real vz2;
     vx2 = vvx * vvx;
     vy2 = vvy * vvy;
     vz2 = vvz * vvz;
@@ -1106,7 +1156,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfaac - mfaaa;
     m0    = m2 + mfaab;
     mfaaa = m0;
-    m0 += UbMath::c1o36 * oMdrho;
+    m0 += vf::basics::constant::c1o36 * oMdrho;
     mfaab = m1 - m0 * vvz;
     mfaac = m2 - 2. * m1 * vvz + vz2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1114,7 +1164,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfabc - mfaba;
     m0    = m2 + mfabb;
     mfaba = m0;
-    m0 += UbMath::c1o9 * oMdrho;
+    m0 += vf::basics::constant::c1o9 * oMdrho;
     mfabb = m1 - m0 * vvz;
     mfabc = m2 - 2. * m1 * vvz + vz2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1122,7 +1172,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfacc - mfaca;
     m0    = m2 + mfacb;
     mfaca = m0;
-    m0 += UbMath::c1o36 * oMdrho;
+    m0 += vf::basics::constant::c1o36 * oMdrho;
     mfacb = m1 - m0 * vvz;
     mfacc = m2 - 2. * m1 * vvz + vz2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1131,7 +1181,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfbac - mfbaa;
     m0    = m2 + mfbab;
     mfbaa = m0;
-    m0 += UbMath::c1o9 * oMdrho;
+    m0 += vf::basics::constant::c1o9 * oMdrho;
     mfbab = m1 - m0 * vvz;
     mfbac = m2 - 2. * m1 * vvz + vz2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1139,7 +1189,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfbbc - mfbba;
     m0    = m2 + mfbbb;
     mfbba = m0;
-    m0 += UbMath::c4o9 * oMdrho;
+    m0 += vf::basics::constant::c4o9 * oMdrho;
     mfbbb = m1 - m0 * vvz;
     mfbbc = m2 - 2. * m1 * vvz + vz2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1147,7 +1197,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfbcc - mfbca;
     m0    = m2 + mfbcb;
     mfbca = m0;
-    m0 += UbMath::c1o9 * oMdrho;
+    m0 += vf::basics::constant::c1o9 * oMdrho;
     mfbcb = m1 - m0 * vvz;
     mfbcc = m2 - 2. * m1 * vvz + vz2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1156,7 +1206,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfcac - mfcaa;
     m0    = m2 + mfcab;
     mfcaa = m0;
-    m0 += UbMath::c1o36 * oMdrho;
+    m0 += vf::basics::constant::c1o36 * oMdrho;
     mfcab = m1 - m0 * vvz;
     mfcac = m2 - 2. * m1 * vvz + vz2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1164,7 +1214,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfcbc - mfcba;
     m0    = m2 + mfcbb;
     mfcba = m0;
-    m0 += UbMath::c1o9 * oMdrho;
+    m0 += vf::basics::constant::c1o9 * oMdrho;
     mfcbb = m1 - m0 * vvz;
     mfcbc = m2 - 2. * m1 * vvz + vz2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1172,7 +1222,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfccc - mfcca;
     m0    = m2 + mfccb;
     mfcca = m0;
-    m0 += UbMath::c1o36 * oMdrho;
+    m0 += vf::basics::constant::c1o36 * oMdrho;
     mfccb = m1 - m0 * vvz;
     mfccc = m2 - 2. * m1 * vvz + vz2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1184,7 +1234,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfaca - mfaaa;
     m0    = m2 + mfaba;
     mfaaa = m0;
-    m0 += UbMath::c1o6 * oMdrho;
+    m0 += vf::basics::constant::c1o6 * oMdrho;
     mfaba = m1 - m0 * vvy;
     mfaca = m2 - 2. * m1 * vvy + vy2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1199,7 +1249,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfacc - mfaac;
     m0    = m2 + mfabc;
     mfaac = m0;
-    m0 += UbMath::c1o18 * oMdrho;
+    m0 += vf::basics::constant::c1o18 * oMdrho;
     mfabc = m1 - m0 * vvy;
     mfacc = m2 - 2. * m1 * vvy + vy2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1208,7 +1258,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfbca - mfbaa;
     m0    = m2 + mfbba;
     mfbaa = m0;
-    m0 += UbMath::c2o3 * oMdrho;
+    m0 += vf::basics::constant::c2o3 * oMdrho;
     mfbba = m1 - m0 * vvy;
     mfbca = m2 - 2. * m1 * vvy + vy2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1223,7 +1273,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfbcc - mfbac;
     m0    = m2 + mfbbc;
     mfbac = m0;
-    m0 += UbMath::c2o9 * oMdrho;
+    m0 += vf::basics::constant::c2o9 * oMdrho;
     mfbbc = m1 - m0 * vvy;
     mfbcc = m2 - 2. * m1 * vvy + vy2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1232,7 +1282,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfcca - mfcaa;
     m0    = m2 + mfcba;
     mfcaa = m0;
-    m0 += UbMath::c1o6 * oMdrho;
+    m0 += vf::basics::constant::c1o6 * oMdrho;
     mfcba = m1 - m0 * vvy;
     mfcca = m2 - 2. * m1 * vvy + vy2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1247,7 +1297,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfccc - mfcac;
     m0    = m2 + mfcbc;
     mfcac = m0;
-    m0 += UbMath::c1o18 * oMdrho;
+    m0 += vf::basics::constant::c1o18 * oMdrho;
     mfcbc = m1 - m0 * vvy;
     mfccc = m2 - 2. * m1 * vvy + vy2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1274,7 +1324,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfcca - mfaca;
     m0    = m2 + mfbca;
     mfaca = m0;
-    m0 += UbMath::c1o3 * oMdrho;
+    m0 += vf::basics::constant::c1o3 * oMdrho;
     mfbca = m1 - m0 * vvx;
     mfcca = m2 - 2. * m1 * vvx + vx2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1305,7 +1355,7 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfcac - mfaac;
     m0    = m2 + mfbac;
     mfaac = m0;
-    m0 += UbMath::c1o3 * oMdrho;
+    m0 += vf::basics::constant::c1o3 * oMdrho;
     mfbac = m1 - m0 * vvx;
     mfcac = m2 - 2. * m1 * vvx + vx2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
@@ -1320,36 +1370,38 @@ static inline LBMReal getShearRate(const LBMReal *const f, LBMReal collFactorF)
     m1    = mfccc - mfacc;
     m0    = m2 + mfbcc;
     mfacc = m0;
-    m0 += UbMath::c1o9 * oMdrho;
+    m0 += vf::basics::constant::c1o9 * oMdrho;
     mfbcc = m1 - m0 * vvx;
     mfccc = m2 - 2. * m1 * vvx + vx2 * m0;
     ////////////////////////////////////////////////////////////////////////////////////
     // Cumulants
     ////////////////////////////////////////////////////////////////////////////////////
-    LBMReal OxxPyyPzz = 1.; // omega2 or bulk viscosity
+    real OxxPyyPzz = 1.; // omega2 or bulk viscosity
 
-    LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
-    LBMReal mxxMyy    = mfcaa - mfaca;
-    LBMReal mxxMzz    = mfcaa - mfaac;
+    real mxxPyyPzz = mfcaa + mfaca + mfaac;
+    real mxxMyy    = mfcaa - mfaca;
+    real mxxMzz    = mfcaa - mfaac;
 
-    LBMReal dxux = -UbMath::c1o2 * collFactorF * (mxxMyy + mxxMzz) + UbMath::c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
-    LBMReal dyuy = dxux + collFactorF * UbMath::c3o2 * mxxMyy;
-    LBMReal dzuz = dxux + collFactorF * UbMath::c3o2 * mxxMzz;
+    real dxux = -vf::basics::constant::c1o2 * collFactorF * (mxxMyy + mxxMzz) + vf::basics::constant::c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
+    real dyuy = dxux + collFactorF * vf::basics::constant::c3o2 * mxxMyy;
+    real dzuz = dxux + collFactorF * vf::basics::constant::c3o2 * mxxMzz;
 
-    LBMReal Dxy = -UbMath::three * collFactorF * mfbba;
-    LBMReal Dxz = -UbMath::three * collFactorF * mfbab;
-    LBMReal Dyz = -UbMath::three * collFactorF * mfabb;
+    real Dxy = -vf::basics::constant::c3o1 * collFactorF * mfbba;
+    real Dxz = -vf::basics::constant::c3o1 * collFactorF * mfbab;
+    real Dyz = -vf::basics::constant::c3o1 * collFactorF * mfabb;
 
-    return sqrt(UbMath::c2 * (dxux * dxux + dyuy * dyuy + dzuz * dzuz) + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz) /
-           (rho + UbMath::one);
+    return sqrt(vf::basics::constant::c2o1 * (dxux * dxux + dyuy * dyuy + dzuz * dzuz) + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz) /
+           (rho + vf::basics::constant::c1o1);
 }
 //Multiphase stuff
 //////////////////////////////////////////////////////////////////////////
-static void calcMultiphaseFeq(LBMReal *const &feq /*[27]*/, const LBMReal &rho, const LBMReal &p1, const LBMReal &vx1,
-                              const LBMReal &vx2, const LBMReal &vx3)
+static void calcMultiphaseFeq(real *const &feq /*[27]*/, const real &rho, const real &p1, const real &vx1,
+                              const real &vx2, const real &vx3)
 {
-    using namespace UbMath;
-    LBMReal cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
+    using namespace vf::lbm::dir;
+
+    using namespace vf::basics::constant;
+    real cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
 
     feq[DIR_000] = c8o27 * (p1 + rho * c1o3 * (-cu_sq));
     feq[DIR_P00]    = c2o27 * (p1 + rho * c1o3 * (3.0 * (vx1) + c9o2 * (vx1) * (vx1)-cu_sq));
@@ -1388,11 +1440,13 @@ static void calcMultiphaseFeq(LBMReal *const &feq /*[27]*/, const LBMReal &rho,
                (p1 + rho * c1o3 * (3.0 * (-vx1 + vx2 + vx3) + c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq));
 }
 //////////////////////////////////////////////////////////////////////////
-static void calcMultiphaseFeqVB(LBMReal *const &feq /*[27]*/, const LBMReal &p1, const LBMReal &vx1, const LBMReal &vx2,
-                                const LBMReal &vx3)
+static void calcMultiphaseFeqVB(real *const &feq /*[27]*/, const real &p1, const real &vx1, const real &vx2,
+                                const real &vx3)
 {
-    using namespace UbMath;
-    LBMReal cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
+    using namespace vf::lbm::dir;
+
+    using namespace vf::basics::constant;
+    real cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
 
     feq[DIR_000] = p1 + c8o27 * (-cu_sq);
     feq[DIR_P00]    = c2o27 * ((3.0 * (vx1) + c9o2 * (vx1) * (vx1)-cu_sq));
@@ -1423,11 +1477,13 @@ static void calcMultiphaseFeqVB(LBMReal *const &feq /*[27]*/, const LBMReal &p1,
     feq[DIR_MPP]  = c1o216 * ((3.0 * (-vx1 + vx2 + vx3) + c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq));
 }
 //////////////////////////////////////////////////////////////////////////
-static void calcMultiphaseHeq(LBMReal *const &heq /*[27]*/, const LBMReal &phi, const LBMReal &vx1, const LBMReal &vx2,
-                              const LBMReal &vx3)
+static void calcMultiphaseHeq(real *const &heq /*[27]*/, const real &phi, const real &vx1, const real &vx2,
+                              const real &vx3)
 {
-    using namespace UbMath;
-    LBMReal cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
+    using namespace vf::lbm::dir;
+    using namespace vf::basics::constant;
+
+    real cu_sq = 1.5 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
 
     heq[DIR_000] = c8o27 * phi * (1.0 - cu_sq);
     heq[DIR_P00]    = c2o27 * phi * (1.0 + 3.0 * (vx1) + c9o2 * (vx1) * (vx1)-cu_sq);
diff --git a/src/cpu/VirtualFluidsCore/LBM/ICell.h b/src/cpu/VirtualFluidsCore/LBM/ICell.h
index c080033b8bbdbe1741f6f624c2726ffa145bf080..e9b07b1eee9b5819c95203ceabe4af1afd562e7d 100644
--- a/src/cpu/VirtualFluidsCore/LBM/ICell.h
+++ b/src/cpu/VirtualFluidsCore/LBM/ICell.h
@@ -7,14 +7,14 @@
 struct ICell3D {
     ICell3D(int size);
 
-    std::vector<LBMReal> TSW;
-    std::vector<LBMReal> TNW;
-    std::vector<LBMReal> TNE;
-    std::vector<LBMReal> TSE;
-    std::vector<LBMReal> BSW;
-    std::vector<LBMReal> BNW;
-    std::vector<LBMReal> BNE;
-    std::vector<LBMReal> BSE;
+    std::vector<real> TSW;
+    std::vector<real> TNW;
+    std::vector<real> TNE;
+    std::vector<real> TSE;
+    std::vector<real> BSW;
+    std::vector<real> BNW;
+    std::vector<real> BNE;
+    std::vector<real> BSE;
 };
 
 inline ICell3D::ICell3D(int size)
diff --git a/src/cpu/VirtualFluidsCore/LBM/ILBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/ILBMKernel.h
index bde61d9d314b61327ff8f8a2a71d2864d50cc7f5..ab047a86081f9afc1a099eea0087f2728bfa519d 100644
--- a/src/cpu/VirtualFluidsCore/LBM/ILBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/ILBMKernel.h
@@ -48,18 +48,18 @@ public:
     virtual ~ILBMKernel() = default;
 
     virtual void calculate(int step)    = 0;
-    virtual double getCalculationTime() = 0;
+    virtual real getCalculationTime() = 0;
     virtual void swapDistributions()    = 0;
 
     virtual bool getCompressible() const                                             = 0;
     virtual SPtr<BCProcessor> getBCProcessor() const                                 = 0;
     virtual void setBCProcessor(SPtr<BCProcessor> bcProcessor)                       = 0;
     virtual SPtr<DataSet3D> getDataSet() const                                       = 0;
-    virtual double getCollisionFactor() const                                        = 0;
-    virtual void setCollisionFactor(double collFactor)                               = 0;
+    virtual real getCollisionFactor() const                                        = 0;
+    virtual void setCollisionFactor(real collFactor)                               = 0;
     virtual bool isInsideOfDomain(const int &x1, const int &x2, const int &x3) const = 0;
     virtual int getGhostLayerWidth() const                                           = 0;
-    virtual LBMReal getDeltaT() const                                                = 0;
+    virtual real getDeltaT() const                                                = 0;
     virtual bool getWithForcing() const                                              = 0;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantLBMKernel.cpp
index ec4b9bbd4f177a3d0fdbd0c3f1d4c3d7775fface..abac1e285d7945c5180d1acb62457a2a9a718c46 100644
--- a/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantLBMKernel.cpp
@@ -8,7 +8,8 @@
 
 #define PROOF_CORRECTNESS
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::basics::constant;
 
 //////////////////////////////////////////////////////////////////////////
 IncompressibleCumulantLBMKernel::IncompressibleCumulantLBMKernel()
@@ -135,49 +136,49 @@ void IncompressibleCumulantLBMKernel::calculate(int step)
                // a b c
                //-1 0 1
 
-               LBMReal mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
-               LBMReal mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
-               LBMReal mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
-               LBMReal mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
-               LBMReal mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
-               LBMReal mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
-               LBMReal mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
-               LBMReal mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
-               LBMReal mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
-               LBMReal mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
-               LBMReal mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
-               LBMReal mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
-               LBMReal mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
-
-               LBMReal mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
-               LBMReal mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
-               LBMReal mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
-               LBMReal mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
-               LBMReal mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
-               LBMReal mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
-               LBMReal mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
-               LBMReal mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
-               LBMReal mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
-               LBMReal mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-               LBMReal mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
-               LBMReal mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
-               LBMReal mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-               LBMReal mfbbb = (*this->zeroDistributions)(x1, x2, x3);
-
-               LBMReal m0, m1, m2;
-
-               LBMReal rho=(mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
+               real mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
+               real mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
+               real mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
+               real mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
+               real mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
+               real mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
+               real mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
+               real mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
+               real mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
+               real mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
+               real mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
+               real mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
+               real mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
+
+               real mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
+               real mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
+               real mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
+               real mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
+               real mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
+               real mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
+               real mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
+               real mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
+               real mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
+               real mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+               real mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
+               real mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
+               real mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+               real mfbbb = (*this->zeroDistributions)(x1, x2, x3);
+
+               real m0, m1, m2;
+
+               real rho=(mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
                   +(mfaab+mfacb+mfcab+mfccb)+(mfaba+mfabc+mfcba+mfcbc)+(mfbaa+mfbac+mfbca+mfbcc)
                   +(mfabb+mfcbb)+(mfbab+mfbcb)+(mfbba+mfbbc)+mfbbb;
 
-               LBMReal vvx    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfcaa-mfacc) + (mfcca-mfaac))) +
+               real vvx    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfcaa-mfacc) + (mfcca-mfaac))) +
                   (((mfcba-mfabc) + (mfcbc-mfaba)) + ((mfcab-mfacb) + (mfccb-mfaab))) +
                   (mfcbb-mfabb));
-               LBMReal vvy    =((((mfccc-mfaaa) + (mfaca-mfcac)) + ((mfacc-mfcaa) + (mfcca-mfaac))) +
+               real vvy    =((((mfccc-mfaaa) + (mfaca-mfcac)) + ((mfacc-mfcaa) + (mfcca-mfaac))) +
                   (((mfbca-mfbac) + (mfbcc-mfbaa)) + ((mfacb-mfcab) + (mfccb-mfaab))) +
                   (mfbcb-mfbab));
-               LBMReal vvz    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfacc-mfcaa) + (mfaac-mfcca))) +
+               real vvz    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfacc-mfcaa) + (mfaac-mfcca))) +
                   (((mfbac-mfbca) + (mfbcc-mfbaa)) + ((mfabc-mfcba) + (mfcbc-mfaba))) +
                   (mfbbc-mfbba));
 
@@ -185,9 +186,9 @@ void IncompressibleCumulantLBMKernel::calculate(int step)
                ///////////////////////////////////////////////////////////////////////////////////////////
                if (withForcing)
                {
-                  muX1 = static_cast<double>(x1-1+ix1*maxX1);
-                  muX2 = static_cast<double>(x2-1+ix2*maxX2);
-                  muX3 = static_cast<double>(x3-1+ix3*maxX3);
+                  muX1 = static_cast<real>(x1-1+ix1*maxX1);
+                  muX2 = static_cast<real>(x2-1+ix2*maxX2);
+                  muX3 = static_cast<real>(x3-1+ix3*maxX3);
 
                   forcingX1 = muForcingX1.Eval();
                   forcingX2 = muForcingX2.Eval();
@@ -198,7 +199,7 @@ void IncompressibleCumulantLBMKernel::calculate(int step)
                   vvz += forcingX3*deltaT*0.5; // Z
                }
                ///////////////////////////////////////////////////////////////////////////////////////////               
-               LBMReal oMdrho;
+               real oMdrho;
 
                oMdrho=mfccc+mfaaa;
                m0=mfaca+mfcac;
@@ -226,15 +227,15 @@ void IncompressibleCumulantLBMKernel::calculate(int step)
                m0+=mfbbb; //hat gefehlt
                oMdrho = 1. - (oMdrho + m0);
 
-               LBMReal vx2;
-               LBMReal vy2;
-               LBMReal vz2;
+               real vx2;
+               real vy2;
+               real vz2;
                vx2=vvx*vvx;
                vy2=vvy*vvy;
                vz2=vvz*vvz;
                ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal wadjust;
-               LBMReal qudricLimit = 0.01;
+               real wadjust;
+               real qudricLimit = 0.01;
                ////////////////////////////////////////////////////////////////////////////////////
                //Hin
                ////////////////////////////////////////////////////////////////////////////////////
@@ -465,33 +466,33 @@ void IncompressibleCumulantLBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                // Cumulants
                ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal OxxPyyPzz = 1.; //omega2 or bulk viscosity
-               LBMReal OxyyPxzz  = 1.;//-s9;//2+s9;//
-               //LBMReal OxyyMxzz  = 1.;//2+s9;//
-               LBMReal O4        = 1.;
-               LBMReal O5        = 1.;
-               LBMReal O6        = 1.;
+               real OxxPyyPzz = 1.; //omega2 or bulk viscosity
+               real OxyyPxzz  = 1.;//-s9;//2+s9;//
+               //real OxyyMxzz  = 1.;//2+s9;//
+               real O4        = 1.;
+               real O5        = 1.;
+               real O6        = 1.;
 
                //Cum 4.
                //LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3 * oMdrho) * mfabb + 2. * mfbba * mfbab); // till 18.05.2015
                //LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
                //LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-               LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-               LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-               LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+               real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
+               real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
+               real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-               LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9*(oMdrho-1.)*oMdrho);
-               LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9*(oMdrho-1.)*oMdrho);
-               LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9*(oMdrho-1.)*oMdrho);
+               real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9*(oMdrho-1.)*oMdrho);
+               real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9*(oMdrho-1.)*oMdrho);
+               real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9*(oMdrho-1.)*oMdrho);
 
                //Cum 5.
-               LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-               LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-               LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+               real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
+               real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
+               real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
 
                //Cum 6.
-               LBMReal CUMccc = mfccc  +((-4. *  mfbbb * mfbbb
+               real CUMccc = mfccc  +((-4. *  mfbbb * mfbbb
                   -       (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
                   -  4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
                   -  2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
@@ -505,13 +506,13 @@ void IncompressibleCumulantLBMKernel::calculate(int step)
 
                //2.
                // linear combinations
-               LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
-               LBMReal mxxMyy    = mfcaa - mfaca;
-               LBMReal mxxMzz         = mfcaa - mfaac;
+               real mxxPyyPzz = mfcaa + mfaca + mfaac;
+               real mxxMyy    = mfcaa - mfaca;
+               real mxxMzz         = mfcaa - mfaac;
 
-               LBMReal dxux = -c1o2 * collFactor *(mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz*(mfaaa - mxxPyyPzz);
-               LBMReal dyuy = dxux + collFactor * c3o2 * mxxMyy;
-               LBMReal dzuz = dxux + collFactor * c3o2 * mxxMzz;
+               real dxux = -c1o2 * collFactor *(mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz*(mfaaa - mxxPyyPzz);
+               real dyuy = dxux + collFactor * c3o2 * mxxMyy;
+               real dzuz = dxux + collFactor * c3o2 * mxxMzz;
 
                //relax
                mxxPyyPzz += OxxPyyPzz*(mfaaa  - mxxPyyPzz)- 3. * (1. - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
@@ -529,14 +530,14 @@ void IncompressibleCumulantLBMKernel::calculate(int step)
 
                //3.
                // linear combinations
-               LBMReal mxxyPyzz = mfcba + mfabc;
-               LBMReal mxxyMyzz = mfcba - mfabc;
+               real mxxyPyzz = mfcba + mfabc;
+               real mxxyMyzz = mfcba - mfabc;
 
-               LBMReal mxxzPyyz = mfcab + mfacb;
-               LBMReal mxxzMyyz = mfcab - mfacb;
+               real mxxzPyyz = mfcab + mfacb;
+               real mxxzMyyz = mfcab - mfacb;
 
-               LBMReal mxyyPxzz = mfbca + mfbac;
-               LBMReal mxyyMxzz = mfbca - mfbac;
+               real mxyyPxzz = mfbca + mfbac;
+               real mxyyMxzz = mfbca - mfbac;
 
                //relax
                wadjust    = OxyyMxzz+(1.-OxyyMxzz)*fabs(mfbbb)/(fabs(mfbbb)+qudricLimit);
@@ -831,11 +832,11 @@ void IncompressibleCumulantLBMKernel::calculate(int step)
                //proof correctness
                //////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-               LBMReal rho_post = (mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
+               real rho_post = (mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
                   +(mfaab+mfacb+mfcab+mfccb)+(mfaba+mfabc+mfcba+mfcbc)+(mfbaa+mfbac+mfbca+mfbcc)
                   +(mfabb+mfcbb)+(mfbab+mfbcb)+(mfbba+mfbbc)+mfbbb;
-               //LBMReal dif = fabs(rho - rho_post);
-               LBMReal dif = rho - rho_post;
+               //real dif = fabs(rho - rho_post);
+               real dif = rho - rho_post;
 #ifdef SINGLEPRECISION
                if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
@@ -889,7 +890,7 @@ void IncompressibleCumulantLBMKernel::calculate(int step)
    //timer.stop();
 }
 //////////////////////////////////////////////////////////////////////////
-double IncompressibleCumulantLBMKernel::getCalculationTime()
+real IncompressibleCumulantLBMKernel::getCalculationTime()
 {
    //return timer.getDuration();
    return timer.getTotalTime();
diff --git a/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantLBMKernel.h
index d7c3c78a6ffb2c27b99fbf603f5561dff0171c29..5abe9afc29caf6fe178d6b0e7c3b44e373c6defb 100644
--- a/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantLBMKernel.h
@@ -23,27 +23,27 @@ public:
    ~IncompressibleCumulantLBMKernel() override;
    void calculate(int step) override;
    SPtr<LBMKernel> clone() override;
-   double getCalculationTime() override;
+   real getCalculationTime() override;
    void setRelaxationParameter(Parameter p);
 protected:
    virtual void initDataSet();
-   LBMReal f[D3Q27System::ENDF+1];
+   real f[D3Q27System::ENDF+1];
 
    UbTimer timer;
 
-   LBMReal OxyyMxzz;
+   real OxyyMxzz;
    Parameter parameter;
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
 
    mu::value_type muX1,muX2,muX3;
    mu::value_type muDeltaT;
    mu::value_type muNu;
-   LBMReal forcingX1;
-   LBMReal forcingX2;
-   LBMReal forcingX3;
+   real forcingX1;
+   real forcingX2;
+   real forcingX3;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantWithSpongeLayerLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantWithSpongeLayerLBMKernel.cpp
index 90bc1998454e980c86054934222b251699f1412a..7f2bf15a1d919a02cdad290265d741eee080f108 100644
--- a/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantWithSpongeLayerLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantWithSpongeLayerLBMKernel.cpp
@@ -8,7 +8,8 @@
 
 #define PROOF_CORRECTNESS
 
-using namespace UbMath;
+//using namespace UbMath;    
+using namespace vf::basics::constant;
 
 //////////////////////////////////////////////////////////////////////////
 IncompressibleCumulantWithSpongeLayerLBMKernel::IncompressibleCumulantWithSpongeLayerLBMKernel()
@@ -23,7 +24,7 @@ void IncompressibleCumulantWithSpongeLayerLBMKernel::initDataSet()
    dataSet->setFdistributions(d);
 }
 //////////////////////////////////////////////////////////////////////////
-void IncompressibleCumulantWithSpongeLayerLBMKernel::setRelaxFactorParam(int vdir, double vL1, double vdx, double vSP)
+void IncompressibleCumulantWithSpongeLayerLBMKernel::setRelaxFactorParam(int vdir, real vL1, real vdx, real vSP)
 {
    direction = vdir;
    L1 = vL1;
@@ -31,18 +32,20 @@ void IncompressibleCumulantWithSpongeLayerLBMKernel::setRelaxFactorParam(int vdi
    SP = vSP;
 }
 //////////////////////////////////////////////////////////////////////////
-void IncompressibleCumulantWithSpongeLayerLBMKernel::initRelaxFactor(int vdir, double vL1, double vdx, double vSP)
+void IncompressibleCumulantWithSpongeLayerLBMKernel::initRelaxFactor(int vdir, real vL1, real vdx, real vSP)
 {
+    using namespace vf::lbm::dir;
+
    direction = vdir;
    L1 = vL1;
    dx = vdx;
    SP = vSP;
 
-   double sizeX = L1 / dx;
-   double sizeSP = SP / dx;
-   double muX1, muX2, muX3;
+   real sizeX = L1 / dx;
+   real sizeSP = SP / dx;
+   real muX1, muX2, muX3;
 
-   LBMReal spongeFactor;
+   real spongeFactor;
 
    SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
@@ -57,7 +60,7 @@ void IncompressibleCumulantWithSpongeLayerLBMKernel::initRelaxFactor(int vdir, d
    int maxX2 = bcArrayMaxX2 - ghostLayerWidth - 1;
    int maxX3 = bcArrayMaxX3 - ghostLayerWidth - 1;
 
-   SPtr<RelaxationFactorArray3D> relaxationFactorPtr = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(maxX1, maxX2, maxX3));
+   SPtr<RelaxationFactorArray3D> relaxationFactorPtr = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(maxX1, maxX2, maxX3));
    dataSet->setRelaxationFactor(relaxationFactorPtr);
 
    for (int x3 = minX3; x3 < maxX3; x3++)
@@ -68,38 +71,38 @@ void IncompressibleCumulantWithSpongeLayerLBMKernel::initRelaxFactor(int vdir, d
          {
             switch (direction)
             {
-            case D3Q27System::DIR_P00:
-               muX1 = (double)(x1 + ix1 * maxX1);
+            case DIR_P00:
+               muX1 = (real)(x1 + ix1 * maxX1);
                if (muX1 >= (sizeX - sizeSP) / deltaT)
                   spongeFactor = (sizeX - (muX1 * deltaT + 1)) / sizeSP / 2.0 + 0.5;
                else spongeFactor = 1.0;
                break;
-            case D3Q27System::DIR_M00:
-               muX1 = (double)(x1 + ix1 * maxX1);
+            case DIR_M00:
+               muX1 = (real)(x1 + ix1 * maxX1);
                if (muX1 <= sizeSP / deltaT)
                   spongeFactor = (sizeSP - (muX1 * deltaT + 1)) / sizeSP / 2.0 + 0.5;
                else spongeFactor = 1.0;
                break;
-            case D3Q27System::DIR_0P0:
-               muX2 = (double)(x2 + ix2 * maxX2);
+            case DIR_0P0:
+               muX2 = (real)(x2 + ix2 * maxX2);
                if (muX2 >= (sizeX - sizeSP) / deltaT)
                   spongeFactor = (sizeX - (muX2 * deltaT + 1)) / sizeSP / 2.0 + 0.5;
                else spongeFactor = 1.0;
                break;
-            case D3Q27System::DIR_0M0:
-               muX2 = (double)(x2 + ix2 * maxX2);
+            case DIR_0M0:
+               muX2 = (real)(x2 + ix2 * maxX2);
                if (muX2 <= sizeSP / deltaT)
                   spongeFactor = (sizeSP - (muX2 * deltaT + 1)) / sizeSP / 2.0 + 0.5;
                else spongeFactor = 1.0;
                break;
-            case D3Q27System::DIR_00P:
-               muX3 = (double)(x3 + ix3 * maxX3);
+            case DIR_00P:
+               muX3 = (real)(x3 + ix3 * maxX3);
                if (muX3 >= (sizeX - sizeSP) / deltaT)
                   spongeFactor = (sizeX - (muX3 * deltaT + 1)) / sizeSP / 2.0 + 0.5;
                else spongeFactor = 1.0;
                break;
-            case D3Q27System::DIR_00M:
-               muX3 = (double)(x3 + ix3 * maxX3);
+            case DIR_00M:
+               muX3 = (real)(x3 + ix3 * maxX3);
                if (muX3 <= sizeSP / deltaT)
                   spongeFactor = (sizeSP - (muX3 * deltaT + 1)) / sizeSP / 2.0 + 0.5;
                else spongeFactor = 1.0;
@@ -204,8 +207,8 @@ void IncompressibleCumulantWithSpongeLayerLBMKernel::calculate(int step)
    int maxX2 = bcArrayMaxX2-ghostLayerWidth-1;
    int maxX3 = bcArrayMaxX3-ghostLayerWidth-1;
 
-   LBMReal collFactor0 = collFactor;
-   LBMReal spongeFactor;
+   real collFactor0 = collFactor;
+   real spongeFactor;
 
    for(int x3 = minX3; x3 <= maxX3; x3++)
    {
@@ -231,49 +234,49 @@ void IncompressibleCumulantWithSpongeLayerLBMKernel::calculate(int step)
 
                //Rest ist b
 
-               LBMReal mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1,x2,x3);
-               LBMReal mfbcb = (*this->localDistributions)(D3Q27System::ET_N,x1,x2,x3); 
-               LBMReal mfbbc = (*this->localDistributions)(D3Q27System::ET_T,x1,x2,x3);
-               LBMReal mfccb = (*this->localDistributions)(D3Q27System::ET_NE,x1,x2,x3);
-               LBMReal mfacb = (*this->localDistributions)(D3Q27System::ET_NW,x1p,x2,x3);
-               LBMReal mfcbc = (*this->localDistributions)(D3Q27System::ET_TE,x1,x2,x3);
-               LBMReal mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p,x2,x3);
-               LBMReal mfbcc = (*this->localDistributions)(D3Q27System::ET_TN,x1,x2,x3);
-               LBMReal mfbac = (*this->localDistributions)(D3Q27System::ET_TS,x1,x2p,x3);
-               LBMReal mfccc = (*this->localDistributions)(D3Q27System::ET_TNE,x1,x2,x3);
-               LBMReal mfacc = (*this->localDistributions)(D3Q27System::ET_TNW,x1p,x2,x3);
-               LBMReal mfcac = (*this->localDistributions)(D3Q27System::ET_TSE,x1,x2p,x3);
-               LBMReal mfaac = (*this->localDistributions)(D3Q27System::ET_TSW,x1p,x2p,x3);
-
-               LBMReal mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W,x1p,x2,x3  );
-               LBMReal mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S,x1,x2p,x3  );
-               LBMReal mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B,x1,x2,x3p  );
-               LBMReal mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW,x1p,x2p,x3 );
-               LBMReal mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE,x1,x2p,x3 );
-               LBMReal mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW,x1p,x2,x3p );
-               LBMReal mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE,x1,x2,x3p );
-               LBMReal mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS,x1,x2p,x3p );
-               LBMReal mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN,x1,x2,x3p );
-               LBMReal mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW,x1p,x2p,x3p);
-               LBMReal mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE,x1,x2p,x3p);
-               LBMReal mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW,x1p,x2,x3p);
-               LBMReal mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE,x1,x2,x3p);
-
-               LBMReal mfbbb = (*this->zeroDistributions)(x1,x2,x3);
-
-               LBMReal m0, m1, m2;
+               real mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1,x2,x3);
+               real mfbcb = (*this->localDistributions)(D3Q27System::ET_N,x1,x2,x3); 
+               real mfbbc = (*this->localDistributions)(D3Q27System::ET_T,x1,x2,x3);
+               real mfccb = (*this->localDistributions)(D3Q27System::ET_NE,x1,x2,x3);
+               real mfacb = (*this->localDistributions)(D3Q27System::ET_NW,x1p,x2,x3);
+               real mfcbc = (*this->localDistributions)(D3Q27System::ET_TE,x1,x2,x3);
+               real mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p,x2,x3);
+               real mfbcc = (*this->localDistributions)(D3Q27System::ET_TN,x1,x2,x3);
+               real mfbac = (*this->localDistributions)(D3Q27System::ET_TS,x1,x2p,x3);
+               real mfccc = (*this->localDistributions)(D3Q27System::ET_TNE,x1,x2,x3);
+               real mfacc = (*this->localDistributions)(D3Q27System::ET_TNW,x1p,x2,x3);
+               real mfcac = (*this->localDistributions)(D3Q27System::ET_TSE,x1,x2p,x3);
+               real mfaac = (*this->localDistributions)(D3Q27System::ET_TSW,x1p,x2p,x3);
+
+               real mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W,x1p,x2,x3  );
+               real mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S,x1,x2p,x3  );
+               real mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B,x1,x2,x3p  );
+               real mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW,x1p,x2p,x3 );
+               real mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE,x1,x2p,x3 );
+               real mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW,x1p,x2,x3p );
+               real mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE,x1,x2,x3p );
+               real mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS,x1,x2p,x3p );
+               real mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN,x1,x2,x3p );
+               real mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW,x1p,x2p,x3p);
+               real mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE,x1,x2p,x3p);
+               real mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW,x1p,x2,x3p);
+               real mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE,x1,x2,x3p);
+
+               real mfbbb = (*this->zeroDistributions)(x1,x2,x3);
+
+               real m0, m1, m2;
                
-               LBMReal rho=(mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
+               real rho=(mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
                   +(mfaab+mfacb+mfcab+mfccb)+(mfaba+mfabc+mfcba+mfcbc)+(mfbaa+mfbac+mfbca+mfbcc)
                   +(mfabb+mfcbb)+(mfbab+mfbcb)+(mfbba+mfbbc)+mfbbb;
 
-               LBMReal vvx    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfcaa-mfacc) + (mfcca-mfaac))) +
+               real vvx    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfcaa-mfacc) + (mfcca-mfaac))) +
                   (((mfcba-mfabc) + (mfcbc-mfaba)) + ((mfcab-mfacb) + (mfccb-mfaab))) +
                   (mfcbb-mfabb));
-               LBMReal vvy    =((((mfccc-mfaaa) + (mfaca-mfcac)) + ((mfacc-mfcaa) + (mfcca-mfaac))) +
+               real vvy    =((((mfccc-mfaaa) + (mfaca-mfcac)) + ((mfacc-mfcaa) + (mfcca-mfaac))) +
                   (((mfbca-mfbac) + (mfbcc-mfbaa)) + ((mfacb-mfcab) + (mfccb-mfaab))) +
                   (mfbcb-mfbab));
-               LBMReal vvz    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfacc-mfcaa) + (mfaac-mfcca))) +
+               real vvz    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfacc-mfcaa) + (mfaac-mfcca))) +
                   (((mfbac-mfbca) + (mfbcc-mfbaa)) + ((mfabc-mfcba) + (mfcbc-mfaba))) +
                   (mfbbc-mfbba));
                //////////////////////////////////////////////////////////////////////////
@@ -323,7 +326,7 @@ void IncompressibleCumulantWithSpongeLayerLBMKernel::calculate(int step)
                //}
                //////////////////////////////////////////////////////////////////////////
 
-               LBMReal oMdrho;
+               real oMdrho;
 
                oMdrho=mfccc+mfaaa;
                m0=mfaca+mfcac;
@@ -351,15 +354,15 @@ void IncompressibleCumulantWithSpongeLayerLBMKernel::calculate(int step)
                m0+=mfbbb; //hat gefehlt
                oMdrho = 1. - (oMdrho + m0);
 
-               LBMReal vx2;
-               LBMReal vy2;
-               LBMReal vz2;
+               real vx2;
+               real vy2;
+               real vz2;
                vx2=vvx*vvx;
                vy2=vvy*vvy;
                vz2=vvz*vvz;
                ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal wadjust;
-               LBMReal qudricLimit = 0.01;
+               real wadjust;
+               real qudricLimit = 0.01;
                ////////////////////////////////////////////////////////////////////////////////////
                //Hin
                ////////////////////////////////////////////////////////////////////////////////////
@@ -590,29 +593,29 @@ void IncompressibleCumulantWithSpongeLayerLBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                // Cumulants
                ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal OxxPyyPzz = 1.;
-               LBMReal OxyyPxzz  = 1.;//-s9;//2+s9;//
-               //LBMReal OxyyMxzz  = 1.;//2+s9;//
-               LBMReal O4        = 1.;
-               LBMReal O5        = 1.;
-               LBMReal O6        = 1.;
+               real OxxPyyPzz = 1.;
+               real OxyyPxzz  = 1.;//-s9;//2+s9;//
+               //real OxyyMxzz  = 1.;//2+s9;//
+               real O4        = 1.;
+               real O5        = 1.;
+               real O6        = 1.;
 
                //Cum 4.
-               LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3 * oMdrho) * mfabb + 2. * mfbba * mfbab);
-               LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb);
-               LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb);
+               real CUMcbb = mfcbb - ((mfcaa + c1o3 * oMdrho) * mfabb + 2. * mfbba * mfbab);
+               real CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb);
+               real CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb);
 
-               LBMReal CUMcca = mfcca - (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9*(oMdrho-1)*oMdrho;
-               LBMReal CUMcac = mfcac - (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9*(oMdrho-1)*oMdrho;
-               LBMReal CUMacc = mfacc - (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9*(oMdrho-1)*oMdrho;
+               real CUMcca = mfcca - (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9*(oMdrho-1)*oMdrho;
+               real CUMcac = mfcac - (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9*(oMdrho-1)*oMdrho;
+               real CUMacc = mfacc - (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9*(oMdrho-1)*oMdrho;
 
                //Cum 5.
-               LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-               LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-               LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+               real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
+               real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
+               real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
 
                //Cum 6.
-               LBMReal CUMccc = mfccc  +((-4. *  mfbbb * mfbbb 
+               real CUMccc = mfccc  +((-4. *  mfbbb * mfbbb 
                   -       (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
                   -  4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
                   -  2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
@@ -626,13 +629,13 @@ void IncompressibleCumulantWithSpongeLayerLBMKernel::calculate(int step)
 
                //2.
                // linear combinations
-               LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
-               LBMReal mxxMyy    = mfcaa - mfaca;
-               LBMReal mxxMzz         = mfcaa - mfaac;
+               real mxxPyyPzz = mfcaa + mfaca + mfaac;
+               real mxxMyy    = mfcaa - mfaca;
+               real mxxMzz         = mfcaa - mfaac;
 
-               LBMReal dxux = -c1o2 * collFactor *(mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz*(mfaaa - mxxPyyPzz);
-               LBMReal dyuy = dxux + collFactor * c3o2 * mxxMyy;
-               LBMReal dzuz = dxux + collFactor * c3o2 * mxxMzz;
+               real dxux = -c1o2 * collFactor *(mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz*(mfaaa - mxxPyyPzz);
+               real dyuy = dxux + collFactor * c3o2 * mxxMyy;
+               real dzuz = dxux + collFactor * c3o2 * mxxMzz;
 
                //relax
                mxxPyyPzz += OxxPyyPzz*(mfaaa  - mxxPyyPzz)- 3. * (1. - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
@@ -650,14 +653,14 @@ void IncompressibleCumulantWithSpongeLayerLBMKernel::calculate(int step)
 
                //3.
                // linear combinations
-               LBMReal mxxyPyzz = mfcba + mfabc;
-               LBMReal mxxyMyzz = mfcba - mfabc;
+               real mxxyPyzz = mfcba + mfabc;
+               real mxxyMyzz = mfcba - mfabc;
 
-               LBMReal mxxzPyyz = mfcab + mfacb;
-               LBMReal mxxzMyyz = mfcab - mfacb;
+               real mxxzPyyz = mfcab + mfacb;
+               real mxxzMyyz = mfcab - mfacb;
 
-               LBMReal mxyyPxzz = mfbca + mfbac;
-               LBMReal mxyyMxzz = mfbca - mfbac;
+               real mxyyPxzz = mfbca + mfbac;
+               real mxyyMxzz = mfbca - mfbac;
 
                //relax
                wadjust    = OxyyMxzz+(1.-OxyyMxzz)*fabs(mfbbb)/(fabs(mfbbb)+qudricLimit);
@@ -948,11 +951,11 @@ void IncompressibleCumulantWithSpongeLayerLBMKernel::calculate(int step)
                //proof correctness
                //////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-               LBMReal rho_post = (mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
+               real rho_post = (mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
                   +(mfaab+mfacb+mfcab+mfccb)+(mfaba+mfabc+mfcba+mfcbc)+(mfbaa+mfbac+mfbca+mfbcc)
                   +(mfabb+mfcbb)+(mfbab+mfbcb)+(mfbba+mfbbc)+mfbbb; 
                //LBMReal dif = fabs(rho - rho_post);
-               LBMReal dif = rho - rho_post;
+               real dif = rho - rho_post;
 #ifdef SINGLEPRECISION
                if(dif > 10.0E-7 || dif < -10.0E-7)
 #else
diff --git a/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantWithSpongeLayerLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantWithSpongeLayerLBMKernel.h
index 2747cdc7673b6fb7aa3ade08162568f14c3e3ad1..760ad30fb5abb51b2f7d21dbc23d26b3124ac934 100644
--- a/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantWithSpongeLayerLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/IncompressibleCumulantWithSpongeLayerLBMKernel.h
@@ -24,19 +24,19 @@ public:
    ~IncompressibleCumulantWithSpongeLayerLBMKernel() override;
    SPtr<LBMKernel> clone() override;
    void calculate(int step) override;
-   void initRelaxFactor(int vdir, double vL1, double vdx, double vSP);
+   void initRelaxFactor(int vdir, real vL1, real vdx, real vSP);
    //! \param vdir where the sponge layer is placed
    //! \param vL1 length of simulation domain
    //! \param vdx subgrid space 
    //! \param vSP length of sponge layer
-   void setRelaxFactorParam(int vdir, double vL1, double vdx, double vSP);
+   void setRelaxFactorParam(int vdir, real vL1, real vdx, real vSP);
 protected:
   void initDataSet() override;
-  LBMReal OxyyMxzz;
+  real OxyyMxzz;
   int direction;
-  double L1;
-  double dx;
-  double SP;
+  real L1;
+  real dx;
+  real SP;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/IncompressibleOffsetInterpolationProcessor.cpp b/src/cpu/VirtualFluidsCore/LBM/IncompressibleOffsetInterpolationProcessor.cpp
index 39b83f72a835ade4f903910a502383c6e3cd2323..c4759d786367fc9c5030898839b57cbec7bd48ec 100644
--- a/src/cpu/VirtualFluidsCore/LBM/IncompressibleOffsetInterpolationProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/IncompressibleOffsetInterpolationProcessor.cpp
@@ -4,7 +4,7 @@
 
 
 //////////////////////////////////////////////////////////////////////////
-IncompressibleOffsetInterpolationProcessor::IncompressibleOffsetInterpolationProcessor(LBMReal omegaC, LBMReal omegaF)
+IncompressibleOffsetInterpolationProcessor::IncompressibleOffsetInterpolationProcessor(real omegaC, real omegaF)
    : omegaC(omegaC), omegaF(omegaF)
 {
 
@@ -19,13 +19,13 @@ InterpolationProcessorPtr IncompressibleOffsetInterpolationProcessor::clone()
    return iproc;
 }
 //////////////////////////////////////////////////////////////////////////
-void IncompressibleOffsetInterpolationProcessor::setOmegas( LBMReal omegaC, LBMReal omegaF )
+void IncompressibleOffsetInterpolationProcessor::setOmegas( real omegaC, real omegaF )
 {
    this->omegaC = omegaC;
    this->omegaF = omegaF;
 }
 //////////////////////////////////////////////////////////////////////////
-void IncompressibleOffsetInterpolationProcessor::setOffsets(LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void IncompressibleOffsetInterpolationProcessor::setOffsets(real xoff, real yoff, real zoff)
 {
    this->xoff = xoff;
    this->yoff = yoff;
@@ -35,7 +35,7 @@ void IncompressibleOffsetInterpolationProcessor::setOffsets(LBMReal xoff, LBMRea
    this->zoff_sq = zoff * zoff;
 }
 //////////////////////////////////////////////////////////////////////////
-void IncompressibleOffsetInterpolationProcessor::interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void IncompressibleOffsetInterpolationProcessor::interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, real xoff, real yoff, real zoff)
 {
    setOffsets(xoff, yoff, zoff);
    calcInterpolatedCoefficiets(icellC, omegaC, 0.5);
@@ -49,22 +49,23 @@ void IncompressibleOffsetInterpolationProcessor::interpolateCoarseToFine(D3Q27IC
    calcInterpolatedNode(icellF.TNE, omegaF,  0.25,  0.25,  0.25, calcPressTNE(),  1,  1,  1);
 }
 //////////////////////////////////////////////////////////////////////////
-void IncompressibleOffsetInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC, LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void IncompressibleOffsetInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC, real xoff, real yoff, real zoff)
 {
    setOffsets(xoff, yoff, zoff);
    calcInterpolatedCoefficiets(icellF, omegaF, 2.0);
    calcInterpolatedNodeFC(icellC, omegaC);
 }
 //////////////////////////////////////////////////////////////////////////
-void IncompressibleOffsetInterpolationProcessor::calcMoments(const LBMReal* const f, LBMReal omega, LBMReal& press, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3, 
-                                                    LBMReal& kxy, LBMReal& kyz, LBMReal& kxz, LBMReal& kxxMyy, LBMReal& kxxMzz)
+void IncompressibleOffsetInterpolationProcessor::calcMoments(const real* const f, real omega, real& press, real& vx1, real& vx2, real& vx3, 
+                                                    real& kxy, real& kyz, real& kxz, real& kxxMyy, real& kxxMzz)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
    //UBLOG(logINFO,"D3Q27System::DIR_M0M  = " << D3Q27System::DIR_M0M);
-   //UBLOG(logINFO,"BW  = " << BW);
+   //UBLOG(logINFO,"BW  = " << BW);;
 
-   LBMReal rho = 0.0;
+   real rho = 0.0;
    D3Q27System::calcIncompMacroscopicValues(f,rho,vx1,vx2,vx3);
    
    //////////////////////////////////////////////////////////////////////////
@@ -85,7 +86,7 @@ void IncompressibleOffsetInterpolationProcessor::calcMoments(const LBMReal* cons
    kxy   = -3.*omega*((((f[DIR_MMP]+f[DIR_PPM])-(f[DIR_MPP]+f[DIR_PMM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_MPM]+f[DIR_PMP])))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_MP0]+f[DIR_PM0]))-(vx1*vx2));// might not be optimal MG 25.2.13
    kyz   = -3.*omega*((((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMP]+f[DIR_MPM]))+((f[DIR_PMM]+f[DIR_MPP])-(f[DIR_MMP]+f[DIR_PPM])))+((f[DIR_0MM]+f[DIR_0PP])-(f[DIR_0MP]+f[DIR_0PM]))-(vx2*vx3));
    kxz   = -3.*omega*((((f[DIR_MPM]+f[DIR_PMP])-(f[DIR_MMP]+f[DIR_PPM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMM]+f[DIR_MPP])))+((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_M0P]+f[DIR_P0M]))-(vx1*vx3));
-   kxxMyy = -3./2.*omega*((((f[D3Q27System::DIR_M0M]+f[DIR_P0P])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_M0P]+f[DIR_P0M])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_0M0]+f[DIR_0P0]))-(vx1*vx1-vx2*vx2));
+   kxxMyy = -3./2.*omega*((((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_M0P]+f[DIR_P0M])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_0M0]+f[DIR_0P0]))-(vx1*vx1-vx2*vx2));
    kxxMzz = -3./2.*omega*((((f[DIR_MP0]+f[DIR_PM0])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_00M]+f[DIR_00P]))-(vx1*vx1-vx3*vx3));
    //kxxMzz = -3./2.*omega*(((((f[NW]+f[SE])-(f[BS]+f[TN]))+((f[SW]+f[NE])-(f[17]+f[BN])))+((f[W]+f[DIR_P00])-(f[B]+f[T])))-(vx1*vx1-vx3*vx3));
 
@@ -100,25 +101,25 @@ void IncompressibleOffsetInterpolationProcessor::calcMoments(const LBMReal* cons
    //UBLOG(logINFO,"f[TN]  = " << f[TN] << " TN  = " << TN);
 }
 //////////////////////////////////////////////////////////////////////////
-void IncompressibleOffsetInterpolationProcessor::calcInterpolatedCoefficiets(const D3Q27ICell& icell, LBMReal omega, LBMReal eps_new)
+void IncompressibleOffsetInterpolationProcessor::calcInterpolatedCoefficiets(const D3Q27ICell& icell, real omega, real eps_new)
 {
-   LBMReal        vx1_SWT,vx2_SWT,vx3_SWT;
-   LBMReal        vx1_NWT,vx2_NWT,vx3_NWT;
-   LBMReal        vx1_NET,vx2_NET,vx3_NET;
-   LBMReal        vx1_SET,vx2_SET,vx3_SET;
-   LBMReal        vx1_SWB,vx2_SWB,vx3_SWB;
-   LBMReal        vx1_NWB,vx2_NWB,vx3_NWB;
-   LBMReal        vx1_NEB,vx2_NEB,vx3_NEB;
-   LBMReal        vx1_SEB,vx2_SEB,vx3_SEB;
-
-   LBMReal        kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT;
-   LBMReal        kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT;
-   LBMReal        kxyFromfcNEQ_NET, kyzFromfcNEQ_NET, kxzFromfcNEQ_NET, kxxMyyFromfcNEQ_NET, kxxMzzFromfcNEQ_NET;
-   LBMReal        kxyFromfcNEQ_SET, kyzFromfcNEQ_SET, kxzFromfcNEQ_SET, kxxMyyFromfcNEQ_SET, kxxMzzFromfcNEQ_SET;
-   LBMReal        kxyFromfcNEQ_SWB, kyzFromfcNEQ_SWB, kxzFromfcNEQ_SWB, kxxMyyFromfcNEQ_SWB, kxxMzzFromfcNEQ_SWB;
-   LBMReal        kxyFromfcNEQ_NWB, kyzFromfcNEQ_NWB, kxzFromfcNEQ_NWB, kxxMyyFromfcNEQ_NWB, kxxMzzFromfcNEQ_NWB;
-   LBMReal        kxyFromfcNEQ_NEB, kyzFromfcNEQ_NEB, kxzFromfcNEQ_NEB, kxxMyyFromfcNEQ_NEB, kxxMzzFromfcNEQ_NEB;
-   LBMReal        kxyFromfcNEQ_SEB, kyzFromfcNEQ_SEB, kxzFromfcNEQ_SEB, kxxMyyFromfcNEQ_SEB, kxxMzzFromfcNEQ_SEB;
+   real        vx1_SWT,vx2_SWT,vx3_SWT;
+   real        vx1_NWT,vx2_NWT,vx3_NWT;
+   real        vx1_NET,vx2_NET,vx3_NET;
+   real        vx1_SET,vx2_SET,vx3_SET;
+   real        vx1_SWB,vx2_SWB,vx3_SWB;
+   real        vx1_NWB,vx2_NWB,vx3_NWB;
+   real        vx1_NEB,vx2_NEB,vx3_NEB;
+   real        vx1_SEB,vx2_SEB,vx3_SEB;
+
+   real        kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT;
+   real        kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT;
+   real        kxyFromfcNEQ_NET, kyzFromfcNEQ_NET, kxzFromfcNEQ_NET, kxxMyyFromfcNEQ_NET, kxxMzzFromfcNEQ_NET;
+   real        kxyFromfcNEQ_SET, kyzFromfcNEQ_SET, kxzFromfcNEQ_SET, kxxMyyFromfcNEQ_SET, kxxMzzFromfcNEQ_SET;
+   real        kxyFromfcNEQ_SWB, kyzFromfcNEQ_SWB, kxzFromfcNEQ_SWB, kxxMyyFromfcNEQ_SWB, kxxMzzFromfcNEQ_SWB;
+   real        kxyFromfcNEQ_NWB, kyzFromfcNEQ_NWB, kxzFromfcNEQ_NWB, kxxMyyFromfcNEQ_NWB, kxxMzzFromfcNEQ_NWB;
+   real        kxyFromfcNEQ_NEB, kyzFromfcNEQ_NEB, kxzFromfcNEQ_NEB, kxxMyyFromfcNEQ_NEB, kxxMzzFromfcNEQ_NEB;
+   real        kxyFromfcNEQ_SEB, kyzFromfcNEQ_SEB, kxzFromfcNEQ_SEB, kxxMyyFromfcNEQ_SEB, kxxMzzFromfcNEQ_SEB;
 
    calcMoments(icell.TSW,omega,press_SWT,vx1_SWT,vx2_SWT,vx3_SWT, kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT);
    calcMoments(icell.TNW,omega,press_NWT,vx1_NWT,vx2_NWT,vx3_NWT, kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT);
@@ -418,7 +419,7 @@ void IncompressibleOffsetInterpolationProcessor::calcInterpolatedCoefficiets(con
    cyz= cyz + xoff*cxyz;
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-   const LBMReal o = omega;
+   const real o = omega;
 
    f_E = eps_new*((2*(-2*ax + by + cz-kxxMzzAverage-kxxMyyAverage))/(27.*o));
    f_N = eps_new*((2*(ax - 2*by + cz+2*kxxMyyAverage-kxxMzzAverage))/(27.*o));
@@ -526,21 +527,22 @@ void IncompressibleOffsetInterpolationProcessor::calcInterpolatedCoefficiets(con
    yz_TNW =   0.0625*eps_new *((                bxyz +     cxyz)/(72.*o));
 }
 //////////////////////////////////////////////////////////////////////////
-void IncompressibleOffsetInterpolationProcessor::calcInterpolatedNode(LBMReal* f, LBMReal  /*omega*/, LBMReal  /*x*/, LBMReal  /*y*/, LBMReal  /*z*/, LBMReal press, LBMReal xs, LBMReal ys, LBMReal zs)
+void IncompressibleOffsetInterpolationProcessor::calcInterpolatedNode(real* f, real  /*omega*/, real  /*x*/, real  /*y*/, real  /*z*/, real press, real xs, real ys, real zs)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
-   LBMReal rho  = press ;//+ (2.*axx*x+axy*y+axz*z+axyz*y*z+ax + 2.*byy*y+bxy*x+byz*z+bxyz*x*z+by + 2.*czz*z+cxz*x+cyz*y+cxyz*x*y+cz)/3.;
-   LBMReal vx1  = a0 + 0.25*( xs*ax + ys*ay + zs*az) + 0.0625*(axx + xs*ys*axy + xs*zs*axz + ayy + ys*zs*ayz + azz) + 0.015625*(xs*ys*zs*axyz);
-   LBMReal vx2  = b0 + 0.25*( xs*bx + ys*by + zs*bz) + 0.0625*(bxx + xs*ys*bxy + xs*zs*bxz + byy + ys*zs*byz + bzz) + 0.015625*(xs*ys*zs*bxyz);
-   LBMReal vx3  = c0 + 0.25*( xs*cx + ys*cy + zs*cz) + 0.0625*(cxx + xs*ys*cxy + xs*zs*cxz + cyy + ys*zs*cyz + czz) + 0.015625*(xs*ys*zs*cxyz);
+   real rho  = press ;//+ (2.*axx*x+axy*y+axz*z+axyz*y*z+ax + 2.*byy*y+bxy*x+byz*z+bxyz*x*z+by + 2.*czz*z+cxz*x+cyz*y+cxyz*x*y+cz)/3.;
+   real vx1  = a0 + 0.25*( xs*ax + ys*ay + zs*az) + 0.0625*(axx + xs*ys*axy + xs*zs*axz + ayy + ys*zs*ayz + azz) + 0.015625*(xs*ys*zs*axyz);
+   real vx2  = b0 + 0.25*( xs*bx + ys*by + zs*bz) + 0.0625*(bxx + xs*ys*bxy + xs*zs*bxz + byy + ys*zs*byz + bzz) + 0.015625*(xs*ys*zs*bxyz);
+   real vx3  = c0 + 0.25*( xs*cx + ys*cy + zs*cz) + 0.0625*(cxx + xs*ys*cxy + xs*zs*cxz + cyy + ys*zs*cyz + czz) + 0.015625*(xs*ys*zs*cxyz);
 
    //////////////////////////////////////////////////////////////////////////
    //DRAFT
    //vx1 -= forcingF*0.5;
    //////////////////////////////////////////////////////////////////////////
 
-   LBMReal feq[ENDF+1];
+   real feq[ENDF+1];
    D3Q27System::calcIncompFeq(feq,rho,vx1,vx2,vx3);
 
    f[DIR_P00]    = f_E    + xs*x_E    + ys*y_E    + zs*z_E    + xs*ys*xy_E    + xs*zs*xz_E    + ys*zs*yz_E    + feq[DIR_P00];
@@ -573,7 +575,7 @@ void IncompressibleOffsetInterpolationProcessor::calcInterpolatedNode(LBMReal* f
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SWB -0.25, -0.25, -0.25
-LBMReal IncompressibleOffsetInterpolationProcessor::calcPressBSW()
+real IncompressibleOffsetInterpolationProcessor::calcPressBSW()
 {
    return   press_SWT * (0.140625 + 0.1875 * xoff + 0.1875 * yoff - 0.5625 * zoff) +
       press_NWT * (0.046875 + 0.0625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -586,7 +588,7 @@ LBMReal IncompressibleOffsetInterpolationProcessor::calcPressBSW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SWT -0.25, -0.25, 0.25
-LBMReal IncompressibleOffsetInterpolationProcessor::calcPressTSW()
+real IncompressibleOffsetInterpolationProcessor::calcPressTSW()
 {
    return   press_SWT * (0.421875 + 0.5625 * xoff + 0.5625 * yoff - 0.5625 * zoff) +
       press_NWT * (0.140625 + 0.1875 * xoff - 0.5625 * yoff - 0.1875 * zoff) +
@@ -599,7 +601,7 @@ LBMReal IncompressibleOffsetInterpolationProcessor::calcPressTSW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SET 0.25, -0.25, 0.25
-LBMReal IncompressibleOffsetInterpolationProcessor::calcPressTSE()
+real IncompressibleOffsetInterpolationProcessor::calcPressTSE()
 {
    return   press_SET * (0.421875 - 0.5625 * xoff + 0.5625 * yoff - 0.5625 * zoff) +
       press_NET * (0.140625 - 0.1875 * xoff - 0.5625 * yoff - 0.1875 * zoff) +
@@ -612,7 +614,7 @@ LBMReal IncompressibleOffsetInterpolationProcessor::calcPressTSE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SEB 0.25, -0.25, -0.25
-LBMReal IncompressibleOffsetInterpolationProcessor::calcPressBSE()
+real IncompressibleOffsetInterpolationProcessor::calcPressBSE()
 {
    return   press_SET * (0.140625 - 0.1875 * xoff + 0.1875 * yoff - 0.5625 * zoff) +
       press_NET * (0.046875 - 0.0625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -625,7 +627,7 @@ LBMReal IncompressibleOffsetInterpolationProcessor::calcPressBSE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NWB -0.25, 0.25, -0.25
-LBMReal IncompressibleOffsetInterpolationProcessor::calcPressBNW()
+real IncompressibleOffsetInterpolationProcessor::calcPressBNW()
 {
    return   press_NWT * (0.140625 + 0.1875 * xoff - 0.1875 * yoff - 0.5625 * zoff) +
       press_NET * (0.046875 - 0.1875 * xoff - 0.0625 * yoff - 0.1875 * zoff) +
@@ -638,7 +640,7 @@ LBMReal IncompressibleOffsetInterpolationProcessor::calcPressBNW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NWT -0.25, 0.25, 0.25
-LBMReal IncompressibleOffsetInterpolationProcessor::calcPressTNW()
+real IncompressibleOffsetInterpolationProcessor::calcPressTNW()
 {
    return   press_NWT * (0.421875 + 0.5625 * xoff - 0.5625 * yoff - 0.5625 * zoff) +
       press_NET * (0.140625 - 0.5625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -651,7 +653,7 @@ LBMReal IncompressibleOffsetInterpolationProcessor::calcPressTNW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NET 0.25, 0.25, 0.25
-LBMReal IncompressibleOffsetInterpolationProcessor::calcPressTNE()
+real IncompressibleOffsetInterpolationProcessor::calcPressTNE()
 {
    return   press_NET * (0.421875 - 0.5625 * xoff - 0.5625 * yoff - 0.5625 * zoff) +
       press_NWT * (0.140625 + 0.5625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -664,7 +666,7 @@ LBMReal IncompressibleOffsetInterpolationProcessor::calcPressTNE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NEB 0.25, 0.25, -0.25
-LBMReal IncompressibleOffsetInterpolationProcessor::calcPressBNE()
+real IncompressibleOffsetInterpolationProcessor::calcPressBNE()
 {
    return   press_NET * (0.140625 - 0.1875 * xoff - 0.1875 * yoff - 0.5625 * zoff) +
       press_NWT * (0.046875 + 0.1875 * xoff - 0.0625 * yoff - 0.1875 * zoff) +
@@ -677,11 +679,12 @@ LBMReal IncompressibleOffsetInterpolationProcessor::calcPressBNE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position C 0.0, 0.0, 0.0
-void IncompressibleOffsetInterpolationProcessor::calcInterpolatedNodeFC(LBMReal* f, LBMReal omega)
+void IncompressibleOffsetInterpolationProcessor::calcInterpolatedNodeFC(real* f, real omega)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
-   LBMReal press  =  press_NET * (0.125 - 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
+   real press  =  press_NET * (0.125 - 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
       press_NWT * (0.125 + 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
       press_SET * (0.125 - 0.25 * xoff + 0.25 * yoff - 0.25 * zoff) +
       press_SWT * (0.125 + 0.25 * xoff + 0.25 * yoff - 0.25 * zoff) +
@@ -689,22 +692,22 @@ void IncompressibleOffsetInterpolationProcessor::calcInterpolatedNodeFC(LBMReal*
       press_NWB * (0.125 + 0.25 * xoff - 0.25 * yoff + 0.25 * zoff) +
       press_SEB * (0.125 - 0.25 * xoff + 0.25 * yoff + 0.25 * zoff) +
       press_SWB * (0.125 + 0.25 * xoff + 0.25 * yoff + 0.25 * zoff);
-   LBMReal vx1  = a0;
-   LBMReal vx2  = b0;
-   LBMReal vx3  = c0;
+   real vx1  = a0;
+   real vx2  = b0;
+   real vx3  = c0;
 
-   LBMReal rho = press ;//+ (ax+by+cz)/3.;
+   real rho = press ;//+ (ax+by+cz)/3.;
 
    //////////////////////////////////////////////////////////////////////////
    //DRAFT
    //vx1 -= forcingC*0.5;
    //////////////////////////////////////////////////////////////////////////
 
-   LBMReal feq[ENDF+1];
+   real feq[ENDF+1];
    D3Q27System::calcIncompFeq(feq,rho,vx1,vx2,vx3);
 
-   LBMReal eps_new = 2.;
-   LBMReal o  = omega;
+   real eps_new = 2.;
+   real o  = omega;
 //   LBMReal op = 1.;
 
    //f_E    = eps_new *((5.*ax*o + 5.*by*o + 5.*cz*o - 8.*ax*op + 4.*by*op + 4.*cz*op)/(54.*o*op));
@@ -766,14 +769,14 @@ void IncompressibleOffsetInterpolationProcessor::calcInterpolatedNodeFC(LBMReal*
    f[DIR_000] = f_ZERO + feq[DIR_000];
 }
 //////////////////////////////////////////////////////////////////////////
-void IncompressibleOffsetInterpolationProcessor::calcInterpolatedVelocity(LBMReal x, LBMReal y, LBMReal z, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3)
+void IncompressibleOffsetInterpolationProcessor::calcInterpolatedVelocity(real x, real y, real z, real& vx1, real& vx2, real& vx3)
 {
 	vx1  = a0 + ax*x + ay*y + az*z + axx*x*x + ayy*y*y + azz*z*z + axy*x*y + axz*x*z + ayz*y*z+axyz*x*y*z;
 	vx2  = b0 + bx*x + by*y + bz*z + bxx*x*x + byy*y*y + bzz*z*z + bxy*x*y + bxz*x*z + byz*y*z+bxyz*x*y*z;
 	vx3  = c0 + cx*x + cy*y + cz*z + cxx*x*x + cyy*y*y + czz*z*z + cxy*x*y + cxz*x*z + cyz*y*z+cxyz*x*y*z;
 }
 //////////////////////////////////////////////////////////////////////////
-void IncompressibleOffsetInterpolationProcessor::calcInterpolatedShearStress(LBMReal x, LBMReal y, LBMReal z,LBMReal& tauxx, LBMReal& tauyy, LBMReal& tauzz,LBMReal& tauxy, LBMReal& tauxz, LBMReal& tauyz)
+void IncompressibleOffsetInterpolationProcessor::calcInterpolatedShearStress(real x, real y, real z,real& tauxx, real& tauyy, real& tauzz,real& tauxy, real& tauxz, real& tauyz)
 {
 	tauxx=ax+2*axx*x+axy*y+axz*z+axyz*y*z;
 	tauyy=by+2*byy*y+bxy*x+byz*z+bxyz*x*z;
diff --git a/src/cpu/VirtualFluidsCore/LBM/IncompressibleOffsetInterpolationProcessor.h b/src/cpu/VirtualFluidsCore/LBM/IncompressibleOffsetInterpolationProcessor.h
index 6b024d419308e284eae4f334290b23dcd5b48218..866c0f6933e67d66b3b36d65a0f484ba2d8cbf86 100644
--- a/src/cpu/VirtualFluidsCore/LBM/IncompressibleOffsetInterpolationProcessor.h
+++ b/src/cpu/VirtualFluidsCore/LBM/IncompressibleOffsetInterpolationProcessor.h
@@ -16,51 +16,51 @@ class IncompressibleOffsetInterpolationProcessor : public InterpolationProcessor
 {
 public:
    IncompressibleOffsetInterpolationProcessor() = default;
-   IncompressibleOffsetInterpolationProcessor(LBMReal omegaC, LBMReal omegaF);
+   IncompressibleOffsetInterpolationProcessor(real omegaC, real omegaF);
    ~IncompressibleOffsetInterpolationProcessor() override = default;
    InterpolationProcessorPtr clone() override;
-   void setOmegas(LBMReal omegaC, LBMReal omegaF) override;
+   void setOmegas(real omegaC, real omegaF) override;
    void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF) override;
-   void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, LBMReal xoff, LBMReal yoff, LBMReal zoff) override;
-   void interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC) override; 
-   void interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC, LBMReal xoff, LBMReal yoff, LBMReal zoff) override; 
-   //LBMReal forcingC, forcingF;
+   void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, real xoff, real yoff, real zoff) override;
+   void interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC) override; 
+   void interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC, real xoff, real yoff, real zoff) override; 
+   //real forcingC, forcingF;
 protected:   
 private:
-   LBMReal omegaC{0.0}, omegaF{0.0};
-   LBMReal a0, ax, ay, az, axx, ayy, azz, axy, axz, ayz, b0, bx, by, bz, bxx, byy, bzz, bxy, bxz, byz, c0, cx, cy, cz, cxx, cyy, czz, cxy, cxz, cyz, axyz, bxyz, cxyz;
-   LBMReal xoff,    yoff,    zoff;
-   LBMReal xoff_sq, yoff_sq, zoff_sq;
-   LBMReal press_SWT, press_NWT, press_NET, press_SET, press_SWB, press_NWB, press_NEB, press_SEB;
+   real omegaC{0.0}, omegaF{0.0};
+   real a0, ax, ay, az, axx, ayy, azz, axy, axz, ayz, b0, bx, by, bz, bxx, byy, bzz, bxy, bxz, byz, c0, cx, cy, cz, cxx, cyy, czz, cxy, cxz, cyz, axyz, bxyz, cxyz;
+   real xoff,    yoff,    zoff;
+   real xoff_sq, yoff_sq, zoff_sq;
+   real press_SWT, press_NWT, press_NET, press_SET, press_SWB, press_NWB, press_NEB, press_SEB;
 
-   LBMReal  f_E,  f_N,  f_T,  f_NE,  f_SE,  f_BE,  f_TE,  f_TN,  f_BN,  f_TNE,  f_TNW,  f_TSE,  f_TSW,  f_ZERO;
-   LBMReal  x_E,  x_N,  x_T,  x_NE,  x_SE,  x_BE,  x_TE,  x_TN,  x_BN,  x_TNE,  x_TNW,  x_TSE,  x_TSW,  x_ZERO;
-   LBMReal  y_E,  y_N,  y_T,  y_NE,  y_SE,  y_BE,  y_TE,  y_TN,  y_BN,  y_TNE,  y_TNW,  y_TSE,  y_TSW,  y_ZERO;
-   LBMReal  z_E,  z_N,  z_T,  z_NE,  z_SE,  z_BE,  z_TE,  z_TN,  z_BN,  z_TNE,  z_TNW,  z_TSE,  z_TSW,  z_ZERO;
-   LBMReal xy_E, xy_N, xy_T, xy_NE, xy_SE, xy_BE, xy_TE, xy_TN, xy_BN, xy_TNE, xy_TNW, xy_TSE, xy_TSW/*, xy_ZERO*/;
-   LBMReal xz_E, xz_N, xz_T, xz_NE, xz_SE, xz_BE, xz_TE, xz_TN, xz_BN, xz_TNE, xz_TNW, xz_TSE, xz_TSW/*, xz_ZERO*/;
-   LBMReal yz_E, yz_N, yz_T, yz_NE, yz_SE, yz_BE, yz_TE, yz_TN, yz_BN, yz_TNE, yz_TNW, yz_TSE, yz_TSW/*, yz_ZERO*/;
+   real  f_E,  f_N,  f_T,  f_NE,  f_SE,  f_BE,  f_TE,  f_TN,  f_BN,  f_TNE,  f_TNW,  f_TSE,  f_TSW,  f_ZERO;
+   real  x_E,  x_N,  x_T,  x_NE,  x_SE,  x_BE,  x_TE,  x_TN,  x_BN,  x_TNE,  x_TNW,  x_TSE,  x_TSW,  x_ZERO;
+   real  y_E,  y_N,  y_T,  y_NE,  y_SE,  y_BE,  y_TE,  y_TN,  y_BN,  y_TNE,  y_TNW,  y_TSE,  y_TSW,  y_ZERO;
+   real  z_E,  z_N,  z_T,  z_NE,  z_SE,  z_BE,  z_TE,  z_TN,  z_BN,  z_TNE,  z_TNW,  z_TSE,  z_TSW,  z_ZERO;
+   real xy_E, xy_N, xy_T, xy_NE, xy_SE, xy_BE, xy_TE, xy_TN, xy_BN, xy_TNE, xy_TNW, xy_TSE, xy_TSW/*, xy_ZERO*/;
+   real xz_E, xz_N, xz_T, xz_NE, xz_SE, xz_BE, xz_TE, xz_TN, xz_BN, xz_TNE, xz_TNW, xz_TSE, xz_TSW/*, xz_ZERO*/;
+   real yz_E, yz_N, yz_T, yz_NE, yz_SE, yz_BE, yz_TE, yz_TN, yz_BN, yz_TNE, yz_TNW, yz_TSE, yz_TSW/*, yz_ZERO*/;
 
-   LBMReal kxyAverage, kyzAverage, kxzAverage, kxxMyyAverage, kxxMzzAverage; 
+   real kxyAverage, kyzAverage, kxzAverage, kxxMyyAverage, kxxMzzAverage; 
 
 //   LBMReal a,b,c;
 
-   void setOffsets(LBMReal xoff, LBMReal yoff, LBMReal zoff) override;
-   void calcMoments(const LBMReal* const f, LBMReal omega, LBMReal& rho, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3, 
-      LBMReal& kxy, LBMReal& kyz, LBMReal& kxz, LBMReal& kxxMyy, LBMReal& kxxMzz);
-   void calcInterpolatedCoefficiets(const D3Q27ICell& icell, LBMReal omega, LBMReal eps_new) override;
-   void calcInterpolatedNode(LBMReal* f, LBMReal omega, LBMReal x, LBMReal y, LBMReal z, LBMReal press, LBMReal xs, LBMReal ys, LBMReal zs);
-   LBMReal calcPressBSW();
-   LBMReal calcPressTSW();
-   LBMReal calcPressTSE();
-   LBMReal calcPressBSE();
-   LBMReal calcPressBNW();
-   LBMReal calcPressTNW();
-   LBMReal calcPressTNE();
-   LBMReal calcPressBNE();
-   void calcInterpolatedNodeFC(LBMReal* f, LBMReal omega) override;
-   void calcInterpolatedVelocity(LBMReal x, LBMReal y, LBMReal z,LBMReal& vx1, LBMReal& vx2, LBMReal& vx3) override;
-   void calcInterpolatedShearStress(LBMReal x, LBMReal y, LBMReal z,LBMReal& tauxx, LBMReal& tauyy, LBMReal& tauzz,LBMReal& tauxy, LBMReal& tauxz, LBMReal& tauyz) override;
+   void setOffsets(real xoff, real yoff, real zoff) override;
+   void calcMoments(const real* const f, real omega, real& rho, real& vx1, real& vx2, real& vx3, 
+      real& kxy, real& kyz, real& kxz, real& kxxMyy, real& kxxMzz);
+   void calcInterpolatedCoefficiets(const D3Q27ICell& icell, real omega, real eps_new) override;
+   void calcInterpolatedNode(real* f, real omega, real x, real y, real z, real press, real xs, real ys, real zs);
+   real calcPressBSW();
+   real calcPressTSW();
+   real calcPressTSE();
+   real calcPressBSE();
+   real calcPressBNW();
+   real calcPressTNW();
+   real calcPressTNE();
+   real calcPressBNE();
+   void calcInterpolatedNodeFC(real* f, real omega) override;
+   void calcInterpolatedVelocity(real x, real y, real z,real& vx1, real& vx2, real& vx3) override;
+   void calcInterpolatedShearStress(real x, real y, real z,real& tauxx, real& tauyy, real& tauzz,real& tauxy, real& tauxz, real& tauyz) override;
 };
 
 //////////////////////////////////////////////////////////////////////////
@@ -69,7 +69,7 @@ inline void IncompressibleOffsetInterpolationProcessor::interpolateCoarseToFine(
    this->interpolateCoarseToFine(icellC, icellF, 0.0, 0.0, 0.0);
 }
 //////////////////////////////////////////////////////////////////////////
-inline void IncompressibleOffsetInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC)
+inline void IncompressibleOffsetInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC)
 {
    this->interpolateFineToCoarse(icellF, icellC, 0.0, 0.0, 0.0);
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/InitDensityLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/InitDensityLBMKernel.cpp
index c37571337e537c324b557ac6c76680a63fc89b00..927dfeb158d0a89ca2081545420ac537ad7ae2e9 100644
--- a/src/cpu/VirtualFluidsCore/LBM/InitDensityLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/InitDensityLBMKernel.cpp
@@ -3,8 +3,10 @@
 #include "BCProcessor.h"
 #include "DataSet3D.h"
 #include "BCArray3D.h"
+#include "basics/constants/NumericConstants.h"
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::basics::constant;
 
 InitDensityLBMKernel::InitDensityLBMKernel()
 {
@@ -39,14 +41,14 @@ SPtr<LBMKernel> InitDensityLBMKernel::clone()
    return kernel;
 }
 
-void InitDensityLBMKernel::setVelocity(int x1, int x2, int x3, LBMReal vvx, LBMReal vvy, LBMReal vvz)
+void InitDensityLBMKernel::setVelocity(int x1, int x2, int x3, real vvx, real vvy, real vvz)
 {
    v(0, x1, x2, x3) = vvx;
    v(1, x1, x2, x3) = vvy;
    v(2, x1, x2, x3) = vvz;
 }
 
-double InitDensityLBMKernel::getCalculationTime()
+real InitDensityLBMKernel::getCalculationTime()
 {
    return 0;
 }
@@ -856,6 +858,7 @@ double InitDensityLBMKernel::getCalculationTime()
 void InitDensityLBMKernel::calculate(int  /*step*/)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
    localDistributions = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getLocalDistributions();
    nonLocalDistributions = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getNonLocalDistributions();
@@ -863,9 +866,9 @@ void InitDensityLBMKernel::calculate(int  /*step*/)
 
    SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
    SPtr<BoundaryConditions> bcPtr;
-   LBMReal f[D3Q27System::ENDF+1];
-   LBMReal feq[D3Q27System::ENDF+1];
-   LBMReal drho, vx1, vx2, vx3;
+   real f[D3Q27System::ENDF+1];
+   real feq[D3Q27System::ENDF+1];
+   real drho, vx1, vx2, vx3;
    const int bcArrayMaxX1 = (int)bcArray->getNX1();
    const int bcArrayMaxX2 = (int)bcArray->getNX2();
    const int bcArrayMaxX3 = (int)bcArray->getNX3();
@@ -954,7 +957,7 @@ void InitDensityLBMKernel::calculate(int  /*step*/)
                //vx2 = vx2+(vvy-vx2);
                //vx3 = vx3+(vvz-vx3);
 
-               LBMReal cu_sq = 1.5*(vx1*vx1+vx2*vx2+vx3*vx3);
+               real cu_sq = 1.5*(vx1*vx1+vx2*vx2+vx3*vx3);
 
                feq[DIR_000] = c8o27*(drho-cu_sq);
                feq[DIR_P00] = c2o27*(drho+3.0*(vx1)+c9o2*(vx1)*(vx1)-cu_sq);
@@ -1016,11 +1019,11 @@ void InitDensityLBMKernel::calculate(int  /*step*/)
 
                //////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-               LBMReal rho_post = f[REST]+f[DIR_P00]+f[W]+f[N]+f[S]+f[T]+f[B]
+               real rho_post = f[REST]+f[DIR_P00]+f[W]+f[N]+f[S]+f[T]+f[B]
                   +f[NE]+f[SW]+f[SE]+f[NW]+f[TE]+f[BW]+f[BE]
                   +f[TW]+f[TN]+f[BS]+f[BN]+f[TS]+f[TNE]+f[TSW]
                   +f[TSE]+f[TNW]+f[BNE]+f[BSW]+f[BSE]+f[BNW];
-               LBMReal dif = drho-rho_post;
+               real dif = drho-rho_post;
 #ifdef SINGLEPRECISION
                if (dif>10.0E-7||dif<-10.0E-7)
 #else
@@ -1033,35 +1036,35 @@ void InitDensityLBMKernel::calculate(int  /*step*/)
                //////////////////////////////////////////////////////////////////////////
                //write distribution
                //////////////////////////////////////////////////////////////////////////
-               (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3) = f[D3Q27System::INV_P00];
-               (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3) = f[D3Q27System::INV_0P0];
-               (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3) = f[D3Q27System::INV_00P];
-               (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3) = f[D3Q27System::INV_PP0];
-               (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3) = f[D3Q27System::INV_MP0];
-               (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3) = f[D3Q27System::INV_P0P];
-               (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3) = f[D3Q27System::INV_M0P];
-               (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3) = f[D3Q27System::INV_0PP];
-               (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3) = f[D3Q27System::INV_0MP];
-               (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3) = f[D3Q27System::INV_PPP];
-               (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3) = f[D3Q27System::INV_MPP];
-               (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3) = f[D3Q27System::INV_PMP];
-               (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3) = f[D3Q27System::INV_MMP];
+               (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3) = f[INV_P00];
+               (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3) = f[INV_0P0];
+               (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3) = f[INV_00P];
+               (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3) = f[INV_PP0];
+               (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3) = f[INV_MP0];
+               (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3) = f[INV_P0P];
+               (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3) = f[INV_M0P];
+               (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3) = f[INV_0PP];
+               (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3) = f[INV_0MP];
+               (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3) = f[INV_PPP];
+               (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3) = f[INV_MPP];
+               (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3) = f[INV_PMP];
+               (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3) = f[INV_MMP];
 
-               (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3) = f[D3Q27System::INV_M00];
-               (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3) = f[D3Q27System::INV_0M0];
-               (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p) = f[D3Q27System::INV_00M];
-               (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3) = f[D3Q27System::INV_MM0];
-               (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3) = f[D3Q27System::INV_PM0];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p) = f[D3Q27System::INV_M0M];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p) = f[D3Q27System::INV_P0M];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p) = f[D3Q27System::INV_0MM];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p) = f[D3Q27System::INV_0PM];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p) = f[D3Q27System::INV_MMM];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p) = f[D3Q27System::INV_PMM];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p) = f[D3Q27System::INV_MPM];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p) = f[D3Q27System::INV_PPM];
+               (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3) = f[INV_M00];
+               (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3) = f[INV_0M0];
+               (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p) = f[INV_00M];
+               (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3) = f[INV_MM0];
+               (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3) = f[INV_PM0];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p) = f[INV_M0M];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p) = f[INV_P0M];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p) = f[INV_0MM];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p) = f[INV_0PM];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p) = f[INV_MMM];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p) = f[INV_PMM];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p) = f[INV_MPM];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p) = f[INV_PPM];
 
-               (*this->zeroDistributions)(x1, x2, x3) = f[D3Q27System::DIR_000];
+               (*this->zeroDistributions)(x1, x2, x3) = f[DIR_000];
                //////////////////////////////////////////////////////////////////////////
 
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/InitDensityLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/InitDensityLBMKernel.h
index 71833e246353ff667ff025234b4a137fb905c5be..33255f8f5517e6a030cdb060d8397a6cf6cd8580 100644
--- a/src/cpu/VirtualFluidsCore/LBM/InitDensityLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/InitDensityLBMKernel.h
@@ -14,17 +14,17 @@ public:
    ~InitDensityLBMKernel() override;
    void calculate(int step) override;
    SPtr<LBMKernel> clone() override;
-   void setVelocity(int x1, int x2, int x3, LBMReal vvx, LBMReal vvy, LBMReal vvz);
-   double getCalculationTime() override;
+   void setVelocity(int x1, int x2, int x3, real vvx, real vvy, real vvz);
+   real getCalculationTime() override;
 protected:
    void initDataSet();
 private:
-//   LBMReal f[D3Q27System::ENDF+1];
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
+//   real f[D3Q27System::ENDF+1];
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
 //   LBMReal OxyyMxzz;
-   CbArray4D<LBMReal, IndexerX4X3X2X1> v;
+   CbArray4D<real, IndexerX4X3X2X1> v;
 };
 
 #endif // InitDensityLBMKernel_h__
diff --git a/src/cpu/VirtualFluidsCore/LBM/InterpolationHelper.cpp b/src/cpu/VirtualFluidsCore/LBM/InterpolationHelper.cpp
index efe2c8e7cfb39b960c0c86405a05633816fe56ef..33bf1e623ce943d4edf3b11c3f51ad585adf4262 100644
--- a/src/cpu/VirtualFluidsCore/LBM/InterpolationHelper.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/InterpolationHelper.cpp
@@ -4,26 +4,26 @@ InterpolationHelper::InterpolationHelper(InterpolationProcessorPtr iProcessor) :
 //////////////////////////////////////////////////////////////////////////
 InterpolationHelper::~InterpolationHelper() = default;
 //////////////////////////////////////////////////////////////////////////
-void InterpolationHelper::interpolate8to1(D3Q27ICell &icellF, LBMReal *icellC, double /*x1*/, double /*x2*/,
-                                          double /*x3*/, LBMReal omega)
+void InterpolationHelper::interpolate8to1(D3Q27ICell &icellF, real *icellC, real /*x1*/, real /*x2*/,
+                                          real /*x3*/, real omega)
 {
     iProcessor->calcInterpolatedCoefficiets(icellF, omega, 1.0);
     iProcessor->calcInterpolatedNodeFC(icellC, omega);
 }
 //////////////////////////////////////////////////////////////////////////
-void InterpolationHelper::interpolate8to1WithVelocity(D3Q27ICell &icellF, double x1, double x2, double x3,
-                                                      LBMReal omega, LBMReal &vx1, LBMReal &vx2, LBMReal &vx3)
+void InterpolationHelper::interpolate8to1WithVelocity(D3Q27ICell &icellF, real x1, real x2, real x3,
+                                                      real omega, real &vx1, real &vx2, real &vx3)
 {
     iProcessor->setOffsets(0.0, 0.0, 0.0);
     iProcessor->calcInterpolatedCoefficiets(icellF, omega, 0.0);
     iProcessor->calcInterpolatedVelocity(x1, x2, x3, vx1, vx2, vx3);
 }
 //////////////////////////////////////////////////////////////////////////
-void InterpolationHelper::interpolate8to1WithVelocityWithShearStress(D3Q27ICell &icellF, double x1, double x2,
-                                                                     double x3, LBMReal omega, LBMReal &vx1,
-                                                                     LBMReal &vx2, LBMReal &vx3, LBMReal &tauxx,
-                                                                     LBMReal &tauyy, LBMReal &tauzz, LBMReal &tauxy,
-                                                                     LBMReal &tauxz, LBMReal &tauyz)
+void InterpolationHelper::interpolate8to1WithVelocityWithShearStress(D3Q27ICell &icellF, real x1, real x2,
+                                                                     real x3, real omega, real &vx1,
+                                                                     real &vx2, real &vx3, real &tauxx,
+                                                                     real &tauyy, real &tauzz, real &tauxy,
+                                                                     real &tauxz, real &tauyz)
 {
     iProcessor->setOffsets(0.0, 0.0, 0.0);
     iProcessor->calcInterpolatedCoefficiets(icellF, omega, 0.0);
diff --git a/src/cpu/VirtualFluidsCore/LBM/InterpolationHelper.h b/src/cpu/VirtualFluidsCore/LBM/InterpolationHelper.h
index 0e7318a8c44785679cdad1292bf561cc631b2041..b67e8d18ac5c54c775c098aad484d4b5657a917b 100644
--- a/src/cpu/VirtualFluidsCore/LBM/InterpolationHelper.h
+++ b/src/cpu/VirtualFluidsCore/LBM/InterpolationHelper.h
@@ -11,13 +11,13 @@ class InterpolationHelper
 public:
     InterpolationHelper(InterpolationProcessorPtr iProcessor);
     ~InterpolationHelper();
-    void interpolate8to1(D3Q27ICell &icellF, LBMReal *icellC, double x1, double x2, double x3, LBMReal omega);
-    void interpolate8to1WithVelocity(D3Q27ICell &icellF, double x1, double x2, double x3, LBMReal omega, LBMReal &vx1,
-                                     LBMReal &vx2, LBMReal &vx3);
-    void interpolate8to1WithVelocityWithShearStress(D3Q27ICell &icellF, double x1, double x2, double x3, LBMReal omega,
-                                                    LBMReal &vx1, LBMReal &vx2, LBMReal &vx3, LBMReal &tauxx,
-                                                    LBMReal &tauyy, LBMReal &tauzz, LBMReal &tauxy, LBMReal &tauxz,
-                                                    LBMReal &tauyz);
+    void interpolate8to1(D3Q27ICell &icellF, real *icellC, real x1, real x2, real x3, real omega);
+    void interpolate8to1WithVelocity(D3Q27ICell &icellF, real x1, real x2, real x3, real omega, real &vx1,
+                                     real &vx2, real &vx3);
+    void interpolate8to1WithVelocityWithShearStress(D3Q27ICell &icellF, real x1, real x2, real x3, real omega,
+                                                    real &vx1, real &vx2, real &vx3, real &tauxx,
+                                                    real &tauyy, real &tauzz, real &tauxy, real &tauxz,
+                                                    real &tauyz);
 
 protected:
 private:
diff --git a/src/cpu/VirtualFluidsCore/LBM/InterpolationProcessor.cpp b/src/cpu/VirtualFluidsCore/LBM/InterpolationProcessor.cpp
index 8d2a4163b3127d5199c0419e34e1c4b28d505e2c..a82f397c9f89d7605d8409b8f32f8d87a8f8a402 100644
--- a/src/cpu/VirtualFluidsCore/LBM/InterpolationProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/InterpolationProcessor.cpp
@@ -41,12 +41,12 @@ void InterpolationProcessor::writeICellInv(SPtr<DistributionArray3D> f, const D3
     f->setDistributionInv(icell.TNE, x1 + 1, x2 + 1, x3 + 1);
 }
 //////////////////////////////////////////////////////////////////////////
-void InterpolationProcessor::writeINode(SPtr<DistributionArray3D> f, const LBMReal *const inode, int x1, int x2, int x3)
+void InterpolationProcessor::writeINode(SPtr<DistributionArray3D> f, const real *const inode, int x1, int x2, int x3)
 {
     f->setDistribution(inode, x1, x2, x3);
 }
 //////////////////////////////////////////////////////////////////////////
-void InterpolationProcessor::writeINodeInv(SPtr<DistributionArray3D> f, const LBMReal *const inode, int x1, int x2,
+void InterpolationProcessor::writeINodeInv(SPtr<DistributionArray3D> f, const real *const inode, int x1, int x2,
                                            int x3)
 {
     f->setDistributionInv(inode, x1, x2, x3);
@@ -65,7 +65,7 @@ bool InterpolationProcessor::iCellHasSolid(const SPtr<BCArray3D> bcArray, int x1
 //////////////////////////////////////////////////////////////////////////
 bool InterpolationProcessor::findNeighborICell(const SPtr<BCArray3D> bcArray, SPtr<DistributionArray3D> f,
                                                D3Q27ICell &icell, int maxX1, int maxX2, int maxX3, int x1, int x2,
-                                               int x3, LBMReal &xoff, LBMReal &yoff, LBMReal &zoff)
+                                               int x3, real &xoff, real &yoff, real &zoff)
 {
     m_maxX1 = maxX1;
     m_maxX2 = maxX2;
diff --git a/src/cpu/VirtualFluidsCore/LBM/InterpolationProcessor.h b/src/cpu/VirtualFluidsCore/LBM/InterpolationProcessor.h
index 627549dc991f31b543ca23e4c87e8520feb84af3..f298a531b61ca4c4d9ddffc6e2dfeab535be0aa1 100644
--- a/src/cpu/VirtualFluidsCore/LBM/InterpolationProcessor.h
+++ b/src/cpu/VirtualFluidsCore/LBM/InterpolationProcessor.h
@@ -7,14 +7,14 @@
 #include "LBMSystem.h"
 
 struct D3Q27ICell {
-    LBMReal TSW[27];
-    LBMReal TNW[27];
-    LBMReal TNE[27];
-    LBMReal TSE[27];
-    LBMReal BSW[27];
-    LBMReal BNW[27];
-    LBMReal BNE[27];
-    LBMReal BSE[27];
+    real TSW[27];
+    real TNW[27];
+    real TNE[27];
+    real TSE[27];
+    real BSW[27];
+    real BNW[27];
+    real BNE[27];
+    real BSE[27];
 };
 
 class InterpolationProcessor;
@@ -28,34 +28,34 @@ public:
     InterpolationProcessor();
     virtual ~InterpolationProcessor();
     virtual InterpolationProcessorPtr clone()                                    = 0;
-    virtual void setOmegas(LBMReal omegaC, LBMReal omegaF)                       = 0;
+    virtual void setOmegas(real omegaC, real omegaF)                       = 0;
     virtual void interpolateCoarseToFine(D3Q27ICell &icellC, D3Q27ICell &icellF) = 0;
-    virtual void interpolateCoarseToFine(D3Q27ICell &icellC, D3Q27ICell &icellF, LBMReal xoff, LBMReal yoff,
-                                         LBMReal zoff)                           = 0;
-    virtual void interpolateFineToCoarse(D3Q27ICell &icellF, LBMReal *icellC)    = 0;
-    virtual void interpolateFineToCoarse(D3Q27ICell &icellF, LBMReal *icellC, LBMReal xoff, LBMReal yoff,
-                                         LBMReal zoff)                           = 0;
+    virtual void interpolateCoarseToFine(D3Q27ICell &icellC, D3Q27ICell &icellF, real xoff, real yoff,
+                                         real zoff)                           = 0;
+    virtual void interpolateFineToCoarse(D3Q27ICell &icellF, real *icellC)    = 0;
+    virtual void interpolateFineToCoarse(D3Q27ICell &icellF, real *icellC, real xoff, real yoff,
+                                         real zoff)                           = 0;
 
     static void readICell(SPtr<DistributionArray3D> f, D3Q27ICell &icell, int x1, int x2, int x3);
     static void writeICell(SPtr<DistributionArray3D> f, const D3Q27ICell &icell, int x1, int x2, int x3);
     static void writeICellInv(SPtr<DistributionArray3D> f, const D3Q27ICell &icell, int x1, int x2, int x3);
-    static void writeINode(SPtr<DistributionArray3D> f, const LBMReal *const inode, int x1, int x2, int x3);
-    static void writeINodeInv(SPtr<DistributionArray3D> f, const LBMReal *const inode, int x1, int x2, int x3);
+    static void writeINode(SPtr<DistributionArray3D> f, const real *const inode, int x1, int x2, int x3);
+    static void writeINodeInv(SPtr<DistributionArray3D> f, const real *const inode, int x1, int x2, int x3);
     static bool iCellHasSolid(const SPtr<BCArray3D> bcArray, int x1, int x2, int x3);
     static int iCellHowManySolids(const SPtr<BCArray3D> bcArray, int x1, int x2, int x3);
 
     bool findNeighborICell(const SPtr<BCArray3D> bcArray, SPtr<DistributionArray3D> f, D3Q27ICell &icell, int maxX1,
-                           int maxX2, int maxX3, int x1, int x2, int x3, LBMReal &xoff, LBMReal &yoff, LBMReal &zoff);
+                           int maxX2, int maxX3, int x1, int x2, int x3, real &xoff, real &yoff, real &zoff);
 
 protected:
-    virtual void calcInterpolatedCoefficiets(const D3Q27ICell &icell, LBMReal omega, LBMReal eps_new) {}
-    virtual void calcInterpolatedNodeFC(LBMReal *f, LBMReal omega) {}
-    virtual void calcInterpolatedVelocity(LBMReal x, LBMReal y, LBMReal z, LBMReal &vx1, LBMReal &vx2, LBMReal &vx3) {}
-    virtual void calcInterpolatedShearStress(LBMReal x, LBMReal y, LBMReal z, LBMReal &tauxx, LBMReal &tauyy,
-                                             LBMReal &tauzz, LBMReal &tauxy, LBMReal &tauxz, LBMReal &tauyz)
+    virtual void calcInterpolatedCoefficiets(const D3Q27ICell &icell, real omega, real eps_new) {}
+    virtual void calcInterpolatedNodeFC(real *f, real omega) {}
+    virtual void calcInterpolatedVelocity(real x, real y, real z, real &vx1, real &vx2, real &vx3) {}
+    virtual void calcInterpolatedShearStress(real x, real y, real z, real &tauxx, real &tauyy,
+                                             real &tauzz, real &tauxy, real &tauxz, real &tauyz)
     {
     }
-    virtual void setOffsets(LBMReal xoff, LBMReal yoff, LBMReal zoff) {}
+    virtual void setOffsets(real xoff, real yoff, real zoff) {}
     friend class InterpolationHelper;
 
 private:
diff --git a/src/cpu/VirtualFluidsCore/LBM/LBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/LBMKernel.cpp
index 3c588e1506d8649149daad5588e2290c0832334a..0f9a9a96586268c872562e4d2ddfab5ef8e6377c 100644
--- a/src/cpu/VirtualFluidsCore/LBM/LBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/LBMKernel.cpp
@@ -53,22 +53,22 @@ void LBMKernel::setBCProcessor(SPtr<BCProcessor> bcp) { bcProcessor = bcp; }
 //////////////////////////////////////////////////////////////////////////
 SPtr<BCProcessor> LBMKernel::getBCProcessor() const { return bcProcessor; }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setCollisionFactor(double collFactor) { this->collFactor = collFactor; }
+void LBMKernel::setCollisionFactor(real collFactor) { this->collFactor = collFactor; }
 //////////////////////////////////////////////////////////////////////////
-double LBMKernel::getCollisionFactor() const { return collFactor; }
+real LBMKernel::getCollisionFactor() const { return collFactor; }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setForcingX1(LBMReal forcingX1)
+void LBMKernel::setForcingX1(real forcingX1)
 {
     this->muForcingX1.SetExpr(UbSystem::toString(forcingX1, LBMRealLim::digits10));
     this->checkFunction(muForcingX1);
 }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setForcingX2(LBMReal forcingX2)
+void LBMKernel::setForcingX2(real forcingX2)
 {
     this->muForcingX2.SetExpr(UbSystem::toString(forcingX2, LBMRealLim::digits10));
     this->checkFunction(muForcingX2);
 }
-void LBMKernel::setForcingX3(LBMReal forcingX3)
+void LBMKernel::setForcingX3(real forcingX3)
 {
     this->muForcingX3.SetExpr(UbSystem::toString(forcingX3, LBMRealLim::digits10));
     this->checkFunction(muForcingX3);
@@ -111,7 +111,7 @@ void LBMKernel::setForcingX3(const std::string &muParserString)
 //////////////////////////////////////////////////////////////////////////
 void LBMKernel::checkFunction(mu::Parser fct)
 {
-    double x1 = 1.0, x2 = 1.0, x3 = 1.0, dt = 1.0, nue = 1.0, rho = 1.0;
+    real x1 = 1.0, x2 = 1.0, x3 = 1.0, dt = 1.0, nue = 1.0, rho = 1.0;
     fct.DefineVar("x1", &x1);
     fct.DefineVar("x2", &x2);
     fct.DefineVar("x3", &x3);
@@ -141,9 +141,9 @@ void LBMKernel::setIndex(int x1, int x2, int x3)
 //////////////////////////////////////////////////////////////////////////
 SPtr<DataSet3D> LBMKernel::getDataSet() const { return this->dataSet; }
 //////////////////////////////////////////////////////////////////////////
-LBMReal LBMKernel::getDeltaT() const { return this->deltaT; }
+real LBMKernel::getDeltaT() const { return this->deltaT; }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setDeltaT(LBMReal dt) { deltaT = dt; }
+void LBMKernel::setDeltaT(real dt) { deltaT = dt; }
 //////////////////////////////////////////////////////////////////////////
 bool LBMKernel::getCompressible() const { return compressible; }
 //////////////////////////////////////////////////////////////////////////
@@ -188,49 +188,49 @@ bool LBMKernel::isInsideOfDomain(const int &x1, const int &x2, const int &x3) co
 }
 //////////////////////////////////////////////////////////////////////////
 
-void LBMKernel::setCollisionFactorMultiphase(double collFactorL, double collFactorG)
+void LBMKernel::setCollisionFactorMultiphase(real collFactorL, real collFactorG)
 {
     this->collFactorL = collFactorL;
     this->collFactorG = collFactorG;
 }
 //////////////////////////////////////////////////////////////////////////
-double LBMKernel::getCollisionFactorL() const { return collFactorL; }
+real LBMKernel::getCollisionFactorL() const { return collFactorL; }
 //////////////////////////////////////////////////////////////////////////
-double LBMKernel::getCollisionFactorG() const { return collFactorG; }
+real LBMKernel::getCollisionFactorG() const { return collFactorG; }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setDensityRatio(double densityRatio) { this->densityRatio = densityRatio; }
+void LBMKernel::setDensityRatio(real densityRatio) { this->densityRatio = densityRatio; }
 //////////////////////////////////////////////////////////////////////////
-double LBMKernel::getDensityRatio() const { return densityRatio; }
+real LBMKernel::getDensityRatio() const { return densityRatio; }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setMultiphaseModelParameters(LBMReal beta, LBMReal kappa)
+void LBMKernel::setMultiphaseModelParameters(real beta, real kappa)
 {
     this->beta  = beta;
     this->kappa = kappa;
 }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::getMultiphaseModelParameters(LBMReal &beta, LBMReal &kappa)
+void LBMKernel::getMultiphaseModelParameters(real &beta, real &kappa)
 {
     beta  = this->beta;
     kappa = this->kappa;
 }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setContactAngle(double contactAngle) { this->contactAngle = contactAngle; }
+void LBMKernel::setContactAngle(real contactAngle) { this->contactAngle = contactAngle; }
 //////////////////////////////////////////////////////////////////////////
-double LBMKernel::getContactAngle() const { return contactAngle; }
+real LBMKernel::getContactAngle() const { return contactAngle; }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setPhiL(double phiL) { this->phiL = phiL; }
+void LBMKernel::setPhiL(real phiL) { this->phiL = phiL; }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setPhiH(double phiH) { this->phiH = phiH; }
+void LBMKernel::setPhiH(real phiH) { this->phiH = phiH; }
 //////////////////////////////////////////////////////////////////////////
-double LBMKernel::getPhiL() const { return phiL; }
+real LBMKernel::getPhiL() const { return phiL; }
 //////////////////////////////////////////////////////////////////////////
-double LBMKernel::getPhiH() const { return phiH; }
+real LBMKernel::getPhiH() const { return phiH; }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setPhaseFieldRelaxation(double tauH) { this->tauH = tauH; }
+void LBMKernel::setPhaseFieldRelaxation(real tauH) { this->tauH = tauH; }
 //////////////////////////////////////////////////////////////////////////
-double LBMKernel::getPhaseFieldRelaxation() const { return tauH; }
+real LBMKernel::getPhaseFieldRelaxation() const { return tauH; }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setMobility(double mob) { this->mob = mob; }
+void LBMKernel::setMobility(real mob) { this->mob = mob; }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setInterfaceWidth(double w) { this->interfaceWidth = w; }
+void LBMKernel::setInterfaceWidth(real w) { this->interfaceWidth = w; }
 //////////////////////////////////////////////////////////////////////////
diff --git a/src/cpu/VirtualFluidsCore/LBM/LBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/LBMKernel.h
index d5eb02d4c0ef310cdfd63d283abc9719996e5f84..bc12a1ed93ffc241f4e121207376e44533908259 100644
--- a/src/cpu/VirtualFluidsCore/LBM/LBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/LBMKernel.h
@@ -49,7 +49,7 @@ class Block3D;
 class LBMKernel : public ILBMKernel, public enableSharedFromThis<LBMKernel>
 {
 public:
-    using LBMRealLim = std::numeric_limits<LBMReal>;
+    using LBMRealLim = std::numeric_limits<real>;
 
 public:
     LBMKernel();
@@ -57,13 +57,13 @@ public:
     virtual SPtr<LBMKernel> clone() = 0;
 
     void calculate(int step) override    = 0;
-    double getCalculationTime() override = 0;
+    real getCalculationTime() override = 0;
 
     void setBCProcessor(SPtr<BCProcessor> bcp) override;
     SPtr<BCProcessor> getBCProcessor() const override;
 
-    void setCollisionFactor(double collFactor) override;
-    double getCollisionFactor() const override;
+    void setCollisionFactor(real collFactor) override;
+    real getCollisionFactor() const override;
 
     void setGhostLayerWidth(int witdh);
     int getGhostLayerWidth() const override;
@@ -71,9 +71,9 @@ public:
     void setDataSet(SPtr<DataSet3D> dataSet);
     SPtr<DataSet3D> getDataSet() const override;
 
-    void setForcingX1(LBMReal forcingX1);
-    void setForcingX2(LBMReal forcingX2);
-    void setForcingX3(LBMReal forcingX3);
+    void setForcingX1(real forcingX1);
+    void setForcingX2(real forcingX2);
+    void setForcingX3(real forcingX3);
 
     void setForcingX1(const mu::Parser &parser);
     void setForcingX2(const mu::Parser &parser);
@@ -85,8 +85,8 @@ public:
 
     void setIndex(int x1, int x2, int x3);
 
-    LBMReal getDeltaT() const override;
-    void setDeltaT(LBMReal dt);
+    real getDeltaT() const override;
+    void setDeltaT(real dt);
 
     bool getCompressible() const override;
     void setCompressible(bool val);
@@ -112,28 +112,28 @@ public:
 
     ///////// Extra methods for the multiphase kernel ////////////
 
-    void setCollisionFactorMultiphase(double collFactorL, double collFactorG);
-    double getCollisionFactorL() const;
-    double getCollisionFactorG() const;
-    void setDensityRatio(double densityRatio);
-    double getDensityRatio() const;
-    void setMultiphaseModelParameters(LBMReal beta, LBMReal kappa);
-    void getMultiphaseModelParameters(LBMReal &beta, LBMReal &kappa);
-    void setContactAngle(double contactAngle);
-    double getContactAngle() const;
-    void setPhiL(double phiL);
-    void setPhiH(double phiH);
-    double getPhiL() const;
-    double getPhiH() const;
-    void setPhaseFieldRelaxation(double tauH);
-    double getPhaseFieldRelaxation() const;
-    void setMobility(double mob);
-    void setInterfaceWidth(double w);
+    void setCollisionFactorMultiphase(real collFactorL, real collFactorG);
+    real getCollisionFactorL() const;
+    real getCollisionFactorG() const;
+    void setDensityRatio(real densityRatio);
+    real getDensityRatio() const;
+    void setMultiphaseModelParameters(real beta, real kappa);
+    void getMultiphaseModelParameters(real &beta, real &kappa);
+    void setContactAngle(real contactAngle);
+    real getContactAngle() const;
+    void setPhiL(real phiL);
+    void setPhiH(real phiH);
+    real getPhiL() const;
+    real getPhiH() const;
+    void setPhaseFieldRelaxation(real tauH);
+    real getPhaseFieldRelaxation() const;
+    void setMobility(real mob);
+    void setInterfaceWidth(real w);
 
 protected:
     SPtr<DataSet3D> dataSet;
     SPtr<BCProcessor> bcProcessor;
-    LBMReal collFactor;
+    real collFactor;
     int ghostLayerWidth{ 1 };
     bool compressible{ false };
 
@@ -143,7 +143,7 @@ protected:
     mu::Parser muForcingX2;
     mu::Parser muForcingX3;
     int ix1, ix2, ix3;
-    LBMReal deltaT{ 1.0 };
+    real deltaT{ 1.0 };
 
     // sponge layer
     bool withSpongeLayer{ false };
@@ -154,17 +154,17 @@ protected:
     std::array<int, 3> nx;
 
     // Multiphase model
-    LBMReal collFactorL;
-    LBMReal collFactorG;
-    LBMReal densityRatio;
-    LBMReal beta;
-    LBMReal kappa;
-    LBMReal contactAngle;
-    LBMReal phiL;
-    LBMReal phiH;
-    LBMReal tauH;
-    LBMReal mob;
-    LBMReal interfaceWidth { 4.0 };
+    real collFactorL;
+    real collFactorG;
+    real densityRatio;
+    real beta;
+    real kappa;
+    real contactAngle;
+    real phiL;
+    real phiH;
+    real tauH;
+    real mob;
+    real interfaceWidth { 4.0 };
 
 private:
     void checkFunction(mu::Parser fct);
diff --git a/src/cpu/VirtualFluidsCore/LBM/LBMKernelETD3Q27BGK.cpp b/src/cpu/VirtualFluidsCore/LBM/LBMKernelETD3Q27BGK.cpp
index 1fcdf118fa920d648b511c60ebbc48542e164be0..1f7128541ebbff53ea2638998f2a1c576a65b33c 100644
--- a/src/cpu/VirtualFluidsCore/LBM/LBMKernelETD3Q27BGK.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/LBMKernelETD3Q27BGK.cpp
@@ -5,10 +5,13 @@
 #include "DataSet3D.h"
 #include "BCProcessor.h"
 #include "BCArray3D.h"
+#include "basics/constants/NumericConstants.h"
+
+using namespace vf::basics::constant;
+//using namespace UbMath;
 
 //#define PROOF_CORRECTNESS
 
-using namespace UbMath;
 
 //////////////////////////////////////////////////////////////////////////
 LBMKernelETD3Q27BGK::LBMKernelETD3Q27BGK() 
@@ -42,6 +45,7 @@ SPtr<LBMKernel> LBMKernelETD3Q27BGK::clone()
 void LBMKernelETD3Q27BGK::calculate(int  /*step*/)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
    //initializing of forcing stuff 
    if (withForcing)
@@ -60,9 +64,9 @@ void LBMKernelETD3Q27BGK::calculate(int  /*step*/)
    zeroDistributions = std::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getZeroDistributions();
 
    SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
-   LBMReal f[D3Q27System::ENDF+1];
-   LBMReal feq[D3Q27System::ENDF+1];
-   LBMReal drho,vx1,vx2,vx3;
+   real f[D3Q27System::ENDF+1];
+   real feq[D3Q27System::ENDF+1];
+   real drho,vx1,vx2,vx3;
    const int bcArrayMaxX1 = (int)bcArray->getNX1();
    const int bcArrayMaxX2 = (int)bcArray->getNX2();
    const int bcArrayMaxX3 = (int)bcArray->getNX3();
@@ -136,7 +140,7 @@ void LBMKernelETD3Q27BGK::calculate(int  /*step*/)
                + f[DIR_0MP] + f[DIR_PPP] + f[DIR_MMP] + f[DIR_PMP] + f[DIR_MPP] - f[DIR_PPM] - f[DIR_MMM] - f[DIR_PMM] 
                - f[DIR_MPM];
 
-               LBMReal cu_sq=1.5*(vx1*vx1+vx2*vx2+vx3*vx3);
+               real cu_sq=1.5*(vx1*vx1+vx2*vx2+vx3*vx3);
 
                feq[DIR_000] =  c8o27*(drho-cu_sq);
                feq[DIR_P00] =  c2o27*(drho+3.0*( vx1   )+c9o2*( vx1   )*( vx1   )-cu_sq);
@@ -238,11 +242,11 @@ void LBMKernelETD3Q27BGK::calculate(int  /*step*/)
                }
                //////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-               LBMReal rho_post = f[REST] + f[DIR_P00] + f[W] + f[N] + f[S] + f[T] + f[B] 
+               real rho_post = f[REST] + f[DIR_P00] + f[W] + f[N] + f[S] + f[T] + f[B] 
                + f[NE] + f[SW] + f[SE] + f[NW] + f[TE] + f[BW] + f[BE]
                + f[TW] + f[TN] + f[BS] + f[BN] + f[TS] + f[TNE] + f[TSW]
                + f[TSE] + f[TNW] + f[BNE] + f[BSW] + f[BSE] + f[BNW];
-               LBMReal dif = drho - rho_post;
+               real dif = drho - rho_post;
 #ifdef SINGLEPRECISION
                if(dif > 10.0E-7 || dif < -10.0E-7)
 #else
@@ -255,35 +259,35 @@ void LBMKernelETD3Q27BGK::calculate(int  /*step*/)
                //////////////////////////////////////////////////////////////////////////
                //write distribution
                //////////////////////////////////////////////////////////////////////////
-               (*this->localDistributions)(D3Q27System::ET_E,x1,  x2,  x3) = f[D3Q27System::INV_P00];
-               (*this->localDistributions)(D3Q27System::ET_N,x1,  x2,  x3) = f[D3Q27System::INV_0P0];
-               (*this->localDistributions)(D3Q27System::ET_T,x1,  x2,  x3) = f[D3Q27System::INV_00P];
-               (*this->localDistributions)(D3Q27System::ET_NE,x1,  x2,  x3) = f[D3Q27System::INV_PP0];
-               (*this->localDistributions)(D3Q27System::ET_NW,x1p,x2,  x3) = f[D3Q27System::INV_MP0];
-               (*this->localDistributions)(D3Q27System::ET_TE,x1,  x2,  x3) = f[D3Q27System::INV_P0P];
-               (*this->localDistributions)(D3Q27System::ET_TW,x1p,x2,  x3) = f[D3Q27System::INV_M0P];
-               (*this->localDistributions)(D3Q27System::ET_TN,x1,  x2,  x3) = f[D3Q27System::INV_0PP];
-               (*this->localDistributions)(D3Q27System::ET_TS,x1,  x2p,x3) = f[D3Q27System::INV_0MP];
-               (*this->localDistributions)(D3Q27System::ET_TNE,x1,  x2,  x3) = f[D3Q27System::INV_PPP];
-               (*this->localDistributions)(D3Q27System::ET_TNW,x1p,x2,  x3) = f[D3Q27System::INV_MPP];
-               (*this->localDistributions)(D3Q27System::ET_TSE,x1,  x2p,x3) = f[D3Q27System::INV_PMP];
-               (*this->localDistributions)(D3Q27System::ET_TSW,x1p,x2p,x3) = f[D3Q27System::INV_MMP];
+               (*this->localDistributions)(D3Q27System::ET_E,x1,  x2,  x3) = f[INV_P00];
+               (*this->localDistributions)(D3Q27System::ET_N,x1,  x2,  x3) = f[INV_0P0];
+               (*this->localDistributions)(D3Q27System::ET_T,x1,  x2,  x3) = f[INV_00P];
+               (*this->localDistributions)(D3Q27System::ET_NE,x1,  x2,  x3) = f[INV_PP0];
+               (*this->localDistributions)(D3Q27System::ET_NW,x1p,x2,  x3) = f[INV_MP0];
+               (*this->localDistributions)(D3Q27System::ET_TE,x1,  x2,  x3) = f[INV_P0P];
+               (*this->localDistributions)(D3Q27System::ET_TW,x1p,x2,  x3) = f[INV_M0P];
+               (*this->localDistributions)(D3Q27System::ET_TN,x1,  x2,  x3) = f[INV_0PP];
+               (*this->localDistributions)(D3Q27System::ET_TS,x1,  x2p,x3) = f[INV_0MP];
+               (*this->localDistributions)(D3Q27System::ET_TNE,x1,  x2,  x3) = f[INV_PPP];
+               (*this->localDistributions)(D3Q27System::ET_TNW,x1p,x2,  x3) = f[INV_MPP];
+               (*this->localDistributions)(D3Q27System::ET_TSE,x1,  x2p,x3) = f[INV_PMP];
+               (*this->localDistributions)(D3Q27System::ET_TSW,x1p,x2p,x3) = f[INV_MMP];
 
-               (*this->nonLocalDistributions)(D3Q27System::ET_W,x1p,x2,  x3    ) = f[D3Q27System::INV_M00 ];
-               (*this->nonLocalDistributions)(D3Q27System::ET_S,x1,  x2p,x3    ) = f[D3Q27System::INV_0M0 ];
-               (*this->nonLocalDistributions)(D3Q27System::ET_B,x1,  x2,  x3p  ) = f[D3Q27System::INV_00M ];
-               (*this->nonLocalDistributions)(D3Q27System::ET_SW,x1p,x2p,x3   ) = f[D3Q27System::INV_MM0];
-               (*this->nonLocalDistributions)(D3Q27System::ET_SE,x1,  x2p,x3   ) = f[D3Q27System::INV_PM0];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BW,x1p,x2,  x3p ) = f[D3Q27System::INV_M0M];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BE,x1,  x2,  x3p ) = f[D3Q27System::INV_P0M];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BS,x1,  x2p,x3p ) = f[D3Q27System::INV_0MM];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BN,x1,  x2,  x3p ) = f[D3Q27System::INV_0PM];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BSW,x1p,x2p,x3p) = f[D3Q27System::INV_MMM];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BSE,x1,  x2p,x3p) = f[D3Q27System::INV_PMM];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BNW,x1p,x2,  x3p) = f[D3Q27System::INV_MPM];
-               (*this->nonLocalDistributions)(D3Q27System::ET_BNE,x1,  x2,  x3p) = f[D3Q27System::INV_PPM];
+               (*this->nonLocalDistributions)(D3Q27System::ET_W,x1p,x2,  x3    ) = f[INV_M00 ];
+               (*this->nonLocalDistributions)(D3Q27System::ET_S,x1,  x2p,x3    ) = f[INV_0M0 ];
+               (*this->nonLocalDistributions)(D3Q27System::ET_B,x1,  x2,  x3p  ) = f[INV_00M ];
+               (*this->nonLocalDistributions)(D3Q27System::ET_SW,x1p,x2p,x3   ) = f[INV_MM0];
+               (*this->nonLocalDistributions)(D3Q27System::ET_SE,x1,  x2p,x3   ) = f[INV_PM0];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BW,x1p,x2,  x3p ) = f[INV_M0M];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BE,x1,  x2,  x3p ) = f[INV_P0M];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BS,x1,  x2p,x3p ) = f[INV_0MM];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BN,x1,  x2,  x3p ) = f[INV_0PM];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BSW,x1p,x2p,x3p) = f[INV_MMM];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BSE,x1,  x2p,x3p) = f[INV_PMM];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BNW,x1p,x2,  x3p) = f[INV_MPM];
+               (*this->nonLocalDistributions)(D3Q27System::ET_BNE,x1,  x2,  x3p) = f[INV_PPM];
 
-               (*this->zeroDistributions)(x1,x2,x3) = f[D3Q27System::DIR_000];
+               (*this->zeroDistributions)(x1,x2,x3) = f[DIR_000];
                //////////////////////////////////////////////////////////////////////////
 
 
@@ -293,7 +297,7 @@ void LBMKernelETD3Q27BGK::calculate(int  /*step*/)
    }
 }
 //////////////////////////////////////////////////////////////////////////
-double LBMKernelETD3Q27BGK::getCalculationTime()
+real LBMKernelETD3Q27BGK::getCalculationTime()
 {
    return 0.0;
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/LBMKernelETD3Q27BGK.h b/src/cpu/VirtualFluidsCore/LBM/LBMKernelETD3Q27BGK.h
index 09e495c2375b8f009f2a231ca4a762437031303b..c02725698d64e129f2fc8d5858d8598b8db6682f 100644
--- a/src/cpu/VirtualFluidsCore/LBM/LBMKernelETD3Q27BGK.h
+++ b/src/cpu/VirtualFluidsCore/LBM/LBMKernelETD3Q27BGK.h
@@ -14,21 +14,21 @@ public:
    ~LBMKernelETD3Q27BGK() override;
    void calculate(int step)override;
    SPtr<LBMKernel> clone()override;
-   double getCalculationTime() override;
+   real getCalculationTime() override;
 
 private:
    void initDataSet();
    //void collideAllCompressible();
    //void collideAllIncompressible();
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
 
    mu::value_type muX1,muX2,muX3;
-   LBMReal forcingX1;
-   LBMReal forcingX2;
-   LBMReal forcingX3;
+   real forcingX1;
+   real forcingX2;
+   real forcingX3;
 
 
 };
diff --git a/src/cpu/VirtualFluidsCore/LBM/LBMSystem.h b/src/cpu/VirtualFluidsCore/LBM/LBMSystem.h
index 14b4d223b2e07e3dbca9947cefd89de045bfb3cf..5cea71cd2054cc6755266e1b0b9b314ea06cb476 100644
--- a/src/cpu/VirtualFluidsCore/LBM/LBMSystem.h
+++ b/src/cpu/VirtualFluidsCore/LBM/LBMSystem.h
@@ -37,7 +37,7 @@
 #include <iostream>
 #include <string>
 
-#include "basics/Core/DataTypes.h"
+#include "basics/DataTypes.h"
 
 //! \brief namespace for global system-functions
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/LBMUnitConverter.h b/src/cpu/VirtualFluidsCore/LBM/LBMUnitConverter.h
index de485c28da920b150476ad8e7b4e1f03019e132e..8f4feed79fb546289ebda2ae49439144e5a9e388 100644
--- a/src/cpu/VirtualFluidsCore/LBM/LBMUnitConverter.h
+++ b/src/cpu/VirtualFluidsCore/LBM/LBMUnitConverter.h
@@ -67,17 +67,17 @@ public:
 
     LBMUnitConverter() = default;
 
-    LBMUnitConverter(const double &refLengthWorld, const double &csWorld, const double &rhoWorld,
-                     const double &refLengthLb, const double &csLb = 1.0 / std::sqrt(3.0), const double &rhoLb = 1.0)
+    LBMUnitConverter(const real &refLengthWorld, const real &csWorld, const real &rhoWorld,
+                     const real &refLengthLb, const real &csLb = 1.0 / std::sqrt(3.0), const real &rhoLb = 1.0)
     {
         this->init(refLengthWorld, csWorld, rhoWorld, csWorld, refLengthLb, rhoLb, csLb);
     }
 
-    LBMUnitConverter(const double &refLengthWorld, WORLD_MATERIAL worldMaterial, const double &refLengthLb,
-                     const double &csLb = 1.0 / std::sqrt(3.0), const double &rhoLb = 1.0)
+    LBMUnitConverter(const real &refLengthWorld, WORLD_MATERIAL worldMaterial, const real &refLengthLb,
+                     const real &csLb = 1.0 / std::sqrt(3.0), const real &rhoLb = 1.0)
     {
-        double csWorld;
-        double rhoWorld;
+        real csWorld;
+        real rhoWorld;
 
         if (worldMaterial == WATER) {
             csWorld  = 1484 /*m/s*/;
@@ -99,39 +99,39 @@ public:
 
     virtual ~LBMUnitConverter() = default;
 
-    double getRefRhoLb() { return refRhoLb; }
+    real getRefRhoLb() { return refRhoLb; }
 
-    double getFactorLentghLbToW() { return factorLengthLbToW; }
-    double getFactorLentghWToLb() { return 1.0 / this->getFactorLentghLbToW(); }
+    real getFactorLentghLbToW() { return factorLengthLbToW; }
+    real getFactorLentghWToLb() { return 1.0 / this->getFactorLentghLbToW(); }
 
-    double getFactorTimeLbToW() { return factorTimeLbToW; }
-    double getFactorTimeWToLb() { return 1.0 / this->getFactorTimeLbToW(); }
+    real getFactorTimeLbToW() { return factorTimeLbToW; }
+    real getFactorTimeWToLb() { return 1.0 / this->getFactorTimeLbToW(); }
 
-    double getFactorVelocityLbToW() { return factorLengthLbToW / factorTimeLbToW; }
-    double getFactorVelocityWToLb() { return 1.0 / this->getFactorVelocityLbToW(); }
+    real getFactorVelocityLbToW() { return factorLengthLbToW / factorTimeLbToW; }
+    real getFactorVelocityWToLb() { return 1.0 / this->getFactorVelocityLbToW(); }
 
-    double getFactorViscosityLbToW() { return factorLengthLbToW * factorLengthLbToW / factorTimeLbToW; }
-    double getFactorViscosityWToLb() { return 1.0 / this->getFactorViscosityLbToW(); }
+    real getFactorViscosityLbToW() { return factorLengthLbToW * factorLengthLbToW / factorTimeLbToW; }
+    real getFactorViscosityWToLb() { return 1.0 / this->getFactorViscosityLbToW(); }
 
-    double getFactorDensityLbToW() { return this->factorMassLbToW / std::pow(factorLengthLbToW, 3.0); }
-    double getFactorDensityWToLb() { return 1.0 / this->getFactorDensityLbToW(); }
+    real getFactorDensityLbToW() { return this->factorMassLbToW / std::pow(factorLengthLbToW, 3.0); }
+    real getFactorDensityWToLb() { return 1.0 / this->getFactorDensityLbToW(); }
 
-    double getFactorPressureLbToW(){ return this->factorMassLbToW / (factorLengthLbToW * factorTimeLbToW * factorTimeLbToW); }
-    double getFactorPressureWToLb() { return 1.0 / this->getFactorPressureLbToW(); }
+    real getFactorPressureLbToW(){ return this->factorMassLbToW / (factorLengthLbToW * factorTimeLbToW * factorTimeLbToW); }
+    real getFactorPressureWToLb() { return 1.0 / this->getFactorPressureLbToW(); }
 
-    double getFactorMassLbToW() { return this->factorMassLbToW; }
-    double getFactorMassWToLb() { return 1.0 / this->getFactorMassLbToW(); }
+    real getFactorMassLbToW() { return this->factorMassLbToW; }
+    real getFactorMassWToLb() { return 1.0 / this->getFactorMassLbToW(); }
 
-    double getFactorForceLbToW() { return factorMassLbToW * factorLengthLbToW / (factorTimeLbToW * factorTimeLbToW); }
-    double getFactorForceWToLb() { return 1.0 / this->getFactorForceLbToW(); }
+    real getFactorForceLbToW() { return factorMassLbToW * factorLengthLbToW / (factorTimeLbToW * factorTimeLbToW); }
+    real getFactorForceWToLb() { return 1.0 / this->getFactorForceLbToW(); }
 
-    double getFactorTorqueLbToW() { return factorMassLbToW * factorLengthLbToW * factorLengthLbToW / (factorTimeLbToW * factorTimeLbToW);}
-    double getFactorTorqueWToLb() { return 1.0 / this->getFactorTorqueLbToW(); }
+    real getFactorTorqueLbToW() { return factorMassLbToW * factorLengthLbToW * factorLengthLbToW / (factorTimeLbToW * factorTimeLbToW);}
+    real getFactorTorqueWToLb() { return 1.0 / this->getFactorTorqueLbToW(); }
 
-    double getFactorAccLbToW() { return factorLengthLbToW / (factorTimeLbToW * factorTimeLbToW); }
-    double getFactorAccWToLb() { return 1.0 / this->getFactorAccLbToW(); }
+    real getFactorAccLbToW() { return factorLengthLbToW / (factorTimeLbToW * factorTimeLbToW); }
+    real getFactorAccWToLb() { return 1.0 / this->getFactorAccLbToW(); }
 
-    double getFactorTimeLbToW(double deltaX) const { return factorTimeWithoutDx * deltaX; }
+    real getFactorTimeLbToW(real deltaX) const { return factorTimeWithoutDx * deltaX; }
 
 
     /*==========================================================*/
@@ -185,8 +185,8 @@ public:
         return out.str();
     }
 
-    void init(const double &refLengthWorld, const double & /*csWorld*/, const double &rhoWorld, const double &vWorld,
-              const double &refLengthLb, const double &rhoLb, const double &vLb)
+    void init(const real &refLengthWorld, const real & /*csWorld*/, const real &rhoWorld, const real &vWorld,
+              const real &refLengthLb, const real &rhoLb, const real &vLb)
     {
         factorLengthLbToW   = refLengthWorld / refLengthLb;
         factorTimeLbToW     = vLb / vWorld * factorLengthLbToW;
@@ -196,11 +196,11 @@ public:
     }
 
 protected:
-    double factorLengthLbToW{ 1.0 };
-    double factorTimeLbToW{ 1.0 };
-    double factorMassLbToW{ 1.0 };
-    double refRhoLb{ 1.0 };
-    double factorTimeWithoutDx{ 0.0 };
+    real factorLengthLbToW{ 1.0 };
+    real factorTimeLbToW{ 1.0 };
+    real factorMassLbToW{ 1.0 };
+    real refRhoLb{ 1.0 };
+    real factorTimeWithoutDx{ 0.0 };
 };
 
 #endif // LBMUNITCONVERTER_H
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphaseCumulantLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/MultiphaseCumulantLBMKernel.cpp
index ad80b372251a11161de68c6935097da8eec3edc5..5561a04cc18d91b554d094ce43ee0e11f32af2ad 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphaseCumulantLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphaseCumulantLBMKernel.cpp
@@ -83,7 +83,9 @@ SPtr<LBMKernel> MultiphaseCumulantLBMKernel::clone()
 void MultiphaseCumulantLBMKernel::calculate(int step)
 {
     using namespace D3Q27System;
-    using namespace UbMath;
+//    using namespace UbMath;
+    using namespace vf::lbm::dir;
+    using namespace vf::basics::constant;
 
     forcingX1 = 0.0;
     forcingX2 = 0.0;
@@ -111,10 +113,10 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
     int maxX2 = bcArrayMaxX2 - ghostLayerWidth;
     int maxX3 = bcArrayMaxX3 - ghostLayerWidth;
 
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField(
-            new CbArray3D<LBMReal, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr divU(
-            new CbArray3D<LBMReal, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, 0.0));
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField(
+            new CbArray3D<real, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr divU(
+            new CbArray3D<real, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, 0.0));
 
 
         for (int x3 = 0; x3 <= maxX3; x3++) {
@@ -125,34 +127,34 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
                         int x2p = x2 + 1;
                         int x3p = x3 + 1;
 
-                        LBMReal mfcbb = (*this->localDistributionsH)(D3Q27System::ET_E, x1, x2, x3);
-                        LBMReal mfbcb = (*this->localDistributionsH)(D3Q27System::ET_N, x1, x2, x3);
-                        LBMReal mfbbc = (*this->localDistributionsH)(D3Q27System::ET_T, x1, x2, x3);
-                        LBMReal mfccb = (*this->localDistributionsH)(D3Q27System::ET_NE, x1, x2, x3);
-                        LBMReal mfacb = (*this->localDistributionsH)(D3Q27System::ET_NW, x1p, x2, x3);
-                        LBMReal mfcbc = (*this->localDistributionsH)(D3Q27System::ET_TE, x1, x2, x3);
-                        LBMReal mfabc = (*this->localDistributionsH)(D3Q27System::ET_TW, x1p, x2, x3);
-                        LBMReal mfbcc = (*this->localDistributionsH)(D3Q27System::ET_TN, x1, x2, x3);
-                        LBMReal mfbac = (*this->localDistributionsH)(D3Q27System::ET_TS, x1, x2p, x3);
-                        LBMReal mfccc = (*this->localDistributionsH)(D3Q27System::ET_TNE, x1, x2, x3);
-                        LBMReal mfacc = (*this->localDistributionsH)(D3Q27System::ET_TNW, x1p, x2, x3);
-                        LBMReal mfcac = (*this->localDistributionsH)(D3Q27System::ET_TSE, x1, x2p, x3);
-                        LBMReal mfaac = (*this->localDistributionsH)(D3Q27System::ET_TSW, x1p, x2p, x3);
-                        LBMReal mfabb = (*this->nonLocalDistributionsH)(D3Q27System::ET_W, x1p, x2, x3);
-                        LBMReal mfbab = (*this->nonLocalDistributionsH)(D3Q27System::ET_S, x1, x2p, x3);
-                        LBMReal mfbba = (*this->nonLocalDistributionsH)(D3Q27System::ET_B, x1, x2, x3p);
-                        LBMReal mfaab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SW, x1p, x2p, x3);
-                        LBMReal mfcab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SE, x1, x2p, x3);
-                        LBMReal mfaba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BW, x1p, x2, x3p);
-                        LBMReal mfcba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BE, x1, x2, x3p);
-                        LBMReal mfbaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BS, x1, x2p, x3p);
-                        LBMReal mfbca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BN, x1, x2, x3p);
-                        LBMReal mfaaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                        LBMReal mfcaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                        LBMReal mfaca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                        LBMReal mfcca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                        LBMReal mfbbb = (*this->zeroDistributionsH)(x1, x2, x3);
+                        real mfcbb = (*this->localDistributionsH)(D3Q27System::ET_E, x1, x2, x3);
+                        real mfbcb = (*this->localDistributionsH)(D3Q27System::ET_N, x1, x2, x3);
+                        real mfbbc = (*this->localDistributionsH)(D3Q27System::ET_T, x1, x2, x3);
+                        real mfccb = (*this->localDistributionsH)(D3Q27System::ET_NE, x1, x2, x3);
+                        real mfacb = (*this->localDistributionsH)(D3Q27System::ET_NW, x1p, x2, x3);
+                        real mfcbc = (*this->localDistributionsH)(D3Q27System::ET_TE, x1, x2, x3);
+                        real mfabc = (*this->localDistributionsH)(D3Q27System::ET_TW, x1p, x2, x3);
+                        real mfbcc = (*this->localDistributionsH)(D3Q27System::ET_TN, x1, x2, x3);
+                        real mfbac = (*this->localDistributionsH)(D3Q27System::ET_TS, x1, x2p, x3);
+                        real mfccc = (*this->localDistributionsH)(D3Q27System::ET_TNE, x1, x2, x3);
+                        real mfacc = (*this->localDistributionsH)(D3Q27System::ET_TNW, x1p, x2, x3);
+                        real mfcac = (*this->localDistributionsH)(D3Q27System::ET_TSE, x1, x2p, x3);
+                        real mfaac = (*this->localDistributionsH)(D3Q27System::ET_TSW, x1p, x2p, x3);
+                        real mfabb = (*this->nonLocalDistributionsH)(D3Q27System::ET_W, x1p, x2, x3);
+                        real mfbab = (*this->nonLocalDistributionsH)(D3Q27System::ET_S, x1, x2p, x3);
+                        real mfbba = (*this->nonLocalDistributionsH)(D3Q27System::ET_B, x1, x2, x3p);
+                        real mfaab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SW, x1p, x2p, x3);
+                        real mfcab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SE, x1, x2p, x3);
+                        real mfaba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BW, x1p, x2, x3p);
+                        real mfcba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BE, x1, x2, x3p);
+                        real mfbaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BS, x1, x2p, x3p);
+                        real mfbca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BN, x1, x2, x3p);
+                        real mfaaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                        real mfcaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                        real mfaca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                        real mfcca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                        real mfbbb = (*this->zeroDistributionsH)(x1, x2, x3);
                         (*phaseField)(x1, x2, x3) = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca) +
                                                     (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) +
                                                     (mfbaa + mfbac + mfbca + mfbcc) + (mfabb + mfcbb) +
@@ -162,8 +164,8 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
             }
         }
 
-        LBMReal collFactorM;
-        LBMReal forcingTerm[D3Q27System::ENDF + 1];
+        real collFactorM;
+        real forcingTerm[D3Q27System::ENDF + 1];
 
         for (int x3 = minX3; x3 < maxX3; x3++) {
             for (int x2 = minX2; x2 < maxX2; x2++) {
@@ -196,52 +198,52 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
 
                         findNeighbors(phaseField, x1, x2, x3);
 
-                        LBMReal mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
-                        LBMReal mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
-                        LBMReal mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
-                        LBMReal mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
-                        LBMReal mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
-                        LBMReal mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
-                        LBMReal mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
-                        LBMReal mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
-                        LBMReal mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
-                        LBMReal mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
-                        LBMReal mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
-                        LBMReal mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
-                        LBMReal mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
-                        LBMReal mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
-                        LBMReal mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
-                        LBMReal mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
-                        LBMReal mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
-                        LBMReal mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
-                        LBMReal mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
-                        LBMReal mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
-                        LBMReal mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
-                        LBMReal mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
-                        LBMReal mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                        LBMReal mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                        LBMReal mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                        LBMReal mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                        LBMReal mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
-
-                        LBMReal rhoH = 1.0;
-                        LBMReal rhoL = 1.0 / densityRatio;
-
-                        LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
-
-                        LBMReal dX1_phi = gradX1_phi();
-                        LBMReal dX2_phi = gradX2_phi();
-                        LBMReal dX3_phi = gradX3_phi();
-
-                        LBMReal denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
+                        real mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
+                        real mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
+                        real mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
+                        real mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
+                        real mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
+                        real mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
+                        real mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
+                        real mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
+                        real mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
+                        real mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
+                        real mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
+                        real mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
+                        real mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
+                        real mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
+                        real mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
+                        real mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
+                        real mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
+                        real mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
+                        real mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
+                        real mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
+                        real mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
+                        real mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
+                        real mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                        real mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                        real mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                        real mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                        real mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
+
+                        real rhoH = 1.0;
+                        real rhoL = 1.0 / densityRatio;
+
+                        real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+
+                        real dX1_phi = gradX1_phi();
+                        real dX2_phi = gradX2_phi();
+                        real dX3_phi = gradX3_phi();
+
+                        real denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
                         collFactorM = collFactorL + (collFactorL - collFactorG) * (phi[DIR_000] - phiH) / (phiH - phiL);
 
 
-                        LBMReal mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
+                        real mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
 
                         //----------- Calculating Macroscopic Values -------------
-                        LBMReal rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
+                        real rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
 
                         if (withForcing) {
                             // muX1 = static_cast<double>(x1-1+ix1*maxX1);
@@ -258,7 +260,7 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
                             forcingX2 = muForcingX2.Eval();
                             forcingX3 = muForcingX3.Eval();
 
-                            LBMReal rho_m = 1.0 / densityRatio;
+                            real rho_m = 1.0 / densityRatio;
                             forcingX1     = forcingX1 * (rho - rho_m);
                             forcingX2     = forcingX2 * (rho - rho_m);
                             forcingX3     = forcingX3 * (rho - rho_m);
@@ -268,19 +270,19 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
                             // uz += forcingX3*deltaT*0.5; // Z
                         }
 
-                        LBMReal ux = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+                        real ux = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
                                       (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
                                       (mfcbb - mfabb)) /
                                          (rho * c1o3) +
                                      (mu * dX1_phi + forcingX1) / (2 * rho);
 
-                        LBMReal uy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+                        real uy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
                                       (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
                                       (mfbcb - mfbab)) /
                                          (rho * c1o3) +
                                      (mu * dX2_phi + forcingX2) / (2 * rho);
 
-                        LBMReal uz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+                        real uz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
                                       (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
                                       (mfbbc - mfbba)) /
                                          (rho * c1o3) +
@@ -288,17 +290,17 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
 
                         //--------------------------------------------------------
 
-                        LBMReal ux2 = ux * ux;
-                        LBMReal uy2 = uy * uy;
-                        LBMReal uz2 = uz * uz;
+                        real ux2 = ux * ux;
+                        real uy2 = uy * uy;
+                        real uz2 = uz * uz;
 
                         //----------- Calculating Forcing Terms * -------------
                         for (int dir = FSTARTDIR; dir <= FENDDIR; dir++) {
-                            LBMReal velProd = DX1[dir] * ux + DX2[dir] * uy + DX3[dir] * uz;
-                            LBMReal velSq1  = velProd * velProd;
-                            LBMReal gamma = WEIGTH[dir] * (1.0 + 3 * velProd + 4.5 * velSq1 - 1.5 * (ux2 + uy2 + uz2));
+                            real velProd = DX1[dir] * ux + DX2[dir] * uy + DX3[dir] * uz;
+                            real velSq1  = velProd * velProd;
+                            real gamma = WEIGTH[dir] * (1.0 + 3 * velProd + 4.5 * velSq1 - 1.5 * (ux2 + uy2 + uz2));
 
-                            LBMReal fac1 = (gamma - WEIGTH[dir]) * c1o3 * rhoToPhi;
+                            real fac1 = (gamma - WEIGTH[dir]) * c1o3 * rhoToPhi;
 
                             forcingTerm[dir] = ((-ux) * (fac1 * dX1_phi + gamma * (mu * dX1_phi + forcingX1)) +
                                                 (-uy) * (fac1 * dX2_phi + gamma * (mu * dX2_phi + forcingX2)) +
@@ -308,8 +310,8 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
                                                (DX3[dir]) * (fac1 * dX3_phi + gamma * (mu * dX3_phi + forcingX3));
                         }
 
-                        LBMReal gamma = WEIGTH[DIR_000] * (1.0 - 1.5 * (ux2 + uy2 + uz2));
-                        LBMReal fac1      = (gamma - WEIGTH[DIR_000]) * c1o3 * rhoToPhi;
+                        real gamma = WEIGTH[DIR_000] * (1.0 - 1.5 * (ux2 + uy2 + uz2));
+                        real fac1      = (gamma - WEIGTH[DIR_000]) * c1o3 * rhoToPhi;
                         forcingTerm[DIR_000] = (-ux) * (fac1 * dX1_phi + gamma * (mu * dX1_phi + forcingX1)) +
                                             (-uy) * (fac1 * dX2_phi + gamma * (mu * dX2_phi + forcingX2)) +
                                             (-uz) * (fac1 * dX3_phi + gamma * (mu * dX3_phi + forcingX3));
@@ -344,13 +346,13 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
                         mfcca = 3.0 * (mfcca + 0.5 * forcingTerm[DIR_PPM]) / rho;  //-(3.0*p1 - rho)*WEIGTH[BNE];
                         mfbbb = 3.0 * (mfbbb + 0.5 * forcingTerm[DIR_000]) / rho; //- (3.0*p1 - rho)*WEIGTH[REST];
 
-                        LBMReal rho1 = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca) +
+                        real rho1 = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca) +
                                        (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) +
                                        (mfbaa + mfbac + mfbca + mfbcc) + (mfabb + mfcbb) + (mfbab + mfbcb) +
                                        (mfbba + mfbbc) + mfbbb;
 
 
-                        LBMReal oMdrho, m0, m1, m2;
+                        real oMdrho, m0, m1, m2;
 
                         oMdrho = mfccc + mfaaa;
                         m0     = mfaca + mfcac;
@@ -380,8 +382,8 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
                         // oMdrho = rho - (oMdrho + m0);
 
                         ////////////////////////////////////////////////////////////////////////////////////
-                        LBMReal wadjust;
-                        LBMReal qudricLimit = 0.01;
+                        real wadjust;
+                        real qudricLimit = 0.01;
                         ////////////////////////////////////////////////////////////////////////////////////
                         // Hin
                         ////////////////////////////////////////////////////////////////////////////////////
@@ -612,41 +614,41 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
                         ////////////////////////////////////////////////////////////////////////////////////
                         // Cumulants
                         ////////////////////////////////////////////////////////////////////////////////////
-                        LBMReal OxxPyyPzz = 1.; // omega2 or bulk viscosity
-                        LBMReal OxyyPxzz  = 1.; //-s9;//2+s9;//
-                        LBMReal OxyyMxzz  = 1.; // 2+s9;//
-                        LBMReal O4        = 1.;
-                        LBMReal O5        = 1.;
-                        LBMReal O6        = 1.;
+                        real OxxPyyPzz = 1.; // omega2 or bulk viscosity
+                        real OxyyPxzz  = 1.; //-s9;//2+s9;//
+                        real OxyyMxzz  = 1.; // 2+s9;//
+                        real O4        = 1.;
+                        real O5        = 1.;
+                        real O6        = 1.;
 
                         // Cum 4.
-                        LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-                        LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-                        LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+                        real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
+                        real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
+                        real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-                        LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) +
+                        real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) +
                                                   c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
-                        LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) +
+                        real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) +
                                                   c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
-                        LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) +
+                        real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) +
                                                   c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
 
                         // Cum 5.
-                        LBMReal CUMbcc = mfbcc -
+                        real CUMbcc = mfbcc -
                                          (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb +
                                           2. * (mfbab * mfacb + mfbba * mfabc)) -
                                          c1o3 * (mfbca + mfbac) * oMdrho;
-                        LBMReal CUMcbc = mfcbc -
+                        real CUMcbc = mfcbc -
                                          (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb +
                                           2. * (mfabb * mfcab + mfbba * mfbac)) -
                                          c1o3 * (mfcba + mfabc) * oMdrho;
-                        LBMReal CUMccb = mfccb -
+                        real CUMccb = mfccb -
                                          (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb +
                                           2. * (mfbab * mfbca + mfabb * mfcba)) -
                                          c1o3 * (mfacb + mfcab) * oMdrho;
 
                         // Cum 6.
-                        LBMReal CUMccc =
+                        real CUMccc =
                             mfccc +
                             ((-4. * mfbbb * mfbbb - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) -
                               4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) -
@@ -663,13 +665,13 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
 
                         // 2.
                         // linear combinations
-                        LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
-                        LBMReal mxxMyy    = mfcaa - mfaca;
-                        LBMReal mxxMzz    = mfcaa - mfaac;
+                        real mxxPyyPzz = mfcaa + mfaca + mfaac;
+                        real mxxMyy    = mfcaa - mfaca;
+                        real mxxMzz    = mfcaa - mfaac;
 
-                        LBMReal dxux = -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
-                        LBMReal dyuy = dxux + collFactorM * c3o2 * mxxMyy;
-                        LBMReal dzuz = dxux + collFactorM * c3o2 * mxxMzz;
+                        real dxux = -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
+                        real dyuy = dxux + collFactorM * c3o2 * mxxMyy;
+                        real dzuz = dxux + collFactorM * c3o2 * mxxMzz;
 
                         (*divU)(x1, x2, x3) = dxux + dyuy + dzuz;
 
@@ -690,14 +692,14 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
 
                         // 3.
                         // linear combinations
-                        LBMReal mxxyPyzz = mfcba + mfabc;
-                        LBMReal mxxyMyzz = mfcba - mfabc;
+                        real mxxyPyzz = mfcba + mfabc;
+                        real mxxyMyzz = mfcba - mfabc;
 
-                        LBMReal mxxzPyyz = mfcab + mfacb;
-                        LBMReal mxxzMyyz = mfcab - mfacb;
+                        real mxxzPyyz = mfcab + mfacb;
+                        real mxxzMyyz = mfcab - mfacb;
 
-                        LBMReal mxyyPxzz = mfbca + mfbac;
-                        LBMReal mxyyMxzz = mfbca - mfbac;
+                        real mxyyPxzz = mfbca + mfbac;
+                        real mxyyMxzz = mfbca - mfbac;
 
                         // relax
                         wadjust = OxyyMxzz + (1. - OxyyMxzz) * fabs(mfbbb) / (fabs(mfbbb) + qudricLimit);
@@ -1004,12 +1006,12 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
                         // proof correctness
                         //////////////////////////////////////////////////////////////////////////
 #ifdef PROOF_CORRECTNESS
-                        LBMReal rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca) +
+                        real rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca) +
                                            (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) +
                                            (mfbaa + mfbac + mfbca + mfbcc) + (mfabb + mfcbb) + (mfbab + mfbcb) +
                                            (mfbba + mfbbc) + mfbbb;
 
-                        LBMReal dif = rho1 - rho_post;
+                        real dif = rho1 - rho_post;
 #ifdef SINGLEPRECISION
                         if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
@@ -1122,13 +1124,13 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
                         h[DIR_000] = (*this->zeroDistributionsH)(x1, x2, x3);
 
                         for (int dir = STARTF; dir < (ENDF + 1); dir++) {
-                            LBMReal velProd = DX1[dir] * ux + DX2[dir] * uy + DX3[dir] * uz;
-                            LBMReal velSq1  = velProd * velProd;
-                            LBMReal hEq; //, gEq;
+                            real velProd = DX1[dir] * ux + DX2[dir] * uy + DX3[dir] * uz;
+                            real velSq1  = velProd * velProd;
+                            real hEq; //, gEq;
 
                             if (dir != DIR_000) {
-                                LBMReal dirGrad_phi = (phi[dir] - phi[INVDIR[dir]]) / 2.0;
-                                LBMReal hSource     = (tauH - 0.5) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * (dirGrad_phi) / denom; 
+                                real dirGrad_phi = (phi[dir] - phi[INVDIR[dir]]) / 2.0;
+                                real hSource     = (tauH - 0.5) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * (dirGrad_phi) / denom; 
                                 hEq = phi[DIR_000] * WEIGTH[dir] * (1.0 + 3.0 * velProd + 4.5 * velSq1 - 1.5 * (ux2 + uy2 + uz2)) +                                 hSource * WEIGTH[dir];
 
                                 // This corresponds with the collision factor of 1.0 which equals (tauH + 0.5).
@@ -1140,35 +1142,35 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
                             }
                         }
 
-                        (*this->localDistributionsH)(D3Q27System::ET_E, x1, x2, x3)     = h[D3Q27System::INV_P00];
-                        (*this->localDistributionsH)(D3Q27System::ET_N, x1, x2, x3)     = h[D3Q27System::INV_0P0];
-                        (*this->localDistributionsH)(D3Q27System::ET_T, x1, x2, x3)     = h[D3Q27System::INV_00P];
-                        (*this->localDistributionsH)(D3Q27System::ET_NE, x1, x2, x3)    = h[D3Q27System::INV_PP0];
-                        (*this->localDistributionsH)(D3Q27System::ET_NW, x1p, x2, x3)   = h[D3Q27System::INV_MP0];
-                        (*this->localDistributionsH)(D3Q27System::ET_TE, x1, x2, x3)    = h[D3Q27System::INV_P0P];
-                        (*this->localDistributionsH)(D3Q27System::ET_TW, x1p, x2, x3)   = h[D3Q27System::INV_M0P];
-                        (*this->localDistributionsH)(D3Q27System::ET_TN, x1, x2, x3)    = h[D3Q27System::INV_0PP];
-                        (*this->localDistributionsH)(D3Q27System::ET_TS, x1, x2p, x3)   = h[D3Q27System::INV_0MP];
-                        (*this->localDistributionsH)(D3Q27System::ET_TNE, x1, x2, x3)   = h[D3Q27System::INV_PPP];
-                        (*this->localDistributionsH)(D3Q27System::ET_TNW, x1p, x2, x3)  = h[D3Q27System::INV_MPP];
-                        (*this->localDistributionsH)(D3Q27System::ET_TSE, x1, x2p, x3)  = h[D3Q27System::INV_PMP];
-                        (*this->localDistributionsH)(D3Q27System::ET_TSW, x1p, x2p, x3) = h[D3Q27System::INV_MMP];
-
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_W, x1p, x2, x3)     = h[D3Q27System::INV_M00];
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_S, x1, x2p, x3)     = h[D3Q27System::INV_0M0];
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_B, x1, x2, x3p)     = h[D3Q27System::INV_00M];
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_SW, x1p, x2p, x3)   = h[D3Q27System::INV_MM0];
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_SE, x1, x2p, x3)    = h[D3Q27System::INV_PM0];
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BW, x1p, x2, x3p)   = h[D3Q27System::INV_M0M];
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BE, x1, x2, x3p)    = h[D3Q27System::INV_P0M];
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BS, x1, x2p, x3p)   = h[D3Q27System::INV_0MM];
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BN, x1, x2, x3p)    = h[D3Q27System::INV_0PM];
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BSW, x1p, x2p, x3p) = h[D3Q27System::INV_MMM];
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BSE, x1, x2p, x3p)  = h[D3Q27System::INV_PMM];
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BNW, x1p, x2, x3p)  = h[D3Q27System::INV_MPM];
-                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BNE, x1, x2, x3p)   = h[D3Q27System::INV_PPM];
-
-                        (*this->zeroDistributionsH)(x1, x2, x3) = h[D3Q27System::DIR_000];
+                        (*this->localDistributionsH)(D3Q27System::ET_E, x1, x2, x3)     = h[INV_P00];
+                        (*this->localDistributionsH)(D3Q27System::ET_N, x1, x2, x3)     = h[INV_0P0];
+                        (*this->localDistributionsH)(D3Q27System::ET_T, x1, x2, x3)     = h[INV_00P];
+                        (*this->localDistributionsH)(D3Q27System::ET_NE, x1, x2, x3)    = h[INV_PP0];
+                        (*this->localDistributionsH)(D3Q27System::ET_NW, x1p, x2, x3)   = h[INV_MP0];
+                        (*this->localDistributionsH)(D3Q27System::ET_TE, x1, x2, x3)    = h[INV_P0P];
+                        (*this->localDistributionsH)(D3Q27System::ET_TW, x1p, x2, x3)   = h[INV_M0P];
+                        (*this->localDistributionsH)(D3Q27System::ET_TN, x1, x2, x3)    = h[INV_0PP];
+                        (*this->localDistributionsH)(D3Q27System::ET_TS, x1, x2p, x3)   = h[INV_0MP];
+                        (*this->localDistributionsH)(D3Q27System::ET_TNE, x1, x2, x3)   = h[INV_PPP];
+                        (*this->localDistributionsH)(D3Q27System::ET_TNW, x1p, x2, x3)  = h[INV_MPP];
+                        (*this->localDistributionsH)(D3Q27System::ET_TSE, x1, x2p, x3)  = h[INV_PMP];
+                        (*this->localDistributionsH)(D3Q27System::ET_TSW, x1p, x2p, x3) = h[INV_MMP];
+
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_W, x1p, x2, x3)     = h[INV_M00];
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_S, x1, x2p, x3)     = h[INV_0M0];
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_B, x1, x2, x3p)     = h[INV_00M];
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_SW, x1p, x2p, x3)   = h[INV_MM0];
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_SE, x1, x2p, x3)    = h[INV_PM0];
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BW, x1p, x2, x3p)   = h[INV_M0M];
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BE, x1, x2, x3p)    = h[INV_P0M];
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BS, x1, x2p, x3p)   = h[INV_0MM];
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BN, x1, x2, x3p)    = h[INV_0PM];
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BSW, x1p, x2p, x3p) = h[INV_MMM];
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BSE, x1, x2p, x3p)  = h[INV_PMM];
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BNW, x1p, x2, x3p)  = h[INV_MPM];
+                        (*this->nonLocalDistributionsH)(D3Q27System::ET_BNE, x1, x2, x3p)   = h[INV_PPM];
+
+                        (*this->zeroDistributionsH)(x1, x2, x3) = h[DIR_000];
 
                         /////////////////////   END OF OLD BGK SOLVER ///////////////////////////////
                     }
@@ -1180,40 +1182,42 @@ void MultiphaseCumulantLBMKernel::calculate(int step)
 
 //////////////////////////////////////////////////////////////////////////
 
-LBMReal MultiphaseCumulantLBMKernel::gradX1_phi()
+real MultiphaseCumulantLBMKernel::gradX1_phi()
 {
     using namespace D3Q27System;
-    LBMReal sum = 0.0;
+    real sum = 0.0;
     for (int k = FSTARTDIR; k <= FENDDIR; k++) {
         sum += WEIGTH[k] * DX1[k] * phi[k];
     }
     return 3.0 * sum;
 }
 
-LBMReal MultiphaseCumulantLBMKernel::gradX2_phi()
+real MultiphaseCumulantLBMKernel::gradX2_phi()
 {
     using namespace D3Q27System;
-    LBMReal sum = 0.0;
+    real sum = 0.0;
     for (int k = FSTARTDIR; k <= FENDDIR; k++) {
         sum += WEIGTH[k] * DX2[k] * phi[k];
     }
     return 3.0 * sum;
 }
 
-LBMReal MultiphaseCumulantLBMKernel::gradX3_phi()
+real MultiphaseCumulantLBMKernel::gradX3_phi()
 {
     using namespace D3Q27System;
-    LBMReal sum = 0.0;
+    real sum = 0.0;
     for (int k = FSTARTDIR; k <= FENDDIR; k++) {
         sum += WEIGTH[k] * DX3[k] * phi[k];
     }
     return 3.0 * sum;
 }
 
-LBMReal MultiphaseCumulantLBMKernel::nabla2_phi()
+real MultiphaseCumulantLBMKernel::nabla2_phi()
 {
     using namespace D3Q27System;
-    LBMReal sum = 0.0;
+    using namespace vf::lbm::dir;
+
+    real sum = 0.0;
     for (int k = FSTARTDIR; k <= FENDDIR; k++) {
         sum += WEIGTH[k] * (phi[k] - phi[DIR_000]);
     }
@@ -1223,6 +1227,8 @@ LBMReal MultiphaseCumulantLBMKernel::nabla2_phi()
 void MultiphaseCumulantLBMKernel::computePhasefield()
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
+
     SPtr<DistributionArray3D> distributionsH = dataSet->getHdistributions();
 
     int minX1 = ghostLayerWidth;
@@ -1277,10 +1283,12 @@ void MultiphaseCumulantLBMKernel::computePhasefield()
     }
 }
 
-void MultiphaseCumulantLBMKernel::findNeighbors(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphaseCumulantLBMKernel::findNeighbors(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
                                                 int x3)
 {
     using namespace D3Q27System;
+    using namespace vf::lbm::dir;
+
 
     SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphaseCumulantLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/MultiphaseCumulantLBMKernel.h
index fdc47f340dbfaadfd40f4f62885350a82f2cc202..1402e35f0626399c30875d3f58bbcd256367d965 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphaseCumulantLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphaseCumulantLBMKernel.h
@@ -51,50 +51,50 @@ public:
    virtual ~MultiphaseCumulantLBMKernel(void) = default;
    void calculate(int step) override;
    SPtr<LBMKernel> clone() override;
-   double getCalculationTime() override { return .0; }
+   real getCalculationTime() override { return .0; }
 protected:
    virtual void initDataSet();
    void swapDistributions() override;
-   LBMReal f1[D3Q27System::ENDF+1];
+   real f1[D3Q27System::ENDF+1];
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH;
 
    //CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   phaseField;
 
-   LBMReal h  [D3Q27System::ENDF+1];
-   LBMReal g  [D3Q27System::ENDF+1];
-   LBMReal phi[D3Q27System::ENDF+1];
-   LBMReal pr1[D3Q27System::ENDF+1];
-   LBMReal phi_cutoff[D3Q27System::ENDF+1];
+   real h  [D3Q27System::ENDF+1];
+   real g  [D3Q27System::ENDF+1];
+   real phi[D3Q27System::ENDF+1];
+   real pr1[D3Q27System::ENDF+1];
+   real phi_cutoff[D3Q27System::ENDF+1];
 
-   LBMReal gradX1_phi();
-   LBMReal gradX2_phi();
-   LBMReal gradX3_phi();
+   real gradX1_phi();
+   real gradX2_phi();
+   real gradX3_phi();
    //LBMReal gradX1_pr1();
    //LBMReal gradX2_pr1();
    //LBMReal gradX3_pr1();
    //LBMReal dirgradC_phi(int n, int k);
    void computePhasefield();
-   void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
+   void findNeighbors(CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
    //void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf /*Pressure-Field*/, int x1, int x2, int x3);
    //void pressureFiltering(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf /*Pressure-Field*/, CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf_filtered /*Pressure-Field*/);
 
-   LBMReal nabla2_phi();
+   real nabla2_phi();
 
 
    mu::value_type muX1,muX2,muX3;
    mu::value_type muDeltaT;
    mu::value_type muNu;
    mu::value_type muRho;
-   LBMReal forcingX1;
-   LBMReal forcingX2;
-   LBMReal forcingX3;
+   real forcingX1;
+   real forcingX2;
+   real forcingX3;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterCompressibleAirLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterCompressibleAirLBMKernel.cpp
index bd4df8aea33d26b3db75af3e00df564b7ded3efe..551d9d21188cbb865c55bec156469be6c88d8b95 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterCompressibleAirLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterCompressibleAirLBMKernel.cpp
@@ -39,6 +39,7 @@
 #include "DataSet3D.h"
 #include "LBMKernel.h"
 #include <cmath>
+#include "UbMath.h"
 
 #define PROOF_CORRECTNESS
 
@@ -51,16 +52,16 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::initDataSet()
 	SPtr<DistributionArray3D> h(new D3Q27EsoTwist3DSplittedVector( nx[0] + 4, nx[1] + 4, nx[2] + 4, -999.9)); // For phase-field
 
 	SPtr<PhaseFieldArray3D> divU1(new PhaseFieldArray3D(            nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressure(new  CbArray3D<LBMReal, IndexerX3X2X1>(    nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
-	pressureOld = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressure(new  CbArray3D<real, IndexerX3X2X1>(    nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	pressureOld = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
 	dataSet->setFdistributions(f);
 	dataSet->setHdistributions(h); // For phase-field
 	dataSet->setPhaseField(divU1);
 	dataSet->setPressureField(pressure);
 
-	phaseField = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, -999.0));
+	phaseField = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, -999.0));
 
-	divU = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	divU = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
 }
 //////////////////////////////////////////////////////////////////////////
 SPtr<LBMKernel> MultiphasePressureFilterCompressibleAirLBMKernel::clone()
@@ -91,23 +92,27 @@ SPtr<LBMKernel> MultiphasePressureFilterCompressibleAirLBMKernel::clone()
 	return kernel;
 }
 //////////////////////////////////////////////////////////////////////////
-void  MultiphasePressureFilterCompressibleAirLBMKernel::forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-	LBMReal m2 = mfa + mfc;
-	LBMReal m1 = mfc - mfa;
-	LBMReal m0 = m2 + mfb;
+void  MultiphasePressureFilterCompressibleAirLBMKernel::forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+//	using namespace UbMath;
+	using namespace vf::basics::constant;
+
+	real m2 = mfa + mfc;
+	real m1 = mfc - mfa;
+	real m0 = m2 + mfb;
 	mfa = m0;
 	m0 *= Kinverse;
 	m0 += oneMinusRho;
 	mfb = (m1 * Kinverse - m0 * vv) * K;
-	mfc = ((m2 - c2 * m1 * vv) * Kinverse + v2 * m0) * K;
+	mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
-void  MultiphasePressureFilterCompressibleAirLBMKernel::backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-	LBMReal m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
-	LBMReal m1 = (((mfa - mfc) - c2 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
+void  MultiphasePressureFilterCompressibleAirLBMKernel::backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+//	using namespace UbMath;
+	using namespace vf::basics::constant;
+
+	real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
+	real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
 	mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 + vv) * c1o2) * K;
 	mfa = m0;
 	mfb = m1;
@@ -115,20 +120,24 @@ void  MultiphasePressureFilterCompressibleAirLBMKernel::backwardInverseChimeraWi
 
 
 ////////////////////////////////////////////////////////////////////////////////
-void  MultiphasePressureFilterCompressibleAirLBMKernel::forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-	LBMReal m1 = (mfa + mfc) + mfb;
-	LBMReal m2 = mfc - mfa;
-	mfc = (mfc + mfa) + (v2 * m1 - c2 * vv * m2);
+void  MultiphasePressureFilterCompressibleAirLBMKernel::forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+//	using namespace UbMath;
+	using namespace vf::basics::constant;
+
+	real m1 = (mfa + mfc) + mfb;
+	real m2 = mfc - mfa;
+	mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2);
 	mfb = m2 - vv * m1;
 	mfa = m1;
 }
 
 
-void  MultiphasePressureFilterCompressibleAirLBMKernel::backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-	LBMReal ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
-	LBMReal mb = ((mfa - mfc) - mfa * v2) - c2 * mfb * vv;
+void  MultiphasePressureFilterCompressibleAirLBMKernel::backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+//	using namespace UbMath;
+	using namespace vf::basics::constant;
+
+	real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
+	real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv;
 	mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2);
 	mfb = mb;
 	mfa = ma;
@@ -138,13 +147,15 @@ void  MultiphasePressureFilterCompressibleAirLBMKernel::backwardChimera(LBMReal&
 void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 {
 	using namespace D3Q27System;
-	using namespace UbMath;
+//	using namespace UbMath;
+	using namespace vf::lbm::dir;
+	using namespace vf::basics::constant;
 
 	forcingX1 = 0.0;
 	forcingX2 = 0.0;
 	forcingX3 = 0.0;
 
-	LBMReal oneOverInterfaceScale = c4 / interfaceWidth; //1.0;//1.5;
+	real oneOverInterfaceScale = c4o1 / interfaceWidth; //1.0;//1.5;
 														 /////////////////////////////////////
 
 	localDistributionsF    = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getLocalDistributions();
@@ -155,7 +166,7 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 	nonLocalDistributionsH1 = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getHdistributions())->getNonLocalDistributions();
 	zeroDistributionsH1     = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getHdistributions())->getZeroDistributions();
 
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressure = dataSet->getPressureField();
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressure = dataSet->getPressureField();
 
 	SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
@@ -178,34 +189,34 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 					int x2p = x2 + 1;
 					int x3p = x3 + 1;
 
-					LBMReal mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
-					LBMReal mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
-					LBMReal mfbbc = (*this->localDistributionsH1)(D3Q27System::ET_T, x1, x2, x3);
-					LBMReal mfccb = (*this->localDistributionsH1)(D3Q27System::ET_NE, x1, x2, x3);
-					LBMReal mfacb = (*this->localDistributionsH1)(D3Q27System::ET_NW, x1p, x2, x3);
-					LBMReal mfcbc = (*this->localDistributionsH1)(D3Q27System::ET_TE, x1, x2, x3);
-					LBMReal mfabc = (*this->localDistributionsH1)(D3Q27System::ET_TW, x1p, x2, x3);
-					LBMReal mfbcc = (*this->localDistributionsH1)(D3Q27System::ET_TN, x1, x2, x3);
-					LBMReal mfbac = (*this->localDistributionsH1)(D3Q27System::ET_TS, x1, x2p, x3);
-					LBMReal mfccc = (*this->localDistributionsH1)(D3Q27System::ET_TNE, x1, x2, x3);
-					LBMReal mfacc = (*this->localDistributionsH1)(D3Q27System::ET_TNW, x1p, x2, x3);
-					LBMReal mfcac = (*this->localDistributionsH1)(D3Q27System::ET_TSE, x1, x2p, x3);
-					LBMReal mfaac = (*this->localDistributionsH1)(D3Q27System::ET_TSW, x1p, x2p, x3);
-					LBMReal mfabb = (*this->nonLocalDistributionsH1)(D3Q27System::ET_W, x1p, x2, x3);
-					LBMReal mfbab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_S, x1, x2p, x3);
-					LBMReal mfbba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_B, x1, x2, x3p);
-					LBMReal mfaab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SW, x1p, x2p, x3);
-					LBMReal mfcab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SE, x1, x2p, x3);
-					LBMReal mfaba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BW, x1p, x2, x3p);
-					LBMReal mfcba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BE, x1, x2, x3p);
-					LBMReal mfbaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BS, x1, x2p, x3p);
-					LBMReal mfbca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BN, x1, x2, x3p);
-					LBMReal mfaaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-					LBMReal mfcaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSE, x1, x2p, x3p);
-					LBMReal mfaca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNW, x1p, x2, x3p);
-					LBMReal mfcca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-					LBMReal mfbbb = (*this->zeroDistributionsH1)(x1, x2, x3);
+					real mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
+					real mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
+					real mfbbc = (*this->localDistributionsH1)(D3Q27System::ET_T, x1, x2, x3);
+					real mfccb = (*this->localDistributionsH1)(D3Q27System::ET_NE, x1, x2, x3);
+					real mfacb = (*this->localDistributionsH1)(D3Q27System::ET_NW, x1p, x2, x3);
+					real mfcbc = (*this->localDistributionsH1)(D3Q27System::ET_TE, x1, x2, x3);
+					real mfabc = (*this->localDistributionsH1)(D3Q27System::ET_TW, x1p, x2, x3);
+					real mfbcc = (*this->localDistributionsH1)(D3Q27System::ET_TN, x1, x2, x3);
+					real mfbac = (*this->localDistributionsH1)(D3Q27System::ET_TS, x1, x2p, x3);
+					real mfccc = (*this->localDistributionsH1)(D3Q27System::ET_TNE, x1, x2, x3);
+					real mfacc = (*this->localDistributionsH1)(D3Q27System::ET_TNW, x1p, x2, x3);
+					real mfcac = (*this->localDistributionsH1)(D3Q27System::ET_TSE, x1, x2p, x3);
+					real mfaac = (*this->localDistributionsH1)(D3Q27System::ET_TSW, x1p, x2p, x3);
+					real mfabb = (*this->nonLocalDistributionsH1)(D3Q27System::ET_W, x1p, x2, x3);
+					real mfbab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_S, x1, x2p, x3);
+					real mfbba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_B, x1, x2, x3p);
+					real mfaab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SW, x1p, x2p, x3);
+					real mfcab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SE, x1, x2p, x3);
+					real mfaba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BW, x1p, x2, x3p);
+					real mfcba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BE, x1, x2, x3p);
+					real mfbaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BS, x1, x2p, x3p);
+					real mfbca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BN, x1, x2, x3p);
+					real mfaaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+					real mfcaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSE, x1, x2p, x3p);
+					real mfaca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNW, x1p, x2, x3p);
+					real mfcca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+					real mfbbb = (*this->zeroDistributionsH1)(x1, x2, x3);
 					(*phaseField)(x1, x2, x3) = (((mfaaa + mfccc) + (mfaca + mfcac)) + ((mfaac + mfcca)  + (mfcaa + mfacc))  ) +
 						(((mfaab + mfacb) + (mfcab + mfccb)) + ((mfaba + mfabc) + (mfcba + mfcbc)) +
 							((mfbaa + mfbac) + (mfbca + mfbcc))) + ((mfabb + mfcbb) +
@@ -242,16 +253,16 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 
 					mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
 
-					LBMReal rhoH = 1.0;
-					LBMReal rhoL = 1.0 / densityRatio;
+					real rhoH = 1.0;
+					real rhoL = 1.0 / densityRatio;
 
-					LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+					real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
 
-					LBMReal drho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+					real drho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 						+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 						+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 					
-					LBMReal rho = rhoH + rhoToPhi * ((*phaseField)(x1, x2, x3) - phiH); //Incompressible
+					real rho = rhoH + rhoToPhi * ((*phaseField)(x1, x2, x3) - phiH); //Incompressible
 					//LBMReal rho = rhoH + rhoToPhi * ((*pressure)(x1, x2, x3) - phiH); //wrong?
 					//! variable density -> TRANSFER!
 					//LBMReal rho = rhoH * ((*phaseField)(x1, x2, x3)) + rhoL * ((*phaseField2)(x1, x2, x3));
@@ -266,7 +277,7 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 		}
 	}
 
-	LBMReal collFactorM;
+	real collFactorM;
 
 	////Periodic Filter
 	for (int x3 = minX3-1; x3 <= maxX3; x3++) {
@@ -274,7 +285,7 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 			for (int x1 = minX1-1; x1 <= maxX1; x1++) {
 				if (!bcArray->isSolid(x1, x2, x3) && !bcArray->isUndefined(x1, x2, x3)) {
 
-					LBMReal sum = 0.;
+					real sum = 0.;
 
 					///Version for boundaries
 					for (int xx = -1; xx <= 1; xx++) {
@@ -290,9 +301,9 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 								int zzz = zz + x3;
 
 								if (!bcArray->isSolid(xxx, yyy, zzz) && !bcArray->isUndefined(xxx, yyy, zzz)) {
-									sum+= 64.0/(216.0*(c1+c3*abs(xx))* (c1 + c3 * abs(yy))* (c1 + c3 * abs(zz)))*(*pressureOld)(xxx, yyy, zzz);
+									sum+= 64.0/(216.0*(c1o1+c3o1*abs(xx))* (c1o1 + c3o1 * abs(yy))* (c1o1 + c3o1 * abs(zz)))*(*pressureOld)(xxx, yyy, zzz);
 								}
-								else{ sum+= 64.0 / (216.0 * (c1 + c3 * abs(xx)) * (c1 + c3 * abs(yy)) * (c1 + c3 * abs(zz))) * (*pressureOld)(x1, x2, x3);
+								else{ sum+= 64.0 / (216.0 * (c1o1 + c3o1 * abs(xx)) * (c1o1 + c3o1 * abs(yy)) * (c1o1 + c3o1 * abs(zz))) * (*pressureOld)(x1, x2, x3);
 								}
 
 
@@ -338,126 +349,126 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 
 					findNeighbors(phaseField, x1, x2, x3);
 
-					LBMReal mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
-					LBMReal mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
-					LBMReal mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
-					LBMReal mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
-					LBMReal mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
-					LBMReal mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
-					LBMReal mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
-					LBMReal mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
-					LBMReal mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
-					LBMReal mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
-					LBMReal mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
-					LBMReal mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
-					LBMReal mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
-					LBMReal mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
-					LBMReal mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
-					LBMReal mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
-					LBMReal mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
-					LBMReal mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
-					LBMReal mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
-					LBMReal mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
-					LBMReal mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
-					LBMReal mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
-					LBMReal mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-					LBMReal mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
-					LBMReal mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
-					LBMReal mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-					LBMReal mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
-
-					LBMReal rhoH = 1.0;
-					LBMReal rhoL = 1.0 / densityRatio;
-
-					LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
-
-					LBMReal dX1_phi = gradX1_phi();
-					LBMReal dX2_phi = gradX2_phi();
-					LBMReal dX3_phi = gradX3_phi();
-
-					LBMReal denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
-					LBMReal normX1 = dX1_phi / denom;
-					LBMReal normX2 = dX2_phi / denom;
-					LBMReal normX3 = dX3_phi / denom;
+					real mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
+					real mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
+					real mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
+					real mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
+					real mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
+					real mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
+					real mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
+					real mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
+					real mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
+					real mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
+					real mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
+					real mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
+					real mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
+					real mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
+					real mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
+					real mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
+					real mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
+					real mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
+					real mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
+					real mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
+					real mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
+					real mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
+					real mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+					real mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
+					real mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
+					real mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+					real mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
+
+					real rhoH = 1.0;
+					real rhoL = 1.0 / densityRatio;
+
+					real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+
+					real dX1_phi = gradX1_phi();
+					real dX2_phi = gradX2_phi();
+					real dX3_phi = gradX3_phi();
+
+					real denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
+					real normX1 = dX1_phi / denom;
+					real normX2 = dX2_phi / denom;
+					real normX3 = dX3_phi / denom;
 
 
 
 					collFactorM = collFactorL + (collFactorL - collFactorG) * (phi[DIR_000] - phiH) / (phiH - phiL);
 
 
-					LBMReal mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
+					real mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
 
 					//----------- Calculating Macroscopic Values -------------
-					LBMReal rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH); //Incompressible
+					real rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH); //Incompressible
 					//LBMReal rho = rhoL + (rhoH - rhoL) * phi[REST] + (one - phi[REST]) * (*pressure)(x1, x2, x3) * three; //compressible
 
-					LBMReal m0, m1, m2;
-					LBMReal rhoRef=c1;
+					real m0, m1, m2;
+					real rhoRef=c1o1;
 
-					LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+					real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 						(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 						(mfcbb - mfabb))/rhoRef;
-					LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+					real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 						(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 						(mfbcb - mfbab))/rhoRef;
-					LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+					real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 						(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 						(mfbbc - mfbba))/rhoRef;
 
-					LBMReal gradPx = 0.0;
-					LBMReal gradPy = 0.0;
-					LBMReal gradPz = 0.0;
+					real gradPx = 0.0;
+					real gradPy = 0.0;
+					real gradPz = 0.0;
 					for (int dir1 = -1; dir1 <= 1; dir1++) {
 						for (int dir2 = -1; dir2 <= 1; dir2++) {
 							int yyy = x2 + dir1;
 							int zzz = x3 + dir2;
 							if (!bcArray->isSolid(x1-1, yyy, zzz) && !bcArray->isUndefined(x1-1, yyy, zzz)) {
-								gradPx -= (*pressure)(x1 - 1, yyy, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPx -= (*pressure)(x1 - 1, yyy, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							else {
-								gradPx -= (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPx -= (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							if (!bcArray->isSolid(x1 + 1, yyy, zzz) && !bcArray->isUndefined(x1 + 1, yyy, zzz)) {
-								gradPx += (*pressure)(x1 + 1, yyy, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPx += (*pressure)(x1 + 1, yyy, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							else {
-								gradPx += (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPx += (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 
 							int xxx = x1 + dir1;
 							if (!bcArray->isSolid(xxx, x2-1, zzz) && !bcArray->isUndefined(xxx, x2-1, zzz)) {
-								gradPy -= (*pressure)(xxx, x2-1, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPy -= (*pressure)(xxx, x2-1, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							else {
-								gradPy -= (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPy -= (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							if (!bcArray->isSolid(xxx, x2+1, zzz) && !bcArray->isUndefined(xxx, x2+1, zzz)) {
-								gradPy += (*pressure)(xxx, x2+1, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPy += (*pressure)(xxx, x2+1, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							else {
-								gradPy += (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPy += (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 
 							yyy = x2 + dir2;
 							if (!bcArray->isSolid(xxx, yyy, x3-1) && !bcArray->isUndefined(xxx, yyy, x3-1)) {
-								gradPz -= (*pressure)(xxx, yyy, x3-1) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPz -= (*pressure)(xxx, yyy, x3-1) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							else {
-								gradPz -= (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPz -= (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							if (!bcArray->isSolid(xxx, yyy, x3+1) && !bcArray->isUndefined(xxx, yyy, x3+1)) {
-								gradPz += (*pressure)(xxx, yyy, x3+1) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPz += (*pressure)(xxx, yyy, x3+1) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							else {
-								gradPz += (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPz += (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 
 						}
 					}
 
 					//Viscosity increase by pressure gradient
-					LBMReal errPhi = (((1.0 - phi[DIR_000]) * (phi[DIR_000]) * oneOverInterfaceScale)- denom);
+					real errPhi = (((1.0 - phi[DIR_000]) * (phi[DIR_000]) * oneOverInterfaceScale)- denom);
 					//LBMReal limVis = 0.0000001*10;//0.01;
 					// collFactorM =collFactorM/(c1+limVis*(errPhi*errPhi)*collFactorM);
 					// collFactorM = (collFactorM < 1.8) ? 1.8 : collFactorM;
@@ -483,14 +494,14 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 					vvy += mu * dX2_phi * c1o2 / rho ;
 					vvz += mu * dX3_phi * c1o2 / rho;
 
-					LBMReal vx2;
-					LBMReal vy2;
-					LBMReal vz2;
+					real vx2;
+					real vy2;
+					real vz2;
 					vx2 = vvx * vvx;
 					vy2 = vvy * vvy;
 					vz2 = vvz * vvz;
 					///////////////////////////////////////////////////////////////////////////////////////////               
-					LBMReal oMdrho;
+					real oMdrho;
 
 
 					oMdrho = mfccc + mfaaa;
@@ -520,8 +531,8 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 					oMdrho = (rhoRef - (oMdrho + m0))/rhoRef;// 12.03.21 check derivation!!!!
 
 															 ////////////////////////////////////////////////////////////////////////////////////
-					LBMReal wadjust;
-					LBMReal qudricLimit = 0.01;
+					real wadjust;
+					real qudricLimit = 0.01;
 					////////////////////////////////////////////////////////////////////////////////////
 					//Hin
 					////////////////////////////////////////////////////////////////////////////////////
@@ -754,21 +765,21 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 					////////////////////////////////////////////////////////////////////////////////////
 
 					// mfaaa = 0.0;
-					LBMReal OxxPyyPzz = 1.; //omega2 or bulk viscosity
+					real OxxPyyPzz = 1.; //omega2 or bulk viscosity
 											//  LBMReal OxyyPxzz = 1.;//-s9;//2+s9;//
 											//  LBMReal OxyyMxzz  = 1.;//2+s9;//
-					LBMReal O4 = 1.;
-					LBMReal O5 = 1.;
-					LBMReal O6 = 1.;
+					real O4 = 1.;
+					real O5 = 1.;
+					real O6 = 1.;
 
 					/////fourth order parameters; here only for test. Move out of loop!
 
-					LBMReal OxyyPxzz = 8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
-					LBMReal OxyyMxzz = 8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
+					real OxyyPxzz = 8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
+					real OxyyMxzz = 8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
 					//    LBMReal Oxyz = 24.0 * (collFactorM - 2.0) * (4.0 * collFactorM * collFactorM + collFactorM * OxxPyyPzz * (18.0 - 13.0 * collFactorM) + OxxPyyPzz * OxxPyyPzz * (2.0 + collFactorM * (6.0 * collFactorM - 11.0))) / (16.0 * collFactorM * collFactorM * (collFactorM - 6.0) - 2.0 * collFactorM * OxxPyyPzz * (216.0 + 5.0 * collFactorM * (9.0 * collFactorM - 46.0)) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (3.0 * collFactorM - 10.0) * (15.0 * collFactorM - 28.0) - 48.0));
-					LBMReal A = (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+					real A = (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 					//FIXME:  warning C4459: declaration of 'B' hides global declaration (message : see declaration of 'D3Q27System::DIR_00M' )
-					LBMReal BB = (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+					real BB = (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 
 
 					//Cum 4.
@@ -776,21 +787,21 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 					//LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
 					//LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-					LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-					LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-					LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+					real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
+					real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
+					real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-					LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-					LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-					LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
+					real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+					real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+					real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
 
 					//Cum 5.
-					LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-					LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-					LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+					real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
+					real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
+					real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
 
 					//Cum 6.
-					LBMReal CUMccc = mfccc + ((-4. * mfbbb * mfbbb
+					real CUMccc = mfccc + ((-4. * mfbbb * mfbbb
 						- (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
 						- 4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
 						- 2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
@@ -804,21 +815,21 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 
 					//2.
 					// linear combinations
-					LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
+					real mxxPyyPzz = mfcaa + mfaca + mfaac;
 
 					//  LBMReal mfaaaS = (mfaaa * (-4 - 3 * OxxPyyPzz * (-1 + rho)) + 6 * mxxPyyPzz * OxxPyyPzz * (-1 + rho)) / (-4 + 3 * OxxPyyPzz * (-1 + rho));
 					mxxPyyPzz -= mfaaa ;//12.03.21 shifted by mfaaa
 										//mxxPyyPzz-=(mfaaa+mfaaaS)*c1o2;//12.03.21 shifted by mfaaa
-					LBMReal mxxMyy = mfcaa - mfaca;
-					LBMReal mxxMzz = mfcaa - mfaac;
+					real mxxMyy = mfcaa - mfaca;
+					real mxxMzz = mfcaa - mfaac;
 
-					LBMReal dxux =  -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
-					LBMReal dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
-					LBMReal dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
+					real dxux =  -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
+					real dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
+					real dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
 
-					LBMReal Dxy = -three * collFactorM * mfbba;
-					LBMReal Dxz = -three * collFactorM * mfbab;
-					LBMReal Dyz = -three * collFactorM * mfabb;
+					real Dxy = -c3o1 * collFactorM * mfbba;
+					real Dxz = -c3o1 * collFactorM * mfbab;
+					real Dyz = -c3o1 * collFactorM * mfabb;
 
 					//relax
 					mxxPyyPzz += OxxPyyPzz * (/*mfaaa*/ - mxxPyyPzz) - 3. * (1. - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
@@ -845,14 +856,14 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 
 					//3.
 					// linear combinations
-					LBMReal mxxyPyzz = mfcba + mfabc;
-					LBMReal mxxyMyzz = mfcba - mfabc;
+					real mxxyPyzz = mfcba + mfabc;
+					real mxxyMyzz = mfcba - mfabc;
 
-					LBMReal mxxzPyyz = mfcab + mfacb;
-					LBMReal mxxzMyyz = mfcab - mfacb;
+					real mxxzPyyz = mfcab + mfacb;
+					real mxxzMyyz = mfcab - mfacb;
 
-					LBMReal mxyyPxzz = mfbca + mfbac;
-					LBMReal mxyyMxzz = mfbca - mfbac;
+					real mxyyPxzz = mfbca + mfbac;
+					real mxyyMxzz = mfbca - mfbac;
 
 					//relax
 					wadjust = OxyyMxzz + (1. - OxyyMxzz) * fabs(mfbbb) / (fabs(mfbbb) + qudricLimit);
@@ -879,12 +890,12 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 					mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2;
 
 					//4.
-					CUMacc = -O4 * (one / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (one - O4) * (CUMacc);
-					CUMcac = -O4 * (one / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (one - O4) * (CUMcac);
-					CUMcca = -O4 * (one / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (one - O4) * (CUMcca);
-					CUMbbc = -O4 * (one / collFactorM - c1o2) * Dxy * c1o3 * BB + (one - O4) * (CUMbbc);
-					CUMbcb = -O4 * (one / collFactorM - c1o2) * Dxz * c1o3 * BB + (one - O4) * (CUMbcb);
-					CUMcbb = -O4 * (one / collFactorM - c1o2) * Dyz * c1o3 * BB + (one - O4) * (CUMcbb);
+					CUMacc = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc);
+					CUMcac = -O4 * (c1o1 / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac);
+					CUMcca = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca);
+					CUMbbc = -O4 * (c1o1 / collFactorM - c1o2) * Dxy * c1o3 * BB + (c1o1 - O4) * (CUMbbc);
+					CUMbcb = -O4 * (c1o1 / collFactorM - c1o2) * Dxz * c1o3 * BB + (c1o1 - O4) * (CUMbcb);
+					CUMcbb = -O4 * (c1o1 / collFactorM - c1o2) * Dyz * c1o3 * BB + (c1o1 - O4) * (CUMcbb);
 
 					//5.
 					CUMbcc += O5 * (-CUMbcc);
@@ -904,9 +915,9 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 					mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
 					mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-					mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-					mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-					mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
+					mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+					mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+					mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
 
 					//5.
 					mfbcc = CUMbcc + (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac) * oMdrho;
@@ -936,9 +947,9 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 					mfaba = -mfaba;
 					mfaab = -mfaab;
 					//////////////////////////////////////////////////////////////////////////////////////
-					mfbaa += c1o3 * (c1 / collFactorM - c1o2) * rhoToPhi * (2 * dxux * dX1_phi + Dxy * dX2_phi + Dxz * dX3_phi) / (rho);
-					mfaba += c1o3 * (c1 / collFactorM - c1o2) * rhoToPhi * (Dxy * dX1_phi + 2 * dyuy * dX2_phi + Dyz * dX3_phi) / (rho);
-					mfaab += c1o3 * (c1 / collFactorM - c1o2) * rhoToPhi * (Dxz * dX1_phi + Dyz * dX2_phi + 2 * dyuy * dX3_phi) / (rho);
+					mfbaa += c1o3 * (c1o1 / collFactorM - c1o2) * rhoToPhi * (2 * dxux * dX1_phi + Dxy * dX2_phi + Dxz * dX3_phi) / (rho);
+					mfaba += c1o3 * (c1o1 / collFactorM - c1o2) * rhoToPhi * (Dxy * dX1_phi + 2 * dyuy * dX2_phi + Dyz * dX3_phi) / (rho);
+					mfaab += c1o3 * (c1o1 / collFactorM - c1o2) * rhoToPhi * (Dxz * dX1_phi + Dyz * dX2_phi + 2 * dyuy * dX3_phi) / (rho);
 					////////////////////////////////////////////////////////////////////////////////////
 					//back
 					////////////////////////////////////////////////////////////////////////////////////
@@ -1152,7 +1163,7 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 					//proof correctness
 					//////////////////////////////////////////////////////////////////////////
 					//#ifdef  PROOF_CORRECTNESS
-					LBMReal rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+					real rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 						+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 						+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 					//			   //LBMReal dif = fabs(drho - rho_post);
@@ -1211,7 +1222,7 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 /////////////////////  P H A S E - F I E L D   S O L V E R
 ////////////////////////////////////////////
 /////CUMULANT PHASE-FIELD
-					LBMReal omegaD =1.0/( 3.0 * mob + 0.5);
+					real omegaD =1.0/( 3.0 * mob + 0.5);
 					{
 						mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
 						mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
@@ -1248,31 +1259,31 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 						//!
 						////////////////////////////////////////////////////////////////////////////////////
 						// second component
-						LBMReal concentration =
+						real concentration =
 							((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 								(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
 								((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal oneMinusRho = c1- concentration;
+						real oneMinusRho = c1o1 - concentration;
 
-						LBMReal cx =
+						real cx =
 							((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 								(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 								(mfcbb - mfabb));
-						LBMReal cy =
+						real cy =
 							((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 								(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 								(mfbcb - mfbab));
-						LBMReal cz =
+						real cz =
 							((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 								(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 								(mfbbc - mfbba));
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// calculate the square of velocities for this lattice node
-						LBMReal cx2 = cx * cx;
-						LBMReal cy2 = cy * cy;
-						LBMReal cz2 = cz * cz;
+						real cx2 = cx * cx;
+						real cy2 = cy * cy;
+						real cz2 = cz * cz;
 						////////////////////////////////////////////////////////////////////////////////////
 						//! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
 						//! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -1281,85 +1292,85 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 						//!
 						////////////////////////////////////////////////////////////////////////////////////
 						// Z - Dir
-						forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 						forwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// Y - Dir
-						forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 						forwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-						forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 						forwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 						forwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 						forwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 						forwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-						forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// X - Dir
-						forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 						forwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-						forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 						forwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 						forwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 						forwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-						forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 						forwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-						forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3o1, c1o9, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						//! - experimental Cumulant ... to be published ... hopefully
 						//!
 
 						// linearized orthogonalization of 3rd order central moments
-						LBMReal Mabc = mfabc - mfaba * c1o3;
-						LBMReal Mbca = mfbca - mfbaa * c1o3;
-						LBMReal Macb = mfacb - mfaab * c1o3;
-						LBMReal Mcba = mfcba - mfaba * c1o3;
-						LBMReal Mcab = mfcab - mfaab * c1o3;
-						LBMReal Mbac = mfbac - mfbaa * c1o3;
+						real Mabc = mfabc - mfaba * c1o3;
+						real Mbca = mfbca - mfbaa * c1o3;
+						real Macb = mfacb - mfaab * c1o3;
+						real Mcba = mfcba - mfaba * c1o3;
+						real Mcab = mfcab - mfaab * c1o3;
+						real Mbac = mfbac - mfbaa * c1o3;
 						// linearized orthogonalization of 5th order central moments
-						LBMReal Mcbc = mfcbc - mfaba * c1o9;
-						LBMReal Mbcc = mfbcc - mfbaa * c1o9;
-						LBMReal Mccb = mfccb - mfaab * c1o9;
+						real Mcbc = mfcbc - mfaba * c1o9;
+						real Mbcc = mfbcc - mfbaa * c1o9;
+						real Mccb = mfccb - mfaab * c1o9;
 
 						// collision of 1st order moments
-						cx = cx * (c1 - omegaD) + omegaD * vvx * concentration +
-							normX1 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
-						cy = cy * (c1 - omegaD) + omegaD * vvy * concentration +
-							normX2 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
-						cz = cz * (c1 - omegaD) + omegaD * vvz * concentration +
-							normX3 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+						cx = cx * (c1o1 - omegaD) + omegaD * vvx * concentration +
+							normX1 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+						cy = cy * (c1o1 - omegaD) + omegaD * vvy * concentration +
+							normX2 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+						cz = cz * (c1o1 - omegaD) + omegaD * vvz * concentration +
+							normX3 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
 
 						cx2 = cx * cx;
 						cy2 = cy * cy;
 						cz2 = cz * cz;
 
 						// equilibration of 2nd order moments
-						mfbba = zeroReal;
-						mfbab = zeroReal;
-						mfabb = zeroReal;
+						mfbba = c0o1;
+						mfbab = c0o1;
+						mfabb = c0o1;
 
 						mfcaa = c1o3 * concentration;
 						mfaca = c1o3 * concentration;
 						mfaac = c1o3 * concentration;
 
 						// equilibration of 3rd order moments
-						Mabc = zeroReal;
-						Mbca = zeroReal;
-						Macb = zeroReal;
-						Mcba = zeroReal;
-						Mcab = zeroReal;
-						Mbac = zeroReal;
-						mfbbb = zeroReal;
+						Mabc = c0o1;
+						Mbca = c0o1;
+						Macb = c0o1;
+						Mcba = c0o1;
+						Mcab = c0o1;
+						Mbac = c0o1;
+						mfbbb = c0o1;
 
 						// from linearized orthogonalization 3rd order central moments to central moments
 						mfabc = Mabc + mfaba * c1o3;
@@ -1374,14 +1385,14 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 						mfcac = c1o9 * concentration;
 						mfcca = c1o9 * concentration;
 
-						mfcbb = zeroReal;
-						mfbcb = zeroReal;
-						mfbbc = zeroReal;
+						mfcbb = c0o1;
+						mfbcb = c0o1;
+						mfbbc = c0o1;
 
 						// equilibration of 5th order moments
-						Mcbc = zeroReal;
-						Mbcc = zeroReal;
-						Mccb = zeroReal;
+						Mcbc = c0o1;
+						Mbcc = c0o1;
+						Mccb = c0o1;
 
 						// from linearized orthogonalization 5th order central moments to central moments
 						mfcbc = Mcbc + mfaba * c1o9;
@@ -1399,39 +1410,39 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 						//!
 						////////////////////////////////////////////////////////////////////////////////////
 						// X - Dir
-						backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 						backwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-						backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 						backwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 						backwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 						backwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-						backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 						backwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-						backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9o1, c1o9, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// Y - Dir
-						backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 						backwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-						backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 						backwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 						backwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 						backwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 						backwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-						backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// Z - Dir
-						backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 						backwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 
 
@@ -1472,58 +1483,72 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::calculate(int step)
 }
 //////////////////////////////////////////////////////////////////////////
 
-LBMReal MultiphasePressureFilterCompressibleAirLBMKernel::gradX1_phi()
+real MultiphasePressureFilterCompressibleAirLBMKernel::gradX1_phi()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0* ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) + (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) + (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) + (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_PM0] - phi[DIR_MP0]) + (phi[DIR_PP0] - phi[DIR_MM0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_P00] - phi[DIR_M00]));
 }
 
-LBMReal MultiphasePressureFilterCompressibleAirLBMKernel::gradX2_phi()
+real MultiphasePressureFilterCompressibleAirLBMKernel::gradX2_phi()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PPM] - phi[DIR_MMP])- (phi[DIR_PMP] - phi[DIR_MPM])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_0PP] - phi[DIR_0MM]) + (phi[DIR_0PM] - phi[DIR_0MP])) + ((phi[DIR_PP0] - phi[DIR_MM0])- (phi[DIR_PM0] - phi[DIR_MP0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_0P0] - phi[DIR_0M0]));
 }
 
-LBMReal MultiphasePressureFilterCompressibleAirLBMKernel::gradX3_phi()
+real MultiphasePressureFilterCompressibleAirLBMKernel::gradX3_phi()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) - (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) - (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_0MP] - phi[DIR_0PM]) + (phi[DIR_0PP] - phi[DIR_0MM])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_00P] - phi[DIR_00M]));
 }
 
-LBMReal MultiphasePressureFilterCompressibleAirLBMKernel::gradX1_phi2()
+real MultiphasePressureFilterCompressibleAirLBMKernel::gradX1_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) + (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PMP] - phi2[DIR_MPM]) + (phi2[DIR_PPM] - phi2[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_P0P] - phi2[DIR_M0M]) + (phi2[DIR_P0M] - phi2[DIR_M0P])) + ((phi2[DIR_PM0] - phi2[DIR_MP0]) + (phi2[DIR_PP0] - phi2[DIR_MM0])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_P00] - phi2[DIR_M00]));
 }
 
-LBMReal MultiphasePressureFilterCompressibleAirLBMKernel::gradX2_phi2()
+real MultiphasePressureFilterCompressibleAirLBMKernel::gradX2_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) - (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PPM] - phi2[DIR_MMP]) - (phi2[DIR_PMP] - phi2[DIR_MPM])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_0PP] - phi2[DIR_0MM]) + (phi2[DIR_0PM] - phi2[DIR_0MP])) + ((phi2[DIR_PP0] - phi2[DIR_MM0]) - (phi2[DIR_PM0] - phi2[DIR_MP0])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_0P0] - phi2[DIR_0M0]));
 }
 
-LBMReal MultiphasePressureFilterCompressibleAirLBMKernel::gradX3_phi2()
+real MultiphasePressureFilterCompressibleAirLBMKernel::gradX3_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) - (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PMP] - phi2[DIR_MPM]) - (phi2[DIR_PPM] - phi2[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_P0P] - phi2[DIR_M0M]) - (phi2[DIR_P0M] - phi2[DIR_M0P])) + ((phi2[DIR_0MP] - phi2[DIR_0PM]) + (phi2[DIR_0PP] - phi2[DIR_0MM])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_00P] - phi2[DIR_00M]));
 }
 
-LBMReal MultiphasePressureFilterCompressibleAirLBMKernel::nabla2_phi()
+real MultiphasePressureFilterCompressibleAirLBMKernel::nabla2_phi()
 {
 	using namespace D3Q27System;
-	LBMReal sum = 0.0;
+	using namespace vf::lbm::dir;
+
+	real sum = 0.0;
 	sum += WEIGTH[DIR_PPP] * ((((phi[DIR_PPP] - phi[DIR_000]) + (phi[DIR_MMM] - phi[DIR_000])) + ((phi[DIR_MMP] - phi[DIR_000]) + (phi[DIR_PPM] - phi[DIR_000])))
 		+ (((phi[DIR_MPP] - phi[DIR_000]) + (phi[DIR_PMM] - phi[DIR_000])) + ((phi[DIR_PMP] - phi[DIR_000]) + (phi[DIR_MPM] - phi[DIR_000]))));
 	sum += WEIGTH[DIR_0PP] * (
@@ -1543,6 +1568,8 @@ LBMReal MultiphasePressureFilterCompressibleAirLBMKernel::nabla2_phi()
 void MultiphasePressureFilterCompressibleAirLBMKernel::computePhasefield()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	SPtr<DistributionArray3D> distributionsH = dataSet->getHdistributions();
 
 	int minX1 = ghostLayerWidth;
@@ -1597,10 +1624,11 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::computePhasefield()
 	}
 }
 
-void MultiphasePressureFilterCompressibleAirLBMKernel::findNeighbors(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphasePressureFilterCompressibleAirLBMKernel::findNeighbors(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
 	int x3)
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
 	SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
@@ -1617,10 +1645,11 @@ void MultiphasePressureFilterCompressibleAirLBMKernel::findNeighbors(CbArray3D<L
 	}
 }
 
-void MultiphasePressureFilterCompressibleAirLBMKernel::findNeighbors2(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphasePressureFilterCompressibleAirLBMKernel::findNeighbors2(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
 	int x3)
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
 	SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterCompressibleAirLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterCompressibleAirLBMKernel.h
index e15f29e0434c0d5f59977226cab91455f2a39f70..65be707f90d1327cad559cc7f9361e74508bcd30 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterCompressibleAirLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterCompressibleAirLBMKernel.h
@@ -57,60 +57,60 @@ public:
     //CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressure;
 
 
-    double getCalculationTime() override { return .0; }
+    real getCalculationTime() override { return .0; }
 protected:
     virtual void initDataSet();
     void swapDistributions() override;
 
     void initForcing();
 
-    void forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-    void backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-    void forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
-    void backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
+    void forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+    void backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+    void forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
+    void backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
 
-    LBMReal f1[D3Q27System::ENDF+1];
+    real f1[D3Q27System::ENDF+1];
 
-    CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
-    CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
-    CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
+    CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
+    CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
+    CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
 
-    CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH1;
-    CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH1;
-    CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH1;
+    CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH1;
+    CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH1;
+    CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH1;
 
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressureOld;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressureOld;
 
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr divU; 
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr divU; 
 
-    LBMReal h  [D3Q27System::ENDF+1];
-    LBMReal h2[D3Q27System::ENDF + 1];
-    LBMReal g  [D3Q27System::ENDF+1];
-    LBMReal phi[D3Q27System::ENDF+1];
-    LBMReal phi2[D3Q27System::ENDF + 1];
-    LBMReal pr1[D3Q27System::ENDF+1];
-    LBMReal phi_cutoff[D3Q27System::ENDF+1];
+    real h  [D3Q27System::ENDF+1];
+    real h2[D3Q27System::ENDF + 1];
+    real g  [D3Q27System::ENDF+1];
+    real phi[D3Q27System::ENDF+1];
+    real phi2[D3Q27System::ENDF + 1];
+    real pr1[D3Q27System::ENDF+1];
+    real phi_cutoff[D3Q27System::ENDF+1];
 
-    LBMReal gradX1_phi();
-    LBMReal gradX2_phi();
-    LBMReal gradX3_phi();
-    LBMReal gradX1_phi2();
-    LBMReal gradX2_phi2();
-    LBMReal gradX3_phi2();
+    real gradX1_phi();
+    real gradX2_phi();
+    real gradX3_phi();
+    real gradX1_phi2();
+    real gradX2_phi2();
+    real gradX3_phi2();
     void computePhasefield();
-    void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
-    void findNeighbors2(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2, int x3);
+    void findNeighbors(CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
+    void findNeighbors2(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2, int x3);
 
-    LBMReal nabla2_phi();
+    real nabla2_phi();
 
     mu::value_type muX1,muX2,muX3;
     mu::value_type muDeltaT;
     mu::value_type muNu;
     mu::value_type muRho;
-    LBMReal forcingX1;
-    LBMReal forcingX2;
-    LBMReal forcingX3;
+    real forcingX1;
+    real forcingX2;
+    real forcingX3;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterLBMKernel.cpp
index e1d24a2272f0846a29045bd9438db6b0dc729d36..547ad83259d205f8da99184c3cf6c5a761e7f7b2 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterLBMKernel.cpp
@@ -39,6 +39,7 @@
 #include "DataSet3D.h"
 #include "LBMKernel.h"
 #include <cmath>
+#include "UbMath.h"
 
 #define PROOF_CORRECTNESS
 
@@ -51,16 +52,16 @@ void MultiphasePressureFilterLBMKernel::initDataSet()
 	SPtr<DistributionArray3D> h(new D3Q27EsoTwist3DSplittedVector( nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0)); // For phase-field
 
 	//SPtr<PhaseFieldArray3D> divU1(new PhaseFieldArray3D(            nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressure(new  CbArray3D<LBMReal, IndexerX3X2X1>(    nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
-	pressureOld = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressure(new  CbArray3D<real, IndexerX3X2X1>(    nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	pressureOld = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
 	dataSet->setFdistributions(f);
 	dataSet->setHdistributions(h); // For phase-field
 	//dataSet->setPhaseField(divU1);
 	dataSet->setPressureField(pressure);
 
-	phaseField = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	phaseField = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
 
-	divU = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	divU = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
 }
 //////////////////////////////////////////////////////////////////////////
 SPtr<LBMKernel> MultiphasePressureFilterLBMKernel::clone()
@@ -92,23 +93,26 @@ SPtr<LBMKernel> MultiphasePressureFilterLBMKernel::clone()
 	return kernel;
 }
 //////////////////////////////////////////////////////////////////////////
-void  MultiphasePressureFilterLBMKernel::forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-	LBMReal m2 = mfa + mfc;
-	LBMReal m1 = mfc - mfa;
-	LBMReal m0 = m2 + mfb;
+void  MultiphasePressureFilterLBMKernel::forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+//	using namespace UbMath;
+	using namespace vf::basics::constant;
+	real m2 = mfa + mfc;
+	real m1 = mfc - mfa;
+	real m0 = m2 + mfb;
 	mfa = m0;
 	m0 *= Kinverse;
 	m0 += oneMinusRho;
 	mfb = (m1 * Kinverse - m0 * vv) * K;
-	mfc = ((m2 - c2 * m1 * vv) * Kinverse + v2 * m0) * K;
+	mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
-void  MultiphasePressureFilterLBMKernel::backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-	LBMReal m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
-	LBMReal m1 = (((mfa - mfc) - c2 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
+void  MultiphasePressureFilterLBMKernel::backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+//	using namespace UbMath;
+	using namespace vf::basics::constant;
+
+	real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
+	real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
 	mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 + vv) * c1o2) * K;
 	mfa = m0;
 	mfb = m1;
@@ -116,20 +120,24 @@ void  MultiphasePressureFilterLBMKernel::backwardInverseChimeraWithKincompressib
 
 
 ////////////////////////////////////////////////////////////////////////////////
-void  MultiphasePressureFilterLBMKernel::forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-	LBMReal m1 = (mfa + mfc) + mfb;
-	LBMReal m2 = mfc - mfa;
-	mfc = (mfc + mfa) + (v2 * m1 - c2 * vv * m2);
+void  MultiphasePressureFilterLBMKernel::forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+//	using namespace UbMath;
+	using namespace vf::basics::constant;
+
+	real m1 = (mfa + mfc) + mfb;
+	real m2 = mfc - mfa;
+	mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2);
 	mfb = m2 - vv * m1;
 	mfa = m1;
 }
 
 
-void  MultiphasePressureFilterLBMKernel::backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-	LBMReal ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
-	LBMReal mb = ((mfa - mfc) - mfa * v2) - c2 * mfb * vv;
+void  MultiphasePressureFilterLBMKernel::backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+//	using namespace UbMath;
+	using namespace vf::basics::constant;
+
+	real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
+	real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv;
 	mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2);
 	mfb = mb;
 	mfa = ma;
@@ -139,13 +147,15 @@ void  MultiphasePressureFilterLBMKernel::backwardChimera(LBMReal& mfa, LBMReal&
 void MultiphasePressureFilterLBMKernel::calculate(int step)
 {
 	using namespace D3Q27System;
-	using namespace UbMath;
+//	using namespace UbMath;
+	using namespace vf::lbm::dir;
+	using namespace vf::basics::constant;
 
 	forcingX1 = 0.0;
 	forcingX2 = 0.0;
 	forcingX3 = 0.0;
 
-	LBMReal oneOverInterfaceScale = c4 / interfaceWidth; //1.0;//1.5;
+	real oneOverInterfaceScale = c4o1 / interfaceWidth; //1.0;//1.5;
 														 /////////////////////////////////////
 
 	localDistributionsF    = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getLocalDistributions();
@@ -156,7 +166,7 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 	nonLocalDistributionsH1 = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getHdistributions())->getNonLocalDistributions();
 	zeroDistributionsH1     = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getHdistributions())->getZeroDistributions();
 
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressure = dataSet->getPressureField();
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressure = dataSet->getPressureField();
 
 	SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
@@ -179,34 +189,34 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 					int x2p = x2 + 1;
 					int x3p = x3 + 1;
 
-					LBMReal mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
-					LBMReal mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
-					LBMReal mfbbc = (*this->localDistributionsH1)(D3Q27System::ET_T, x1, x2, x3);
-					LBMReal mfccb = (*this->localDistributionsH1)(D3Q27System::ET_NE, x1, x2, x3);
-					LBMReal mfacb = (*this->localDistributionsH1)(D3Q27System::ET_NW, x1p, x2, x3);
-					LBMReal mfcbc = (*this->localDistributionsH1)(D3Q27System::ET_TE, x1, x2, x3);
-					LBMReal mfabc = (*this->localDistributionsH1)(D3Q27System::ET_TW, x1p, x2, x3);
-					LBMReal mfbcc = (*this->localDistributionsH1)(D3Q27System::ET_TN, x1, x2, x3);
-					LBMReal mfbac = (*this->localDistributionsH1)(D3Q27System::ET_TS, x1, x2p, x3);
-					LBMReal mfccc = (*this->localDistributionsH1)(D3Q27System::ET_TNE, x1, x2, x3);
-					LBMReal mfacc = (*this->localDistributionsH1)(D3Q27System::ET_TNW, x1p, x2, x3);
-					LBMReal mfcac = (*this->localDistributionsH1)(D3Q27System::ET_TSE, x1, x2p, x3);
-					LBMReal mfaac = (*this->localDistributionsH1)(D3Q27System::ET_TSW, x1p, x2p, x3);
-					LBMReal mfabb = (*this->nonLocalDistributionsH1)(D3Q27System::ET_W, x1p, x2, x3);
-					LBMReal mfbab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_S, x1, x2p, x3);
-					LBMReal mfbba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_B, x1, x2, x3p);
-					LBMReal mfaab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SW, x1p, x2p, x3);
-					LBMReal mfcab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SE, x1, x2p, x3);
-					LBMReal mfaba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BW, x1p, x2, x3p);
-					LBMReal mfcba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BE, x1, x2, x3p);
-					LBMReal mfbaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BS, x1, x2p, x3p);
-					LBMReal mfbca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BN, x1, x2, x3p);
-					LBMReal mfaaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-					LBMReal mfcaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSE, x1, x2p, x3p);
-					LBMReal mfaca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNW, x1p, x2, x3p);
-					LBMReal mfcca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-					LBMReal mfbbb = (*this->zeroDistributionsH1)(x1, x2, x3);
+					real mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
+					real mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
+					real mfbbc = (*this->localDistributionsH1)(D3Q27System::ET_T, x1, x2, x3);
+					real mfccb = (*this->localDistributionsH1)(D3Q27System::ET_NE, x1, x2, x3);
+					real mfacb = (*this->localDistributionsH1)(D3Q27System::ET_NW, x1p, x2, x3);
+					real mfcbc = (*this->localDistributionsH1)(D3Q27System::ET_TE, x1, x2, x3);
+					real mfabc = (*this->localDistributionsH1)(D3Q27System::ET_TW, x1p, x2, x3);
+					real mfbcc = (*this->localDistributionsH1)(D3Q27System::ET_TN, x1, x2, x3);
+					real mfbac = (*this->localDistributionsH1)(D3Q27System::ET_TS, x1, x2p, x3);
+					real mfccc = (*this->localDistributionsH1)(D3Q27System::ET_TNE, x1, x2, x3);
+					real mfacc = (*this->localDistributionsH1)(D3Q27System::ET_TNW, x1p, x2, x3);
+					real mfcac = (*this->localDistributionsH1)(D3Q27System::ET_TSE, x1, x2p, x3);
+					real mfaac = (*this->localDistributionsH1)(D3Q27System::ET_TSW, x1p, x2p, x3);
+					real mfabb = (*this->nonLocalDistributionsH1)(D3Q27System::ET_W, x1p, x2, x3);
+					real mfbab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_S, x1, x2p, x3);
+					real mfbba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_B, x1, x2, x3p);
+					real mfaab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SW, x1p, x2p, x3);
+					real mfcab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SE, x1, x2p, x3);
+					real mfaba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BW, x1p, x2, x3p);
+					real mfcba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BE, x1, x2, x3p);
+					real mfbaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BS, x1, x2p, x3p);
+					real mfbca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BN, x1, x2, x3p);
+					real mfaaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+					real mfcaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSE, x1, x2p, x3p);
+					real mfaca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNW, x1p, x2, x3p);
+					real mfcca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+					real mfbbb = (*this->zeroDistributionsH1)(x1, x2, x3);
 					(*phaseField)(x1, x2, x3) = (((mfaaa + mfccc) + (mfaca + mfcac)) + ((mfaac + mfcca)  + (mfcaa + mfacc))  ) +
 						(((mfaab + mfacb) + (mfcab + mfccb)) + ((mfaba + mfabc) + (mfcba + mfcbc)) +
 							((mfbaa + mfbac) + (mfbca + mfbcc))) + ((mfabb + mfcbb) +
@@ -243,16 +253,16 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 
 					mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
 
-					LBMReal rhoH = 1.0;
-					LBMReal rhoL = 1.0 / densityRatio;
+					real rhoH = 1.0;
+					real rhoL = 1.0 / densityRatio;
 
-					LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+					real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
 
-					LBMReal drho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+					real drho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 						+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 						+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
-					LBMReal rho = rhoH + rhoToPhi * ((*phaseField)(x1, x2, x3) - phiH);
+					real rho = rhoH + rhoToPhi * ((*phaseField)(x1, x2, x3) - phiH);
 
 					(*pressureOld)(x1, x2, x3) = (*pressure)(x1, x2, x3) + rho * c1o3 * drho;
 				}
@@ -260,7 +270,7 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 		}
 	}
 
-	LBMReal collFactorM;
+	real collFactorM;
 
 	////Periodic Filter
 	for (int x3 = minX3-1; x3 <= maxX3; x3++) {
@@ -268,7 +278,7 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 			for (int x1 = minX1-1; x1 <= maxX1; x1++) {
 				if (!bcArray->isSolid(x1, x2, x3) && !bcArray->isUndefined(x1, x2, x3)) {
 
-					LBMReal sum = 0.;
+					real sum = 0.;
 
 					///Version for boundaries
 					for (int xx = -1; xx <= 1; xx++) {
@@ -284,9 +294,9 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 								int zzz = zz + x3;
 
 								if (!bcArray->isSolid(xxx, yyy, zzz) && !bcArray->isUndefined(xxx, yyy, zzz)) {
-									sum+= 64.0/(216.0*(c1+c3*abs(xx))* (c1 + c3 * abs(yy))* (c1 + c3 * abs(zz)))*(*pressureOld)(xxx, yyy, zzz);
+									sum+= 64.0/(216.0*(c1o1+c3o1 *abs(xx))* (c1o1 + c3o1 * abs(yy))* (c1o1 + c3o1 * abs(zz)))*(*pressureOld)(xxx, yyy, zzz);
 								}
-								else{ sum+= 64.0 / (216.0 * (c1 + c3 * abs(xx)) * (c1 + c3 * abs(yy)) * (c1 + c3 * abs(zz))) * (*pressureOld)(x1, x2, x3);
+								else{ sum+= 64.0 / (216.0 * (c1o1 + c3o1 * abs(xx)) * (c1o1 + c3o1 * abs(yy)) * (c1o1 + c3o1 * abs(zz))) * (*pressureOld)(x1, x2, x3);
 								}
 
 
@@ -332,48 +342,48 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 
 					findNeighbors(phaseField, x1, x2, x3);
 
-					LBMReal mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
-					LBMReal mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
-					LBMReal mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
-					LBMReal mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
-					LBMReal mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
-					LBMReal mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
-					LBMReal mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
-					LBMReal mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
-					LBMReal mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
-					LBMReal mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
-					LBMReal mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
-					LBMReal mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
-					LBMReal mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
-					LBMReal mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
-					LBMReal mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
-					LBMReal mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
-					LBMReal mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
-					LBMReal mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
-					LBMReal mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
-					LBMReal mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
-					LBMReal mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
-					LBMReal mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
-					LBMReal mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-					LBMReal mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
-					LBMReal mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
-					LBMReal mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-					LBMReal mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
-
-					LBMReal rhoH = 1.0;
-					LBMReal rhoL = 1.0 / densityRatio;
-
-					LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
-
-					LBMReal dX1_phi = gradX1_phi();
-					LBMReal dX2_phi = gradX2_phi();
-					LBMReal dX3_phi = gradX3_phi();
-
-					LBMReal denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
-					LBMReal normX1 = dX1_phi / denom;
-					LBMReal normX2 = dX2_phi / denom;
-					LBMReal normX3 = dX3_phi / denom;
+					real mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
+					real mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
+					real mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
+					real mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
+					real mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
+					real mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
+					real mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
+					real mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
+					real mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
+					real mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
+					real mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
+					real mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
+					real mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
+					real mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
+					real mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
+					real mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
+					real mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
+					real mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
+					real mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
+					real mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
+					real mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
+					real mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
+					real mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+					real mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
+					real mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
+					real mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+					real mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
+
+					real rhoH = 1.0;
+					real rhoL = 1.0 / densityRatio;
+
+					real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+
+					real dX1_phi = gradX1_phi();
+					real dX2_phi = gradX2_phi();
+					real dX3_phi = gradX3_phi();
+
+					real denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
+					real normX1 = dX1_phi / denom;
+					real normX2 = dX2_phi / denom;
+					real normX3 = dX3_phi / denom;
 
 					dX1_phi = normX1 * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * oneOverInterfaceScale;
                     dX2_phi = normX2 * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * oneOverInterfaceScale;
@@ -382,77 +392,77 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 					collFactorM = collFactorL + (collFactorL - collFactorG) * (phi[DIR_000] - phiH) / (phiH - phiL);
 
 
-					LBMReal mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
+					real mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
 
 					//----------- Calculating Macroscopic Values -------------
-					LBMReal rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
+					real rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
 
-					LBMReal m0, m1, m2;
-					LBMReal rhoRef=c1;
+					real m0, m1, m2;
+					real rhoRef=c1o1;
 
-					LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+					real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 						(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 						(mfcbb - mfabb))/rhoRef;
-					LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+					real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 						(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 						(mfbcb - mfbab))/rhoRef;
-					LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+					real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 						(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 						(mfbbc - mfbba))/rhoRef;
 
-					LBMReal gradPx = 0.0;
-					LBMReal gradPy = 0.0;
-					LBMReal gradPz = 0.0;
+					real gradPx = 0.0;
+					real gradPy = 0.0;
+					real gradPz = 0.0;
 					for (int dir1 = -1; dir1 <= 1; dir1++) {
 						for (int dir2 = -1; dir2 <= 1; dir2++) {
 							int yyy = x2 + dir1;
 							int zzz = x3 + dir2;
 							if (!bcArray->isSolid(x1-1, yyy, zzz) && !bcArray->isUndefined(x1-1, yyy, zzz)) {
-								gradPx -= (*pressure)(x1 - 1, yyy, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPx -= (*pressure)(x1 - 1, yyy, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							else {
-								gradPx -= (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPx -= (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							if (!bcArray->isSolid(x1 + 1, yyy, zzz) && !bcArray->isUndefined(x1 + 1, yyy, zzz)) {
-								gradPx += (*pressure)(x1 + 1, yyy, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPx += (*pressure)(x1 + 1, yyy, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							else {
-								gradPx += (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPx += (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 
 							int xxx = x1 + dir1;
 							if (!bcArray->isSolid(xxx, x2-1, zzz) && !bcArray->isUndefined(xxx, x2-1, zzz)) {
-								gradPy -= (*pressure)(xxx, x2-1, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPy -= (*pressure)(xxx, x2-1, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							else {
-								gradPy -= (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPy -= (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							if (!bcArray->isSolid(xxx, x2+1, zzz) && !bcArray->isUndefined(xxx, x2+1, zzz)) {
-								gradPy += (*pressure)(xxx, x2+1, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPy += (*pressure)(xxx, x2+1, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							else {
-								gradPy += (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPy += (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 
 							yyy = x2 + dir2;
 							if (!bcArray->isSolid(xxx, yyy, x3-1) && !bcArray->isUndefined(xxx, yyy, x3-1)) {
-								gradPz -= (*pressure)(xxx, yyy, x3-1) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPz -= (*pressure)(xxx, yyy, x3-1) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							else {
-								gradPz -= (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPz -= (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							if (!bcArray->isSolid(xxx, yyy, x3+1) && !bcArray->isUndefined(xxx, yyy, x3+1)) {
-								gradPz += (*pressure)(xxx, yyy, x3+1) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPz += (*pressure)(xxx, yyy, x3+1) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 							else {
-								gradPz += (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+								gradPz += (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 							}
 
 						}
 					}
 
 					//Viscosity increase by pressure gradient
-					LBMReal errPhi = (((1.0 - phi[DIR_000]) * (phi[DIR_000]) * oneOverInterfaceScale)- denom);
+					real errPhi = (((1.0 - phi[DIR_000]) * (phi[DIR_000]) * oneOverInterfaceScale)- denom);
 					//LBMReal limVis = 0.0000001*10;//0.01;
 					// collFactorM =collFactorM/(c1+limVis*(errPhi*errPhi)*collFactorM);
 					// collFactorM = (collFactorM < 1.8) ? 1.8 : collFactorM;
@@ -483,45 +493,45 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 					//vvz += mu * dX3_phi * c1o2 / rho;
 
 					//Abbas
-					LBMReal pStar = ((((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc)))
+					real pStar = ((((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc)))
 						+ (((mfaab + mfccb) + (mfacb + mfcab)) + ((mfaba + mfcbc) + (mfabc + mfcba)) + ((mfbaa + mfbcc) + (mfbac + mfbca))))
 						+ ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb) * c1o3;
 
-					LBMReal M200 = ((((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc)))
+					real M200 = ((((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc)))
 						+ (((mfaab + mfccb) + (mfacb + mfcab)) + ((mfaba + mfcbc) + (mfabc + mfcba))))
 						+ ((mfabb + mfcbb))));
-					LBMReal M020 = ((((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc)))
+					real M020 = ((((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc)))
 						+ (((mfaab + mfccb) + (mfacb + mfcab)) + ((mfbaa + mfbcc) + (mfbac + mfbca))))
 						+ ((mfbab + mfbcb))));
-					LBMReal M002 = ((((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc)))
+					real M002 = ((((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc)))
 						+ (+((mfaba + mfcbc) + (mfabc + mfcba)) + ((mfbaa + mfbcc) + (mfbac + mfbca))))
 						+ ((mfbba + mfbbc))));
 
-					LBMReal M110 = ((((((mfaaa + mfccc) + (-mfcac - mfaca)) + ((mfaac + mfcca) + (-mfcaa - mfacc)))
+					real M110 = ((((((mfaaa + mfccc) + (-mfcac - mfaca)) + ((mfaac + mfcca) + (-mfcaa - mfacc)))
 						+ (((mfaab + mfccb) + (-mfacb - mfcab))))
 						));
-					LBMReal M101 = ((((((mfaaa + mfccc) - (mfaac + mfcca)) + ((mfcac + mfaca) - (mfcaa + mfacc)))
+					real M101 = ((((((mfaaa + mfccc) - (mfaac + mfcca)) + ((mfcac + mfaca) - (mfcaa + mfacc)))
 						+ (((mfaba + mfcbc) + (-mfabc - mfcba))))
 						));
-					LBMReal M011 = ((((((mfaaa + mfccc) - (mfaac + mfcca)) + ((mfcaa + mfacc) - (mfcac + mfaca)))
+					real M011 = ((((((mfaaa + mfccc) - (mfaac + mfcca)) + ((mfcaa + mfacc) - (mfcac + mfaca)))
 						+ (((mfbaa + mfbcc) + (-mfbac - mfbca))))
 						));
-					LBMReal vvxI = vvx;
-					LBMReal vvyI = vvy;
-					LBMReal vvzI = vvz;
+					real vvxI = vvx;
+					real vvyI = vvy;
+					real vvzI = vvz;
 
 					//LBMReal collFactorStore = collFactorM;
 					//LBMReal stress;
 					for (int iter = 0; iter < 1; iter++) {
-						LBMReal OxxPyyPzz = 1.0;
-						LBMReal mxxPyyPzz = (M200 - vvxI * vvxI) + (M020 - vvyI * vvyI) + (M002 - vvzI * vvzI);
-						mxxPyyPzz -= c3 * pStar;
+						real OxxPyyPzz = 1.0;
+						real mxxPyyPzz = (M200 - vvxI * vvxI) + (M020 - vvyI * vvyI) + (M002 - vvzI * vvzI);
+						mxxPyyPzz -= c3o1 * pStar;
 
-						LBMReal mxxMyy = (M200 - vvxI * vvxI) - (M020 - vvyI * vvyI);
-						LBMReal mxxMzz = (M200 - vvxI * vvxI) - (M002 - vvzI * vvzI);
-						LBMReal mxy = M110 - vvxI * vvyI;
-						LBMReal mxz = M101 - vvxI * vvzI;
-						LBMReal myz = M011 - vvyI * vvzI;
+						real mxxMyy = (M200 - vvxI * vvxI) - (M020 - vvyI * vvyI);
+						real mxxMzz = (M200 - vvxI * vvxI) - (M002 - vvzI * vvzI);
+						real mxy = M110 - vvxI * vvyI;
+						real mxz = M101 - vvxI * vvzI;
+						real myz = M011 - vvyI * vvzI;
 
 						///////Bingham
 						//LBMReal dxux = -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
@@ -541,16 +551,16 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 						//////!Bingham
 
 
-						mxxMyy *= c1 - collFactorM * c1o2;
-						mxxMzz *= c1 - collFactorM * c1o2;
-						mxy *= c1 - collFactorM * c1o2;
-						mxz *= c1 - collFactorM * c1o2;
-						myz *= c1 - collFactorM * c1o2;
-						mxxPyyPzz *= c1 - OxxPyyPzz * c1o2;
+						mxxMyy *= c1o1 - collFactorM * c1o2;
+						mxxMzz *= c1o1 - collFactorM * c1o2;
+						mxy *= c1o1 - collFactorM * c1o2;
+						mxz *= c1o1 - collFactorM * c1o2;
+						myz *= c1o1 - collFactorM * c1o2;
+						mxxPyyPzz *= c1o1 - OxxPyyPzz * c1o2;
 						//mxxPyyPzz += c3o2 * pStar;
-						LBMReal mxx = (mxxMyy + mxxMzz + mxxPyyPzz) * c1o3;
-						LBMReal myy = (-c2 * mxxMyy + mxxMzz + mxxPyyPzz) * c1o3;
-						LBMReal mzz = (mxxMyy - c2 * mxxMzz + mxxPyyPzz) * c1o3;
+						real mxx = (mxxMyy + mxxMzz + mxxPyyPzz) * c1o3;
+						real myy = (-c2o1 * mxxMyy + mxxMzz + mxxPyyPzz) * c1o3;
+						real mzz = (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz) * c1o3;
 						vvxI = vvx - (mxx * dX1_phi + mxy * dX2_phi + mxz * dX3_phi) * rhoToPhi / (rho);
 						vvyI = vvy - (mxy * dX1_phi + myy * dX2_phi + myz * dX3_phi) * rhoToPhi / (rho);
 						vvzI = vvz - (mxz * dX1_phi + myz * dX2_phi + mzz * dX3_phi) * rhoToPhi / (rho);
@@ -560,9 +570,9 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 					}
 
 
-					forcingX1 += c2 * (vvxI - vvx);
-					forcingX2 += c2 * (vvyI - vvy);
-					forcingX3 += c2 * (vvzI - vvz);
+					forcingX1 += c2o1 * (vvxI - vvx);
+					forcingX2 += c2o1 * (vvyI - vvy);
+					forcingX3 += c2o1 * (vvzI - vvz);
 
 					mfabb += c1o2 * (-forcingX1) * c2o9;
 					mfbab += c1o2 * (-forcingX2) * c2o9;
@@ -600,14 +610,14 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 					//!Abbas
 
 
-					LBMReal vx2;
-					LBMReal vy2;
-					LBMReal vz2;
+					real vx2;
+					real vy2;
+					real vz2;
 					vx2 = vvx * vvx;
 					vy2 = vvy * vvy;
 					vz2 = vvz * vvz;
 					///////////////////////////////////////////////////////////////////////////////////////////               
-					LBMReal oMdrho;
+					real oMdrho;
 
 
 					oMdrho = mfccc + mfaaa;
@@ -637,8 +647,8 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 					oMdrho = (rhoRef - (oMdrho + m0))/rhoRef;// 12.03.21 check derivation!!!!
 
 															 ////////////////////////////////////////////////////////////////////////////////////
-					LBMReal wadjust;
-					LBMReal qudricLimit = 0.01;
+					real wadjust;
+					real qudricLimit = 0.01;
 					////////////////////////////////////////////////////////////////////////////////////
 					//Hin
 					////////////////////////////////////////////////////////////////////////////////////
@@ -871,21 +881,21 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 					////////////////////////////////////////////////////////////////////////////////////
 
 					// mfaaa = 0.0;
-					LBMReal OxxPyyPzz = 1.; //omega2 or bulk viscosity
+					real OxxPyyPzz = 1.; //omega2 or bulk viscosity
 											//  LBMReal OxyyPxzz = 1.;//-s9;//2+s9;//
 											//  LBMReal OxyyMxzz  = 1.;//2+s9;//
-					LBMReal O4 = 1.;
-					LBMReal O5 = 1.;
-					LBMReal O6 = 1.;
+					real O4 = 1.;
+					real O5 = 1.;
+					real O6 = 1.;
 
 					/////fourth order parameters; here only for test. Move out of loop!
 
-					LBMReal OxyyPxzz = 8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
-					LBMReal OxyyMxzz = 8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
-					LBMReal Oxyz = 24.0 * (collFactorM - 2.0) * (4.0 * collFactorM * collFactorM + collFactorM * OxxPyyPzz * (18.0 - 13.0 * collFactorM) + OxxPyyPzz * OxxPyyPzz * (2.0 + collFactorM * (6.0 * collFactorM - 11.0))) / (16.0 * collFactorM * collFactorM * (collFactorM - 6.0) - 2.0 * collFactorM * OxxPyyPzz * (216.0 + 5.0 * collFactorM * (9.0 * collFactorM - 46.0)) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (3.0 * collFactorM - 10.0) * (15.0 * collFactorM - 28.0) - 48.0));
-					LBMReal A = (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+					real OxyyPxzz = 8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
+					real OxyyMxzz = 8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
+					real Oxyz = 24.0 * (collFactorM - 2.0) * (4.0 * collFactorM * collFactorM + collFactorM * OxxPyyPzz * (18.0 - 13.0 * collFactorM) + OxxPyyPzz * OxxPyyPzz * (2.0 + collFactorM * (6.0 * collFactorM - 11.0))) / (16.0 * collFactorM * collFactorM * (collFactorM - 6.0) - 2.0 * collFactorM * OxxPyyPzz * (216.0 + 5.0 * collFactorM * (9.0 * collFactorM - 46.0)) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (3.0 * collFactorM - 10.0) * (15.0 * collFactorM - 28.0) - 48.0));
+					real A = (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 					//FIXME:  warning C4459: declaration of 'B' hides global declaration (message : see declaration of 'D3Q27System::DIR_00M' )
-					LBMReal BB = (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+					real BB = (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 
 
 					//Cum 4.
@@ -893,21 +903,21 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 					//LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
 					//LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-					LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-					LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-					LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+					real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
+					real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
+					real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-					LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-					LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-					LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
+					real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+					real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+					real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
 
 					//Cum 5.
-					LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-					LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-					LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+					real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
+					real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
+					real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
 
 					//Cum 6.
-					LBMReal CUMccc = mfccc + ((-4. * mfbbb * mfbbb
+					real CUMccc = mfccc + ((-4. * mfbbb * mfbbb
 						- (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
 						- 4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
 						- 2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
@@ -921,21 +931,21 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 
 					//2.
 					// linear combinations
-					LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
+					real mxxPyyPzz = mfcaa + mfaca + mfaac;
 
 					//  LBMReal mfaaaS = (mfaaa * (-4 - 3 * OxxPyyPzz * (-1 + rho)) + 6 * mxxPyyPzz * OxxPyyPzz * (-1 + rho)) / (-4 + 3 * OxxPyyPzz * (-1 + rho));
 					mxxPyyPzz -= mfaaa ;//12.03.21 shifted by mfaaa
 										//mxxPyyPzz-=(mfaaa+mfaaaS)*c1o2;//12.03.21 shifted by mfaaa
-					LBMReal mxxMyy = mfcaa - mfaca;
-					LBMReal mxxMzz = mfcaa - mfaac;
+					real mxxMyy = mfcaa - mfaca;
+					real mxxMzz = mfcaa - mfaac;
 
-					LBMReal dxux =  -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
-					LBMReal dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
-					LBMReal dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
+					real dxux =  -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
+					real dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
+					real dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
 
-					LBMReal Dxy = -three * collFactorM * mfbba;
-					LBMReal Dxz = -three * collFactorM * mfbab;
-					LBMReal Dyz = -three * collFactorM * mfabb;
+					real Dxy = -c3o1 * collFactorM * mfbba;
+					real Dxz = -c3o1 * collFactorM * mfbab;
+					real Dyz = -c3o1 * collFactorM * mfabb;
 
 					//relax
 					mxxPyyPzz += OxxPyyPzz * (/*mfaaa*/ - mxxPyyPzz) - 3. * (1. - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
@@ -962,14 +972,14 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 
 					//3.
 					// linear combinations
-					LBMReal mxxyPyzz = mfcba + mfabc;
-					LBMReal mxxyMyzz = mfcba - mfabc;
+					real mxxyPyzz = mfcba + mfabc;
+					real mxxyMyzz = mfcba - mfabc;
 
-					LBMReal mxxzPyyz = mfcab + mfacb;
-					LBMReal mxxzMyyz = mfcab - mfacb;
+					real mxxzPyyz = mfcab + mfacb;
+					real mxxzMyyz = mfcab - mfacb;
 
-					LBMReal mxyyPxzz = mfbca + mfbac;
-					LBMReal mxyyMxzz = mfbca - mfbac;
+					real mxyyPxzz = mfbca + mfbac;
+					real mxyyMxzz = mfbca - mfbac;
 
 					//relax
 					wadjust = Oxyz + (1. - Oxyz) * fabs(mfbbb) / (fabs(mfbbb) + qudricLimit);
@@ -996,12 +1006,12 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 					mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2;
 
 					//4.
-					CUMacc = -O4 * (one / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (one - O4) * (CUMacc);
-					CUMcac = -O4 * (one / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (one - O4) * (CUMcac);
-					CUMcca = -O4 * (one / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (one - O4) * (CUMcca);
-					CUMbbc = -O4 * (one / collFactorM - c1o2) * Dxy * c1o3 * BB + (one - O4) * (CUMbbc);
-					CUMbcb = -O4 * (one / collFactorM - c1o2) * Dxz * c1o3 * BB + (one - O4) * (CUMbcb);
-					CUMcbb = -O4 * (one / collFactorM - c1o2) * Dyz * c1o3 * BB + (one - O4) * (CUMcbb);
+					CUMacc = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc);
+					CUMcac = -O4 * (c1o1 / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac);
+					CUMcca = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca);
+					CUMbbc = -O4 * (c1o1 / collFactorM - c1o2) * Dxy * c1o3 * BB + (c1o1 - O4) * (CUMbbc);
+					CUMbcb = -O4 * (c1o1 / collFactorM - c1o2) * Dxz * c1o3 * BB + (c1o1 - O4) * (CUMbcb);
+					CUMcbb = -O4 * (c1o1 / collFactorM - c1o2) * Dyz * c1o3 * BB + (c1o1 - O4) * (CUMcbb);
 
 					//5.
 					CUMbcc += O5 * (-CUMbcc);
@@ -1021,9 +1031,9 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 					mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
 					mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-					mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-					mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-					mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
+					mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+					mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+					mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
 
 					//5.
 					mfbcc = CUMbcc + (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac) * oMdrho;
@@ -1301,7 +1311,7 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 					//proof correctness
 					//////////////////////////////////////////////////////////////////////////
 					//#ifdef  PROOF_CORRECTNESS
-					LBMReal rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+					real rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 						+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 						+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 					//			   //LBMReal dif = fabs(drho - rho_post);
@@ -1360,7 +1370,7 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 /////////////////////  P H A S E - F I E L D   S O L V E R
 ////////////////////////////////////////////
 /////CUMULANT PHASE-FIELD
-					LBMReal omegaD =1.0/( 3.0 * mob + 0.5);
+					real omegaD =1.0/( 3.0 * mob + 0.5);
 					{
 						mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
 						mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
@@ -1397,31 +1407,31 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 						//!
 						////////////////////////////////////////////////////////////////////////////////////
 						// second component
-						LBMReal concentration =
+						real concentration =
 							((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 								(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
 								((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal oneMinusRho = c1- concentration;
+						real oneMinusRho = c1o1 - concentration;
 
-						LBMReal cx =
+						real cx =
 							((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 								(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 								(mfcbb - mfabb));
-						LBMReal cy =
+						real cy =
 							((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 								(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 								(mfbcb - mfbab));
-						LBMReal cz =
+						real cz =
 							((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 								(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 								(mfbbc - mfbba));
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// calculate the square of velocities for this lattice node
-						LBMReal cx2 = cx * cx;
-						LBMReal cy2 = cy * cy;
-						LBMReal cz2 = cz * cz;
+						real cx2 = cx * cx;
+						real cy2 = cy * cy;
+						real cz2 = cz * cz;
 						////////////////////////////////////////////////////////////////////////////////////
 						//! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
 						//! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -1430,85 +1440,85 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 						//!
 						////////////////////////////////////////////////////////////////////////////////////
 						// Z - Dir
-						forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 						forwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// Y - Dir
-						forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 						forwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-						forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 						forwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 						forwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 						forwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 						forwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-						forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// X - Dir
-						forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 						forwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-						forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 						forwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 						forwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 						forwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-						forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 						forwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-						forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3o1, c1o9, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						//! - experimental Cumulant ... to be published ... hopefully
 						//!
 
 						// linearized orthogonalization of 3rd order central moments
-						LBMReal Mabc = mfabc - mfaba * c1o3;
-						LBMReal Mbca = mfbca - mfbaa * c1o3;
-						LBMReal Macb = mfacb - mfaab * c1o3;
-						LBMReal Mcba = mfcba - mfaba * c1o3;
-						LBMReal Mcab = mfcab - mfaab * c1o3;
-						LBMReal Mbac = mfbac - mfbaa * c1o3;
+						real Mabc = mfabc - mfaba * c1o3;
+						real Mbca = mfbca - mfbaa * c1o3;
+						real Macb = mfacb - mfaab * c1o3;
+						real Mcba = mfcba - mfaba * c1o3;
+						real Mcab = mfcab - mfaab * c1o3;
+						real Mbac = mfbac - mfbaa * c1o3;
 						// linearized orthogonalization of 5th order central moments
-						LBMReal Mcbc = mfcbc - mfaba * c1o9;
-						LBMReal Mbcc = mfbcc - mfbaa * c1o9;
-						LBMReal Mccb = mfccb - mfaab * c1o9;
+						real Mcbc = mfcbc - mfaba * c1o9;
+						real Mbcc = mfbcc - mfbaa * c1o9;
+						real Mccb = mfccb - mfaab * c1o9;
 
 						// collision of 1st order moments
-						cx = cx * (c1 - omegaD) + omegaD * vvx * concentration +
-							normX1 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
-						cy = cy * (c1 - omegaD) + omegaD * vvy * concentration +
-							normX2 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
-						cz = cz * (c1 - omegaD) + omegaD * vvz * concentration +
-							normX3 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+						cx = cx * (c1o1 - omegaD) + omegaD * vvx * concentration +
+							normX1 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+						cy = cy * (c1o1 - omegaD) + omegaD * vvy * concentration +
+							normX2 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+						cz = cz * (c1o1 - omegaD) + omegaD * vvz * concentration +
+							normX3 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
 
 						cx2 = cx * cx;
 						cy2 = cy * cy;
 						cz2 = cz * cz;
 
 						// equilibration of 2nd order moments
-						mfbba = zeroReal;
-						mfbab = zeroReal;
-						mfabb = zeroReal;
+						mfbba = c0o1;
+						mfbab = c0o1;
+						mfabb = c0o1;
 
 						mfcaa = c1o3 * concentration;
 						mfaca = c1o3 * concentration;
 						mfaac = c1o3 * concentration;
 
 						// equilibration of 3rd order moments
-						Mabc = zeroReal;
-						Mbca = zeroReal;
-						Macb = zeroReal;
-						Mcba = zeroReal;
-						Mcab = zeroReal;
-						Mbac = zeroReal;
-						mfbbb = zeroReal;
+						Mabc = c0o1;
+						Mbca = c0o1;
+						Macb = c0o1;
+						Mcba = c0o1;
+						Mcab = c0o1;
+						Mbac = c0o1;
+						mfbbb = c0o1;
 
 						// from linearized orthogonalization 3rd order central moments to central moments
 						mfabc = Mabc + mfaba * c1o3;
@@ -1523,14 +1533,14 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 						mfcac = c1o9 * concentration;
 						mfcca = c1o9 * concentration;
 
-						mfcbb = zeroReal;
-						mfbcb = zeroReal;
-						mfbbc = zeroReal;
+						mfcbb = c0o1;
+						mfbcb = c0o1;
+						mfbbc = c0o1;
 
 						// equilibration of 5th order moments
-						Mcbc = zeroReal;
-						Mbcc = zeroReal;
-						Mccb = zeroReal;
+						Mcbc = c0o1;
+						Mbcc = c0o1;
+						Mccb = c0o1;
 
 						// from linearized orthogonalization 5th order central moments to central moments
 						mfcbc = Mcbc + mfaba * c1o9;
@@ -1548,39 +1558,39 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 						//!
 						////////////////////////////////////////////////////////////////////////////////////
 						// X - Dir
-						backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 						backwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-						backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 						backwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 						backwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 						backwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-						backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 						backwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-						backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9o1, c1o9, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// Y - Dir
-						backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 						backwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-						backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 						backwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 						backwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 						backwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 						backwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-						backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// Z - Dir
-						backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 						backwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 
 
@@ -1621,34 +1631,42 @@ void MultiphasePressureFilterLBMKernel::calculate(int step)
 }
 //////////////////////////////////////////////////////////////////////////
 
-LBMReal MultiphasePressureFilterLBMKernel::gradX1_phi()
+real MultiphasePressureFilterLBMKernel::gradX1_phi()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0* ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) + (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) + (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) + (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_PM0] - phi[DIR_MP0]) + (phi[DIR_PP0] - phi[DIR_MM0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_P00] - phi[DIR_M00]));
 }
 
-LBMReal MultiphasePressureFilterLBMKernel::gradX2_phi()
+real MultiphasePressureFilterLBMKernel::gradX2_phi()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PPM] - phi[DIR_MMP])- (phi[DIR_PMP] - phi[DIR_MPM])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_0PP] - phi[DIR_0MM]) + (phi[DIR_0PM] - phi[DIR_0MP])) + ((phi[DIR_PP0] - phi[DIR_MM0])- (phi[DIR_PM0] - phi[DIR_MP0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_0P0] - phi[DIR_0M0]));
 }
 
-LBMReal MultiphasePressureFilterLBMKernel::gradX3_phi()
+real MultiphasePressureFilterLBMKernel::gradX3_phi()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) - (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) - (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_0MP] - phi[DIR_0PM]) + (phi[DIR_0PP] - phi[DIR_0MM])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_00P] - phi[DIR_00M]));
 }
 
-LBMReal MultiphasePressureFilterLBMKernel::nabla2_phi()
+real MultiphasePressureFilterLBMKernel::nabla2_phi()
 {
 	using namespace D3Q27System;
-	LBMReal sum = 0.0;
+	using namespace vf::lbm::dir;
+
+	real sum = 0.0;
 	sum += WEIGTH[DIR_PPP] * ((((phi[DIR_PPP] - phi[DIR_000]) + (phi[DIR_MMM] - phi[DIR_000])) + ((phi[DIR_MMP] - phi[DIR_000]) + (phi[DIR_PPM] - phi[DIR_000])))
 		+ (((phi[DIR_MPP] - phi[DIR_000]) + (phi[DIR_PMM] - phi[DIR_000])) + ((phi[DIR_PMP] - phi[DIR_000]) + (phi[DIR_MPM] - phi[DIR_000]))));
 	sum += WEIGTH[DIR_0PP] * (
@@ -1668,6 +1686,8 @@ LBMReal MultiphasePressureFilterLBMKernel::nabla2_phi()
 void MultiphasePressureFilterLBMKernel::computePhasefield()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	SPtr<DistributionArray3D> distributionsH = dataSet->getHdistributions();
 
 	int minX1 = ghostLayerWidth;
@@ -1722,10 +1742,11 @@ void MultiphasePressureFilterLBMKernel::computePhasefield()
 	}
 }
 
-void MultiphasePressureFilterLBMKernel::findNeighbors(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphasePressureFilterLBMKernel::findNeighbors(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
 	int x3)
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
 	SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterLBMKernel.h
index 9b2b568b2854b3351361d8e9687fbbc6a0d7f284..d13a5aeffa95cc3ee4980edf5cc93650ecc617a3 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphasePressureFilterLBMKernel.h
@@ -51,13 +51,13 @@ public:
     virtual ~MultiphasePressureFilterLBMKernel(void) = default;
     void calculate(int step) override;
     SPtr<LBMKernel> clone() override;
-    double getCalculationTime() override { return .0; }
+    real getCalculationTime() override { return .0; }
 
-    void setPhaseFieldBC(LBMReal bc)
+    void setPhaseFieldBC(real bc)
     {
         phaseFieldBC = bc;
     }
-    LBMReal getPhaseFieldBC()
+    real getPhaseFieldBC()
     {
         return phaseFieldBC;
     }
@@ -68,44 +68,44 @@ protected:
 
     void initForcing();
 
-    void forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-    void backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-    void forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
-    void backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
+    void forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+    void backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+    void forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
+    void backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
 
-    CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
-    CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
-    CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
+    CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
+    CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
+    CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
 
-    CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH1;
-    CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH1;
-    CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH1;
+    CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH1;
+    CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH1;
+    CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH1;
 
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressureOld;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressureOld;
 
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr divU; 
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr divU; 
 
-    LBMReal h  [D3Q27System::ENDF+1];
-    LBMReal phi[D3Q27System::ENDF+1];
+    real h  [D3Q27System::ENDF+1];
+    real phi[D3Q27System::ENDF+1];
 
-    LBMReal gradX1_phi();
-    LBMReal gradX2_phi();
-    LBMReal gradX3_phi();
+    real gradX1_phi();
+    real gradX2_phi();
+    real gradX3_phi();
     void computePhasefield();
-    void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
+    void findNeighbors(CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
 
-    LBMReal nabla2_phi();
+    real nabla2_phi();
 
     mu::value_type muX1,muX2,muX3;
     mu::value_type muDeltaT;
     mu::value_type muNu;
     mu::value_type muRho;
-    LBMReal forcingX1;
-    LBMReal forcingX2;
-    LBMReal forcingX3;
+    real forcingX1;
+    real forcingX2;
+    real forcingX3;
 
-    LBMReal phaseFieldBC { 0.0 }; // if 0.0 then light fluid on the wall, else if 1.0 havy fluid
+    real phaseFieldBC { 0.0 }; // if 0.0 then light fluid on the wall, else if 1.0 havy fluid
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphaseScratchCumulantLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/MultiphaseScratchCumulantLBMKernel.cpp
index 7424fdcbe8a36b7020e53fd78e154577fdc9ab47..eb9606e49591c9ec550c65fe15eca025c31e5ab0 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphaseScratchCumulantLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphaseScratchCumulantLBMKernel.cpp
@@ -81,23 +81,27 @@ SPtr<LBMKernel> MultiphaseScratchCumulantLBMKernel::clone()
     return kernel;
 }
 //////////////////////////////////////////////////////////////////////////
- void  MultiphaseScratchCumulantLBMKernel::forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-    LBMReal m2 = mfa + mfc;
-	LBMReal m1 = mfc - mfa;
-	LBMReal m0 = m2 + mfb;
+ void  MultiphaseScratchCumulantLBMKernel::forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+//	using namespace UbMath;
+	using namespace vf::basics::constant;
+	
+	real m2 = mfa + mfc;
+	real m1 = mfc - mfa;
+	real m0 = m2 + mfb;
 	mfa = m0;
 	m0 *= Kinverse;
 	m0 += oneMinusRho;
 	mfb = (m1 * Kinverse - m0 * vv) * K;
-	mfc = ((m2 - c2 * m1 * vv) * Kinverse + v2 * m0) * K;
+	mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
- void  MultiphaseScratchCumulantLBMKernel::backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-    LBMReal m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
-	LBMReal m1 = (((mfa - mfc) - c2 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
+ void  MultiphaseScratchCumulantLBMKernel::backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+//	using namespace UbMath;
+	using namespace vf::basics::constant;
+
+	real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
+	real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
 	mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 + vv) * c1o2) * K;
 	mfa = m0;
 	mfb = m1;
@@ -105,20 +109,24 @@ SPtr<LBMKernel> MultiphaseScratchCumulantLBMKernel::clone()
 
 
 ////////////////////////////////////////////////////////////////////////////////
- void  MultiphaseScratchCumulantLBMKernel::forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-    LBMReal m1 = (mfa + mfc) + mfb;
-	LBMReal m2 = mfc - mfa;
-	mfc = (mfc + mfa) + (v2 * m1 - c2 * vv * m2);
+ void  MultiphaseScratchCumulantLBMKernel::forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+//	using namespace UbMath;
+	using namespace vf::basics::constant;
+
+	real m1 = (mfa + mfc) + mfb;
+	real m2 = mfc - mfa;
+	mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2);
 	mfb = m2 - vv * m1;
 	mfa = m1;
 }
 
 
- void  MultiphaseScratchCumulantLBMKernel::backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-    LBMReal ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
-	LBMReal mb = ((mfa - mfc) - mfa * v2) - c2 * mfb * vv;
+ void  MultiphaseScratchCumulantLBMKernel::backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+//	using namespace UbMath;
+    using namespace vf::basics::constant;
+
+	real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
+	real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv;
 	mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2);
 	mfb = mb;
 	mfa = ma;
@@ -128,13 +136,15 @@ SPtr<LBMKernel> MultiphaseScratchCumulantLBMKernel::clone()
 void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 {
     using namespace D3Q27System;
-    using namespace UbMath;
+ //   using namespace UbMath;
+	using namespace vf::lbm::dir;
+	using namespace vf::basics::constant;
 
     forcingX1 = 0.0;
     forcingX2 = 0.0;
     forcingX3 = 0.0;
 
-	LBMReal oneOverInterfaceScale = 1.0;// 1.0 / 3.0;
+	real oneOverInterfaceScale = 1.0;// 1.0 / 3.0;
     /////////////////////////////////////
 
     localDistributionsF    = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getLocalDistributions();
@@ -158,10 +168,10 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
     int maxX2 = bcArrayMaxX2 - ghostLayerWidth;
     int maxX3 = bcArrayMaxX3 - ghostLayerWidth;
 
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField(
-            new CbArray3D<LBMReal, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr divU(
-            new CbArray3D<LBMReal, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, 0.0));
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField(
+            new CbArray3D<real, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr divU(
+            new CbArray3D<real, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, 0.0));
 
 
 		/////For velocity filter
@@ -182,34 +192,34 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
                         int x2p = x2 + 1;
                         int x3p = x3 + 1;
 
-                        LBMReal mfcbb = (*this->localDistributionsH)(D3Q27System::ET_E, x1, x2, x3);
-                        LBMReal mfbcb = (*this->localDistributionsH)(D3Q27System::ET_N, x1, x2, x3);
-                        LBMReal mfbbc = (*this->localDistributionsH)(D3Q27System::ET_T, x1, x2, x3);
-                        LBMReal mfccb = (*this->localDistributionsH)(D3Q27System::ET_NE, x1, x2, x3);
-                        LBMReal mfacb = (*this->localDistributionsH)(D3Q27System::ET_NW, x1p, x2, x3);
-                        LBMReal mfcbc = (*this->localDistributionsH)(D3Q27System::ET_TE, x1, x2, x3);
-                        LBMReal mfabc = (*this->localDistributionsH)(D3Q27System::ET_TW, x1p, x2, x3);
-                        LBMReal mfbcc = (*this->localDistributionsH)(D3Q27System::ET_TN, x1, x2, x3);
-                        LBMReal mfbac = (*this->localDistributionsH)(D3Q27System::ET_TS, x1, x2p, x3);
-                        LBMReal mfccc = (*this->localDistributionsH)(D3Q27System::ET_TNE, x1, x2, x3);
-                        LBMReal mfacc = (*this->localDistributionsH)(D3Q27System::ET_TNW, x1p, x2, x3);
-                        LBMReal mfcac = (*this->localDistributionsH)(D3Q27System::ET_TSE, x1, x2p, x3);
-                        LBMReal mfaac = (*this->localDistributionsH)(D3Q27System::ET_TSW, x1p, x2p, x3);
-                        LBMReal mfabb = (*this->nonLocalDistributionsH)(D3Q27System::ET_W, x1p, x2, x3);
-                        LBMReal mfbab = (*this->nonLocalDistributionsH)(D3Q27System::ET_S, x1, x2p, x3);
-                        LBMReal mfbba = (*this->nonLocalDistributionsH)(D3Q27System::ET_B, x1, x2, x3p);
-                        LBMReal mfaab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SW, x1p, x2p, x3);
-                        LBMReal mfcab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SE, x1, x2p, x3);
-                        LBMReal mfaba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BW, x1p, x2, x3p);
-                        LBMReal mfcba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BE, x1, x2, x3p);
-                        LBMReal mfbaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BS, x1, x2p, x3p);
-                        LBMReal mfbca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BN, x1, x2, x3p);
-                        LBMReal mfaaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                        LBMReal mfcaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                        LBMReal mfaca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                        LBMReal mfcca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                        LBMReal mfbbb = (*this->zeroDistributionsH)(x1, x2, x3);
+                        real mfcbb = (*this->localDistributionsH)(D3Q27System::ET_E, x1, x2, x3);
+                        real mfbcb = (*this->localDistributionsH)(D3Q27System::ET_N, x1, x2, x3);
+                        real mfbbc = (*this->localDistributionsH)(D3Q27System::ET_T, x1, x2, x3);
+                        real mfccb = (*this->localDistributionsH)(D3Q27System::ET_NE, x1, x2, x3);
+                        real mfacb = (*this->localDistributionsH)(D3Q27System::ET_NW, x1p, x2, x3);
+                        real mfcbc = (*this->localDistributionsH)(D3Q27System::ET_TE, x1, x2, x3);
+                        real mfabc = (*this->localDistributionsH)(D3Q27System::ET_TW, x1p, x2, x3);
+                        real mfbcc = (*this->localDistributionsH)(D3Q27System::ET_TN, x1, x2, x3);
+                        real mfbac = (*this->localDistributionsH)(D3Q27System::ET_TS, x1, x2p, x3);
+                        real mfccc = (*this->localDistributionsH)(D3Q27System::ET_TNE, x1, x2, x3);
+                        real mfacc = (*this->localDistributionsH)(D3Q27System::ET_TNW, x1p, x2, x3);
+                        real mfcac = (*this->localDistributionsH)(D3Q27System::ET_TSE, x1, x2p, x3);
+                        real mfaac = (*this->localDistributionsH)(D3Q27System::ET_TSW, x1p, x2p, x3);
+                        real mfabb = (*this->nonLocalDistributionsH)(D3Q27System::ET_W, x1p, x2, x3);
+                        real mfbab = (*this->nonLocalDistributionsH)(D3Q27System::ET_S, x1, x2p, x3);
+                        real mfbba = (*this->nonLocalDistributionsH)(D3Q27System::ET_B, x1, x2, x3p);
+                        real mfaab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SW, x1p, x2p, x3);
+                        real mfcab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SE, x1, x2p, x3);
+                        real mfaba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BW, x1p, x2, x3p);
+                        real mfcba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BE, x1, x2, x3p);
+                        real mfbaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BS, x1, x2p, x3p);
+                        real mfbca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BN, x1, x2, x3p);
+                        real mfaaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                        real mfcaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                        real mfaca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                        real mfcca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                        real mfbbb = (*this->zeroDistributionsH)(x1, x2, x3);
                         (*phaseField)(x1, x2, x3) = (((mfaaa + mfccc) + (mfaca + mfcac)) + ((mfaac + mfcca)  + (mfcaa + mfacc))  ) +
                                                     (((mfaab + mfacb) + (mfcab + mfccb)) + ((mfaba + mfabc) + (mfcba + mfcbc)) +
                                                     ((mfbaa + mfbac) + (mfbca + mfbcc))) + ((mfabb + mfcbb) +
@@ -222,43 +232,43 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 						///Velocity filter
 
 
-						LBMReal rhoH = 1.0;
-						LBMReal rhoL = 1.0 / densityRatio;
-
-						LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
-
-
-						LBMReal rho = rhoH + rhoToPhi * ((*phaseField)(x1, x2, x3) - phiH);
-
-						mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3) / rho * c3;
-						mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3) / rho * c3;
-						mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3) / rho * c3;
-						mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3) / rho * c3;
-						mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3) / rho * c3;
-						mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3) / rho * c3;
-						mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3) / rho * c3;
-						mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3) / rho * c3;
-						mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3) / rho * c3;
-						mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3) / rho * c3;
-						mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3) / rho * c3;
-						mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3) / rho * c3;
-						mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3) / rho * c3;
-
-						mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3) / rho * c3;
-						mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3) / rho * c3;
-						mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p) / rho * c3;
-						mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3) / rho * c3;
-						mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3) / rho * c3;
-						mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p) / rho * c3;
-						mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p) / rho * c3;
-						mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p) / rho * c3;
-						mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p) / rho * c3;
-						mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p) / rho * c3;
-						mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p) / rho * c3;
-						mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p) / rho * c3;
-						mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p) / rho * c3;
-
-						mfbbb = (*this->zeroDistributionsF)(x1, x2, x3) / rho * c3;
+						real rhoH = 1.0;
+						real rhoL = 1.0 / densityRatio;
+
+						real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+
+
+						real rho = rhoH + rhoToPhi * ((*phaseField)(x1, x2, x3) - phiH);
+
+						mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3) / rho * c3o1;
+						mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3) / rho * c3o1;
+						mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3) / rho * c3o1;
+						mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3) / rho * c3o1;
+						mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3) / rho * c3o1;
+						mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3) / rho * c3o1;
+						mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3) / rho * c3o1;
+						mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3) / rho * c3o1;
+						mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3) / rho * c3o1;
+						mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3) / rho * c3o1;
+						mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3) / rho * c3o1;
+						mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3) / rho * c3o1;
+						mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3) / rho * c3o1;
+
+						mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3) / rho * c3o1;
+						mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3) / rho * c3o1;
+						mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p) / rho * c3o1;
+						mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3) / rho * c3o1;
+						mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3) / rho * c3o1;
+						mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p) / rho * c3o1;
+						mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p) / rho * c3o1;
+						mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p) / rho * c3o1;
+						mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p) / rho * c3o1;
+						mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p) / rho * c3o1;
+						mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p) / rho * c3o1;
+						mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p) / rho * c3o1;
+						mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p) / rho * c3o1;
+
+						mfbbb = (*this->zeroDistributionsF)(x1, x2, x3) / rho * c3o1;
 
 						//(*velocityX)(x1, x2, x3) = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 						//	(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
@@ -279,7 +289,7 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
             }
         }
 
-        LBMReal collFactorM;
+        real collFactorM;
         //LBMReal forcingTerm[D3Q27System::ENDF + 1];
 
         for (int x3 = minX3; x3 < maxX3; x3++) {
@@ -317,43 +327,43 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 
                         findNeighbors(phaseField, x1, x2, x3);
 						//// reading distributions here appears to be unnecessary!
-                        LBMReal mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
-                        LBMReal mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
-                        LBMReal mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
-                        LBMReal mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
-                        LBMReal mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
-                        LBMReal mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
-                        LBMReal mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
-                        LBMReal mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
-                        LBMReal mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
-                        LBMReal mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
-                        LBMReal mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
-                        LBMReal mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
-                        LBMReal mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
-                        LBMReal mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
-                        LBMReal mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
-                        LBMReal mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
-                        LBMReal mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
-                        LBMReal mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
-                        LBMReal mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
-                        LBMReal mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
-                        LBMReal mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
-                        LBMReal mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
-                        LBMReal mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                        LBMReal mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                        LBMReal mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                        LBMReal mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                        LBMReal mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
-
-                        LBMReal rhoH = 1.0;
-                        LBMReal rhoL = 1.0 / densityRatio;
-
-                        LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
-
-                        LBMReal dX1_phi = gradX1_phi();
-                        LBMReal dX2_phi = gradX2_phi();
-                        LBMReal dX3_phi = gradX3_phi();
+                        real mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
+                        real mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
+                        real mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
+                        real mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
+                        real mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
+                        real mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
+                        real mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
+                        real mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
+                        real mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
+                        real mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
+                        real mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
+                        real mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
+                        real mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
+                        real mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
+                        real mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
+                        real mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
+                        real mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
+                        real mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
+                        real mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
+                        real mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
+                        real mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
+                        real mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
+                        real mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                        real mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                        real mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                        real mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                        real mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
+
+                        real rhoH = 1.0;
+                        real rhoL = 1.0 / densityRatio;
+
+                        real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+
+                        real dX1_phi = gradX1_phi();
+                        real dX2_phi = gradX2_phi();
+                        real dX3_phi = gradX3_phi();
 
 						//LBMReal dX1_phi = 3.0*((
 						//	WEIGTH[TNE]*((((*phaseField)(x1 + 1, x2+1, x3+1)- (*phaseField)(x1 - 1, x2 - 1, x3 - 1))+ ((*phaseField)(x1 + 1, x2 - 1, x3 + 1) - (*phaseField)(x1 - 1, x2 + 1, x3 - 1)))
@@ -412,10 +422,10 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 						//dX3_phi = (2*dX3_phi -1*dX3_phi2);// 2 * dX3_phi - dX3_phi2;
 
 
-                        LBMReal denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
-                        LBMReal normX1 = dX1_phi/denom;
-						LBMReal normX2 = dX2_phi/denom;
-						LBMReal normX3 = dX3_phi/denom; 
+                        real denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
+                        real normX1 = dX1_phi/denom;
+						real normX2 = dX2_phi/denom;
+						real normX3 = dX3_phi/denom; 
 
 
 						///test for magnitude of gradient from phase indicator directly
@@ -434,10 +444,10 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 						collFactorM = collFactorL + (collFactorL - collFactorG) * (phi[DIR_000] - phiH) / (phiH - phiL);
 						//collFactorM = phi[REST] - phiL < (phiH - phiL) * 0.05 ? collFactorG : collFactorL;
 
-                        LBMReal mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
+                        real mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
 
                         //----------- Calculating Macroscopic Values -------------
-                        LBMReal rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
+                        real rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
 
 						if (withForcing) {
 							// muX1 = static_cast<double>(x1-1+ix1*maxX1);
@@ -448,49 +458,49 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 							forcingX2 = muForcingX2.Eval();
 							forcingX3 = muForcingX3.Eval();
 
-							LBMReal rho_m = 1.0 / densityRatio;
+							real rho_m = 1.0 / densityRatio;
 							forcingX1 = forcingX1 * (rho - rho_m);
 							forcingX2 = forcingX2 * (rho - rho_m);
 							forcingX3 = forcingX3 * (rho - rho_m);
 						}
                             			   ////Incompressible Kernal
 
-			    mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3)/rho*c3;
-			    mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3) / rho * c3;
-			    mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3) / rho * c3;
-			    mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3) / rho * c3;
-			    mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3) / rho * c3;
-			    mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3) / rho * c3;
-			    mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3) / rho * c3;
-			    mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3) / rho * c3;
-			    mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3) / rho * c3;
-			    mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3) / rho * c3;
-			    mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3) / rho * c3;
-			    mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3) / rho * c3;
-			    mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3) / rho * c3;
+			    mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3)/rho*c3o1;
+			    mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3) / rho * c3o1;
+			    mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3) / rho * c3o1;
+			    mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3) / rho * c3o1;
+			    mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3) / rho * c3o1;
+			    mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3) / rho * c3o1;
+			    mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3) / rho * c3o1;
+			    mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3) / rho * c3o1;
+			    mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3) / rho * c3o1;
+			    mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3) / rho * c3o1;
+			    mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3) / rho * c3o1;
+			    mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3) / rho * c3o1;
+			    mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3) / rho * c3o1;
 
-			    mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3) / rho * c3;
-			    mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3) / rho * c3;
-			    mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p) / rho * c3;
-			    mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3) / rho * c3;
-			    mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3) / rho * c3;
-			    mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p) / rho * c3;
-			    mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p) / rho * c3;
-			    mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p) / rho * c3;
-			    mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p) / rho * c3;
-			    mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p) / rho * c3;
-			    mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p) / rho * c3;
-			    mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p) / rho * c3;
-			    mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p) / rho * c3;
+			    mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3) / rho * c3o1;
+			    mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3) / rho * c3o1;
+			    mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p) / rho * c3o1;
+			    mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3) / rho * c3o1;
+			    mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3) / rho * c3o1;
+			    mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p) / rho * c3o1;
+			    mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p) / rho * c3o1;
+			    mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p) / rho * c3o1;
+			    mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p) / rho * c3o1;
+			    mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p) / rho * c3o1;
+			    mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p) / rho * c3o1;
+			    mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p) / rho * c3o1;
+			    mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p) / rho * c3o1;
 
-			    mfbbb = (*this->zeroDistributionsF)(x1, x2, x3) / rho * c3;
+			    mfbbb = (*this->zeroDistributionsF)(x1, x2, x3) / rho * c3o1;
 
 
 
 
 
-			   LBMReal m0, m1, m2;
-			   LBMReal rhoRef=c1;
+			   real m0, m1, m2;
+			   real rhoRef=c1o1;
 
 			  //LBMReal
 			  // FIXME: warning: unused variable 'drho'
@@ -498,13 +508,13 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 //				   + (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 //				   + (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
-			   LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+			   real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 				   (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 				   (mfcbb - mfabb))/rhoRef;
-			   LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+			   real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 				   (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 				   (mfbcb - mfbab))/rhoRef;
-			   LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+			   real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 				   (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 				   (mfbbc - mfbba))/rhoRef;
 
@@ -576,7 +586,7 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 
 			   ///----Classic source term 8.4.2021
 
-			   LBMReal vvxF, vvyF, vvzF;
+			   real vvxF, vvyF, vvzF;
 			   vvxF = vvx;//-2*c1o24 * lap_vx;// 
 			   vvyF = vvy;//-2*c1o24 * lap_vy;// 
 			   vvzF = vvz;//-2*c1o24 * lap_vz;// 
@@ -587,10 +597,10 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			   //if (vvxF != vvx) {
 				  // vvxF = vvxF;
 			   //}
-			   LBMReal weightGrad =  1.0-denom*denom/(denom*denom+0.0001*0.001);
-			   LBMReal dX1_phiF = dX1_phi * weightGrad + (1.0 - weightGrad) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * normX1;
-			   LBMReal dX2_phiF = dX2_phi * weightGrad + (1.0 - weightGrad) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * normX2;
-			   LBMReal dX3_phiF = dX3_phi * weightGrad + (1.0 - weightGrad) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * normX3;
+			   real weightGrad =  1.0-denom*denom/(denom*denom+0.0001*0.001);
+			   real dX1_phiF = dX1_phi * weightGrad + (1.0 - weightGrad) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * normX1;
+			   real dX2_phiF = dX2_phi * weightGrad + (1.0 - weightGrad) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * normX2;
+			   real dX3_phiF = dX3_phi * weightGrad + (1.0 - weightGrad) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * normX3;
 
 			   //dX1_phiF *= 1.2;
 			   //dX2_phiF *= 1.2;
@@ -603,19 +613,19 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 				  // dX2_phiF = gradPhi * normX2;
 				  // dX3_phiF = gradPhi * normX3;
 
-			   LBMReal ux2;
-			   LBMReal uy2;
-			   LBMReal uz2;
+			   real ux2;
+			   real uy2;
+			   real uz2;
 			   ux2 = vvxF * vvxF;
 			   uy2 = vvyF * vvyF;
 			   uz2 = vvzF * vvzF;
-			   LBMReal forcingTerm[D3Q27System::ENDF + 1];
+			   real forcingTerm[D3Q27System::ENDF + 1];
 			   for (int dir = FSTARTDIR; dir <= FENDDIR; dir++) {
-				   LBMReal velProd = DX1[dir] * vvxF + DX2[dir] * vvyF + DX3[dir] * vvzF;
-				   LBMReal velSq1 = velProd * velProd;
-				   LBMReal gamma = WEIGTH[dir] * (1.0 + 3 * velProd + (4.5 * velSq1 - 1.5 * (ux2 + uy2 + uz2)));
+				   real velProd = DX1[dir] * vvxF + DX2[dir] * vvyF + DX3[dir] * vvzF;
+				   real velSq1 = velProd * velProd;
+				   real gamma = WEIGTH[dir] * (1.0 + 3 * velProd + (4.5 * velSq1 - 1.5 * (ux2 + uy2 + uz2)));
 
-				   LBMReal fac1 = (gamma - WEIGTH[dir]) * c1o3 * rhoToPhi;
+				   real fac1 = (gamma - WEIGTH[dir]) * c1o3 * rhoToPhi;
 
 				   forcingTerm[dir] = 
 					   (-vvxF) * (fac1 * dX1_phiF ) +
@@ -646,8 +656,8 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 
 			   }
 
-			   LBMReal gamma = WEIGTH[DIR_000] * (1.0 - 1.5 * (ux2 + uy2 + uz2));
-			   LBMReal fac1 = (gamma - WEIGTH[DIR_000]) * c1o3 * rhoToPhi;
+			   real gamma = WEIGTH[DIR_000] * (1.0 - 1.5 * (ux2 + uy2 + uz2));
+			   real fac1 = (gamma - WEIGTH[DIR_000]) * c1o3 * rhoToPhi;
 			   forcingTerm[DIR_000] = (-vvxF) * (fac1 * dX1_phiF ) +
 				   (-vvyF) * (fac1 * dX2_phiF ) +
 				   (-vvzF) * (fac1 * dX3_phiF );
@@ -784,9 +794,9 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			   ///////////////////////////////////////////////////////////////////////////////////////////
 			   if (withForcing)
 			   {
-				   muX1 = static_cast<double>(x1 - 1 + ix1 * maxX1);
-				   muX2 = static_cast<double>(x2 - 1 + ix2 * maxX2);
-				   muX3 = static_cast<double>(x3 - 1 + ix3 * maxX3);
+				   muX1 = static_cast<real>(x1 - 1 + ix1 * maxX1);
+				   muX2 = static_cast<real>(x2 - 1 + ix2 * maxX2);
+				   muX3 = static_cast<real>(x3 - 1 + ix3 * maxX3);
 
 				   forcingX1 = muForcingX1.Eval();
 				   forcingX2 = muForcingX2.Eval();
@@ -797,9 +807,9 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 				   vvz += forcingX3 * deltaT * 0.5; // Z
 			   }
 
-			   LBMReal vx2;
-			   LBMReal vy2;
-			   LBMReal vz2;
+			   real vx2;
+			   real vy2;
+			   real vz2;
 			   vx2 = vvx * vvx;
 			   vy2 = vvy * vvy;
 			   vz2 = vvz * vvz;
@@ -809,7 +819,7 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 
 
 			   ///////////////////////////////////////////////////////////////////////////////////////////               
-			   LBMReal oMdrho;
+			   real oMdrho;
 
 
 			   oMdrho = mfccc + mfaaa;
@@ -840,8 +850,8 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 
 
 			   ////////////////////////////////////////////////////////////////////////////////////
-			   LBMReal wadjust;
-			   LBMReal qudricLimit = 0.01;
+			   real wadjust;
+			   real qudricLimit = 0.01;
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //Hin
 			   ////////////////////////////////////////////////////////////////////////////////////
@@ -1072,22 +1082,22 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Cumulants
 			   ////////////////////////////////////////////////////////////////////////////////////
-			   LBMReal OxxPyyPzz = 1.; //omega2 or bulk viscosity
+			   real OxxPyyPzz = 1.; //omega2 or bulk viscosity
 			   //LBMReal OxyyPxzz = 2.0 - collFactorM;// 1.;//-s9;//2+s9;//
 			   //LBMReal OxyyMxzz  = 2.0 - collFactorM;// 1.;//2+s9;//
-			   LBMReal O4 = 1.0;//collFactorM;// 1.;
-			   LBMReal O5 = 1.;
-			   LBMReal O6 = 1.;
+			   real O4 = 1.0;//collFactorM;// 1.;
+			   real O5 = 1.;
+			   real O6 = 1.;
 
 
 			   /////fourth order parameters; here only for test. Move out of loop!
 
-			   LBMReal OxyyPxzz =  8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
-			   LBMReal OxyyMxzz =  8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
-			   LBMReal Oxyz =  24.0 * (collFactorM - 2.0) * (4.0 * collFactorM * collFactorM + collFactorM * OxxPyyPzz * (18.0 - 13.0 * collFactorM) + OxxPyyPzz * OxxPyyPzz * (2.0 + collFactorM * (6.0 * collFactorM - 11.0))) / (16.0 * collFactorM * collFactorM * (collFactorM - 6.0) - 2.0 * collFactorM * OxxPyyPzz * (216.0 + 5.0 * collFactorM * (9.0 * collFactorM - 46.0)) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (3.0 * collFactorM - 10.0) * (15.0 * collFactorM - 28.0) - 48.0));
-			   LBMReal A =  (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+			   real OxyyPxzz =  8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
+			   real OxyyMxzz =  8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
+			   real Oxyz =  24.0 * (collFactorM - 2.0) * (4.0 * collFactorM * collFactorM + collFactorM * OxxPyyPzz * (18.0 - 13.0 * collFactorM) + OxxPyyPzz * OxxPyyPzz * (2.0 + collFactorM * (6.0 * collFactorM - 11.0))) / (16.0 * collFactorM * collFactorM * (collFactorM - 6.0) - 2.0 * collFactorM * OxxPyyPzz * (216.0 + 5.0 * collFactorM * (9.0 * collFactorM - 46.0)) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (3.0 * collFactorM - 10.0) * (15.0 * collFactorM - 28.0) - 48.0));
+			   real A =  (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 			   //FIXME:  warning C4459: declaration of 'B' hides global declaration (message : see declaration of 'D3Q27System::DIR_00M' )
-			   LBMReal BB =   (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+			   real BB =   (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 
 
 			   //Cum 4.
@@ -1095,21 +1105,21 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			   //LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
 			   //LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-			   LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-			   LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-			   LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+			   real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
+			   real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
+			   real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-			   LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-			   LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-			   LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
+			   real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+			   real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+			   real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
 
 			   //Cum 5.
-			   LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-			   LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-			   LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+			   real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
+			   real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
+			   real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
 
 			   //Cum 6.
-			   LBMReal CUMccc = mfccc + ((-4. * mfbbb * mfbbb
+			   real CUMccc = mfccc + ((-4. * mfbbb * mfbbb
 				   - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
 				   - 4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
 				   - 2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
@@ -1123,10 +1133,10 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 
 			   //2.
 			   // linear combinations
-			   LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
+			   real mxxPyyPzz = mfcaa + mfaca + mfaac;
 				mxxPyyPzz-=mfaaa;//12.03.21 shifted by mfaaa
-			   LBMReal mxxMyy = mfcaa - mfaca;
-			   LBMReal mxxMzz = mfcaa - mfaac;
+			   real mxxMyy = mfcaa - mfaca;
+			   real mxxMzz = mfcaa - mfaac;
 
 			   //applying phase field gradients first part:
 			  // mxxPyyPzz += c2o3 * rhoToPhi * (dX1_phi * vvx + dX2_phi * vvy + dX3_phi * vvz);
@@ -1152,17 +1162,17 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			   //mfbba += c1o6 * (dX1_phi * vvyF + dX2_phi * vvxF) * correctionScaling;
 
 
-			   LBMReal dxux = -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
-			   LBMReal dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
-			   LBMReal dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
+			   real dxux = -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
+			   real dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
+			   real dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
 
-			   LBMReal Dxy = -three * collFactorM * mfbba;
-			   LBMReal Dxz = -three * collFactorM * mfbab;
-			   LBMReal Dyz = -three * collFactorM * mfabb;
+			   real Dxy = -c3o1 * collFactorM * mfbba;
+			   real Dxz = -c3o1 * collFactorM * mfbab;
+			   real Dyz = -c3o1 * collFactorM * mfabb;
 
 			   ////relax unfiltered
 			   //! divergenceFilter 10.05.2021
-			   LBMReal divMag= (1.0 - phi[DIR_000]) * (phi[DIR_000])*10*5*sqrt(fabs((OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz) - 3. * (1. - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz))));
+			   real divMag= (1.0 - phi[DIR_000]) * (phi[DIR_000])*10*5*sqrt(fabs((OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz) - 3. * (1. - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz))));
 			  // LBMReal divMag = 500 *500* 50*(fabs((OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz) - 3. * (1. - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz))))* (fabs((OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz) - 3. * (1. - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz))));
 			   //LBMReal divMag = (dX1_phi * dxux) > 0 ? (dX1_phi * dxux) : 0;
 			   //divMag += (dX2_phi * dyuy) > 0 ? (dX2_phi * dyuy) : 0;
@@ -1237,14 +1247,14 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 
 			   //3.
 			   // linear combinations
-			   LBMReal mxxyPyzz = mfcba + mfabc;
-			   LBMReal mxxyMyzz = mfcba - mfabc;
+			   real mxxyPyzz = mfcba + mfabc;
+			   real mxxyMyzz = mfcba - mfabc;
 
-			   LBMReal mxxzPyyz = mfcab + mfacb;
-			   LBMReal mxxzMyyz = mfcab - mfacb;
+			   real mxxzPyyz = mfcab + mfacb;
+			   real mxxzMyyz = mfcab - mfacb;
 
-			   LBMReal mxyyPxzz = mfbca + mfbac;
-			   LBMReal mxyyMxzz = mfbca - mfbac;
+			   real mxyyPxzz = mfbca + mfbac;
+			   real mxyyMxzz = mfbca - mfbac;
 
 			   //relax
 			   wadjust = Oxyz + (1. - Oxyz) * fabs(mfbbb) / (fabs(mfbbb) + qudricLimit);
@@ -1280,12 +1290,12 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			   //CUMcbb += O4 * (-CUMcbb);
 
 
-			   CUMacc = -O4 * (one / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (one - O4) * (CUMacc);
-			   CUMcac = -O4 * (one / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (one - O4) * (CUMcac);
-			   CUMcca = -O4 * (one / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (one - O4) * (CUMcca);
-			   CUMbbc = -O4 * (one / collFactorM - c1o2) * Dxy * c1o3 * BB + (one - O4) * (CUMbbc);
-			   CUMbcb = -O4 * (one / collFactorM - c1o2) * Dxz * c1o3 * BB + (one - O4) * (CUMbcb);
-			   CUMcbb = -O4 * (one / collFactorM - c1o2) * Dyz * c1o3 * BB + (one - O4) * (CUMcbb);
+			   CUMacc = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc);
+			   CUMcac = -O4 * (c1o1 / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac);
+			   CUMcca = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca);
+			   CUMbbc = -O4 * (c1o1 / collFactorM - c1o2) * Dxy * c1o3 * BB + (c1o1 - O4) * (CUMbbc);
+			   CUMbcb = -O4 * (c1o1 / collFactorM - c1o2) * Dxz * c1o3 * BB + (c1o1 - O4) * (CUMbcb);
+			   CUMcbb = -O4 * (c1o1 / collFactorM - c1o2) * Dyz * c1o3 * BB + (c1o1 - O4) * (CUMcbb);
 
 
 
@@ -1349,9 +1359,9 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			   mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
 			   mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-			   mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-			   mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-			   mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
+			   mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+			   mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+			   mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
 
 			   //5.
 			   mfbcc = CUMbcc + (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac) * oMdrho;
@@ -2513,7 +2523,7 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
                         /////////////////////  P H A S E - F I E L D   S O L V E R
                         ////////////////////////////////////////////
 		/////CUMULANT PHASE-FIELD
-				LBMReal omegaD =1.0/( 3.0 * mob + 0.5);
+				real omegaD =1.0/( 3.0 * mob + 0.5);
 
 			   mfcbb = (*this->localDistributionsH)(D3Q27System::ET_E, x1, x2, x3);
 			   mfbcb = (*this->localDistributionsH)(D3Q27System::ET_N, x1, x2, x3);
@@ -2577,7 +2587,7 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			 //  LBMReal vvz = uz;
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // second component
-			   LBMReal concentration =
+			   real concentration =
 				   ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				   (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
 					   ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
@@ -2592,26 +2602,26 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			  // vvy += fy * c1o2;
 			  // vvz += fz * c1o2;
 			   ////////////////////////////////////////////////////////////////////////////////////
-			   LBMReal oneMinusRho = c1- concentration;
+			   real oneMinusRho = c1o1 - concentration;
 
-			   LBMReal cx =
+			   real cx =
 				   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 				   (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 					   (mfcbb - mfabb));
-			   LBMReal cy =
+			   real cy =
 				   ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 				   (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 					   (mfbcb - mfbab));
-			   LBMReal cz =
+			   real cz =
 				   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 				   (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 					   (mfbbc - mfbba));
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // calculate the square of velocities for this lattice node
-			   LBMReal cx2 = cx * cx;
-			   LBMReal cy2 = cy * cy;
-			   LBMReal cz2 = cz * cz;
+			   real cx2 = cx * cx;
+			   real cy2 = cy * cy;
+			   real cz2 = cz * cz;
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
 			   //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -2620,60 +2630,60 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			   //!
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Z - Dir
-			   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 			   forwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Y - Dir
-			   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   forwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-			   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 			   forwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 			   forwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 			   forwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   forwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-			   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // X - Dir
-			   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 			   forwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-			   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   forwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 			   forwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 			   forwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-			   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   forwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-			   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3o1, c1o9, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //! - experimental Cumulant ... to be published ... hopefully
 			   //!
 
 			   // linearized orthogonalization of 3rd order central moments
-			   LBMReal Mabc = mfabc - mfaba * c1o3;
-			   LBMReal Mbca = mfbca - mfbaa * c1o3;
-			   LBMReal Macb = mfacb - mfaab * c1o3;
-			   LBMReal Mcba = mfcba - mfaba * c1o3;
-			   LBMReal Mcab = mfcab - mfaab * c1o3;
-			   LBMReal Mbac = mfbac - mfbaa * c1o3;
+			   real Mabc = mfabc - mfaba * c1o3;
+			   real Mbca = mfbca - mfbaa * c1o3;
+			   real Macb = mfacb - mfaab * c1o3;
+			   real Mcba = mfcba - mfaba * c1o3;
+			   real Mcab = mfcab - mfaab * c1o3;
+			   real Mbac = mfbac - mfbaa * c1o3;
 			   // linearized orthogonalization of 5th order central moments
-			   LBMReal Mcbc = mfcbc - mfaba * c1o9;
-			   LBMReal Mbcc = mfbcc - mfbaa * c1o9;
-			   LBMReal Mccb = mfccb - mfaab * c1o9;
+			   real Mcbc = mfcbc - mfaba * c1o9;
+			   real Mbcc = mfbcc - mfbaa * c1o9;
+			   real Mccb = mfccb - mfaab * c1o9;
 
 			   // collision of 1st order moments
-			   cx = cx * (c1 - omegaD) + omegaD * vvx * concentration + normX1 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
-			   cy = cy * (c1 - omegaD) + omegaD * vvy * concentration + normX2 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
-			   cz = cz * (c1 - omegaD) + omegaD * vvz * concentration + normX3 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+			   cx = cx * (c1o1 - omegaD) + omegaD * vvx * concentration + normX1 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+			   cy = cy * (c1o1 - omegaD) + omegaD * vvy * concentration + normX2 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+			   cz = cz * (c1o1 - omegaD) + omegaD * vvz * concentration + normX3 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
 
 			   //mhx = (ux * phi[REST] + normX1 * (tauH - 0.5) * (1.0 - phi[REST]) * (phi[REST])) / tauH + (1.0 - 1.0 / tauH) * mhx;
 			   //mhy = (uy * phi[REST] + normX2 * (tauH - 0.5) * (1.0 - phi[REST]) * (phi[REST])) / tauH + (1.0 - 1.0 / tauH) * mhy;
@@ -2685,9 +2695,9 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			   cz2 = cz * cz;
 
 			   // equilibration of 2nd order moments
-			   mfbba = zeroReal;
-			   mfbab = zeroReal;
-			   mfabb = zeroReal;
+			   mfbba = c0o1;
+			   mfbab = c0o1;
+			   mfabb = c0o1;
 
 			   mfcaa = c1o3 * concentration;
 			   mfaca = c1o3 * concentration;
@@ -2704,13 +2714,13 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			   //mfaac = mfaac*(c1 - omega2) + omega2*c1o3 * concentration;
 
 			   // equilibration of 3rd order moments
-			   Mabc = zeroReal;
-			   Mbca = zeroReal;
-			   Macb = zeroReal;
-			   Mcba = zeroReal;
-			   Mcab = zeroReal;
-			   Mbac = zeroReal;
-			   mfbbb = zeroReal;
+			   Mabc = c0o1;
+			   Mbca = c0o1;
+			   Macb = c0o1;
+			   Mcba = c0o1;
+			   Mcab = c0o1;
+			   Mbac = c0o1;
+			   mfbbb = c0o1;
 
 			   // from linearized orthogonalization 3rd order central moments to central moments
 			   mfabc = Mabc + mfaba * c1o3;
@@ -2725,14 +2735,14 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			   mfcac = c1o9 * concentration;
 			   mfcca = c1o9 * concentration;
 
-			   mfcbb = zeroReal;
-			   mfbcb = zeroReal;
-			   mfbbc = zeroReal;
+			   mfcbb = c0o1;
+			   mfbcb = c0o1;
+			   mfbbc = c0o1;
 
 			   // equilibration of 5th order moments
-			   Mcbc = zeroReal;
-			   Mbcc = zeroReal;
-			   Mccb = zeroReal;
+			   Mcbc = c0o1;
+			   Mbcc = c0o1;
+			   Mccb = c0o1;
 
 			   // from linearized orthogonalization 5th order central moments to central moments
 			   mfcbc = Mcbc + mfaba * c1o9;
@@ -2750,39 +2760,39 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 			   //!
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // X - Dir
-			   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 			   backwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-			   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   backwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 			   backwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 			   backwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-			   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   backwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-			   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9o1, c1o9, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Y - Dir
-			   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   backwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-			   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 			   backwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 			   backwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 			   backwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   backwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-			   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Z - Dir
-			   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 			   backwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 
 
@@ -2918,9 +2928,11 @@ void MultiphaseScratchCumulantLBMKernel::calculate(int step)
 }
 //////////////////////////////////////////////////////////////////////////
 
-LBMReal MultiphaseScratchCumulantLBMKernel::gradX1_phi()
+real MultiphaseScratchCumulantLBMKernel::gradX1_phi()
 {
+	using namespace vf::lbm::dir;
     using namespace D3Q27System;
+
 	return 3.0* ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) + (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) + (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) + (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_PM0] - phi[DIR_MP0]) + (phi[DIR_PP0] - phi[DIR_MM0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_P00] - phi[DIR_M00]));
@@ -2931,9 +2943,11 @@ LBMReal MultiphaseScratchCumulantLBMKernel::gradX1_phi()
     //return 3.0 * sum;
 }
 
-LBMReal MultiphaseScratchCumulantLBMKernel::gradX2_phi()
+real MultiphaseScratchCumulantLBMKernel::gradX2_phi()
 {
+	using namespace vf::lbm::dir;
     using namespace D3Q27System;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PPM] - phi[DIR_MMP])- (phi[DIR_PMP] - phi[DIR_MPM])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_0PP] - phi[DIR_0MM]) + (phi[DIR_0PM] - phi[DIR_0MP])) + ((phi[DIR_PP0] - phi[DIR_MM0])- (phi[DIR_PM0] - phi[DIR_MP0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_0P0] - phi[DIR_0M0]));
@@ -2944,9 +2958,11 @@ LBMReal MultiphaseScratchCumulantLBMKernel::gradX2_phi()
     //return 3.0 * sum;
 }
 
-LBMReal MultiphaseScratchCumulantLBMKernel::gradX3_phi()
+real MultiphaseScratchCumulantLBMKernel::gradX3_phi()
 {
-    using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+	using namespace D3Q27System;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) - (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) - (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_0MP] - phi[DIR_0PM]) + (phi[DIR_0PP] - phi[DIR_0MM])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_00P] - phi[DIR_00M]));
@@ -2957,10 +2973,12 @@ LBMReal MultiphaseScratchCumulantLBMKernel::gradX3_phi()
     //return 3.0 * sum;
 }
 
-LBMReal MultiphaseScratchCumulantLBMKernel::nabla2_phi()
+real MultiphaseScratchCumulantLBMKernel::nabla2_phi()
 {
     using namespace D3Q27System;
-    LBMReal sum = 0.0;
+	using namespace vf::lbm::dir;
+
+    real sum = 0.0;
 	sum += WEIGTH[DIR_PPP] * ((((phi[DIR_PPP] - phi[DIR_000]) + (phi[DIR_MMM] - phi[DIR_000])) + ((phi[DIR_MMP] - phi[DIR_000]) + (phi[DIR_PPM] - phi[DIR_000])))
 		+ (((phi[DIR_MPP] - phi[DIR_000]) + (phi[DIR_PMM] - phi[DIR_000])) + ((phi[DIR_PMP] - phi[DIR_000]) + (phi[DIR_MPM] - phi[DIR_000]))));
 	sum += WEIGTH[DIR_0PP] * (
@@ -2982,6 +3000,8 @@ LBMReal MultiphaseScratchCumulantLBMKernel::nabla2_phi()
 void MultiphaseScratchCumulantLBMKernel::computePhasefield()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
     SPtr<DistributionArray3D> distributionsH = dataSet->getHdistributions();
 
     int minX1 = ghostLayerWidth;
@@ -3036,10 +3056,11 @@ void MultiphaseScratchCumulantLBMKernel::computePhasefield()
     }
 }
 
-void MultiphaseScratchCumulantLBMKernel::findNeighbors(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphaseScratchCumulantLBMKernel::findNeighbors(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
                                                 int x3)
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
     SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphaseScratchCumulantLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/MultiphaseScratchCumulantLBMKernel.h
index c4cc5c263d079e402987dc38037c1dcf9e6ae6b1..6f10c9b5db0e2272d734de02a55dab1452f79d1a 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphaseScratchCumulantLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphaseScratchCumulantLBMKernel.h
@@ -51,54 +51,54 @@ public:
    virtual ~MultiphaseScratchCumulantLBMKernel(void) = default;
    void calculate(int step) override;
    SPtr<LBMKernel> clone() override;
-   void forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-   void backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-   void forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
-   void backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
+   void forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+   void backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+   void forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
+   void backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
 
-   double getCalculationTime() override { return .0; }
+   real getCalculationTime() override { return .0; }
 protected:
    virtual void initDataSet();
    void swapDistributions() override;
-   LBMReal f1[D3Q27System::ENDF+1];
+   real f1[D3Q27System::ENDF+1];
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH;
 
    //CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   phaseField;
 
-   LBMReal h  [D3Q27System::ENDF+1];
-   LBMReal g  [D3Q27System::ENDF+1];
-   LBMReal phi[D3Q27System::ENDF+1];
-   LBMReal pr1[D3Q27System::ENDF+1];
-   LBMReal phi_cutoff[D3Q27System::ENDF+1];
+   real h  [D3Q27System::ENDF+1];
+   real g  [D3Q27System::ENDF+1];
+   real phi[D3Q27System::ENDF+1];
+   real pr1[D3Q27System::ENDF+1];
+   real phi_cutoff[D3Q27System::ENDF+1];
 
-   LBMReal gradX1_phi();
-   LBMReal gradX2_phi();
-   LBMReal gradX3_phi();
+   real gradX1_phi();
+   real gradX2_phi();
+   real gradX3_phi();
    //LBMReal gradX1_pr1();
    //LBMReal gradX2_pr1();
    //LBMReal gradX3_pr1();
    //LBMReal dirgradC_phi(int n, int k);
    void computePhasefield();
-   void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
+   void findNeighbors(CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
    //void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf /*Pressure-Field*/, int x1, int x2, int x3);
    //void pressureFiltering(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf /*Pressure-Field*/, CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf_filtered /*Pressure-Field*/);
 
-   LBMReal nabla2_phi();
+   real nabla2_phi();
 
 
    mu::value_type muX1,muX2,muX3;
    mu::value_type muDeltaT;
    mu::value_type muNu;
-   LBMReal forcingX1;
-   LBMReal forcingX2;
-   LBMReal forcingX3;
+   real forcingX1;
+   real forcingX2;
+   real forcingX3;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphaseSimpleVelocityBaseExternalPressureLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/MultiphaseSimpleVelocityBaseExternalPressureLBMKernel.cpp
index 0d84520603d6c3a149efa30f298832e97fa623eb..54b2f412a530be065ad9931a77e38511a68b3aa9 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphaseSimpleVelocityBaseExternalPressureLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphaseSimpleVelocityBaseExternalPressureLBMKernel.cpp
@@ -41,6 +41,8 @@
 #include <cmath>
 #include <iostream>
 #include <string>
+#include "basics/constants/NumericConstants.h"
+//#include <basics/utilities/UbMath.h>
 
 #define PROOF_CORRECTNESS
 
@@ -53,19 +55,19 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::initDataSet()
 	SPtr<DistributionArray3D> h(new D3Q27EsoTwist3DSplittedVector( nx[0] + 4, nx[1] + 4, nx[2] + 4, -999.9)); // For phase-field
 	SPtr<DistributionArray3D> h2(new D3Q27EsoTwist3DSplittedVector(nx[0] + 4, nx[1] + 4, nx[2] + 4, -999.9));
 	SPtr<PhaseFieldArray3D> divU1(new PhaseFieldArray3D(            nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressure(new  CbArray3D<LBMReal, IndexerX3X2X1>(    nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
-	pressureOld = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
-	p1Old = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressure(new  CbArray3D<real, IndexerX3X2X1>(    nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	pressureOld = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	p1Old = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
 	dataSet->setFdistributions(f);
 	dataSet->setHdistributions(h); // For phase-field
 	dataSet->setH2distributions(h2);
 	dataSet->setPhaseField(divU1);
 	dataSet->setPressureField(pressure);
 
-	phaseField = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, -999.0));
-	phaseFieldOld = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 999.0));
+	phaseField = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, -999.0));
+	phaseFieldOld = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 999.0));
 
-	divU = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	divU = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
 }
 //////////////////////////////////////////////////////////////////////////
 SPtr<LBMKernel> MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::clone()
@@ -96,23 +98,23 @@ SPtr<LBMKernel> MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::clone()
 	return kernel;
 }
 //////////////////////////////////////////////////////////////////////////
-void  MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-	LBMReal m2 = mfa + mfc;
-	LBMReal m1 = mfc - mfa;
-	LBMReal m0 = m2 + mfb;
+void  MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+	using namespace vf::basics::constant;
+	real m2 = mfa + mfc;
+	real m1 = mfc - mfa;
+	real m0 = m2 + mfb;
 	mfa = m0;
 	m0 *= Kinverse;
 	m0 += oneMinusRho;
 	mfb = (m1 * Kinverse - m0 * vv) * K;
-	mfc = ((m2 - c2 * m1 * vv) * Kinverse + v2 * m0) * K;
+	mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
-void  MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-	LBMReal m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
-	LBMReal m1 = (((mfa - mfc) - c2 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
+void  MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+	using namespace vf::basics::constant;
+	real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
+	real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
 	mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 + vv) * c1o2) * K;
 	mfa = m0;
 	mfb = m1;
@@ -120,20 +122,20 @@ void  MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::backwardInverseChim
 
 
 ////////////////////////////////////////////////////////////////////////////////
-void  MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-	LBMReal m1 = (mfa + mfc) + mfb;
-	LBMReal m2 = mfc - mfa;
-	mfc = (mfc + mfa) + (v2 * m1 - c2 * vv * m2);
+void  MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+	using namespace vf::basics::constant;
+	real m1 = (mfa + mfc) + mfb;
+	real m2 = mfc - mfa;
+	mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2);
 	mfb = m2 - vv * m1;
 	mfa = m1;
 }
 
 
-void  MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-	LBMReal ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
-	LBMReal mb = ((mfa - mfc) - mfa * v2) - c2 * mfb * vv;
+void  MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+	using namespace vf::basics::constant;
+	real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
+	real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv;
 	mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2);
 	mfb = mb;
 	mfa = ma;
@@ -143,13 +145,14 @@ void  MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::backwardChimera(LBM
 void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 {
 	using namespace D3Q27System;
-	using namespace UbMath;
+	using namespace vf::basics::constant;
+	using namespace vf::lbm::dir;
 
 	forcingX1 = 0.0;
 	forcingX2 = 0.0;
 	forcingX3 = 0.0;
 
-	LBMReal oneOverInterfaceScale = c4 / interfaceWidth; //1.0;//1.5;
+	real oneOverInterfaceScale = c4o1 / interfaceWidth; //1.0;//1.5;
 														 /////////////////////////////////////
 
 	localDistributionsF    = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getLocalDistributions();
@@ -165,7 +168,7 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 	zeroDistributionsH2     = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getH2distributions())->getZeroDistributions();
 
 
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressure = dataSet->getPressureField();
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressure = dataSet->getPressureField();
 
 	SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
@@ -179,7 +182,7 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 	int maxX1 = bcArrayMaxX1 - ghostLayerWidth;
 	int maxX2 = bcArrayMaxX2 - ghostLayerWidth;
 	int maxX3 = bcArrayMaxX3 - ghostLayerWidth;
-	LBMReal omegaDRho = 1.0;// 1.25;// 1.3;
+	real omegaDRho = 1.0;// 1.25;// 1.3;
 
 	for (int x3 = minX3-ghostLayerWidth; x3 < maxX3+ghostLayerWidth; x3++) {
 		for (int x2 = minX2-ghostLayerWidth; x2 < maxX2+ghostLayerWidth; x2++) {
@@ -189,40 +192,40 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					int x2p = x2 + 1;
 					int x3p = x3 + 1;
 
-					LBMReal mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
-					LBMReal mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
-					LBMReal mfbbc = (*this->localDistributionsH1)(D3Q27System::ET_T, x1, x2, x3);
-					LBMReal mfccb = (*this->localDistributionsH1)(D3Q27System::ET_NE, x1, x2, x3);
-					LBMReal mfacb = (*this->localDistributionsH1)(D3Q27System::ET_NW, x1p, x2, x3);
-					LBMReal mfcbc = (*this->localDistributionsH1)(D3Q27System::ET_TE, x1, x2, x3);
-					LBMReal mfabc = (*this->localDistributionsH1)(D3Q27System::ET_TW, x1p, x2, x3);
-					LBMReal mfbcc = (*this->localDistributionsH1)(D3Q27System::ET_TN, x1, x2, x3);
-					LBMReal mfbac = (*this->localDistributionsH1)(D3Q27System::ET_TS, x1, x2p, x3);
-					LBMReal mfccc = (*this->localDistributionsH1)(D3Q27System::ET_TNE, x1, x2, x3);
-					LBMReal mfacc = (*this->localDistributionsH1)(D3Q27System::ET_TNW, x1p, x2, x3);
-					LBMReal mfcac = (*this->localDistributionsH1)(D3Q27System::ET_TSE, x1, x2p, x3);
-					LBMReal mfaac = (*this->localDistributionsH1)(D3Q27System::ET_TSW, x1p, x2p, x3);
-					LBMReal mfabb = (*this->nonLocalDistributionsH1)(D3Q27System::ET_W, x1p, x2, x3);
-					LBMReal mfbab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_S, x1, x2p, x3);
-					LBMReal mfbba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_B, x1, x2, x3p);
-					LBMReal mfaab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SW, x1p, x2p, x3);
-					LBMReal mfcab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SE, x1, x2p, x3);
-					LBMReal mfaba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BW, x1p, x2, x3p);
-					LBMReal mfcba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BE, x1, x2, x3p);
-					LBMReal mfbaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BS, x1, x2p, x3p);
-					LBMReal mfbca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BN, x1, x2, x3p);
-					LBMReal mfaaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-					LBMReal mfcaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSE, x1, x2p, x3p);
-					LBMReal mfaca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNW, x1p, x2, x3p);
-					LBMReal mfcca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-					LBMReal mfbbb = (*this->zeroDistributionsH1)(x1, x2, x3);
+					real mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
+					real mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
+					real mfbbc = (*this->localDistributionsH1)(D3Q27System::ET_T, x1, x2, x3);
+					real mfccb = (*this->localDistributionsH1)(D3Q27System::ET_NE, x1, x2, x3);
+					real mfacb = (*this->localDistributionsH1)(D3Q27System::ET_NW, x1p, x2, x3);
+					real mfcbc = (*this->localDistributionsH1)(D3Q27System::ET_TE, x1, x2, x3);
+					real mfabc = (*this->localDistributionsH1)(D3Q27System::ET_TW, x1p, x2, x3);
+					real mfbcc = (*this->localDistributionsH1)(D3Q27System::ET_TN, x1, x2, x3);
+					real mfbac = (*this->localDistributionsH1)(D3Q27System::ET_TS, x1, x2p, x3);
+					real mfccc = (*this->localDistributionsH1)(D3Q27System::ET_TNE, x1, x2, x3);
+					real mfacc = (*this->localDistributionsH1)(D3Q27System::ET_TNW, x1p, x2, x3);
+					real mfcac = (*this->localDistributionsH1)(D3Q27System::ET_TSE, x1, x2p, x3);
+					real mfaac = (*this->localDistributionsH1)(D3Q27System::ET_TSW, x1p, x2p, x3);
+					real mfabb = (*this->nonLocalDistributionsH1)(D3Q27System::ET_W, x1p, x2, x3);
+					real mfbab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_S, x1, x2p, x3);
+					real mfbba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_B, x1, x2, x3p);
+					real mfaab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SW, x1p, x2p, x3);
+					real mfcab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SE, x1, x2p, x3);
+					real mfaba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BW, x1p, x2, x3p);
+					real mfcba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BE, x1, x2, x3p);
+					real mfbaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BS, x1, x2p, x3p);
+					real mfbca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BN, x1, x2, x3p);
+					real mfaaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+					real mfcaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSE, x1, x2p, x3p);
+					real mfaca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNW, x1p, x2, x3p);
+					real mfcca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+					real mfbbb = (*this->zeroDistributionsH1)(x1, x2, x3);
 					(*phaseField)(x1, x2, x3) = (((mfaaa + mfccc) + (mfaca + mfcac)) + ((mfaac + mfcca)  + (mfcaa + mfacc))  ) +
 						(((mfaab + mfacb) + (mfcab + mfccb)) + ((mfaba + mfabc) + (mfcba + mfcbc)) +
 							((mfbaa + mfbac) + (mfbca + mfbcc))) + ((mfabb + mfcbb) +
 								(mfbab + mfbcb) + (mfbba + mfbbc)) + mfbbb;
 					if ((*phaseField)(x1, x2, x3) > 1 ) {
-						(*phaseField)(x1, x2, x3) = c1;
+						(*phaseField)(x1, x2, x3) = c1o1;
 					}
 
 					if ((*phaseField)(x1, x2, x3) < 0) {
@@ -262,18 +265,18 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					//LBMReal rhoH = 1.0;
 					//LBMReal rhoL = 1.0 / densityRatio;
 
-					LBMReal rhoH = 1.0*densityRatio;
-					LBMReal rhoL = 1.0;
+					real rhoH = 1.0*densityRatio;
+					real rhoL = 1.0;
 
-					LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+					real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
 
-					LBMReal drho = (((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc))   )
+					real drho = (((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc))   )
 						+ (((mfaab + mfccb) + (mfacb + mfcab) ) + ((mfaba + mfcbc) + (mfabc + mfcba) ) + ((mfbaa + mfbcc) + (mfbac + mfbca) )))
 						+ ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
 				
 					omegaDRho = 2.0;// 1.5;
 					drho *= omegaDRho;
-					LBMReal keepDrho = drho;
+					real keepDrho = drho;
 					drho = ((*p1Old)(x1, x2, x3) + drho) * c1o2;
 				//	drho = ((*p1Old)(x1, x2, x3)*c2o3 + drho*c1o3) ;
 					(*p1Old)(x1, x2, x3) = keepDrho;
@@ -322,7 +325,7 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 		}
 	}
 
-	LBMReal collFactorM;
+	real collFactorM;
 
 	////Periodic Filter
 	//for (int x3 = minX3-1; x3 <= maxX3; x3++) {
@@ -394,92 +397,92 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 
 					findNeighbors(phaseField, x1, x2, x3);
 
-					LBMReal mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
-					LBMReal mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
-					LBMReal mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
-					LBMReal mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
-					LBMReal mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
-					LBMReal mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
-					LBMReal mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
-					LBMReal mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
-					LBMReal mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
-					LBMReal mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
-					LBMReal mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
-					LBMReal mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
-					LBMReal mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
-					LBMReal mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
-					LBMReal mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
-					LBMReal mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
-					LBMReal mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
-					LBMReal mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
-					LBMReal mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
-					LBMReal mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
-					LBMReal mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
-					LBMReal mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
-					LBMReal mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-					LBMReal mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
-					LBMReal mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
-					LBMReal mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-					LBMReal mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
-
-
-					LBMReal mfhcbb = (*this->localDistributionsH2)(D3Q27System::ET_E, x1, x2, x3);
-					LBMReal mfhbcb = (*this->localDistributionsH2)(D3Q27System::ET_N, x1, x2, x3);
-					LBMReal mfhbbc = (*this->localDistributionsH2)(D3Q27System::ET_T, x1, x2, x3);
-					LBMReal mfhccb = (*this->localDistributionsH2)(D3Q27System::ET_NE, x1, x2, x3);
-					LBMReal mfhacb = (*this->localDistributionsH2)(D3Q27System::ET_NW, x1p, x2, x3);
-					LBMReal mfhcbc = (*this->localDistributionsH2)(D3Q27System::ET_TE, x1, x2, x3);
-					LBMReal mfhabc = (*this->localDistributionsH2)(D3Q27System::ET_TW, x1p, x2, x3);
-					LBMReal mfhbcc = (*this->localDistributionsH2)(D3Q27System::ET_TN, x1, x2, x3);
-					LBMReal mfhbac = (*this->localDistributionsH2)(D3Q27System::ET_TS, x1, x2p, x3);
-					LBMReal mfhccc = (*this->localDistributionsH2)(D3Q27System::ET_TNE, x1, x2, x3);
-					LBMReal mfhacc = (*this->localDistributionsH2)(D3Q27System::ET_TNW, x1p, x2, x3);
-					LBMReal mfhcac = (*this->localDistributionsH2)(D3Q27System::ET_TSE, x1, x2p, x3);
-					LBMReal mfhaac = (*this->localDistributionsH2)(D3Q27System::ET_TSW, x1p, x2p, x3);
-					LBMReal mfhabb = (*this->nonLocalDistributionsH2)(D3Q27System::ET_W, x1p, x2, x3);
-					LBMReal mfhbab = (*this->nonLocalDistributionsH2)(D3Q27System::ET_S, x1, x2p, x3);
-					LBMReal mfhbba = (*this->nonLocalDistributionsH2)(D3Q27System::ET_B, x1, x2, x3p);
-					LBMReal mfhaab = (*this->nonLocalDistributionsH2)(D3Q27System::ET_SW, x1p, x2p, x3);
-					LBMReal mfhcab = (*this->nonLocalDistributionsH2)(D3Q27System::ET_SE, x1, x2p, x3);
-					LBMReal mfhaba = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BW, x1p, x2, x3p);
-					LBMReal mfhcba = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BE, x1, x2, x3p);
-					LBMReal mfhbaa = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BS, x1, x2p, x3p);
-					LBMReal mfhbca = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BN, x1, x2, x3p);
-					LBMReal mfhaaa = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-					LBMReal mfhcaa = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BSE, x1, x2p, x3p);
-					LBMReal mfhaca = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BNW, x1p, x2, x3p);
-					LBMReal mfhcca = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-					LBMReal mfhbbb = (*this->zeroDistributionsH2)(x1, x2, x3);
+					real mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
+					real mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
+					real mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
+					real mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
+					real mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
+					real mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
+					real mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
+					real mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
+					real mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
+					real mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
+					real mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
+					real mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
+					real mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
+					real mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
+					real mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
+					real mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
+					real mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
+					real mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
+					real mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
+					real mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
+					real mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
+					real mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
+					real mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+					real mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
+					real mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
+					real mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+					real mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
+
+
+					real mfhcbb = (*this->localDistributionsH2)(D3Q27System::ET_E, x1, x2, x3);
+					real mfhbcb = (*this->localDistributionsH2)(D3Q27System::ET_N, x1, x2, x3);
+					real mfhbbc = (*this->localDistributionsH2)(D3Q27System::ET_T, x1, x2, x3);
+					real mfhccb = (*this->localDistributionsH2)(D3Q27System::ET_NE, x1, x2, x3);
+					real mfhacb = (*this->localDistributionsH2)(D3Q27System::ET_NW, x1p, x2, x3);
+					real mfhcbc = (*this->localDistributionsH2)(D3Q27System::ET_TE, x1, x2, x3);
+					real mfhabc = (*this->localDistributionsH2)(D3Q27System::ET_TW, x1p, x2, x3);
+					real mfhbcc = (*this->localDistributionsH2)(D3Q27System::ET_TN, x1, x2, x3);
+					real mfhbac = (*this->localDistributionsH2)(D3Q27System::ET_TS, x1, x2p, x3);
+					real mfhccc = (*this->localDistributionsH2)(D3Q27System::ET_TNE, x1, x2, x3);
+					real mfhacc = (*this->localDistributionsH2)(D3Q27System::ET_TNW, x1p, x2, x3);
+					real mfhcac = (*this->localDistributionsH2)(D3Q27System::ET_TSE, x1, x2p, x3);
+					real mfhaac = (*this->localDistributionsH2)(D3Q27System::ET_TSW, x1p, x2p, x3);
+					real mfhabb = (*this->nonLocalDistributionsH2)(D3Q27System::ET_W, x1p, x2, x3);
+					real mfhbab = (*this->nonLocalDistributionsH2)(D3Q27System::ET_S, x1, x2p, x3);
+					real mfhbba = (*this->nonLocalDistributionsH2)(D3Q27System::ET_B, x1, x2, x3p);
+					real mfhaab = (*this->nonLocalDistributionsH2)(D3Q27System::ET_SW, x1p, x2p, x3);
+					real mfhcab = (*this->nonLocalDistributionsH2)(D3Q27System::ET_SE, x1, x2p, x3);
+					real mfhaba = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BW, x1p, x2, x3p);
+					real mfhcba = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BE, x1, x2, x3p);
+					real mfhbaa = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BS, x1, x2p, x3p);
+					real mfhbca = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BN, x1, x2, x3p);
+					real mfhaaa = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+					real mfhcaa = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BSE, x1, x2p, x3p);
+					real mfhaca = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BNW, x1p, x2, x3p);
+					real mfhcca = (*this->nonLocalDistributionsH2)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+					real mfhbbb = (*this->zeroDistributionsH2)(x1, x2, x3);
 
 					//LBMReal rhoH = 1.0;
 					//LBMReal rhoL = 1.0 / densityRatio;
 
-					LBMReal rhoH = 1.0;
-					LBMReal rhoL = 1.0/ densityRatio;
+					real rhoH = 1.0;
+					real rhoL = 1.0/ densityRatio;
 
-					LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+					real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
 
-					LBMReal dX1_phi = gradX1_phi();
-					LBMReal dX2_phi = gradX2_phi();
-					LBMReal dX3_phi = gradX3_phi();
+					real dX1_phi = gradX1_phi();
+					real dX2_phi = gradX2_phi();
+					real dX3_phi = gradX3_phi();
 
-					LBMReal denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9+1e-3;
+					real denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9+1e-3;
 					// 01.09.2022: unclear what value we have to add to the normal: lager values better cut of in gas phase?
-					LBMReal normX1 = dX1_phi / denom;
-					LBMReal normX2 = dX2_phi / denom;
-					LBMReal normX3 = dX3_phi / denom;
+					real normX1 = dX1_phi / denom;
+					real normX2 = dX2_phi / denom;
+					real normX3 = dX3_phi / denom;
 
 
 
 					collFactorM = collFactorL + (collFactorL - collFactorG) * (phi[DIR_000] - phiH) / (phiH - phiL);
 
 
-					LBMReal mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
+					real mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
 
 					//----------- Calculating Macroscopic Values -------------
-					LBMReal rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH); //Incompressible
+					real rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH); //Incompressible
 
 																		///scaled phase field
 					//LBMReal rho = rhoH + rhoToPhi * ((*phaseField)(x1, x2, x3) * (*phaseField)(x1, x2, x3) / ((*phaseField)(x1, x2, x3) * (*phaseField)(x1, x2, x3) + (c1 - (*phaseField)(x1, x2, x3)) * (c1 - (*phaseField)(x1, x2, x3))) - phiH);
@@ -488,23 +491,23 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					//LBMReal rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH)+(one-phi[DIR_000])* (*pressure)(x1, x2, x3)*three; //compressible
 					//LBMReal rho = rhoL + (rhoH - rhoL) * phi[DIR_000] + (one - phi[DIR_000]) * (*pressure)(x1, x2, x3) * three; //compressible
 
-					LBMReal m0, m1, m2;
-					LBMReal rhoRef=c1;
+					real m0, m1, m2;
+					real rhoRef= c1o1;
 
-					LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+					real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 						(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 						(mfcbb - mfabb))/rhoRef;
-					LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+					real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 						(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 						(mfbcb - mfbab))/rhoRef;
-					LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+					real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 						(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 						(mfbbc - mfbba))/rhoRef;
 					////Filter&Gradient merged
-					LBMReal pressureHere = (*pressureOld)(x1, x2, x3);
+					real pressureHere = (*pressureOld)(x1, x2, x3);
 					//LBMReal pressureHere = (*pressure)(x1, x2, x3);
 
-					LBMReal arrayP[3][3][3] = { {{pressureHere,pressureHere,pressureHere},{pressureHere,pressureHere,pressureHere},{pressureHere,pressureHere,pressureHere}},
+					real arrayP[3][3][3] = { {{pressureHere,pressureHere,pressureHere},{pressureHere,pressureHere,pressureHere},{pressureHere,pressureHere,pressureHere}},
 												{{pressureHere,pressureHere,pressureHere},{pressureHere,pressureHere,pressureHere},{pressureHere,pressureHere,pressureHere}},
 												{ {pressureHere,pressureHere,pressureHere},{pressureHere,pressureHere,pressureHere},{pressureHere,pressureHere,pressureHere}} };
 					//LBMReal LaplaceP = 0.0;
@@ -686,7 +689,7 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 
 					///////////////////////////////////////////////
 
-					LBMReal pStarStart = ((((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc)))
+					real pStarStart = ((((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc)))
 						+ (((mfaab + mfccb) + (mfacb + mfcab)) + ((mfaba + mfcbc) + (mfabc + mfcba)) + ((mfbaa + mfbcc) + (mfbac + mfbca))))
 						+ ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb) * c1o3;
 
@@ -793,7 +796,7 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 
 
 					//Abbas
-					LBMReal pStar = ((((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc)))
+					real pStar = ((((((mfaaa + mfccc) + (mfaac + mfcca)) + ((mfcac + mfaca) + (mfcaa + mfacc)))
 						+ (((mfaab + mfccb) + (mfacb + mfcab)) + ((mfaba + mfcbc) + (mfabc + mfcba)) + ((mfbaa + mfbcc) + (mfbac + mfbca))))
 						+ ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb) * c1o3;
 					//22.09.22 not yet in balance, repaire here
@@ -1046,9 +1049,9 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					//LBMReal M011 = ((((((mfaaa + mfccc) - (mfaac + mfcca)) + ( (mfcaa + mfacc)- (mfcac + mfaca)))
 					//	+ (((mfbaa + mfbcc) + (-mfbac - mfbca))))
 					//	));
-					LBMReal vvxI = vvx;
-					LBMReal vvyI = vvy;
-					LBMReal vvzI = vvz;
+					real vvxI = vvx;
+					real vvyI = vvy;
+					real vvzI = vvz;
 
 					//LBMReal collFactorStore=collFactorM;
 					//LBMReal stress;
@@ -1164,16 +1167,16 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 
 					//!Abbas
 
-					LBMReal vx2;
-					LBMReal vy2;
-					LBMReal vz2;
+					real vx2;
+					real vy2;
+					real vz2;
 					vx2 = vvx * vvx;
 					vy2 = vvy * vvy;
 					vz2 = vvz * vvz;
 					//pStar =ppStar- (vx2 + vy2 + vz2)*pStar;
 				//	pStar = (pStar + ppStar)*c1o2;
 					///////////////////////////////////////////////////////////////////////////////////////////               
-					LBMReal oMdrho;
+					real oMdrho;
 					///////////////
 						//mfabb -= pStar * c2o9;
 						//mfbab -= pStar * c2o9;
@@ -1231,8 +1234,8 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					oMdrho = (rhoRef - (oMdrho + m0))/rhoRef;// 12.03.21 check derivation!!!!
 
 															 ////////////////////////////////////////////////////////////////////////////////////
-					LBMReal wadjust;
-					LBMReal qudricLimit = 0.01 / (c1 + 1.0e4 * phi[DIR_000] * (c1 - phi[DIR_000])); //LBMReal qudricLimit = 0.01;
+					real wadjust;
+					real qudricLimit = 0.01 / (c1o1 + 1.0e4 * phi[DIR_000] * (c1o1 - phi[DIR_000])); //real qudricLimit = 0.01;
 					////////////////////////////////////////////////////////////////////////////////////
 					//Hin
 					////////////////////////////////////////////////////////////////////////////////////
@@ -1465,24 +1468,24 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					////////////////////////////////////////////////////////////////////////////////////
 
 					// mfaaa = 0.0;
-					LBMReal OxxPyyPzz = 1.0; //omega2 or bulk viscosity
+					real OxxPyyPzz = 1.0; //omega2 or bulk viscosity
 											//  LBMReal OxyyPxzz = 1.;//-s9;//2+s9;//
 											//  LBMReal OxyyMxzz  = 1.;//2+s9;//
-					LBMReal O4 = 1.;
-					LBMReal O5 = 1.;
-					LBMReal O6 = 1.;
+					real O4 = 1.;
+					real O5 = 1.;
+					real O6 = 1.;
 
 					//collFactorM+= (1.7 - collFactorM) * fabs(mfaaa) / (fabs(mfaaa) + 0.001f);
 
 
 					/////fourth order parameters; here only for test. Move out of loop!
 
-					LBMReal OxyyPxzz = 8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
-					LBMReal OxyyMxzz = 8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
-				    LBMReal Oxyz = 24.0 * (collFactorM - 2.0) * (4.0 * collFactorM * collFactorM + collFactorM * OxxPyyPzz * (18.0 - 13.0 * collFactorM) + OxxPyyPzz * OxxPyyPzz * (2.0 + collFactorM * (6.0 * collFactorM - 11.0))) / (16.0 * collFactorM * collFactorM * (collFactorM - 6.0) - 2.0 * collFactorM * OxxPyyPzz * (216.0 + 5.0 * collFactorM * (9.0 * collFactorM - 46.0)) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (3.0 * collFactorM - 10.0) * (15.0 * collFactorM - 28.0) - 48.0));
-					LBMReal A = (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+					real OxyyPxzz = 8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
+					real OxyyMxzz = 8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
+				    real Oxyz = 24.0 * (collFactorM - 2.0) * (4.0 * collFactorM * collFactorM + collFactorM * OxxPyyPzz * (18.0 - 13.0 * collFactorM) + OxxPyyPzz * OxxPyyPzz * (2.0 + collFactorM * (6.0 * collFactorM - 11.0))) / (16.0 * collFactorM * collFactorM * (collFactorM - 6.0) - 2.0 * collFactorM * OxxPyyPzz * (216.0 + 5.0 * collFactorM * (9.0 * collFactorM - 46.0)) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (3.0 * collFactorM - 10.0) * (15.0 * collFactorM - 28.0) - 48.0));
+					real A = (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 					//FIXME:  warning C4459: declaration of 'B' hides global declaration (message : see declaration of 'D3Q27System::B' )
-					LBMReal BB = (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+					real BB = (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 					//LBMReal stress = 1.0;// stress / (stress + 1.0e-10);
 					//stress = 1.0;
 					//OxyyPxzz += stress*(1.0-OxyyPxzz);
@@ -1497,21 +1500,21 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					//LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
 					//LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-					LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-					LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-					LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+					real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
+					real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
+					real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-					LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-					LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-					LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
+					real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+					real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+					real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
 
 					//Cum 5.
-					LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-					LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-					LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+					real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
+					real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
+					real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
 
 					//Cum 6.
-					LBMReal CUMccc = mfccc + ((-4. * mfbbb * mfbbb
+					real CUMccc = mfccc + ((-4. * mfbbb * mfbbb
 						- (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
 						- 4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
 						- 2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
@@ -1525,7 +1528,7 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 
 					//2.
 					// linear combinations
-					LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
+					real mxxPyyPzz = mfcaa + mfaca + mfaac;
 					//pStar = (mxxPyyPzz+vx2+vy2+vz2) * c1o3;//does not work
 					//pStar = (mxxPyyPzz) * c1o3;
 					//pStar = pStar + 1.5 * (mxxPyyPzz * c1o3 - pStar);
@@ -1533,25 +1536,25 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					//  LBMReal mfaaaS = (mfaaa * (-4 - 3 * OxxPyyPzz * (-1 + rho)) + 6 * mxxPyyPzz * OxxPyyPzz * (-1 + rho)) / (-4 + 3 * OxxPyyPzz * (-1 + rho));
 					mxxPyyPzz -= mfaaa ;//12.03.21 shifted by mfaaa
 										//mxxPyyPzz-=(mfaaa+mfaaaS)*c1o2;//12.03.21 shifted by mfaaa
-					LBMReal mxxMyy = mfcaa - mfaca;
-					LBMReal mxxMzz = mfcaa - mfaac;
+					real mxxMyy = mfcaa - mfaca;
+					real mxxMzz = mfcaa - mfaac;
 
 					///
-					LBMReal mmfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
-					LBMReal mmfaca = c1o3 * (-2. * mxxMyy + mxxMzz + mxxPyyPzz);
-					LBMReal mmfaac = c1o3 * (mxxMyy - 2. * mxxMzz + mxxPyyPzz);
-					LBMReal mmfabb = mfabb;
-					LBMReal mmfbab = mfbab;
-					LBMReal mmfbba = mfbba;
+					real mmfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
+					real mmfaca = c1o3 * (-2. * mxxMyy + mxxMzz + mxxPyyPzz);
+					real mmfaac = c1o3 * (mxxMyy - 2. * mxxMzz + mxxPyyPzz);
+					real mmfabb = mfabb;
+					real mmfbab = mfbab;
+					real mmfbba = mfbba;
 					///
 
-					LBMReal dxux =  -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz)*0;
+					real dxux =  -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz)*0;
 					//LBMReal dxux = -c1o2 * (mxxMyy + mxxMzz) * collFactorM - mfaaa * c1o3* omegaDRho;
-					LBMReal dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
-					LBMReal dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
-					LBMReal Dxy = -three * collFactorM * mfbba;
-					LBMReal Dxz = -three * collFactorM * mfbab;
-					LBMReal Dyz = -three * collFactorM * mfabb;
+					real dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
+					real dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
+					real Dxy = -c3o1 * collFactorM * mfbba;
+					real Dxz = -c3o1 * collFactorM * mfbab;
+					real Dyz = -c3o1 * collFactorM * mfabb;
 //					// attempt to improve implicit  stress computation by fixed iteration
 //					LBMReal dX2_rho = (rhoToPhi)*dX2_phi;
 //					LBMReal dX1_rho = (rhoToPhi)*dX1_phi;
@@ -1599,14 +1602,14 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 
 					//3.
 					// linear combinations
-					LBMReal mxxyPyzz = mfcba + mfabc;
-					LBMReal mxxyMyzz = mfcba - mfabc;
+					real mxxyPyzz = mfcba + mfabc;
+					real mxxyMyzz = mfcba - mfabc;
 
-					LBMReal mxxzPyyz = mfcab + mfacb;
-					LBMReal mxxzMyyz = mfcab - mfacb;
+					real mxxzPyyz = mfcab + mfacb;
+					real mxxzMyyz = mfcab - mfacb;
 
-					LBMReal mxyyPxzz = mfbca + mfbac;
-					LBMReal mxyyMxzz = mfbca - mfbac;
+					real mxyyPxzz = mfbca + mfbac;
+					real mxyyMxzz = mfbca - mfbac;
 
 					 mmfcaa += c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz- mfaaa);
 					 mmfaca += c1o3 * (-2. * mxxMyy + mxxMzz + mxxPyyPzz- mfaaa);
@@ -1640,12 +1643,12 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2;
 
 					//4.
-					CUMacc = -O4 * (one / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (one - O4) * (CUMacc);
-					CUMcac = -O4 * (one / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (one - O4) * (CUMcac);
-					CUMcca = -O4 * (one / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (one - O4) * (CUMcca);
-					CUMbbc = -O4 * (one / collFactorM - c1o2) * Dxy * c1o3 * BB + (one - O4) * (CUMbbc);
-					CUMbcb = -O4 * (one / collFactorM - c1o2) * Dxz * c1o3 * BB + (one - O4) * (CUMbcb);
-					CUMcbb = -O4 * (one / collFactorM - c1o2) * Dyz * c1o3 * BB + (one - O4) * (CUMcbb);
+					CUMacc = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc);
+					CUMcac = -O4 * (c1o1 / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac);
+					CUMcca = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca);
+					CUMbbc = -O4 * (c1o1 / collFactorM - c1o2) * Dxy * c1o3 * BB + (c1o1 - O4) * (CUMbbc);
+					CUMbcb = -O4 * (c1o1 / collFactorM - c1o2) * Dxz * c1o3 * BB + (c1o1 - O4) * (CUMbcb);
+					CUMcbb = -O4 * (c1o1 / collFactorM - c1o2) * Dyz * c1o3 * BB + (c1o1 - O4) * (CUMcbb);
 
 					//5.
 					CUMbcc += O5 * (-CUMbcc);
@@ -1665,9 +1668,9 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
 					mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-					mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-					mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-					mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
+					mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+					mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+					mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
 
 					//5.
 					mfbcc = CUMbcc + (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac) * oMdrho;
@@ -2123,7 +2126,7 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 
 
 
-					pStar += pStarStart*(omegaDRho-c1);
+					pStar += pStarStart*(omegaDRho- c1o1);
 
 					mfcbb -= c2o9*pStar;
 					mfbcb -= c2o9*pStar;
@@ -2151,7 +2154,7 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					mfcaa -= c1o72*pStar;
 					mfaca -= c1o72*pStar;
 					mfcca -= c1o72*pStar;
-					mfbbb -= c4*c2o9*pStar;
+					mfbbb -= c4o1*c2o9*pStar;
 
 					mfhbcb = rho*c2o9 * pStar;
 					mfhbbc = rho*c2o9 * pStar;
@@ -2179,7 +2182,7 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					mfhcaa = rho*c1o72 * pStar;
 					mfhaca = rho*c1o72 * pStar;
 					mfhcca = rho*c1o72 * pStar;
-					mfhbbb = rho*c4 * c2o9 * pStar;
+					mfhbbb = rho* c4o1 * c2o9 * pStar;
 
 					//mfStartbcb =  c2o9  * pStarStart;
 					//mfStartbbc =  c2o9  * pStarStart;
@@ -2442,7 +2445,7 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 					//proof correctness
 					//////////////////////////////////////////////////////////////////////////
 					//#ifdef  PROOF_CORRECTNESS
-					LBMReal rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+					real rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 						+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 						+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 					//			   //LBMReal dif = fabs(drho - rho_post);
@@ -2532,7 +2535,7 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 /////////////////////  P H A S E - F I E L D   S O L V E R
 ////////////////////////////////////////////
 /////CUMULANT PHASE-FIELD
-					LBMReal omegaD =1.0/( 3.0 * mob + 0.5);
+					real omegaD =1.0/( 3.0 * mob + 0.5);
 					{
 						mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
 						mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
@@ -2569,31 +2572,31 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 						//!
 						////////////////////////////////////////////////////////////////////////////////////
 						// second component
-						LBMReal concentration =
+						real concentration =
 							((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 								(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
 								((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal oneMinusRho = c1- concentration;
+						real oneMinusRho = c1o1 - concentration;
 
-						LBMReal cx =
+						real cx =
 							((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 								(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 								(mfcbb - mfabb));
-						LBMReal cy =
+						real cy =
 							((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 								(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 								(mfbcb - mfbab));
-						LBMReal cz =
+						real cz =
 							((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 								(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 								(mfbbc - mfbba));
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// calculate the square of velocities for this lattice node
-						LBMReal cx2 = cx * cx;
-						LBMReal cy2 = cy * cy;
-						LBMReal cz2 = cz * cz;
+						real cx2 = cx * cx;
+						real cy2 = cy * cy;
+						real cz2 = cz * cz;
 						////////////////////////////////////////////////////////////////////////////////////
 						//! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
 						//! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -2602,88 +2605,88 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 						//!
 						////////////////////////////////////////////////////////////////////////////////////
 						// Z - Dir
-						forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 						forwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// Y - Dir
-						forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 						forwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-						forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 						forwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 						forwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 						forwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-						forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 						forwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-						forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// X - Dir
-						forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 						forwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-						forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 						forwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 						forwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 						forwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-						forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 						forwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-						forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3, c1o9, oneMinusRho);
+						forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3o1, c1o9, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						//! - experimental Cumulant ... to be published ... hopefully
 						//!
 
 						// linearized orthogonalization of 3rd order central moments
-						LBMReal Mabc = mfabc - mfaba * c1o3;
-						LBMReal Mbca = mfbca - mfbaa * c1o3;
-						LBMReal Macb = mfacb - mfaab * c1o3;
-						LBMReal Mcba = mfcba - mfaba * c1o3;
-						LBMReal Mcab = mfcab - mfaab * c1o3;
-						LBMReal Mbac = mfbac - mfbaa * c1o3;
+						real Mabc = mfabc - mfaba * c1o3;
+						real Mbca = mfbca - mfbaa * c1o3;
+						real Macb = mfacb - mfaab * c1o3;
+						real Mcba = mfcba - mfaba * c1o3;
+						real Mcab = mfcab - mfaab * c1o3;
+						real Mbac = mfbac - mfbaa * c1o3;
 						// linearized orthogonalization of 5th order central moments
-						LBMReal Mcbc = mfcbc - mfaba * c1o9;
-						LBMReal Mbcc = mfbcc - mfbaa * c1o9;
-						LBMReal Mccb = mfccb - mfaab * c1o9;
+						real Mcbc = mfcbc - mfaba * c1o9;
+						real Mbcc = mfbcc - mfbaa * c1o9;
+						real Mccb = mfccb - mfaab * c1o9;
 
 						//31.05.2022 addaptive mobility
 						//omegaD = c1 + (sqrt((cx - vvx * concentration) * (cx - vvx * concentration) + (cy - vvy * concentration) * (cy - vvy * concentration) + (cz - vvz * concentration) * (cz - vvz * concentration))) / (sqrt((cx - vvx * concentration) * (cx - vvx * concentration) + (cy - vvy * concentration) * (cy - vvy * concentration) + (cz - vvz * concentration) * (cz - vvz * concentration)) + fabs((1.0 - concentration) * (concentration)) * c1o6 * oneOverInterfaceScale+1.0e-200);
 						//omegaD = c2 * (concentration * (concentration - c1)) / (-c6 * (sqrt((cx - vvx * concentration) * (cx - vvx * concentration) + (cy - vvy * concentration) * (cy - vvy * concentration) + (cz - vvz * concentration) * (cz - vvz * concentration))) + (concentration * (concentration - c1))+1.0e-200);
 						// collision of 1st order moments
-						cx = cx * (c1 - omegaD) + omegaD * vvx * concentration +
-							normX1 * (c1 - 0.5 * omegaD) * (1.0 - concentration) * (concentration) * c1o3 * oneOverInterfaceScale;
-						cy = cy * (c1 - omegaD) + omegaD * vvy * concentration +
-							normX2 * (c1 - 0.5 * omegaD) * (1.0 - concentration) * (concentration) * c1o3 * oneOverInterfaceScale;
-						cz = cz * (c1 - omegaD) + omegaD * vvz * concentration +
-							normX3 * (c1 - 0.5 * omegaD) * (1.0 - concentration) * (concentration) * c1o3 * oneOverInterfaceScale;
+						cx = cx * (c1o1 - omegaD) + omegaD * vvx * concentration +
+							normX1 * (c1o1 - 0.5 * omegaD) * (1.0 - concentration) * (concentration) * c1o3 * oneOverInterfaceScale;
+						cy = cy * (c1o1 - omegaD) + omegaD * vvy * concentration +
+							normX2 * (c1o1 - 0.5 * omegaD) * (1.0 - concentration) * (concentration) * c1o3 * oneOverInterfaceScale;
+						cz = cz * (c1o1 - omegaD) + omegaD * vvz * concentration +
+							normX3 * (c1o1 - 0.5 * omegaD) * (1.0 - concentration) * (concentration) * c1o3 * oneOverInterfaceScale;
 
 						cx2 = cx * cx;
 						cy2 = cy * cy;
 						cz2 = cz * cz;
 
 						// equilibration of 2nd order moments
-						mfbba = zeroReal;
-						mfbab = zeroReal;
-						mfabb = zeroReal;
+						mfbba = c0o1;
+						mfbab = c0o1;
+						mfabb = c0o1;
 
 						mfcaa = c1o3 * concentration;
 						mfaca = c1o3 * concentration;
 						mfaac = c1o3 * concentration;
 
 						// equilibration of 3rd order moments
-						Mabc = zeroReal;
-						Mbca = zeroReal;
-						Macb = zeroReal;
-						Mcba = zeroReal;
-						Mcab = zeroReal;
-						Mbac = zeroReal;
-						mfbbb = zeroReal;
+						Mabc = c0o1;
+						Mbca = c0o1;
+						Macb = c0o1;
+						Mcba = c0o1;
+						Mcab = c0o1;
+						Mbac = c0o1;
+						mfbbb = c0o1;
 
 						// from linearized orthogonalization 3rd order central moments to central moments
 						mfabc = Mabc + mfaba * c1o3;
@@ -2698,14 +2701,14 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 						mfcac = c1o9 * concentration;
 						mfcca = c1o9 * concentration;
 
-						mfcbb = zeroReal;
-						mfbcb = zeroReal;
-						mfbbc = zeroReal;
+						mfcbb = c0o1;
+						mfbcb = c0o1;
+						mfbbc = c0o1;
 
 						// equilibration of 5th order moments
-						Mcbc = zeroReal;
-						Mbcc = zeroReal;
-						Mccb = zeroReal;
+						Mcbc = c0o1;
+						Mbcc = c0o1;
+						Mccb = c0o1;
 
 						// from linearized orthogonalization 5th order central moments to central moments
 						mfcbc = Mcbc + mfaba * c1o9;
@@ -2723,39 +2726,39 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 						//!
 						////////////////////////////////////////////////////////////////////////////////////
 						// X - Dir
-						backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 						backwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-						backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 						backwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 						backwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 						backwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-						backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 						backwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-						backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9o1, c1o9, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// Y - Dir
-						backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 						backwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-						backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 						backwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 						backwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 						backwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 						backwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-						backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 						////////////////////////////////////////////////////////////////////////////////////
 						// Z - Dir
-						backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 						backwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-						backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+						backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 
 
@@ -2800,82 +2803,102 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::calculate(int step)
 }
 //////////////////////////////////////////////////////////////////////////
 
-LBMReal MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX1_phi()
+real MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX1_phi()
 {
+	using namespace vf::lbm::dir;
 	using namespace D3Q27System;
+
 	return 3.0* ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) + (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) + (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) + (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_PM0] - phi[DIR_MP0]) + (phi[DIR_PP0] - phi[DIR_MM0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_P00] - phi[DIR_M00]));
 }
 
-LBMReal MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX2_phi()
+real MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX2_phi()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PPM] - phi[DIR_MMP])- (phi[DIR_PMP] - phi[DIR_MPM])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_0PP] - phi[DIR_0MM]) + (phi[DIR_0PM] - phi[DIR_0MP])) + ((phi[DIR_PP0] - phi[DIR_MM0])- (phi[DIR_PM0] - phi[DIR_MP0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_0P0] - phi[DIR_0M0]));
 }
 
-LBMReal MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX3_phi()
+real MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX3_phi()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) - (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) - (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_0MP] - phi[DIR_0PM]) + (phi[DIR_0PP] - phi[DIR_0MM])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_00P] - phi[DIR_00M]));
 }
 
-LBMReal MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX1_rhoInv(LBMReal rhoL,LBMReal rhoDIV)
+real MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX1_rhoInv(real rhoL,real rhoDIV)
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((1.0/(rhoL+rhoDIV*phi[DIR_PPP]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MMM])) + (1.0 / (rhoL + rhoDIV * phi[DIR_PMM]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MPP]))) + ((1.0 / (rhoL + rhoDIV * phi[DIR_PMP]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MPM])) + (1.0 / (rhoL + rhoDIV * phi[DIR_PPM]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MMP]))))
 		+ WEIGTH[DIR_PP0] * (((1.0 / (rhoL + rhoDIV * phi[DIR_P0P]) - 1.0 / (rhoL + rhoDIV * phi[DIR_M0M])) + (1.0 / (rhoL + rhoDIV * phi[DIR_P0M]) - 1.0 / (rhoL + rhoDIV * phi[DIR_M0P]))) + ((1.0 / (rhoL + rhoDIV * phi[DIR_PM0]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MP0])) + (1.0 / (rhoL + rhoDIV * phi[DIR_PP0]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MM0]))))) +
 		+WEIGTH[DIR_0P0] * (1.0 / (rhoL + rhoDIV * phi[DIR_P00]) - 1.0 / (rhoL + rhoDIV * phi[DIR_M00])));
 }
 
-LBMReal MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX2_rhoInv(LBMReal rhoL,LBMReal rhoDIV)
+real MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX2_rhoInv(real rhoL,real rhoDIV)
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((1.0 / (rhoL + rhoDIV * phi[DIR_PPP]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MMM])) - (1.0 / (rhoL + rhoDIV * phi[DIR_PMM]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MPP]))) + ((1.0 / (rhoL + rhoDIV * phi[DIR_PPM]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MMP])) - (1.0 / (rhoL + rhoDIV * phi[DIR_PMP]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MPM]))))
 		+ WEIGTH[DIR_PP0] * (((1.0 / (rhoL + rhoDIV * phi[DIR_0PP]) - 1.0 / (rhoL + rhoDIV * phi[DIR_0MM])) + (1.0 / (rhoL + rhoDIV * phi[DIR_0PM]) - 1.0 / (rhoL + rhoDIV * phi[DIR_0MP]))) + ((1.0 / (rhoL + rhoDIV * phi[DIR_PP0]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MM0])) - (1.0 / (rhoL + rhoDIV * phi[DIR_PM0]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MP0]))))) +
 		+WEIGTH[DIR_0P0] * (1.0 / (rhoL + rhoDIV * phi[DIR_0P0]) - 1.0 / (rhoL + rhoDIV * phi[DIR_0M0])));
 }
 
-LBMReal MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX3_rhoInv(LBMReal rhoL, LBMReal rhoDIV)
+real MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX3_rhoInv(real rhoL, real rhoDIV)
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((1.0 / (rhoL + rhoDIV * phi[DIR_PPP]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MMM])) - (1.0 / (rhoL + rhoDIV * phi[DIR_PMM]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MPP]))) + ((1.0 / (rhoL + rhoDIV * phi[DIR_PMP]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MPM])) - (1.0 / (rhoL + rhoDIV * phi[DIR_PPM]) - 1.0 / (rhoL + rhoDIV * phi[DIR_MMP]))))
 		+ WEIGTH[DIR_PP0] * (((1.0 / (rhoL + rhoDIV * phi[DIR_P0P]) - 1.0 / (rhoL + rhoDIV * phi[DIR_M0M])) - (1.0 / (rhoL + rhoDIV * phi[DIR_P0M]) - 1.0 / (rhoL + rhoDIV * phi[DIR_M0P]))) + ((1.0 / (rhoL + rhoDIV * phi[DIR_0MP]) - 1.0 / (rhoL + rhoDIV * phi[DIR_0PM])) + (1.0 / (rhoL + rhoDIV * phi[DIR_0PP]) - 1.0 / (rhoL + rhoDIV * phi[DIR_0MM]))))) +
 		+WEIGTH[DIR_0P0] * (1.0 / (rhoL + rhoDIV * phi[DIR_00P]) - 1.0 / (rhoL + rhoDIV * phi[DIR_00M])));
 }
 
-LBMReal MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX1_phi2()
+real MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX1_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) + (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PMP] - phi2[DIR_MPM]) + (phi2[DIR_PPM] - phi2[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_P0P] - phi2[DIR_M0M]) + (phi2[DIR_P0M] - phi2[DIR_M0P])) + ((phi2[DIR_PM0] - phi2[DIR_MP0]) + (phi2[DIR_PP0] - phi2[DIR_MM0])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_P00] - phi2[DIR_M00]));
 }
 
-LBMReal MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX2_phi2()
+real MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX2_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) - (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PPM] - phi2[DIR_MMP]) - (phi2[DIR_PMP] - phi2[DIR_MPM])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_0PP] - phi2[DIR_0MM]) + (phi2[DIR_0PM] - phi2[DIR_0MP])) + ((phi2[DIR_PP0] - phi2[DIR_MM0]) - (phi2[DIR_PM0] - phi2[DIR_MP0])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_0P0] - phi2[DIR_0M0]));
 }
 
-LBMReal MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX3_phi2()
+real MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::gradX3_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) - (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PMP] - phi2[DIR_MPM]) - (phi2[DIR_PPM] - phi2[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_P0P] - phi2[DIR_M0M]) - (phi2[DIR_P0M] - phi2[DIR_M0P])) + ((phi2[DIR_0MP] - phi2[DIR_0PM]) + (phi2[DIR_0PP] - phi2[DIR_0MM])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_00P] - phi2[DIR_00M]));
 }
 
-LBMReal MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::nabla2_phi()
+real MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::nabla2_phi()
 {
 	using namespace D3Q27System;
-	LBMReal sum = 0.0;
+	using namespace vf::lbm::dir;
+
+	real sum = 0.0;
 	sum += WEIGTH[DIR_PPP] * ((((phi[DIR_PPP] - phi[DIR_000]) + (phi[DIR_MMM] - phi[DIR_000])) + ((phi[DIR_MMP] - phi[DIR_000]) + (phi[DIR_PPM] - phi[DIR_000])))
 		+ (((phi[DIR_MPP] - phi[DIR_000]) + (phi[DIR_PMM] - phi[DIR_000])) + ((phi[DIR_PMP] - phi[DIR_000]) + (phi[DIR_MPM] - phi[DIR_000]))));
 	sum += WEIGTH[DIR_0PP] * (
@@ -2896,6 +2919,8 @@ LBMReal MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::nabla2_phi()
 void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::computePhasefield()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	SPtr<DistributionArray3D> distributionsH = dataSet->getHdistributions();
 
 	int minX1 = ghostLayerWidth;
@@ -2950,14 +2975,19 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::computePhasefield()
 	}
 }
 
-void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::findNeighbors(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::findNeighbors(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
 	int x3)
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+	using namespace vf::basics::constant;
 
 	SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
 	phi[DIR_000] = (*ph)(x1, x2, x3);
+    if (phi[DIR_000] < 0) {
+        phi[DIR_000] = c0o1;
+    }
 
 
 	for (int k = FSTARTDIR; k <= FENDDIR; k++) {
@@ -2971,10 +3001,11 @@ void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::findNeighbors(CbArra
 	}
 }
 
-void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::findNeighbors2(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphaseSimpleVelocityBaseExternalPressureLBMKernel::findNeighbors2(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
 	int x3)
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
 	SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphaseSimpleVelocityBaseExternalPressureLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/MultiphaseSimpleVelocityBaseExternalPressureLBMKernel.h
index c5bc3560408698d4e83a2f45fcbeaf1b5b37317d..e2dc6d86abf7525231510df4f052cfc0768df457 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphaseSimpleVelocityBaseExternalPressureLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphaseSimpleVelocityBaseExternalPressureLBMKernel.h
@@ -57,69 +57,69 @@ public:
     //CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressure;
 
 
-    double getCalculationTime() override { return .0; }
+    real getCalculationTime() override { return .0; }
 protected:
     virtual void initDataSet();
     void swapDistributions() override;
 
     void initForcing();
 
-    void forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-    void backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-    void forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
-    void backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
-
-    LBMReal f1[D3Q27System::ENDF+1];
-
-    CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
-    CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
-    CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
-
-    CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH1;
-    CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH1;
-    CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH1;
-
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH2;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH2;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH2;
-
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressureOld;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr p1Old;
-
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseFieldOld;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr divU; 
-
-    LBMReal h  [D3Q27System::ENDF+1];
-    LBMReal h2[D3Q27System::ENDF + 1];
-    LBMReal g  [D3Q27System::ENDF+1];
-    LBMReal phi[D3Q27System::ENDF+1];
-    LBMReal phi2[D3Q27System::ENDF + 1];
-    LBMReal pr1[D3Q27System::ENDF+1];
-    LBMReal phi_cutoff[D3Q27System::ENDF+1];
-
-    LBMReal gradX1_phi();
-    LBMReal gradX2_phi();
-    LBMReal gradX3_phi();
-	LBMReal gradX1_rhoInv(LBMReal rhoL, LBMReal rhoDIV);
-	LBMReal gradX2_rhoInv(LBMReal rhoL, LBMReal rhoDIV);
-	LBMReal gradX3_rhoInv(LBMReal rhoL, LBMReal rhoDIV);
-    LBMReal gradX1_phi2();
-    LBMReal gradX2_phi2();
-    LBMReal gradX3_phi2();
+    void forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+    void backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+    void forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
+    void backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
+
+    real f1[D3Q27System::ENDF+1];
+
+    CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
+    CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
+    CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
+
+    CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH1;
+    CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH1;
+    CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH1;
+
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH2;
+    CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH2;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH2;
+
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressureOld;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr p1Old;
+
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseFieldOld;
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr divU; 
+
+    real h  [D3Q27System::ENDF+1];
+    real h2[D3Q27System::ENDF + 1];
+    real g  [D3Q27System::ENDF+1];
+    real phi[D3Q27System::ENDF+1];
+    real phi2[D3Q27System::ENDF + 1];
+    real pr1[D3Q27System::ENDF+1];
+    real phi_cutoff[D3Q27System::ENDF+1];
+
+    real gradX1_phi();
+    real gradX2_phi();
+    real gradX3_phi();
+	real gradX1_rhoInv(real rhoL, real rhoDIV);
+	real gradX2_rhoInv(real rhoL, real rhoDIV);
+	real gradX3_rhoInv(real rhoL, real rhoDIV);
+    real gradX1_phi2();
+    real gradX2_phi2();
+    real gradX3_phi2();
     void computePhasefield();
-    void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
-    void findNeighbors2(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2, int x3);
+    void findNeighbors(CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
+    void findNeighbors2(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2, int x3);
 
-    LBMReal nabla2_phi();
+    real nabla2_phi();
 
     mu::value_type muX1,muX2,muX3;
     mu::value_type muDeltaT;
     mu::value_type muNu;
     mu::value_type muRho;
-    LBMReal forcingX1;
-    LBMReal forcingX2;
-    LBMReal forcingX3;
+    real forcingX1;
+    real forcingX2;
+    real forcingX3;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsCumulantLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsCumulantLBMKernel.cpp
index db1397374771efd414bdeccbefe605b810cf449b..7ca468f57b58610619b4d641290bae360fc92891 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsCumulantLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsCumulantLBMKernel.cpp
@@ -82,23 +82,27 @@ SPtr<LBMKernel> MultiphaseTwoPhaseFieldsCumulantLBMKernel::clone()
     return kernel;
 }
 //////////////////////////////////////////////////////////////////////////
- void  MultiphaseTwoPhaseFieldsCumulantLBMKernel::forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-    LBMReal m2 = mfa + mfc;
-	LBMReal m1 = mfc - mfa;
-	LBMReal m0 = m2 + mfb;
+ void  MultiphaseTwoPhaseFieldsCumulantLBMKernel::forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+//	using namespace UbMath;
+	using namespace vf::basics::constant;
+
+	real m2 = mfa + mfc;
+	real m1 = mfc - mfa;
+	real m0 = m2 + mfb;
 	mfa = m0;
 	m0 *= Kinverse;
 	m0 += oneMinusRho;
 	mfb = (m1 * Kinverse - m0 * vv) * K;
-	mfc = ((m2 - c2 * m1 * vv) * Kinverse + v2 * m0) * K;
+	mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
- void  MultiphaseTwoPhaseFieldsCumulantLBMKernel::backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-    LBMReal m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
-	LBMReal m1 = (((mfa - mfc) - c2 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
+ void  MultiphaseTwoPhaseFieldsCumulantLBMKernel::backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+//	using namespace UbMath;
+	using namespace vf::basics::constant;
+	
+	real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
+	real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
 	mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 + vv) * c1o2) * K;
 	mfa = m0;
 	mfb = m1;
@@ -106,20 +110,24 @@ SPtr<LBMKernel> MultiphaseTwoPhaseFieldsCumulantLBMKernel::clone()
 
 
 ////////////////////////////////////////////////////////////////////////////////
- void  MultiphaseTwoPhaseFieldsCumulantLBMKernel::forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-    LBMReal m1 = (mfa + mfc) + mfb;
-	LBMReal m2 = mfc - mfa;
-	mfc = (mfc + mfa) + (v2 * m1 - c2 * vv * m2);
+ void  MultiphaseTwoPhaseFieldsCumulantLBMKernel::forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+//	using namespace UbMath;
+	using namespace vf::basics::constant;
+	
+	real m1 = (mfa + mfc) + mfb;
+	real m2 = mfc - mfa;
+	mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2);
 	mfb = m2 - vv * m1;
 	mfa = m1;
 }
 
 
- void  MultiphaseTwoPhaseFieldsCumulantLBMKernel::backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-    LBMReal ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
-	LBMReal mb = ((mfa - mfc) - mfa * v2) - c2 * mfb * vv;
+ void  MultiphaseTwoPhaseFieldsCumulantLBMKernel::backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+//	using namespace UbMath;
+	using namespace vf::basics::constant;
+	
+	real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
+	real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv;
 	mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2);
 	mfb = mb;
 	mfa = ma;
@@ -129,13 +137,15 @@ SPtr<LBMKernel> MultiphaseTwoPhaseFieldsCumulantLBMKernel::clone()
 void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 {
     using namespace D3Q27System;
-    using namespace UbMath;
+//    using namespace UbMath;
+	using namespace vf::lbm::dir;
+	using namespace vf::basics::constant;
 
     forcingX1 = 0.0;
     forcingX2 = 0.0;
     forcingX3 = 0.0;
 
-	LBMReal oneOverInterfaceScale = 1.0;
+	real oneOverInterfaceScale = 1.0;
     /////////////////////////////////////
 
     localDistributionsF    = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getLocalDistributions();
@@ -165,12 +175,12 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 
     //TODO
 	//very expensive !!!!!
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField(
-            new CbArray3D<LBMReal, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField2(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr divU(
-            new CbArray3D<LBMReal, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, 0.0));
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField(
+            new CbArray3D<real, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField2(
+        new CbArray3D<real, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr divU(
+            new CbArray3D<real, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, 0.0));
 
 
         for (int x3 = 0; x3 <= maxX3; x3++) {
@@ -181,34 +191,34 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
                         int x2p = x2 + 1;
                         int x3p = x3 + 1;
 
-                        LBMReal mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
-                        LBMReal mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
-                        LBMReal mfbbc = (*this->localDistributionsH1)(D3Q27System::ET_T, x1, x2, x3);
-                        LBMReal mfccb = (*this->localDistributionsH1)(D3Q27System::ET_NE, x1, x2, x3);
-                        LBMReal mfacb = (*this->localDistributionsH1)(D3Q27System::ET_NW, x1p, x2, x3);
-                        LBMReal mfcbc = (*this->localDistributionsH1)(D3Q27System::ET_TE, x1, x2, x3);
-                        LBMReal mfabc = (*this->localDistributionsH1)(D3Q27System::ET_TW, x1p, x2, x3);
-                        LBMReal mfbcc = (*this->localDistributionsH1)(D3Q27System::ET_TN, x1, x2, x3);
-                        LBMReal mfbac = (*this->localDistributionsH1)(D3Q27System::ET_TS, x1, x2p, x3);
-                        LBMReal mfccc = (*this->localDistributionsH1)(D3Q27System::ET_TNE, x1, x2, x3);
-                        LBMReal mfacc = (*this->localDistributionsH1)(D3Q27System::ET_TNW, x1p, x2, x3);
-                        LBMReal mfcac = (*this->localDistributionsH1)(D3Q27System::ET_TSE, x1, x2p, x3);
-                        LBMReal mfaac = (*this->localDistributionsH1)(D3Q27System::ET_TSW, x1p, x2p, x3);
-                        LBMReal mfabb = (*this->nonLocalDistributionsH1)(D3Q27System::ET_W, x1p, x2, x3);
-                        LBMReal mfbab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_S, x1, x2p, x3);
-                        LBMReal mfbba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_B, x1, x2, x3p);
-                        LBMReal mfaab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SW, x1p, x2p, x3);
-                        LBMReal mfcab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SE, x1, x2p, x3);
-                        LBMReal mfaba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BW, x1p, x2, x3p);
-                        LBMReal mfcba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BE, x1, x2, x3p);
-                        LBMReal mfbaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BS, x1, x2p, x3p);
-                        LBMReal mfbca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BN, x1, x2, x3p);
-                        LBMReal mfaaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                        LBMReal mfcaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                        LBMReal mfaca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                        LBMReal mfcca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                        LBMReal mfbbb = (*this->zeroDistributionsH1)(x1, x2, x3);
+                        real mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
+                        real mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
+                        real mfbbc = (*this->localDistributionsH1)(D3Q27System::ET_T, x1, x2, x3);
+                        real mfccb = (*this->localDistributionsH1)(D3Q27System::ET_NE, x1, x2, x3);
+                        real mfacb = (*this->localDistributionsH1)(D3Q27System::ET_NW, x1p, x2, x3);
+                        real mfcbc = (*this->localDistributionsH1)(D3Q27System::ET_TE, x1, x2, x3);
+                        real mfabc = (*this->localDistributionsH1)(D3Q27System::ET_TW, x1p, x2, x3);
+                        real mfbcc = (*this->localDistributionsH1)(D3Q27System::ET_TN, x1, x2, x3);
+                        real mfbac = (*this->localDistributionsH1)(D3Q27System::ET_TS, x1, x2p, x3);
+                        real mfccc = (*this->localDistributionsH1)(D3Q27System::ET_TNE, x1, x2, x3);
+                        real mfacc = (*this->localDistributionsH1)(D3Q27System::ET_TNW, x1p, x2, x3);
+                        real mfcac = (*this->localDistributionsH1)(D3Q27System::ET_TSE, x1, x2p, x3);
+                        real mfaac = (*this->localDistributionsH1)(D3Q27System::ET_TSW, x1p, x2p, x3);
+                        real mfabb = (*this->nonLocalDistributionsH1)(D3Q27System::ET_W, x1p, x2, x3);
+                        real mfbab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_S, x1, x2p, x3);
+                        real mfbba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_B, x1, x2, x3p);
+                        real mfaab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SW, x1p, x2p, x3);
+                        real mfcab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SE, x1, x2p, x3);
+                        real mfaba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BW, x1p, x2, x3p);
+                        real mfcba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BE, x1, x2, x3p);
+                        real mfbaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BS, x1, x2p, x3p);
+                        real mfbca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BN, x1, x2, x3p);
+                        real mfaaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                        real mfcaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                        real mfaca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                        real mfcca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                        real mfbbb = (*this->zeroDistributionsH1)(x1, x2, x3);
                         (*phaseField)(x1, x2, x3) = (((mfaaa + mfccc) + (mfaca + mfcac)) + ((mfaac + mfcca)  + (mfcaa + mfacc))  ) +
                                                     (((mfaab + mfacb) + (mfcab + mfccb)) + ((mfaba + mfabc) + (mfcba + mfcbc)) +
                                                     ((mfbaa + mfbac) + (mfbca + mfbcc))) + ((mfabb + mfcbb) +
@@ -256,7 +266,7 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
             }
         }
 
-        LBMReal collFactorM;
+        real collFactorM;
         //LBMReal forcingTerm[D3Q27System::ENDF + 1];
 
         for (int x3 = minX3; x3 < maxX3; x3++) {
@@ -291,53 +301,53 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
                         findNeighbors(phaseField, x1, x2, x3);
 						findNeighbors2(phaseField2, x1, x2, x3);
 
-                        LBMReal mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
-                        LBMReal mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
-                        LBMReal mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
-                        LBMReal mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
-                        LBMReal mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
-                        LBMReal mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
-                        LBMReal mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
-                        LBMReal mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
-                        LBMReal mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
-                        LBMReal mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
-                        LBMReal mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
-                        LBMReal mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
-                        LBMReal mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
-                        LBMReal mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
-                        LBMReal mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
-                        LBMReal mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
-                        LBMReal mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
-                        LBMReal mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
-                        LBMReal mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
-                        LBMReal mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
-                        LBMReal mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
-                        LBMReal mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
-                        LBMReal mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                        LBMReal mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                        LBMReal mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                        LBMReal mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                        LBMReal mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
-
-                        LBMReal rhoH = 1.0;
-                        LBMReal rhoL = 1.0 / densityRatio;
-
-                        LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
-
-                        LBMReal dX1_phi = gradX1_phi();
-                        LBMReal dX2_phi = gradX2_phi();
-                        LBMReal dX3_phi = gradX3_phi();
-
-						LBMReal dX1_phi2 = gradX1_phi2();
-						LBMReal dX2_phi2 = gradX2_phi2();
-						LBMReal dX3_phi2 = gradX3_phi2();
-
-
-                        LBMReal denom2 = sqrt(dX1_phi * dX1_phi+ dX1_phi2 * dX1_phi2 + dX2_phi * dX2_phi + dX2_phi2 * dX2_phi2 + dX3_phi * dX3_phi+ dX3_phi2 * dX3_phi2) + 1e-9;
-                        LBMReal normX1 = (dX1_phi-dX1_phi2)/denom2;
-						LBMReal normX2 = (dX2_phi-dX2_phi2)/denom2;
-						LBMReal normX3 = (dX3_phi-dX3_phi2)/denom2;
+                        real mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
+                        real mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
+                        real mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
+                        real mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
+                        real mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
+                        real mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
+                        real mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
+                        real mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
+                        real mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
+                        real mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
+                        real mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
+                        real mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
+                        real mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
+                        real mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
+                        real mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
+                        real mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
+                        real mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
+                        real mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
+                        real mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
+                        real mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
+                        real mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
+                        real mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
+                        real mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                        real mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                        real mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                        real mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                        real mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
+
+                        real rhoH = 1.0;
+                        real rhoL = 1.0 / densityRatio;
+
+                        real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+
+                        real dX1_phi = gradX1_phi();
+                        real dX2_phi = gradX2_phi();
+                        real dX3_phi = gradX3_phi();
+
+						real dX1_phi2 = gradX1_phi2();
+						real dX2_phi2 = gradX2_phi2();
+						real dX3_phi2 = gradX3_phi2();
+
+
+                        real denom2 = sqrt(dX1_phi * dX1_phi+ dX1_phi2 * dX1_phi2 + dX2_phi * dX2_phi + dX2_phi2 * dX2_phi2 + dX3_phi * dX3_phi+ dX3_phi2 * dX3_phi2) + 1e-9;
+                        real normX1 = (dX1_phi-dX1_phi2)/denom2;
+						real normX2 = (dX2_phi-dX2_phi2)/denom2;
+						real normX3 = (dX3_phi-dX3_phi2)/denom2;
 
 						//LBMReal denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
 						//LBMReal normX1 = dX1_phi / denom;
@@ -349,59 +359,59 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 						collFactorM = collFactorL + (collFactorL - collFactorG) * (phi[DIR_000] - phiH) / (phiH - phiL);
 
 
-                        LBMReal mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
+                        real mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
 
                         //----------- Calculating Macroscopic Values -------------
-                        LBMReal rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
+                        real rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
 
                             			   ////Incompressible Kernal
 
-						mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3) / rho * c3;
-						mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3) / rho * c3;
-						mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3) / rho * c3;
-						mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3) / rho * c3;
-						mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3) / rho * c3;
-						mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3) / rho * c3;
-						mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3) / rho * c3;
-						mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3) / rho * c3;
-						mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3) / rho * c3;
-						mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3) / rho * c3;
-						mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3) / rho * c3;
-						mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3) / rho * c3;
-						mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3) / rho * c3;
-
-						mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3) / rho * c3;
-						mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3) / rho * c3;
-						mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p) / rho * c3;
-						mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3) / rho * c3;
-						mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3) / rho * c3;
-						mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p) / rho * c3;
-						mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p) / rho * c3;
-						mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p) / rho * c3;
-						mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p) / rho * c3;
-						mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p) / rho * c3;
-						mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p) / rho * c3;
-						mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p) / rho * c3;
-						mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p) / rho * c3;
-
-						mfbbb = (*this->zeroDistributionsF)(x1, x2, x3) / rho * c3;
-
-
-			   LBMReal m0, m1, m2;
-			   LBMReal rhoRef=c1;
+						mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3) / rho * c3o1;
+						mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3) / rho * c3o1;
+						mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3) / rho * c3o1;
+						mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3) / rho * c3o1;
+						mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3) / rho * c3o1;
+						mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3) / rho * c3o1;
+						mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3) / rho * c3o1;
+						mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3) / rho * c3o1;
+						mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3) / rho * c3o1;
+						mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3) / rho * c3o1;
+						mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3) / rho * c3o1;
+						mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3) / rho * c3o1;
+						mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3) / rho * c3o1;
+
+						mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3) / rho * c3o1;
+						mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3) / rho * c3o1;
+						mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p) / rho * c3o1;
+						mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3) / rho * c3o1;
+						mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3) / rho * c3o1;
+						mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p) / rho * c3o1;
+						mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p) / rho * c3o1;
+						mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p) / rho * c3o1;
+						mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p) / rho * c3o1;
+						mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p) / rho * c3o1;
+						mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p) / rho * c3o1;
+						mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p) / rho * c3o1;
+						mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p) / rho * c3o1;
+
+						mfbbb = (*this->zeroDistributionsF)(x1, x2, x3) / rho * c3o1;
+
+
+			   real m0, m1, m2;
+			   real rhoRef=c1o1;
 
 			  //LBMReal 
 			//    LBMReal drho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 			// 	   + (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 			// 	   + (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
-			   LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+			   real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 				   (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 				   (mfcbb - mfabb))/rhoRef;
-			   LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+			   real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 				   (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 				   (mfbcb - mfbab))/rhoRef;
-			   LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+			   real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 				   (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 				   (mfbbc - mfbba))/rhoRef;
 
@@ -434,7 +444,7 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   //////classic source term
 			   ///----Classic source term 8.4.2021
 
-			   LBMReal vvxF, vvyF, vvzF;
+			   real vvxF, vvyF, vvzF;
 			   vvxF = vvx;//-2*c1o24 * lap_vx;// 
 			   vvyF = vvy;//-2*c1o24 * lap_vy;// 
 			   vvzF = vvz;//-2*c1o24 * lap_vz;// 
@@ -445,10 +455,10 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   //if (vvxF != vvx) {
 				  // vvxF = vvxF;
 			   //}
-			   LBMReal weightGrad = 1.0;// -denom * denom / (denom * denom + 0.0001 * 0.001);
-			   LBMReal dX1_phiF = dX1_phi * weightGrad + (1.0 - weightGrad) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * normX1;
-			   LBMReal dX2_phiF = dX2_phi * weightGrad + (1.0 - weightGrad) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * normX2;
-			   LBMReal dX3_phiF = dX3_phi * weightGrad + (1.0 - weightGrad) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * normX3;
+			   real weightGrad = 1.0;// -denom * denom / (denom * denom + 0.0001 * 0.001);
+			   real dX1_phiF = dX1_phi * weightGrad + (1.0 - weightGrad) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * normX1;
+			   real dX2_phiF = dX2_phi * weightGrad + (1.0 - weightGrad) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * normX2;
+			   real dX3_phiF = dX3_phi * weightGrad + (1.0 - weightGrad) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * normX3;
 
 			   //dX1_phiF *= 1.2;
 			   //dX2_phiF *= 1.2;
@@ -461,17 +471,17 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 				  // dX2_phiF = gradPhi * normX2;
 				  // dX3_phiF = gradPhi * normX3;
 
-			   LBMReal ux2;
-			   LBMReal uy2;
-			   LBMReal uz2;
+			   real ux2;
+			   real uy2;
+			   real uz2;
 			   ux2 = vvxF * vvxF;
 			   uy2 = vvyF * vvyF;
 			   uz2 = vvzF * vvzF;
-			   LBMReal forcingTerm[D3Q27System::ENDF + 1];
+			   real forcingTerm[D3Q27System::ENDF + 1];
 			   for (int dir = FSTARTDIR; dir <= FENDDIR; dir++) {
-				   LBMReal velProd = DX1[dir] * vvxF + DX2[dir] * vvyF + DX3[dir] * vvzF;
-				   LBMReal velSq1 = velProd * velProd;
-				   LBMReal gamma = WEIGTH[dir] * (1.0 + 3 * velProd + (4.5 * velSq1 - 1.5 * (ux2 + uy2 + uz2)));
+				   real velProd = DX1[dir] * vvxF + DX2[dir] * vvyF + DX3[dir] * vvzF;
+				   real velSq1 = velProd * velProd;
+				   real gamma = WEIGTH[dir] * (1.0 + 3 * velProd + (4.5 * velSq1 - 1.5 * (ux2 + uy2 + uz2)));
 
 				   //LBMReal fac1 = (gamma - WEIGTH[dir]) * c1o3 * rhoToPhi;
 
@@ -484,7 +494,7 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 					  // (DX3[dir]) * (fac1 * dX3_phiF);
 
 
-				   LBMReal fac1 = (gamma - WEIGTH[dir]) * c1o3 ;
+				   real fac1 = (gamma - WEIGTH[dir]) * c1o3 ;
 
 				   forcingTerm[dir] =
 					   (-vvxF) * (fac1 * (dX1_phiF * rhoH + dX2_phi2 * rhoL)) +
@@ -498,8 +508,8 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 
 			   }
 
-			   LBMReal gamma = WEIGTH[DIR_000] * (1.0 - 1.5 * (ux2 + uy2 + uz2));
-			   LBMReal fac1 = (gamma - WEIGTH[DIR_000]) * c1o3 * rhoToPhi;
+			   real gamma = WEIGTH[DIR_000] * (1.0 - 1.5 * (ux2 + uy2 + uz2));
+			   real fac1 = (gamma - WEIGTH[DIR_000]) * c1o3 * rhoToPhi;
 			   forcingTerm[DIR_000] =	 (-vvxF) * (fac1 * (dX1_phiF * rhoH + dX2_phi2 * rhoL)) +
 				   (-vvyF) * (fac1 * (dX2_phiF * rhoH + dX2_phi2 * rhoL)) +
 				   (-vvzF) * (fac1 * (dX3_phiF * rhoH + dX3_phi2 * rhoL));
@@ -643,9 +653,9 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   ///////////////////////////////////////////////////////////////////////////////////////////
 			   if (withForcing)
 			   {
-				   muX1 = static_cast<double>(x1 - 1 + ix1 * maxX1);
-				   muX2 = static_cast<double>(x2 - 1 + ix2 * maxX2);
-				   muX3 = static_cast<double>(x3 - 1 + ix3 * maxX3);
+				   muX1 = static_cast<real>(x1 - 1 + ix1 * maxX1);
+				   muX2 = static_cast<real>(x2 - 1 + ix2 * maxX2);
+				   muX3 = static_cast<real>(x3 - 1 + ix3 * maxX3);
 
 				   //forcingX1 = muForcingX1.Eval();
 				   //forcingX2 = muForcingX2.Eval();
@@ -656,14 +666,14 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 				   //vvz += forcingX3 * deltaT * 0.5; // Z
 			   }
 
-			   LBMReal vx2;
-               LBMReal vy2;
-               LBMReal vz2;
+			   real vx2;
+               real vy2;
+               real vz2;
                vx2 = vvx * vvx;
                vy2 = vvy * vvy;
                vz2 = vvz * vvz;
 			   ///////////////////////////////////////////////////////////////////////////////////////////               
-			   LBMReal oMdrho;
+			   real oMdrho;
 
 
 			   oMdrho = mfccc + mfaaa;
@@ -693,8 +703,8 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   oMdrho = (rhoRef - (oMdrho + m0))/rhoRef;// 12.03.21 check derivation!!!!
 
 			   ////////////////////////////////////////////////////////////////////////////////////
-			   LBMReal wadjust;
-			   LBMReal qudricLimit = 0.01;
+			   real wadjust;
+			   real qudricLimit = 0.01;
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //Hin
 			   ////////////////////////////////////////////////////////////////////////////////////
@@ -925,23 +935,23 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Cumulants
 			   ////////////////////////////////////////////////////////////////////////////////////
-			   LBMReal OxxPyyPzz = 1.; //omega2 or bulk viscosity
+			   real OxxPyyPzz = 1.; //omega2 or bulk viscosity
 			 //  LBMReal OxyyPxzz = 1.;//-s9;//2+s9;//
 			 //  LBMReal OxyyMxzz  = 1.;//2+s9;//
-			   LBMReal O4 = 1.;
-			   LBMReal O5 = 1.;
-			   LBMReal O6 = 1.;
+			   real O4 = 1.;
+			   real O5 = 1.;
+			   real O6 = 1.;
 
 
 
 			   /////fourth order parameters; here only for test. Move out of loop!
 
-			   LBMReal OxyyPxzz = 8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
-			   LBMReal OxyyMxzz = 8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
+			   real OxyyPxzz = 8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
+			   real OxyyMxzz = 8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
 			//    LBMReal Oxyz = 24.0 * (collFactorM - 2.0) * (4.0 * collFactorM * collFactorM + collFactorM * OxxPyyPzz * (18.0 - 13.0 * collFactorM) + OxxPyyPzz * OxxPyyPzz * (2.0 + collFactorM * (6.0 * collFactorM - 11.0))) / (16.0 * collFactorM * collFactorM * (collFactorM - 6.0) - 2.0 * collFactorM * OxxPyyPzz * (216.0 + 5.0 * collFactorM * (9.0 * collFactorM - 46.0)) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (3.0 * collFactorM - 10.0) * (15.0 * collFactorM - 28.0) - 48.0));
-			   LBMReal A = (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+			   real A = (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 			   //FIXME:  warning C4459: declaration of 'B' hides global declaration (message : see declaration of 'D3Q27System::DIR_00M' )
-			   LBMReal BB = (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+			   real BB = (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 
 
 			   //Cum 4.
@@ -949,21 +959,21 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   //LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
 			   //LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-			   LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-			   LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-			   LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+			   real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
+			   real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
+			   real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-			   LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-			   LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-			   LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
+			   real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+			   real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+			   real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
 
 			   //Cum 5.
-			   LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-			   LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-			   LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+			   real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
+			   real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
+			   real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
 
 			   //Cum 6.
-			   LBMReal CUMccc = mfccc + ((-4. * mfbbb * mfbbb
+			   real CUMccc = mfccc + ((-4. * mfbbb * mfbbb
 				   - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
 				   - 4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
 				   - 2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
@@ -977,10 +987,10 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 
 			   //2.
 			   // linear combinations
-			   LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
+			   real mxxPyyPzz = mfcaa + mfaca + mfaac;
 				mxxPyyPzz-=mfaaa;//12.03.21 shifted by mfaaa
-			   LBMReal mxxMyy = mfcaa - mfaca;
-			   LBMReal mxxMzz = mfcaa - mfaac;
+			   real mxxMyy = mfcaa - mfaca;
+			   real mxxMzz = mfcaa - mfaac;
 
 			   //applying phase field gradients first part:
 			  // mxxPyyPzz += c2o3 * rhoToPhi * (dX1_phi * vvx + dX2_phi * vvy + dX3_phi * vvz);
@@ -995,13 +1005,13 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
                //mfbab += c1o6 * (dX1_phi * vvz + dX3_phi * vvx) * correctionScaling;
                //mfbba += c1o6 * (dX1_phi * vvy + dX2_phi * vvx) * correctionScaling;
 
-			   LBMReal dxux =  -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
-			   LBMReal dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
-			   LBMReal dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
+			   real dxux =  -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
+			   real dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
+			   real dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
 
-			   LBMReal Dxy = -three * collFactorM * mfbba;
-			   LBMReal Dxz = -three * collFactorM * mfbab;
-			   LBMReal Dyz = -three * collFactorM * mfabb;
+			   real Dxy = -c3o1 * collFactorM * mfbba;
+			   real Dxz = -c3o1 * collFactorM * mfbab;
+			   real Dyz = -c3o1 * collFactorM * mfabb;
 
 
 			   //relax
@@ -1034,14 +1044,14 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 
 			   //3.
 			   // linear combinations
-			   LBMReal mxxyPyzz = mfcba + mfabc;
-			   LBMReal mxxyMyzz = mfcba - mfabc;
+			   real mxxyPyzz = mfcba + mfabc;
+			   real mxxyMyzz = mfcba - mfabc;
 
-			   LBMReal mxxzPyyz = mfcab + mfacb;
-			   LBMReal mxxzMyyz = mfcab - mfacb;
+			   real mxxzPyyz = mfcab + mfacb;
+			   real mxxzMyyz = mfcab - mfacb;
 
-			   LBMReal mxyyPxzz = mfbca + mfbac;
-			   LBMReal mxyyMxzz = mfbca - mfbac;
+			   real mxyyPxzz = mfbca + mfbac;
+			   real mxyyMxzz = mfbca - mfbac;
 
 			   //relax
 			   wadjust = OxyyMxzz + (1. - OxyyMxzz) * fabs(mfbbb) / (fabs(mfbbb) + qudricLimit);
@@ -1075,12 +1085,12 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   //CUMbbc += O4 * (-CUMbbc);
 			   //CUMbcb += O4 * (-CUMbcb);
 			   //CUMcbb += O4 * (-CUMcbb);
-			   CUMacc = -O4 * (one / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (one - O4) * (CUMacc);
-			   CUMcac = -O4 * (one / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (one - O4) * (CUMcac);
-			   CUMcca = -O4 * (one / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (one - O4) * (CUMcca);
-			   CUMbbc = -O4 * (one / collFactorM - c1o2) * Dxy * c1o3 * BB + (one - O4) * (CUMbbc);
-			   CUMbcb = -O4 * (one / collFactorM - c1o2) * Dxz * c1o3 * BB + (one - O4) * (CUMbcb);
-			   CUMcbb = -O4 * (one / collFactorM - c1o2) * Dyz * c1o3 * BB + (one - O4) * (CUMcbb);
+			   CUMacc = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc);
+			   CUMcac = -O4 * (c1o1 / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac);
+			   CUMcca = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca);
+			   CUMbbc = -O4 * (c1o1 / collFactorM - c1o2) * Dxy * c1o3 * BB + (c1o1 - O4) * (CUMbbc);
+			   CUMbcb = -O4 * (c1o1 / collFactorM - c1o2) * Dxz * c1o3 * BB + (c1o1 - O4) * (CUMbcb);
+			   CUMcbb = -O4 * (c1o1 / collFactorM - c1o2) * Dyz * c1o3 * BB + (c1o1 - O4) * (CUMcbb);
 
 			   //5.
 			   CUMbcc += O5 * (-CUMbcc);
@@ -1100,9 +1110,9 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
 			   mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-			   mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-			   mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-			   mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
+			   mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+			   mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+			   mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
 
 			   //5.
 			   mfbcc = CUMbcc + (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac) * oMdrho;
@@ -2260,7 +2270,7 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
                         /////////////////////  P H A S E - F I E L D   S O L V E R
                         ////////////////////////////////////////////
 		/////CUMULANT PHASE-FIELD
-				LBMReal omegaD =1.0/( 3.0 * mob + 0.5);
+				real omegaD =1.0/( 3.0 * mob + 0.5);
 				{
 			   mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
 			   mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
@@ -2324,7 +2334,7 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			 //  LBMReal vvz = uz;
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // second component
-			   LBMReal concentration =
+			   real concentration =
 				   ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				   (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
 					   ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
@@ -2339,26 +2349,26 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			  // vvy += fy * c1o2;
 			  // vvz += fz * c1o2;
 			   ////////////////////////////////////////////////////////////////////////////////////
-			   LBMReal oneMinusRho = c1- concentration;
+			   real oneMinusRho = c1o1 - concentration;
 
-			   LBMReal cx =
+			   real cx =
 				   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 				   (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 					   (mfcbb - mfabb));
-			   LBMReal cy =
+			   real cy =
 				   ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 				   (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 					   (mfbcb - mfbab));
-			   LBMReal cz =
+			   real cz =
 				   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 				   (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 					   (mfbbc - mfbba));
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // calculate the square of velocities for this lattice node
-			   LBMReal cx2 = cx * cx;
-			   LBMReal cy2 = cy * cy;
-			   LBMReal cz2 = cz * cz;
+			   real cx2 = cx * cx;
+			   real cy2 = cy * cy;
+			   real cz2 = cz * cz;
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
 			   //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -2367,66 +2377,66 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   //!
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Z - Dir
-			   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 			   forwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Y - Dir
-			   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   forwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-			   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 			   forwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 			   forwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 			   forwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   forwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-			   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // X - Dir
-			   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 			   forwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-			   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   forwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 			   forwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 			   forwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-			   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   forwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-			   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3o1, c1o9, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //! - experimental Cumulant ... to be published ... hopefully
 			   //!
 
 			   // linearized orthogonalization of 3rd order central moments
-			   LBMReal Mabc = mfabc - mfaba * c1o3;
-			   LBMReal Mbca = mfbca - mfbaa * c1o3;
-			   LBMReal Macb = mfacb - mfaab * c1o3;
-			   LBMReal Mcba = mfcba - mfaba * c1o3;
-			   LBMReal Mcab = mfcab - mfaab * c1o3;
-			   LBMReal Mbac = mfbac - mfbaa * c1o3;
+			   real Mabc = mfabc - mfaba * c1o3;
+			   real Mbca = mfbca - mfbaa * c1o3;
+			   real Macb = mfacb - mfaab * c1o3;
+			   real Mcba = mfcba - mfaba * c1o3;
+			   real Mcab = mfcab - mfaab * c1o3;
+			   real Mbac = mfbac - mfbaa * c1o3;
 			   // linearized orthogonalization of 5th order central moments
-			   LBMReal Mcbc = mfcbc - mfaba * c1o9;
-			   LBMReal Mbcc = mfbcc - mfbaa * c1o9;
-			   LBMReal Mccb = mfccb - mfaab * c1o9;
+			   real Mcbc = mfcbc - mfaba * c1o9;
+			   real Mbcc = mfbcc - mfbaa * c1o9;
+			   real Mccb = mfccb - mfaab * c1o9;
 
 			   // collision of 1st order moments
 			  // LBMReal ccx, ccy, ccz;
 			   
 
-               cx = cx * (c1 - omegaD) + omegaD * vvx * concentration +
-                    normX1 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
-               cy = cy * (c1 - omegaD) + omegaD * vvy * concentration +
-                    normX2 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
-               cz = cz * (c1 - omegaD) + omegaD * vvz * concentration +
-                    normX3 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+               cx = cx * (c1o1 - omegaD) + omegaD * vvx * concentration +
+                    normX1 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+               cy = cy * (c1o1 - omegaD) + omegaD * vvy * concentration +
+                    normX2 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+               cz = cz * (c1o1 - omegaD) + omegaD * vvz * concentration +
+                    normX3 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
 
 			   //mhx = (ux * phi[REST] + normX1 * (tauH - 0.5) * (1.0 - phi[REST]) * (phi[REST])) / tauH + (1.0 - 1.0 / tauH) * mhx;
 			   //mhy = (uy * phi[REST] + normX2 * (tauH - 0.5) * (1.0 - phi[REST]) * (phi[REST])) / tauH + (1.0 - 1.0 / tauH) * mhy;
@@ -2438,9 +2448,9 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   cz2 = cz * cz;
 
 			   // equilibration of 2nd order moments
-			   mfbba = zeroReal;
-			   mfbab = zeroReal;
-			   mfabb = zeroReal;
+			   mfbba = c0o1;
+			   mfbab = c0o1;
+			   mfabb = c0o1;
 
 			   mfcaa = c1o3 * concentration;
 			   mfaca = c1o3 * concentration;
@@ -2457,13 +2467,13 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   //mfaac = mfaac*(c1 - omega2) + omega2*c1o3 * concentration;
 
 			   // equilibration of 3rd order moments
-			   Mabc = zeroReal;
-			   Mbca = zeroReal;
-			   Macb = zeroReal;
-			   Mcba = zeroReal;
-			   Mcab = zeroReal;
-			   Mbac = zeroReal;
-			   mfbbb = zeroReal;
+			   Mabc = c0o1;
+			   Mbca = c0o1;
+			   Macb = c0o1;
+			   Mcba = c0o1;
+			   Mcab = c0o1;
+			   Mbac = c0o1;
+			   mfbbb = c0o1;
 
 			   // from linearized orthogonalization 3rd order central moments to central moments
 			   mfabc = Mabc + mfaba * c1o3;
@@ -2478,14 +2488,14 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   mfcac = c1o9 * concentration;
 			   mfcca = c1o9 * concentration;
 
-			   mfcbb = zeroReal;
-			   mfbcb = zeroReal;
-			   mfbbc = zeroReal;
+			   mfcbb = c0o1;
+			   mfbcb = c0o1;
+			   mfbbc = c0o1;
 
 			   // equilibration of 5th order moments
-			   Mcbc = zeroReal;
-			   Mbcc = zeroReal;
-			   Mccb = zeroReal;
+			   Mcbc = c0o1;
+			   Mbcc = c0o1;
+			   Mccb = c0o1;
 
 			   // from linearized orthogonalization 5th order central moments to central moments
 			   mfcbc = Mcbc + mfaba * c1o9;
@@ -2503,39 +2513,39 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 			   //!
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // X - Dir
-			   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 			   backwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-			   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   backwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 			   backwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 			   backwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-			   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   backwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-			   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9o1, c1o9, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Y - Dir
-			   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   backwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-			   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 			   backwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 			   backwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 			   backwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   backwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-			   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Z - Dir
-			   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 			   backwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 
 
@@ -2640,7 +2650,7 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 	 //  LBMReal vvz = uz;
 	   ////////////////////////////////////////////////////////////////////////////////////
 	   // second component
-   LBMReal concentration =
+   real concentration =
 	   ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 	   (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
 		   ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
@@ -2655,26 +2665,26 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
   // vvy += fy * c1o2;
   // vvz += fz * c1o2;
    ////////////////////////////////////////////////////////////////////////////////////
-   LBMReal oneMinusRho = c1 - concentration;
+   real oneMinusRho = c1o1 - concentration;
 
-   LBMReal cx =
+   real cx =
 	   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 	   (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 		   (mfcbb - mfabb));
-   LBMReal cy =
+   real cy =
 	   ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 	   (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 		   (mfbcb - mfbab));
-   LBMReal cz =
+   real cz =
 	   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 	   (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 		   (mfbbc - mfbba));
 
    ////////////////////////////////////////////////////////////////////////////////////
    // calculate the square of velocities for this lattice node
-   LBMReal cx2 = cx * cx;
-   LBMReal cy2 = cy * cy;
-   LBMReal cz2 = cz * cz;
+   real cx2 = cx * cx;
+   real cy2 = cy * cy;
+   real cz2 = cz * cz;
    ////////////////////////////////////////////////////////////////////////////////////
    //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
    //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -2683,63 +2693,63 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
    //!
    ////////////////////////////////////////////////////////////////////////////////////
    // Z - Dir
-   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
    forwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    // Y - Dir
-   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
    forwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
    forwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
    forwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
    forwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
    forwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    // X - Dir
-   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
    forwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
    forwardChimera(mfaab, mfbab, mfcab, cx, cx2);
    forwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
    forwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
    forwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3o1, c1o9, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    //! - experimental Cumulant ... to be published ... hopefully
    //!
 
    // linearized orthogonalization of 3rd order central moments
-   LBMReal Mabc = mfabc - mfaba * c1o3;
-   LBMReal Mbca = mfbca - mfbaa * c1o3;
-   LBMReal Macb = mfacb - mfaab * c1o3;
-   LBMReal Mcba = mfcba - mfaba * c1o3;
-   LBMReal Mcab = mfcab - mfaab * c1o3;
-   LBMReal Mbac = mfbac - mfbaa * c1o3;
+   real Mabc = mfabc - mfaba * c1o3;
+   real Mbca = mfbca - mfbaa * c1o3;
+   real Macb = mfacb - mfaab * c1o3;
+   real Mcba = mfcba - mfaba * c1o3;
+   real Mcab = mfcab - mfaab * c1o3;
+   real Mbac = mfbac - mfbaa * c1o3;
    // linearized orthogonalization of 5th order central moments
-   LBMReal Mcbc = mfcbc - mfaba * c1o9;
-   LBMReal Mbcc = mfbcc - mfbaa * c1o9;
-   LBMReal Mccb = mfccb - mfaab * c1o9;
+   real Mcbc = mfcbc - mfaba * c1o9;
+   real Mbcc = mfbcc - mfbaa * c1o9;
+   real Mccb = mfccb - mfaab * c1o9;
 
    // collision of 1st order moments
-   cx = cx * (c1 - omegaD) + omegaD * vvx * concentration +
-	   normX1 * (c1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
-   cy = cy * (c1 - omegaD) + omegaD * vvy * concentration +
-	   normX2 * (c1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
-   cz = cz * (c1 - omegaD) + omegaD * vvz * concentration +
-	   normX3 * (c1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
+   cx = cx * (c1o1 - omegaD) + omegaD * vvx * concentration +
+	   normX1 * (c1o1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
+   cy = cy * (c1o1 - omegaD) + omegaD * vvy * concentration +
+	   normX2 * (c1o1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
+   cz = cz * (c1o1 - omegaD) + omegaD * vvz * concentration +
+	   normX3 * (c1o1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
 
    //mhx = (ux * phi[REST] + normX1 * (tauH - 0.5) * (1.0 - phi[REST]) * (phi[REST])) / tauH + (1.0 - 1.0 / tauH) * mhx;
    //mhy = (uy * phi[REST] + normX2 * (tauH - 0.5) * (1.0 - phi[REST]) * (phi[REST])) / tauH + (1.0 - 1.0 / tauH) * mhy;
@@ -2751,9 +2761,9 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
    cz2 = cz * cz;
 
    // equilibration of 2nd order moments
-   mfbba = zeroReal;
-   mfbab = zeroReal;
-   mfabb = zeroReal;
+   mfbba = c0o1;
+   mfbab = c0o1;
+   mfabb = c0o1;
 
    mfcaa = c1o3 * concentration;
    mfaca = c1o3 * concentration;
@@ -2770,13 +2780,13 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
    //mfaac = mfaac*(c1 - omega2) + omega2*c1o3 * concentration;
 
    // equilibration of 3rd order moments
-   Mabc = zeroReal;
-   Mbca = zeroReal;
-   Macb = zeroReal;
-   Mcba = zeroReal;
-   Mcab = zeroReal;
-   Mbac = zeroReal;
-   mfbbb = zeroReal;
+   Mabc = c0o1;
+   Mbca = c0o1;
+   Macb = c0o1;
+   Mcba = c0o1;
+   Mcab = c0o1;
+   Mbac = c0o1;
+   mfbbb = c0o1;
 
    // from linearized orthogonalization 3rd order central moments to central moments
    mfabc = Mabc + mfaba * c1o3;
@@ -2791,14 +2801,14 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
    mfcac = c1o9 * concentration;
    mfcca = c1o9 * concentration;
 
-   mfcbb = zeroReal;
-   mfbcb = zeroReal;
-   mfbbc = zeroReal;
+   mfcbb = c0o1;
+   mfbcb = c0o1;
+   mfbbc = c0o1;
 
    // equilibration of 5th order moments
-   Mcbc = zeroReal;
-   Mbcc = zeroReal;
-   Mccb = zeroReal;
+   Mcbc = c0o1;
+   Mbcc = c0o1;
+   Mccb = c0o1;
 
    // from linearized orthogonalization 5th order central moments to central moments
    mfcbc = Mcbc + mfaba * c1o9;
@@ -2816,39 +2826,39 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
    //!
    ////////////////////////////////////////////////////////////////////////////////////
    // X - Dir
-   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
    backwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
    backwardChimera(mfaab, mfbab, mfcab, cx, cx2);
    backwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
    backwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
    backwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9o1, c1o9, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    // Y - Dir
-   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
    backwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
    backwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
    backwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
    backwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
    backwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    // Z - Dir
-   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
    backwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 
 
@@ -2981,9 +2991,11 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::calculate(int step)
 }
 //////////////////////////////////////////////////////////////////////////
 
-LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX1_phi()
+real MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX1_phi()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0* ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) + (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) + (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) + (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_PM0] - phi[DIR_MP0]) + (phi[DIR_PP0] - phi[DIR_MM0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_P00] - phi[DIR_M00]));
@@ -2994,9 +3006,11 @@ LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX1_phi()
     //return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX2_phi()
+real MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX2_phi()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PPM] - phi[DIR_MMP])- (phi[DIR_PMP] - phi[DIR_MPM])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_0PP] - phi[DIR_0MM]) + (phi[DIR_0PM] - phi[DIR_0MP])) + ((phi[DIR_PP0] - phi[DIR_MM0])- (phi[DIR_PM0] - phi[DIR_MP0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_0P0] - phi[DIR_0M0]));
@@ -3007,9 +3021,11 @@ LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX2_phi()
     //return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX3_phi()
+real MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX3_phi()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) - (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) - (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_0MP] - phi[DIR_0PM]) + (phi[DIR_0PP] - phi[DIR_0MM])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_00P] - phi[DIR_00M]));
@@ -3020,9 +3036,11 @@ LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX3_phi()
     //return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX1_phi2()
+real MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX1_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) + (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PMP] - phi2[DIR_MPM]) + (phi2[DIR_PPM] - phi2[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_P0P] - phi2[DIR_M0M]) + (phi2[DIR_P0M] - phi2[DIR_M0P])) + ((phi2[DIR_PM0] - phi2[DIR_MP0]) + (phi2[DIR_PP0] - phi2[DIR_MM0])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_P00] - phi2[DIR_M00]));
@@ -3033,9 +3051,11 @@ LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX1_phi2()
 	//return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX2_phi2()
+real MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX2_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) - (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PPM] - phi2[DIR_MMP]) - (phi2[DIR_PMP] - phi2[DIR_MPM])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_0PP] - phi2[DIR_0MM]) + (phi2[DIR_0PM] - phi2[DIR_0MP])) + ((phi2[DIR_PP0] - phi2[DIR_MM0]) - (phi2[DIR_PM0] - phi2[DIR_MP0])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_0P0] - phi2[DIR_0M0]));
@@ -3046,9 +3066,11 @@ LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX2_phi2()
 	//return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX3_phi2()
+real MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX3_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) - (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PMP] - phi2[DIR_MPM]) - (phi2[DIR_PPM] - phi2[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_P0P] - phi2[DIR_M0M]) - (phi2[DIR_P0M] - phi2[DIR_M0P])) + ((phi2[DIR_0MP] - phi2[DIR_0PM]) + (phi2[DIR_0PP] - phi2[DIR_0MM])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_00P] - phi2[DIR_00M]));
@@ -3063,10 +3085,12 @@ LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::gradX3_phi2()
 
 
 
-LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::nabla2_phi()
+real MultiphaseTwoPhaseFieldsCumulantLBMKernel::nabla2_phi()
 {
     using namespace D3Q27System;
-    LBMReal sum = 0.0;
+	using namespace vf::lbm::dir;
+
+    real sum = 0.0;
 	sum += WEIGTH[DIR_PPP] * ((((phi[DIR_PPP] - phi[DIR_000]) + (phi[DIR_MMM] - phi[DIR_000])) + ((phi[DIR_MMP] - phi[DIR_000]) + (phi[DIR_PPM] - phi[DIR_000])))
 		+ (((phi[DIR_MPP] - phi[DIR_000]) + (phi[DIR_PMM] - phi[DIR_000])) + ((phi[DIR_PMP] - phi[DIR_000]) + (phi[DIR_MPM] - phi[DIR_000]))));
 	sum += WEIGTH[DIR_0PP] * (
@@ -3088,6 +3112,8 @@ LBMReal MultiphaseTwoPhaseFieldsCumulantLBMKernel::nabla2_phi()
 void MultiphaseTwoPhaseFieldsCumulantLBMKernel::computePhasefield()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
     SPtr<DistributionArray3D> distributionsH = dataSet->getHdistributions();
 
     int minX1 = ghostLayerWidth;
@@ -3142,12 +3168,13 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::computePhasefield()
     }
 }
 
-void MultiphaseTwoPhaseFieldsCumulantLBMKernel::findNeighbors(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphaseTwoPhaseFieldsCumulantLBMKernel::findNeighbors(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
                                                 int x3)
 {
-    using namespace D3Q27System;
+	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
-    SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
+	SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
     phi[DIR_000] = (*ph)(x1, x2, x3);
 
@@ -3162,10 +3189,11 @@ void MultiphaseTwoPhaseFieldsCumulantLBMKernel::findNeighbors(CbArray3D<LBMReal,
     }
 }
 
-void MultiphaseTwoPhaseFieldsCumulantLBMKernel::findNeighbors2(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphaseTwoPhaseFieldsCumulantLBMKernel::findNeighbors2(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
 	int x3)
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
 	SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsCumulantLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsCumulantLBMKernel.h
index a65fe073fc18258f518f72df97e6e8751adc4479..dbc94d613c4683fb19cb92a7ab7d075da41ab231 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsCumulantLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsCumulantLBMKernel.h
@@ -51,64 +51,64 @@ public:
    virtual ~MultiphaseTwoPhaseFieldsCumulantLBMKernel(void) = default;
    void calculate(int step) override;
    SPtr<LBMKernel> clone() override;
-   void forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-   void backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-   void forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
-   void backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
+   void forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+   void backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+   void forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
+   void backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
 
-   double getCalculationTime() override { return .0; }
+   real getCalculationTime() override { return .0; }
 protected:
    virtual void initDataSet();
    void swapDistributions() override;
-   LBMReal f1[D3Q27System::ENDF+1];
+   real f1[D3Q27System::ENDF+1];
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH1;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH1;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH1;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH1;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH1;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH1;
 
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH2;
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH2;
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsH2;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH2;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH2;
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsH2;
 
-   //CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   phaseField;
+   //CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   phaseField;
 
-   LBMReal h  [D3Q27System::ENDF+1];
-   LBMReal h2[D3Q27System::ENDF + 1];
-   LBMReal g  [D3Q27System::ENDF+1];
-   LBMReal phi[D3Q27System::ENDF+1];
-   LBMReal phi2[D3Q27System::ENDF + 1];
-   LBMReal pr1[D3Q27System::ENDF+1];
-   LBMReal phi_cutoff[D3Q27System::ENDF+1];
+   real h  [D3Q27System::ENDF+1];
+   real h2[D3Q27System::ENDF + 1];
+   real g  [D3Q27System::ENDF+1];
+   real phi[D3Q27System::ENDF+1];
+   real phi2[D3Q27System::ENDF + 1];
+   real pr1[D3Q27System::ENDF+1];
+   real phi_cutoff[D3Q27System::ENDF+1];
 
-   LBMReal gradX1_phi();
-   LBMReal gradX2_phi();
-   LBMReal gradX3_phi();
-   LBMReal gradX1_phi2();
-   LBMReal gradX2_phi2();
-   LBMReal gradX3_phi2();
+   real gradX1_phi();
+   real gradX2_phi();
+   real gradX3_phi();
+   real gradX1_phi2();
+   real gradX2_phi2();
+   real gradX3_phi2();
    //LBMReal gradX1_pr1();
    //LBMReal gradX2_pr1();
    //LBMReal gradX3_pr1();
    //LBMReal dirgradC_phi(int n, int k);
    void computePhasefield();
-   void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
-   void findNeighbors2(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2, int x3);
+   void findNeighbors(CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
+   void findNeighbors2(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2, int x3);
    //void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf /*Pressure-Field*/, int x1, int x2, int x3);
    //void pressureFiltering(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf /*Pressure-Field*/, CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf_filtered /*Pressure-Field*/);
 
-   LBMReal nabla2_phi();
+   real nabla2_phi();
 
 
    mu::value_type muX1,muX2,muX3;
    mu::value_type muDeltaT;
    mu::value_type muNu;
-   LBMReal forcingX1;
-   LBMReal forcingX2;
-   LBMReal forcingX3;
+   real forcingX1;
+   real forcingX2;
+   real forcingX3;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsPressureFilterLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsPressureFilterLBMKernel.cpp
index 3baddc4fef5447c83b242727276fd0ec7b64c206..090abea7ab2158faf9bdf807be64e35ff49d25d1 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsPressureFilterLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsPressureFilterLBMKernel.cpp
@@ -39,6 +39,7 @@
 #include "DataSet3D.h"
 #include "LBMKernel.h"
 #include <cmath>
+//#include "UbMath.h"
 
 #define PROOF_CORRECTNESS
 
@@ -52,8 +53,8 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::initDataSet()
     SPtr<DistributionArray3D> h2(new D3Q27EsoTwist3DSplittedVector(nx[0] + 4, nx[1] + 4, nx[2] + 4, -999.9)); // For phase-field
     //SPtr<PhaseFieldArray3D> divU(new PhaseFieldArray3D(            nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
 	SPtr<PhaseFieldArray3D> divU1(new PhaseFieldArray3D(            nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressure(new  CbArray3D<LBMReal, IndexerX3X2X1>(    nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
-	pressureOld = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressure(new  CbArray3D<real, IndexerX3X2X1>(    nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	pressureOld = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
     dataSet->setFdistributions(f);
     dataSet->setHdistributions(h); // For phase-field
     dataSet->setH2distributions(h2); // For phase-field
@@ -61,9 +62,9 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::initDataSet()
 	dataSet->setPhaseField(divU1);
 	dataSet->setPressureField(pressure);
 
-	phaseField = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, -999.0));
-	phaseField2 = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, -999.0));
-	divU = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
+	phaseField = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, -999.0));
+	phaseField2 = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, -999.0));
+	divU = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<real, IndexerX3X2X1>(nx[0] + 4, nx[1] + 4, nx[2] + 4, 0.0));
 
 }
 //////////////////////////////////////////////////////////////////////////
@@ -95,23 +96,27 @@ SPtr<LBMKernel> MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::clone()
     return kernel;
 }
 //////////////////////////////////////////////////////////////////////////
- void  MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-    LBMReal m2 = mfa + mfc;
-	LBMReal m1 = mfc - mfa;
-	LBMReal m0 = m2 + mfb;
+ void  MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+//	using namespace UbMath;
+	using namespace vf::basics::constant;
+	
+	real m2 = mfa + mfc;
+	real m1 = mfc - mfa;
+	real m0 = m2 + mfb;
 	mfa = m0;
 	m0 *= Kinverse;
 	m0 += oneMinusRho;
 	mfb = (m1 * Kinverse - m0 * vv) * K;
-	mfc = ((m2 - c2 * m1 * vv) * Kinverse + v2 * m0) * K;
+	mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
- void  MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-    LBMReal m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
-	LBMReal m1 = (((mfa - mfc) - c2 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
+ void  MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+//	using namespace UbMath;
+	using namespace vf::basics::constant;
+	 
+	real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
+	real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
 	mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 + vv) * c1o2) * K;
 	mfa = m0;
 	mfb = m1;
@@ -119,20 +124,24 @@ SPtr<LBMKernel> MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::clone()
 
 
 ////////////////////////////////////////////////////////////////////////////////
- void  MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-    LBMReal m1 = (mfa + mfc) + mfb;
-	LBMReal m2 = mfc - mfa;
-	mfc = (mfc + mfa) + (v2 * m1 - c2 * vv * m2);
+ void  MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+//	using namespace UbMath;
+	using namespace vf::basics::constant;
+	
+	real m1 = (mfa + mfc) + mfb;
+	real m2 = mfc - mfa;
+	mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2);
 	mfb = m2 - vv * m1;
 	mfa = m1;
 }
 
 
- void  MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-    LBMReal ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
-	LBMReal mb = ((mfa - mfc) - mfa * v2) - c2 * mfb * vv;
+ void  MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+//	using namespace UbMath;
+	using namespace vf::basics::constant;
+	 
+	real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
+	real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv;
 	mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2);
 	mfb = mb;
 	mfa = ma;
@@ -142,13 +151,15 @@ SPtr<LBMKernel> MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::clone()
 void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 {
     using namespace D3Q27System;
-    using namespace UbMath;
+ //   using namespace UbMath;
+	using namespace vf::lbm::dir;
+	using namespace vf::basics::constant;
 
     forcingX1 = 0.0;
     forcingX2 = 0.0;
     forcingX3 = 0.0;
 
-	LBMReal oneOverInterfaceScale = c4 / interfaceWidth; //1.0;//1.5;
+	real oneOverInterfaceScale = c4o1 / interfaceWidth; //1.0;//1.5;
     /////////////////////////////////////
 
     localDistributionsF    = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getLocalDistributions();
@@ -163,7 +174,7 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
     nonLocalDistributionsH2 = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getH2distributions())->getNonLocalDistributions();
     zeroDistributionsH2     = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getH2distributions())->getZeroDistributions();
 
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressure = dataSet->getPressureField();
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressure = dataSet->getPressureField();
 
     SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
@@ -182,7 +193,7 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 
     //TODO
 	//very expensive !!!!!
-	//CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField(
+	//CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField(
  //           new CbArray3D<LBMReal, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
  //   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField2(
  //       new CbArray3D<LBMReal, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
@@ -198,34 +209,34 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
                         int x2p = x2 + 1;
                         int x3p = x3 + 1;
 
-                        LBMReal mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
-                        LBMReal mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
-                        LBMReal mfbbc = (*this->localDistributionsH1)(D3Q27System::ET_T, x1, x2, x3);
-                        LBMReal mfccb = (*this->localDistributionsH1)(D3Q27System::ET_NE, x1, x2, x3);
-                        LBMReal mfacb = (*this->localDistributionsH1)(D3Q27System::ET_NW, x1p, x2, x3);
-                        LBMReal mfcbc = (*this->localDistributionsH1)(D3Q27System::ET_TE, x1, x2, x3);
-                        LBMReal mfabc = (*this->localDistributionsH1)(D3Q27System::ET_TW, x1p, x2, x3);
-                        LBMReal mfbcc = (*this->localDistributionsH1)(D3Q27System::ET_TN, x1, x2, x3);
-                        LBMReal mfbac = (*this->localDistributionsH1)(D3Q27System::ET_TS, x1, x2p, x3);
-                        LBMReal mfccc = (*this->localDistributionsH1)(D3Q27System::ET_TNE, x1, x2, x3);
-                        LBMReal mfacc = (*this->localDistributionsH1)(D3Q27System::ET_TNW, x1p, x2, x3);
-                        LBMReal mfcac = (*this->localDistributionsH1)(D3Q27System::ET_TSE, x1, x2p, x3);
-                        LBMReal mfaac = (*this->localDistributionsH1)(D3Q27System::ET_TSW, x1p, x2p, x3);
-                        LBMReal mfabb = (*this->nonLocalDistributionsH1)(D3Q27System::ET_W, x1p, x2, x3);
-                        LBMReal mfbab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_S, x1, x2p, x3);
-                        LBMReal mfbba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_B, x1, x2, x3p);
-                        LBMReal mfaab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SW, x1p, x2p, x3);
-                        LBMReal mfcab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SE, x1, x2p, x3);
-                        LBMReal mfaba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BW, x1p, x2, x3p);
-                        LBMReal mfcba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BE, x1, x2, x3p);
-                        LBMReal mfbaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BS, x1, x2p, x3p);
-                        LBMReal mfbca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BN, x1, x2, x3p);
-                        LBMReal mfaaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                        LBMReal mfcaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                        LBMReal mfaca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                        LBMReal mfcca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                        LBMReal mfbbb = (*this->zeroDistributionsH1)(x1, x2, x3);
+                        real mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
+                        real mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
+                        real mfbbc = (*this->localDistributionsH1)(D3Q27System::ET_T, x1, x2, x3);
+                        real mfccb = (*this->localDistributionsH1)(D3Q27System::ET_NE, x1, x2, x3);
+                        real mfacb = (*this->localDistributionsH1)(D3Q27System::ET_NW, x1p, x2, x3);
+                        real mfcbc = (*this->localDistributionsH1)(D3Q27System::ET_TE, x1, x2, x3);
+                        real mfabc = (*this->localDistributionsH1)(D3Q27System::ET_TW, x1p, x2, x3);
+                        real mfbcc = (*this->localDistributionsH1)(D3Q27System::ET_TN, x1, x2, x3);
+                        real mfbac = (*this->localDistributionsH1)(D3Q27System::ET_TS, x1, x2p, x3);
+                        real mfccc = (*this->localDistributionsH1)(D3Q27System::ET_TNE, x1, x2, x3);
+                        real mfacc = (*this->localDistributionsH1)(D3Q27System::ET_TNW, x1p, x2, x3);
+                        real mfcac = (*this->localDistributionsH1)(D3Q27System::ET_TSE, x1, x2p, x3);
+                        real mfaac = (*this->localDistributionsH1)(D3Q27System::ET_TSW, x1p, x2p, x3);
+                        real mfabb = (*this->nonLocalDistributionsH1)(D3Q27System::ET_W, x1p, x2, x3);
+                        real mfbab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_S, x1, x2p, x3);
+                        real mfbba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_B, x1, x2, x3p);
+                        real mfaab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SW, x1p, x2p, x3);
+                        real mfcab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SE, x1, x2p, x3);
+                        real mfaba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BW, x1p, x2, x3p);
+                        real mfcba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BE, x1, x2, x3p);
+                        real mfbaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BS, x1, x2p, x3p);
+                        real mfbca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BN, x1, x2, x3p);
+                        real mfaaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                        real mfcaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                        real mfaca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                        real mfcca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                        real mfbbb = (*this->zeroDistributionsH1)(x1, x2, x3);
 						(*phaseField)(x1, x2, x3) = (((mfaaa + mfccc) + (mfaca + mfcac)) + ((mfaac + mfcca)  + (mfcaa + mfacc))  ) +
                                                     (((mfaab + mfacb) + (mfcab + mfccb)) + ((mfaba + mfabc) + (mfcba + mfcbc)) +
                                                     ((mfbaa + mfbac) + (mfbca + mfbcc))) + ((mfabb + mfcbb) +
@@ -301,16 +312,16 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 
 						 mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
 
-						LBMReal rhoH = 1.0;
-						LBMReal rhoL = 1.0 / densityRatio;
+						real rhoH = 1.0;
+						real rhoL = 1.0 / densityRatio;
 
-						LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+						real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
 
-						LBMReal drho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+						real drho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 							+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 							+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
-						LBMReal rho = rhoH + rhoToPhi * ((*phaseField)(x1, x2, x3) - phiH);
+						real rho = rhoH + rhoToPhi * ((*phaseField)(x1, x2, x3) - phiH);
 						//! variable density -> TRANSFER!
 						//LBMReal rho = rhoH * ((*phaseField)(x1, x2, x3)) + rhoL * ((*phaseField2)(x1, x2, x3));
 
@@ -324,7 +335,7 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
             }
         }
 
-        LBMReal collFactorM;
+        real collFactorM;
         //LBMReal forcingTerm[D3Q27System::ENDF + 1];
 
 		////filter
@@ -385,7 +396,7 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 				for (int x1 = minX1-1; x1 <= maxX1; x1++) {
 					if (!bcArray->isSolid(x1, x2, x3) && !bcArray->isUndefined(x1, x2, x3)) {
 
-						LBMReal sum = 0.;
+						real sum = 0.;
 
 
 
@@ -444,9 +455,9 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 									int zzz = zz + x3;
 									
 									if (!bcArray->isSolid(xxx, yyy, zzz) && !bcArray->isUndefined(xxx, yyy, zzz)) {
-										sum+= 64.0/(216.0*(c1+c3*abs(xx))* (c1 + c3 * abs(yy))* (c1 + c3 * abs(zz)))*(*pressureOld)(xxx, yyy, zzz);
+										sum+= 64.0/(216.0*(c1o1+c3o1*abs(xx))* (c1o1 + c3o1 * abs(yy))* (c1o1 + c3o1 * abs(zz)))*(*pressureOld)(xxx, yyy, zzz);
 									}
-									else{ sum+= 64.0 / (216.0 * (c1 + c3 * abs(xx)) * (c1 + c3 * abs(yy)) * (c1 + c3 * abs(zz))) * (*pressureOld)(x1, x2, x3);
+									else{ sum+= 64.0 / (216.0 * (c1o1 + c3o1 * abs(xx)) * (c1o1 + c3o1 * abs(yy)) * (c1o1 + c3o1 * abs(zz))) * (*pressureOld)(x1, x2, x3);
 									}
 
 
@@ -512,43 +523,43 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
                         findNeighbors(phaseField, x1, x2, x3);
 						findNeighbors2(phaseField2, x1, x2, x3);
 
-                        LBMReal mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
-                        LBMReal mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
-                        LBMReal mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
-                        LBMReal mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
-                        LBMReal mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
-                        LBMReal mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
-                        LBMReal mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
-                        LBMReal mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
-                        LBMReal mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
-                        LBMReal mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
-                        LBMReal mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
-                        LBMReal mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
-                        LBMReal mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
-                        LBMReal mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
-                        LBMReal mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
-                        LBMReal mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
-                        LBMReal mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
-                        LBMReal mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
-                        LBMReal mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
-                        LBMReal mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
-                        LBMReal mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
-                        LBMReal mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
-                        LBMReal mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                        LBMReal mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                        LBMReal mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                        LBMReal mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                        LBMReal mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
-
-                        LBMReal rhoH = 1.0;
-                        LBMReal rhoL = 1.0 / densityRatio;
-
-                        LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
-
-                        LBMReal dX1_phi = gradX1_phi();
-                        LBMReal dX2_phi = gradX2_phi();
-                        LBMReal dX3_phi = gradX3_phi();
+                        real mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
+                        real mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
+                        real mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
+                        real mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
+                        real mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
+                        real mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
+                        real mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
+                        real mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
+                        real mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
+                        real mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
+                        real mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
+                        real mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
+                        real mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
+                        real mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
+                        real mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
+                        real mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
+                        real mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
+                        real mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
+                        real mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
+                        real mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
+                        real mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
+                        real mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
+                        real mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                        real mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                        real mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                        real mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                        real mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
+
+                        real rhoH = 1.0;
+                        real rhoL = 1.0 / densityRatio;
+
+                        real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+
+                        real dX1_phi = gradX1_phi();
+                        real dX2_phi = gradX2_phi();
+                        real dX3_phi = gradX3_phi();
 
 						//LBMReal dX1_phi2 = gradX1_phi2();
 						//LBMReal dX2_phi2 = gradX2_phi2();
@@ -560,20 +571,20 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 						//LBMReal normX2 = (dX2_phi-dX2_phi2)/denom2;
 						//LBMReal normX3 = (dX3_phi-dX3_phi2)/denom2;
 
-						LBMReal denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
-						LBMReal normX1 = dX1_phi / denom;
-						LBMReal normX2 = dX2_phi / denom;
-						LBMReal normX3 = dX3_phi / denom;
+						real denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
+						real normX1 = dX1_phi / denom;
+						real normX2 = dX2_phi / denom;
+						real normX3 = dX3_phi / denom;
 
 
 
 						collFactorM = collFactorL + (collFactorL - collFactorG) * (phi[DIR_000] - phiH) / (phiH - phiL);
 
 
-                        LBMReal mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
+                        real mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
 
                         //----------- Calculating Macroscopic Values -------------
-                        LBMReal rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
+                        real rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
 
 						//! variable density -> TRANSFER!
 						//LBMReal rho = rhoH * ((*phaseField)(x1, x2, x3)) + rhoL * ((*phaseField2)(x1, x2, x3));
@@ -611,21 +622,21 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 						//mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);// / rho * c3;
 
 
-			   LBMReal m0, m1, m2;
-			   LBMReal rhoRef=c1;
+			   real m0, m1, m2;
+			   real rhoRef=c1o1;
 
 			  //LBMReal 
 			//    LBMReal drho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 			// 	   + (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 			// 	   + (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
-			   LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+			   real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 				   (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 				   (mfcbb - mfabb))/rhoRef;
-			   LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+			   real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 				   (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 				   (mfbcb - mfbab))/rhoRef;
-			   LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+			   real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 				   (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 				   (mfbbc - mfbba))/rhoRef;
 
@@ -660,59 +671,59 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 				  // + WEIGTH[DIR_P00] * ((*pressure)(x1, x2, x3+1) - (*pressure)(x1, x2, x3-1)));
 			  
 			   
-			   LBMReal gradPx = 0.0;
-			   LBMReal gradPy = 0.0;
-			   LBMReal gradPz = 0.0;
+			   real gradPx = 0.0;
+			   real gradPy = 0.0;
+			   real gradPz = 0.0;
 			   for (int dir1 = -1; dir1 <= 1; dir1++) {
 				   for (int dir2 = -1; dir2 <= 1; dir2++) {
 					   int yyy = x2 + dir1;
 					   int zzz = x3 + dir2;
 					   if (!bcArray->isSolid(x1-1, yyy, zzz) && !bcArray->isUndefined(x1-1, yyy, zzz)) {
-						   gradPx -= (*pressure)(x1 - 1, yyy, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPx -= (*pressure)(x1 - 1, yyy, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   else {
-						   gradPx -= (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPx -= (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   if (!bcArray->isSolid(x1 + 1, yyy, zzz) && !bcArray->isUndefined(x1 + 1, yyy, zzz)) {
-						   gradPx += (*pressure)(x1 + 1, yyy, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPx += (*pressure)(x1 + 1, yyy, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   else {
-						   gradPx += (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPx += (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 
 					   int xxx = x1 + dir1;
 					   if (!bcArray->isSolid(xxx, x2-1, zzz) && !bcArray->isUndefined(xxx, x2-1, zzz)) {
-						   gradPy -= (*pressure)(xxx, x2-1, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPy -= (*pressure)(xxx, x2-1, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   else {
-						   gradPy -= (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPy -= (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   if (!bcArray->isSolid(xxx, x2+1, zzz) && !bcArray->isUndefined(xxx, x2+1, zzz)) {
-						   gradPy += (*pressure)(xxx, x2+1, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPy += (*pressure)(xxx, x2+1, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   else {
-						   gradPy += (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPy += (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 
 					   yyy = x2 + dir2;
 					   if (!bcArray->isSolid(xxx, yyy, x3-1) && !bcArray->isUndefined(xxx, yyy, x3-1)) {
-						   gradPz -= (*pressure)(xxx, yyy, x3-1) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPz -= (*pressure)(xxx, yyy, x3-1) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   else {
-						   gradPz -= (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPz -= (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   if (!bcArray->isSolid(xxx, yyy, x3+1) && !bcArray->isUndefined(xxx, yyy, x3+1)) {
-						   gradPz += (*pressure)(xxx, yyy, x3+1) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPz += (*pressure)(xxx, yyy, x3+1) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   else {
-						   gradPz += (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPz += (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 
 				   }
 			   }
 
 			   //Viscosity increase by pressure gradient
-			   LBMReal errPhi = (((1.0 - phi[DIR_000]) * (phi[DIR_000]) * oneOverInterfaceScale)- denom);
+			   real errPhi = (((1.0 - phi[DIR_000]) * (phi[DIR_000]) * oneOverInterfaceScale)- denom);
 			   //LBMReal limVis = 0.0000001*10;//0.01;
 			  // collFactorM =collFactorM/(c1+limVis*(errPhi*errPhi)*collFactorM);
 			  // collFactorM = (collFactorM < 1.8) ? 1.8 : collFactorM;
@@ -985,14 +996,14 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 				  // //vvz += forcingX3 * deltaT * 0.5; // Z
 			   //}
 
-			   LBMReal vx2;
-               LBMReal vy2;
-               LBMReal vz2;
+			   real vx2;
+               real vy2;
+               real vz2;
                vx2 = vvx * vvx;
                vy2 = vvy * vvy;
                vz2 = vvz * vvz;
 			   ///////////////////////////////////////////////////////////////////////////////////////////               
-			   LBMReal oMdrho;
+			   real oMdrho;
 
 
 			   oMdrho = mfccc + mfaaa;
@@ -1022,8 +1033,8 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			   oMdrho = (rhoRef - (oMdrho + m0))/rhoRef;// 12.03.21 check derivation!!!!
 
 			   ////////////////////////////////////////////////////////////////////////////////////
-			   LBMReal wadjust;
-			   LBMReal qudricLimit = 0.01;
+			   real wadjust;
+			   real qudricLimit = 0.01;
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //Hin
 			   ////////////////////////////////////////////////////////////////////////////////////
@@ -1257,23 +1268,23 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 
 
 			  // mfaaa = 0.0;
-			   LBMReal OxxPyyPzz = 1.; //omega2 or bulk viscosity
+			   real OxxPyyPzz = 1.; //omega2 or bulk viscosity
 			 //  LBMReal OxyyPxzz = 1.;//-s9;//2+s9;//
 			 //  LBMReal OxyyMxzz  = 1.;//2+s9;//
-			   LBMReal O4 = 1.;
-			   LBMReal O5 = 1.;
-			   LBMReal O6 = 1.;
+			   real O4 = 1.;
+			   real O5 = 1.;
+			   real O6 = 1.;
 
 
 
 			   /////fourth order parameters; here only for test. Move out of loop!
 
-			   LBMReal OxyyPxzz = 8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
-			   LBMReal OxyyMxzz = 8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
-			//    LBMReal Oxyz = 24.0 * (collFactorM - 2.0) * (4.0 * collFactorM * collFactorM + collFactorM * OxxPyyPzz * (18.0 - 13.0 * collFactorM) + OxxPyyPzz * OxxPyyPzz * (2.0 + collFactorM * (6.0 * collFactorM - 11.0))) / (16.0 * collFactorM * collFactorM * (collFactorM - 6.0) - 2.0 * collFactorM * OxxPyyPzz * (216.0 + 5.0 * collFactorM * (9.0 * collFactorM - 46.0)) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (3.0 * collFactorM - 10.0) * (15.0 * collFactorM - 28.0) - 48.0));
-			   LBMReal A = (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+			   real OxyyPxzz = 8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
+			   real OxyyMxzz = 8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
+			//    real Oxyz = 24.0 * (collFactorM - 2.0) * (4.0 * collFactorM * collFactorM + collFactorM * OxxPyyPzz * (18.0 - 13.0 * collFactorM) + OxxPyyPzz * OxxPyyPzz * (2.0 + collFactorM * (6.0 * collFactorM - 11.0))) / (16.0 * collFactorM * collFactorM * (collFactorM - 6.0) - 2.0 * collFactorM * OxxPyyPzz * (216.0 + 5.0 * collFactorM * (9.0 * collFactorM - 46.0)) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (3.0 * collFactorM - 10.0) * (15.0 * collFactorM - 28.0) - 48.0));
+			   real A = (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 			   //FIXME:  warning C4459: declaration of 'B' hides global declaration (message : see declaration of 'D3Q27System::DIR_00M' )
-			   LBMReal BB = (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+			   real BB = (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 
 
 			   //Cum 4.
@@ -1281,21 +1292,21 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			   //LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
 			   //LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-			   LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-			   LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-			   LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+			   real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
+			   real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
+			   real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-			   LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-			   LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-			   LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
+			   real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+			   real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+			   real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
 
 			   //Cum 5.
-			   LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-			   LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-			   LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+			   real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
+			   real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
+			   real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
 
 			   //Cum 6.
-			   LBMReal CUMccc = mfccc + ((-4. * mfbbb * mfbbb
+			   real CUMccc = mfccc + ((-4. * mfbbb * mfbbb
 				   - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
 				   - 4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
 				   - 2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
@@ -1317,13 +1328,13 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 
 			   //2.
 			   // linear combinations
-			   LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
+			   real mxxPyyPzz = mfcaa + mfaca + mfaac;
 
 			//  LBMReal mfaaaS = (mfaaa * (-4 - 3 * OxxPyyPzz * (-1 + rho)) + 6 * mxxPyyPzz * OxxPyyPzz * (-1 + rho)) / (-4 + 3 * OxxPyyPzz * (-1 + rho));
 			  mxxPyyPzz -= mfaaa ;//12.03.21 shifted by mfaaa
 				//mxxPyyPzz-=(mfaaa+mfaaaS)*c1o2;//12.03.21 shifted by mfaaa
-			   LBMReal mxxMyy = mfcaa - mfaca;
-			   LBMReal mxxMzz = mfcaa - mfaac;
+			   real mxxMyy = mfcaa - mfaca;
+			   real mxxMzz = mfcaa - mfaac;
 
 			   //applying phase field gradients first part:
 			  // mxxPyyPzz += c2o3 * rhoToPhi * (dX1_phi * vvx + dX2_phi * vvy + dX3_phi * vvz);
@@ -1338,13 +1349,13 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
                //mfbab += c1o6 * (dX1_phi * vvz + dX3_phi * vvx) * correctionScaling;
                //mfbba += c1o6 * (dX1_phi * vvy + dX2_phi * vvx) * correctionScaling;
 
-			   LBMReal dxux =  -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
-			   LBMReal dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
-			   LBMReal dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
+			   real dxux =  -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
+			   real dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
+			   real dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
 
-			   LBMReal Dxy = -three * collFactorM * mfbba;
-			   LBMReal Dxz = -three * collFactorM * mfbab;
-			   LBMReal Dyz = -three * collFactorM * mfabb;
+			   real Dxy = -c3o1 * collFactorM * mfbba;
+			   real Dxz = -c3o1 * collFactorM * mfbab;
+			   real Dyz = -c3o1 * collFactorM * mfabb;
 
 
 			   //relax
@@ -1382,14 +1393,14 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 
 			   //3.
 			   // linear combinations
-			   LBMReal mxxyPyzz = mfcba + mfabc;
-			   LBMReal mxxyMyzz = mfcba - mfabc;
+			   real mxxyPyzz = mfcba + mfabc;
+			   real mxxyMyzz = mfcba - mfabc;
 
-			   LBMReal mxxzPyyz = mfcab + mfacb;
-			   LBMReal mxxzMyyz = mfcab - mfacb;
+			   real mxxzPyyz = mfcab + mfacb;
+			   real mxxzMyyz = mfcab - mfacb;
 
-			   LBMReal mxyyPxzz = mfbca + mfbac;
-			   LBMReal mxyyMxzz = mfbca - mfbac;
+			   real mxyyPxzz = mfbca + mfbac;
+			   real mxyyMxzz = mfbca - mfbac;
 
 			   //relax
 			   wadjust = OxyyMxzz + (1. - OxyyMxzz) * fabs(mfbbb) / (fabs(mfbbb) + qudricLimit);
@@ -1423,12 +1434,12 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			   //CUMbbc += O4 * (-CUMbbc);
 			   //CUMbcb += O4 * (-CUMbcb);
 			   //CUMcbb += O4 * (-CUMcbb);
-			   CUMacc = -O4 * (one / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (one - O4) * (CUMacc);
-			   CUMcac = -O4 * (one / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (one - O4) * (CUMcac);
-			   CUMcca = -O4 * (one / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (one - O4) * (CUMcca);
-			   CUMbbc = -O4 * (one / collFactorM - c1o2) * Dxy * c1o3 * BB + (one - O4) * (CUMbbc);
-			   CUMbcb = -O4 * (one / collFactorM - c1o2) * Dxz * c1o3 * BB + (one - O4) * (CUMbcb);
-			   CUMcbb = -O4 * (one / collFactorM - c1o2) * Dyz * c1o3 * BB + (one - O4) * (CUMcbb);
+			   CUMacc = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc);
+			   CUMcac = -O4 * (c1o1 / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac);
+			   CUMcca = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca);
+			   CUMbbc = -O4 * (c1o1 / collFactorM - c1o2) * Dxy * c1o3 * BB + (c1o1 - O4) * (CUMbbc);
+			   CUMbcb = -O4 * (c1o1 / collFactorM - c1o2) * Dxz * c1o3 * BB + (c1o1 - O4) * (CUMbcb);
+			   CUMcbb = -O4 * (c1o1 / collFactorM - c1o2) * Dyz * c1o3 * BB + (c1o1 - O4) * (CUMcbb);
 
 			   //5.
 			   CUMbcc += O5 * (-CUMbcc);
@@ -1448,9 +1459,9 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			   mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
 			   mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-			   mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-			   mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-			   mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
+			   mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+			   mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+			   mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
 
 			   //5.
 			   mfbcc = CUMbcc + (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac) * oMdrho;
@@ -1483,9 +1494,9 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			   //mfbaa += -rho * rhoToPhi * c1o2 * ((mbxx + mfcaa) * dX1_phi + (mbxy + mfbba) * dX2_phi + (mbxz + mfbab) * dX3_phi);
 			   //mfaba += -rho * rhoToPhi * c1o2 * ((mbxy + mfbba) * dX1_phi + (mbyy + mfaca) * dX2_phi + (mbyz + mfabb) * dX3_phi);
 			   //mfaab += -rho * rhoToPhi * c1o2 * ((mbxz + mfbab) * dX1_phi + (mbyz + mfabb) * dX2_phi + (mbzz + mfaac) * dX3_phi);
-			   mfbaa += c1o3 * (c1 / collFactorM - c1o2) * rhoToPhi * (2 * dxux * dX1_phi + Dxy * dX2_phi + Dxz * dX3_phi) / (rho);
-			   mfaba += c1o3 * (c1 / collFactorM - c1o2) * rhoToPhi * (Dxy * dX1_phi + 2 * dyuy * dX2_phi + Dyz * dX3_phi) / (rho);
-			   mfaab += c1o3 * (c1 / collFactorM - c1o2) * rhoToPhi * (Dxz * dX1_phi + Dyz * dX2_phi + 2 * dyuy * dX3_phi) / (rho);
+			   mfbaa += c1o3 * (c1o1 / collFactorM - c1o2) * rhoToPhi * (2 * dxux * dX1_phi + Dxy * dX2_phi + Dxz * dX3_phi) / (rho);
+			   mfaba += c1o3 * (c1o1 / collFactorM - c1o2) * rhoToPhi * (Dxy * dX1_phi + 2 * dyuy * dX2_phi + Dyz * dX3_phi) / (rho);
+			   mfaab += c1o3 * (c1o1 / collFactorM - c1o2) * rhoToPhi * (Dxz * dX1_phi + Dyz * dX2_phi + 2 * dyuy * dX3_phi) / (rho);
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //back
 			   ////////////////////////////////////////////////////////////////////////////////////
@@ -1731,7 +1742,7 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			   //proof correctness
 			   //////////////////////////////////////////////////////////////////////////
 //#ifdef  PROOF_CORRECTNESS
-			   LBMReal rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+			   real rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 				   + (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 				   + (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 //			   //LBMReal dif = fabs(drho - rho_post);
@@ -2618,7 +2629,7 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
                         /////////////////////  P H A S E - F I E L D   S O L V E R
                         ////////////////////////////////////////////
 		/////CUMULANT PHASE-FIELD
-				LBMReal omegaD =1.0/( 3.0 * mob + 0.5);
+				real omegaD =1.0/( 3.0 * mob + 0.5);
 				{
 			   mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
 			   mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
@@ -2682,7 +2693,7 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			 //  LBMReal vvz = uz;
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // second component
-			   LBMReal concentration =
+			   real concentration =
 				   ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				   (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
 					   ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
@@ -2697,26 +2708,26 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			  // vvy += fy * c1o2;
 			  // vvz += fz * c1o2;
 			   ////////////////////////////////////////////////////////////////////////////////////
-			   LBMReal oneMinusRho = c1- concentration;
+			   real oneMinusRho = c1o1 - concentration;
 
-			   LBMReal cx =
+			   real cx =
 				   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 				   (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 					   (mfcbb - mfabb));
-			   LBMReal cy =
+			   real cy =
 				   ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 				   (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 					   (mfbcb - mfbab));
-			   LBMReal cz =
+			   real cz =
 				   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 				   (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 					   (mfbbc - mfbba));
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // calculate the square of velocities for this lattice node
-			   LBMReal cx2 = cx * cx;
-			   LBMReal cy2 = cy * cy;
-			   LBMReal cz2 = cz * cz;
+			   real cx2 = cx * cx;
+			   real cy2 = cy * cy;
+			   real cz2 = cz * cz;
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
 			   //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -2725,66 +2736,66 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			   //!
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Z - Dir
-			   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 			   forwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Y - Dir
-			   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   forwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-			   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 			   forwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 			   forwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 			   forwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   forwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-			   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // X - Dir
-			   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 			   forwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-			   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   forwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 			   forwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 			   forwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-			   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   forwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-			   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3o1, c1o9, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //! - experimental Cumulant ... to be published ... hopefully
 			   //!
 
 			   // linearized orthogonalization of 3rd order central moments
-			   LBMReal Mabc = mfabc - mfaba * c1o3;
-			   LBMReal Mbca = mfbca - mfbaa * c1o3;
-			   LBMReal Macb = mfacb - mfaab * c1o3;
-			   LBMReal Mcba = mfcba - mfaba * c1o3;
-			   LBMReal Mcab = mfcab - mfaab * c1o3;
-			   LBMReal Mbac = mfbac - mfbaa * c1o3;
+			   real Mabc = mfabc - mfaba * c1o3;
+			   real Mbca = mfbca - mfbaa * c1o3;
+			   real Macb = mfacb - mfaab * c1o3;
+			   real Mcba = mfcba - mfaba * c1o3;
+			   real Mcab = mfcab - mfaab * c1o3;
+			   real Mbac = mfbac - mfbaa * c1o3;
 			   // linearized orthogonalization of 5th order central moments
-			   LBMReal Mcbc = mfcbc - mfaba * c1o9;
-			   LBMReal Mbcc = mfbcc - mfbaa * c1o9;
-			   LBMReal Mccb = mfccb - mfaab * c1o9;
+			   real Mcbc = mfcbc - mfaba * c1o9;
+			   real Mbcc = mfbcc - mfbaa * c1o9;
+			   real Mccb = mfccb - mfaab * c1o9;
 
 			   // collision of 1st order moments
 			  // LBMReal ccx, ccy, ccz;
 			   
 
-               cx = cx * (c1 - omegaD) + omegaD * vvx * concentration +
-                    normX1 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
-               cy = cy * (c1 - omegaD) + omegaD * vvy * concentration +
-                    normX2 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
-               cz = cz * (c1 - omegaD) + omegaD * vvz * concentration +
-                    normX3 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+               cx = cx * (c1o1 - omegaD) + omegaD * vvx * concentration +
+                    normX1 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+               cy = cy * (c1o1 - omegaD) + omegaD * vvy * concentration +
+                    normX2 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+               cz = cz * (c1o1 - omegaD) + omegaD * vvz * concentration +
+                    normX3 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
 
 			   //cx = cx * (c1 - omegaD) + omegaD * vvx * concentration +
 				  // normX1 * (c1 - 0.5 * omegaD) * (1.0 - phi[REST]) * (phi[REST])*(phi[REST]+phi2[REST]) * c1o3 * oneOverInterfaceScale;
@@ -2803,9 +2814,9 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			   cz2 = cz * cz;
 
 			   // equilibration of 2nd order moments
-			   mfbba = zeroReal;
-			   mfbab = zeroReal;
-			   mfabb = zeroReal;
+			   mfbba = c0o1;
+			   mfbab = c0o1;
+			   mfabb = c0o1;
 
 			   mfcaa = c1o3 * concentration;
 			   mfaca = c1o3 * concentration;
@@ -2822,13 +2833,13 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			   //mfaac = mfaac*(c1 - omega2) + omega2*c1o3 * concentration;
 
 			   // equilibration of 3rd order moments
-			   Mabc = zeroReal;
-			   Mbca = zeroReal;
-			   Macb = zeroReal;
-			   Mcba = zeroReal;
-			   Mcab = zeroReal;
-			   Mbac = zeroReal;
-			   mfbbb = zeroReal;
+			   Mabc = c0o1;
+			   Mbca = c0o1;
+			   Macb = c0o1;
+			   Mcba = c0o1;
+			   Mcab = c0o1;
+			   Mbac = c0o1;
+			   mfbbb = c0o1;
 
 			   // from linearized orthogonalization 3rd order central moments to central moments
 			   mfabc = Mabc + mfaba * c1o3;
@@ -2843,14 +2854,14 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			   mfcac = c1o9 * concentration;
 			   mfcca = c1o9 * concentration;
 
-			   mfcbb = zeroReal;
-			   mfbcb = zeroReal;
-			   mfbbc = zeroReal;
+			   mfcbb = c0o1;
+			   mfbcb = c0o1;
+			   mfbbc = c0o1;
 
 			   // equilibration of 5th order moments
-			   Mcbc = zeroReal;
-			   Mbcc = zeroReal;
-			   Mccb = zeroReal;
+			   Mcbc = c0o1;
+			   Mbcc = c0o1;
+			   Mccb = c0o1;
 
 			   // from linearized orthogonalization 5th order central moments to central moments
 			   mfcbc = Mcbc + mfaba * c1o9;
@@ -2868,39 +2879,39 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 			   //!
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // X - Dir
-			   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 			   backwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-			   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   backwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 			   backwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 			   backwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-			   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   backwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-			   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9o1, c1o9, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Y - Dir
-			   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   backwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-			   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 			   backwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 			   backwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 			   backwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   backwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-			   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Z - Dir
-			   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 			   backwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 
 
@@ -3005,7 +3016,7 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 	 //  LBMReal vvz = uz;
 	   ////////////////////////////////////////////////////////////////////////////////////
 	   // second component
-   LBMReal concentration =
+   real concentration =
 	   ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 	   (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
 		   ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
@@ -3020,26 +3031,26 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
   // vvy += fy * c1o2;
   // vvz += fz * c1o2;
    ////////////////////////////////////////////////////////////////////////////////////
-   LBMReal oneMinusRho = c1 - concentration;
+   real oneMinusRho = c1o1 - concentration;
 
-   LBMReal cx =
+   real cx =
 	   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 	   (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 		   (mfcbb - mfabb));
-   LBMReal cy =
+   real cy =
 	   ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 	   (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 		   (mfbcb - mfbab));
-   LBMReal cz =
+   real cz =
 	   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 	   (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 		   (mfbbc - mfbba));
 
    ////////////////////////////////////////////////////////////////////////////////////
    // calculate the square of velocities for this lattice node
-   LBMReal cx2 = cx * cx;
-   LBMReal cy2 = cy * cy;
-   LBMReal cz2 = cz * cz;
+   real cx2 = cx * cx;
+   real cy2 = cy * cy;
+   real cz2 = cz * cz;
    ////////////////////////////////////////////////////////////////////////////////////
    //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
    //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -3048,63 +3059,63 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
    //!
    ////////////////////////////////////////////////////////////////////////////////////
    // Z - Dir
-   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
    forwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    // Y - Dir
-   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
    forwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
    forwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
    forwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
    forwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
    forwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    // X - Dir
-   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
    forwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
    forwardChimera(mfaab, mfbab, mfcab, cx, cx2);
    forwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
    forwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
    forwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3o1, c1o9, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    //! - experimental Cumulant ... to be published ... hopefully
    //!
 
    // linearized orthogonalization of 3rd order central moments
-   LBMReal Mabc = mfabc - mfaba * c1o3;
-   LBMReal Mbca = mfbca - mfbaa * c1o3;
-   LBMReal Macb = mfacb - mfaab * c1o3;
-   LBMReal Mcba = mfcba - mfaba * c1o3;
-   LBMReal Mcab = mfcab - mfaab * c1o3;
-   LBMReal Mbac = mfbac - mfbaa * c1o3;
+   real Mabc = mfabc - mfaba * c1o3;
+   real Mbca = mfbca - mfbaa * c1o3;
+   real Macb = mfacb - mfaab * c1o3;
+   real Mcba = mfcba - mfaba * c1o3;
+   real Mcab = mfcab - mfaab * c1o3;
+   real Mbac = mfbac - mfbaa * c1o3;
    // linearized orthogonalization of 5th order central moments
-   LBMReal Mcbc = mfcbc - mfaba * c1o9;
-   LBMReal Mbcc = mfbcc - mfbaa * c1o9;
-   LBMReal Mccb = mfccb - mfaab * c1o9;
+   real Mcbc = mfcbc - mfaba * c1o9;
+   real Mbcc = mfbcc - mfbaa * c1o9;
+   real Mccb = mfccb - mfaab * c1o9;
 
    // collision of 1st order moments
-   cx = cx * (c1 - omegaD) + omegaD * vvx * concentration +
-	   normX1 * (c1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
-   cy = cy * (c1 - omegaD) + omegaD * vvy * concentration +
-	   normX2 * (c1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
-   cz = cz * (c1 - omegaD) + omegaD * vvz * concentration +
-	   normX3 * (c1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
+   cx = cx * (c1o1 - omegaD) + omegaD * vvx * concentration +
+	   normX1 * (c1o1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
+   cy = cy * (c1o1 - omegaD) + omegaD * vvy * concentration +
+	   normX2 * (c1o1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
+   cz = cz * (c1o1 - omegaD) + omegaD * vvz * concentration +
+	   normX3 * (c1o1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
 
    //mhx = (ux * phi[REST] + normX1 * (tauH - 0.5) * (1.0 - phi[REST]) * (phi[REST])) / tauH + (1.0 - 1.0 / tauH) * mhx;
    //mhy = (uy * phi[REST] + normX2 * (tauH - 0.5) * (1.0 - phi[REST]) * (phi[REST])) / tauH + (1.0 - 1.0 / tauH) * mhy;
@@ -3116,9 +3127,9 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
    cz2 = cz * cz;
 
    // equilibration of 2nd order moments
-   mfbba = zeroReal;
-   mfbab = zeroReal;
-   mfabb = zeroReal;
+   mfbba = c0o1;
+   mfbab = c0o1;
+   mfabb = c0o1;
 
    mfcaa = c1o3 * concentration;
    mfaca = c1o3 * concentration;
@@ -3135,13 +3146,13 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
    //mfaac = mfaac*(c1 - omega2) + omega2*c1o3 * concentration;
 
    // equilibration of 3rd order moments
-   Mabc = zeroReal;
-   Mbca = zeroReal;
-   Macb = zeroReal;
-   Mcba = zeroReal;
-   Mcab = zeroReal;
-   Mbac = zeroReal;
-   mfbbb = zeroReal;
+   Mabc = c0o1;
+   Mbca = c0o1;
+   Macb = c0o1;
+   Mcba = c0o1;
+   Mcab = c0o1;
+   Mbac = c0o1;
+   mfbbb = c0o1;
 
    // from linearized orthogonalization 3rd order central moments to central moments
    mfabc = Mabc + mfaba * c1o3;
@@ -3156,14 +3167,14 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
    mfcac = c1o9 * concentration;
    mfcca = c1o9 * concentration;
 
-   mfcbb = zeroReal;
-   mfbcb = zeroReal;
-   mfbbc = zeroReal;
+   mfcbb = c0o1;
+   mfbcb = c0o1;
+   mfbbc = c0o1;
 
    // equilibration of 5th order moments
-   Mcbc = zeroReal;
-   Mbcc = zeroReal;
-   Mccb = zeroReal;
+   Mcbc = c0o1;
+   Mbcc = c0o1;
+   Mccb = c0o1;
 
    // from linearized orthogonalization 5th order central moments to central moments
    mfcbc = Mcbc + mfaba * c1o9;
@@ -3181,39 +3192,39 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
    //!
    ////////////////////////////////////////////////////////////////////////////////////
    // X - Dir
-   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
    backwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
    backwardChimera(mfaab, mfbab, mfcab, cx, cx2);
    backwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
    backwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
    backwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9o1, c1o9, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    // Y - Dir
-   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
    backwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
    backwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
    backwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
    backwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
    backwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    // Z - Dir
-   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
    backwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 
 
@@ -3346,9 +3357,11 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::calculate(int step)
 }
 //////////////////////////////////////////////////////////////////////////
 
-LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX1_phi()
+real MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX1_phi()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0* ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) + (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) + (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) + (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_PM0] - phi[DIR_MP0]) + (phi[DIR_PP0] - phi[DIR_MM0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_P00] - phi[DIR_M00]));
@@ -3359,9 +3372,11 @@ LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX1_phi()
     //return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX2_phi()
+real MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX2_phi()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PPM] - phi[DIR_MMP])- (phi[DIR_PMP] - phi[DIR_MPM])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_0PP] - phi[DIR_0MM]) + (phi[DIR_0PM] - phi[DIR_0MP])) + ((phi[DIR_PP0] - phi[DIR_MM0])- (phi[DIR_PM0] - phi[DIR_MP0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_0P0] - phi[DIR_0M0]));
@@ -3372,9 +3387,11 @@ LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX2_phi()
     //return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX3_phi()
+real MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX3_phi()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) - (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) - (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_0MP] - phi[DIR_0PM]) + (phi[DIR_0PP] - phi[DIR_0MM])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_00P] - phi[DIR_00M]));
@@ -3385,9 +3402,11 @@ LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX3_phi()
     //return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX1_phi2()
+real MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX1_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) + (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PMP] - phi2[DIR_MPM]) + (phi2[DIR_PPM] - phi2[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_P0P] - phi2[DIR_M0M]) + (phi2[DIR_P0M] - phi2[DIR_M0P])) + ((phi2[DIR_PM0] - phi2[DIR_MP0]) + (phi2[DIR_PP0] - phi2[DIR_MM0])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_P00] - phi2[DIR_M00]));
@@ -3398,9 +3417,11 @@ LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX1_phi2()
 	//return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX2_phi2()
+real MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX2_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) - (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PPM] - phi2[DIR_MMP]) - (phi2[DIR_PMP] - phi2[DIR_MPM])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_0PP] - phi2[DIR_0MM]) + (phi2[DIR_0PM] - phi2[DIR_0MP])) + ((phi2[DIR_PP0] - phi2[DIR_MM0]) - (phi2[DIR_PM0] - phi2[DIR_MP0])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_0P0] - phi2[DIR_0M0]));
@@ -3411,9 +3432,11 @@ LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX2_phi2()
 	//return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX3_phi2()
+real MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX3_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) - (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PMP] - phi2[DIR_MPM]) - (phi2[DIR_PPM] - phi2[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_P0P] - phi2[DIR_M0M]) - (phi2[DIR_P0M] - phi2[DIR_M0P])) + ((phi2[DIR_0MP] - phi2[DIR_0PM]) + (phi2[DIR_0PP] - phi2[DIR_0MM])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_00P] - phi2[DIR_00M]));
@@ -3428,10 +3451,12 @@ LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::gradX3_phi2()
 
 
 
-LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::nabla2_phi()
+real MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::nabla2_phi()
 {
     using namespace D3Q27System;
-    LBMReal sum = 0.0;
+	using namespace vf::lbm::dir;
+
+    real sum = 0.0;
 	sum += WEIGTH[DIR_PPP] * ((((phi[DIR_PPP] - phi[DIR_000]) + (phi[DIR_MMM] - phi[DIR_000])) + ((phi[DIR_MMP] - phi[DIR_000]) + (phi[DIR_PPM] - phi[DIR_000])))
 		+ (((phi[DIR_MPP] - phi[DIR_000]) + (phi[DIR_PMM] - phi[DIR_000])) + ((phi[DIR_PMP] - phi[DIR_000]) + (phi[DIR_MPM] - phi[DIR_000]))));
 	sum += WEIGTH[DIR_0PP] * (
@@ -3453,6 +3478,8 @@ LBMReal MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::nabla2_phi()
 void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::computePhasefield()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
     SPtr<DistributionArray3D> distributionsH = dataSet->getHdistributions();
 
     int minX1 = ghostLayerWidth;
@@ -3507,10 +3534,11 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::computePhasefield()
     }
 }
 
-void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::findNeighbors(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::findNeighbors(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
                                                 int x3)
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
     SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
@@ -3527,10 +3555,11 @@ void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::findNeighbors(CbArray3D<LB
     }
 }
 
-void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::findNeighbors2(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphaseTwoPhaseFieldsPressureFilterLBMKernel::findNeighbors2(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
 	int x3)
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
 	SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsPressureFilterLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsPressureFilterLBMKernel.h
index 7d20f8210474b665da49c88068746a39faacfb2e..138b24410b10b4631b1411fba3e803bde504531a 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsPressureFilterLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsPressureFilterLBMKernel.h
@@ -57,74 +57,74 @@ public:
    //CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressure;
    
 
-   double getCalculationTime() override { return .0; }
+   real getCalculationTime() override { return .0; }
 protected:
    virtual void initDataSet();
    void swapDistributions() override;
 
    void initForcing();
 
-   void forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-   void backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-   void forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
-   void backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
+   void forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+   void backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+   void forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
+   void backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
 
-   LBMReal f1[D3Q27System::ENDF+1];
+   real f1[D3Q27System::ENDF+1];
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH1;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH1;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH1;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH1;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH1;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH1;
 
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH2;
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH2;
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsH2;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH2;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH2;
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsH2;
 
    //CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   phaseField;
 
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressureOld;
-
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField;
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField2; 
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr divU; 
-
-   LBMReal h  [D3Q27System::ENDF+1];
-   LBMReal h2[D3Q27System::ENDF + 1];
-   LBMReal g  [D3Q27System::ENDF+1];
-   LBMReal phi[D3Q27System::ENDF+1];
-   LBMReal phi2[D3Q27System::ENDF + 1];
-   LBMReal pr1[D3Q27System::ENDF+1];
-   LBMReal phi_cutoff[D3Q27System::ENDF+1];
-
-   LBMReal gradX1_phi();
-   LBMReal gradX2_phi();
-   LBMReal gradX3_phi();
-   LBMReal gradX1_phi2();
-   LBMReal gradX2_phi2();
-   LBMReal gradX3_phi2();
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressureOld;
+
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField;
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField2; 
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr divU; 
+
+   real h  [D3Q27System::ENDF+1];
+   real h2[D3Q27System::ENDF + 1];
+   real g  [D3Q27System::ENDF+1];
+   real phi[D3Q27System::ENDF+1];
+   real phi2[D3Q27System::ENDF + 1];
+   real pr1[D3Q27System::ENDF+1];
+   real phi_cutoff[D3Q27System::ENDF+1];
+
+   real gradX1_phi();
+   real gradX2_phi();
+   real gradX3_phi();
+   real gradX1_phi2();
+   real gradX2_phi2();
+   real gradX3_phi2();
    //LBMReal gradX1_pr1();
    //LBMReal gradX2_pr1();
    //LBMReal gradX3_pr1();
    //LBMReal dirgradC_phi(int n, int k);
    void computePhasefield();
-   void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
-   void findNeighbors2(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2, int x3);
+   void findNeighbors(CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
+   void findNeighbors2(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2, int x3);
    //void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf /*Pressure-Field*/, int x1, int x2, int x3);
    //void pressureFiltering(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf /*Pressure-Field*/, CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf_filtered /*Pressure-Field*/);
 
-   LBMReal nabla2_phi();
+   real nabla2_phi();
 
 
    mu::value_type muX1,muX2,muX3;
    mu::value_type muDeltaT;
    mu::value_type muNu;
    mu::value_type muRho;
-   LBMReal forcingX1;
-   LBMReal forcingX2;
-   LBMReal forcingX3;
+   real forcingX1;
+   real forcingX2;
+   real forcingX3;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel.cpp
index ffed1483ca63e674b26023aca87cb63986644813..eb5bd4201e5aa29c66be74440ac619caf3992f86 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel.cpp
@@ -51,8 +51,8 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::initDataSet()
     SPtr<DistributionArray3D> h(new D3Q27EsoTwist3DSplittedVector(nx[0] + 2, nx[1] + 2, nx[2] + 2, -999.9)); // For phase-field
     SPtr<DistributionArray3D> h2(new D3Q27EsoTwist3DSplittedVector(nx[0] + 2, nx[1] + 2, nx[2] + 2, -999.9)); // For phase-field
     SPtr<PhaseFieldArray3D> divU(new PhaseFieldArray3D(nx[0] + 2, nx[1] + 2, nx[2] + 2, 0.0));
-	 pressure= CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 2, nx[1] + 2, nx[2] + 2, 0.0));
-	 pressureOld = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<LBMReal, IndexerX3X2X1>(nx[0] + 2, nx[1] + 2, nx[2] + 2, 0.0));
+	 pressure= CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<real, IndexerX3X2X1>(nx[0] + 2, nx[1] + 2, nx[2] + 2, 0.0));
+	 pressureOld = CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr(new  CbArray3D<real, IndexerX3X2X1>(nx[0] + 2, nx[1] + 2, nx[2] + 2, 0.0));
     dataSet->setFdistributions(f);
     dataSet->setHdistributions(h); // For phase-field
     dataSet->setH2distributions(h2); // For phase-field
@@ -84,23 +84,27 @@ SPtr<LBMKernel> MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::clone()
     return kernel;
 }
 //////////////////////////////////////////////////////////////////////////
- void  MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-    LBMReal m2 = mfa + mfc;
-	LBMReal m1 = mfc - mfa;
-	LBMReal m0 = m2 + mfb;
+ void  MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+//	using namespace UbMath;
+	using namespace vf::basics::constant;
+	 
+	real m2 = mfa + mfc;
+	real m1 = mfc - mfa;
+	real m0 = m2 + mfb;
 	mfa = m0;
 	m0 *= Kinverse;
 	m0 += oneMinusRho;
 	mfb = (m1 * Kinverse - m0 * vv) * K;
-	mfc = ((m2 - c2 * m1 * vv) * Kinverse + v2 * m0) * K;
+	mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
- void  MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho) {
-	using namespace UbMath;
-    LBMReal m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
-	LBMReal m1 = (((mfa - mfc) - c2 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
+ void  MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho) {
+//	using namespace UbMath;
+	using namespace vf::basics::constant;
+	 
+	real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 - vv) * c1o2) * K;
+	real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (-v2)) * K;
 	mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + oneMinusRho) * (v2 + vv) * c1o2) * K;
 	mfa = m0;
 	mfb = m1;
@@ -108,20 +112,24 @@ SPtr<LBMKernel> MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::clone()
 
 
 ////////////////////////////////////////////////////////////////////////////////
- void  MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-    LBMReal m1 = (mfa + mfc) + mfb;
-	LBMReal m2 = mfc - mfa;
-	mfc = (mfc + mfa) + (v2 * m1 - c2 * vv * m2);
+ void  MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+//	using namespace UbMath;
+	using namespace vf::basics::constant;
+	 
+	real m1 = (mfa + mfc) + mfb;
+	real m2 = mfc - mfa;
+	mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2);
 	mfb = m2 - vv * m1;
 	mfa = m1;
 }
 
 
- void  MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2) {
-	using namespace UbMath;
-    LBMReal ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
-	LBMReal mb = ((mfa - mfc) - mfa * v2) - c2 * mfb * vv;
+ void  MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2) {
+//	using namespace UbMath;
+	using namespace vf::basics::constant;
+	 
+	real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
+	real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv;
 	mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2);
 	mfb = mb;
 	mfa = ma;
@@ -131,13 +139,15 @@ SPtr<LBMKernel> MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::clone()
 void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 {
     using namespace D3Q27System;
-    using namespace UbMath;
+ //   using namespace UbMath;
+	using namespace vf::lbm::dir;
+	using namespace vf::basics::constant;
 
     forcingX1 = 0.0;
     forcingX2 = 0.0;
     forcingX3 = 0.0;
 
-	LBMReal oneOverInterfaceScale = 1.0;
+	real oneOverInterfaceScale = 1.0;
     /////////////////////////////////////
 
     localDistributionsF    = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getLocalDistributions();
@@ -167,12 +177,12 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 
     //TODO
 	//very expensive !!!!!
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField(
-            new CbArray3D<LBMReal, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr phaseField2(
-        new CbArray3D<LBMReal, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr divU(
-            new CbArray3D<LBMReal, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, 0.0));
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField(
+            new CbArray3D<real, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
+    CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr phaseField2(
+        new CbArray3D<real, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, -999.0));
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr divU(
+            new CbArray3D<real, IndexerX3X2X1>(bcArrayMaxX1, bcArrayMaxX2, bcArrayMaxX3, 0.0));
 
 #pragma omp parallel for
 	  for (int x3 = 0; x3 <= maxX3; x3++) {
@@ -183,34 +193,34 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
                         int x2p = x2 + 1;
                         int x3p = x3 + 1;
 
-                        LBMReal mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
-                        LBMReal mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
-                        LBMReal mfbbc = (*this->localDistributionsH1)(D3Q27System::ET_T, x1, x2, x3);
-                        LBMReal mfccb = (*this->localDistributionsH1)(D3Q27System::ET_NE, x1, x2, x3);
-                        LBMReal mfacb = (*this->localDistributionsH1)(D3Q27System::ET_NW, x1p, x2, x3);
-                        LBMReal mfcbc = (*this->localDistributionsH1)(D3Q27System::ET_TE, x1, x2, x3);
-                        LBMReal mfabc = (*this->localDistributionsH1)(D3Q27System::ET_TW, x1p, x2, x3);
-                        LBMReal mfbcc = (*this->localDistributionsH1)(D3Q27System::ET_TN, x1, x2, x3);
-                        LBMReal mfbac = (*this->localDistributionsH1)(D3Q27System::ET_TS, x1, x2p, x3);
-                        LBMReal mfccc = (*this->localDistributionsH1)(D3Q27System::ET_TNE, x1, x2, x3);
-                        LBMReal mfacc = (*this->localDistributionsH1)(D3Q27System::ET_TNW, x1p, x2, x3);
-                        LBMReal mfcac = (*this->localDistributionsH1)(D3Q27System::ET_TSE, x1, x2p, x3);
-                        LBMReal mfaac = (*this->localDistributionsH1)(D3Q27System::ET_TSW, x1p, x2p, x3);
-                        LBMReal mfabb = (*this->nonLocalDistributionsH1)(D3Q27System::ET_W, x1p, x2, x3);
-                        LBMReal mfbab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_S, x1, x2p, x3);
-                        LBMReal mfbba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_B, x1, x2, x3p);
-                        LBMReal mfaab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SW, x1p, x2p, x3);
-                        LBMReal mfcab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SE, x1, x2p, x3);
-                        LBMReal mfaba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BW, x1p, x2, x3p);
-                        LBMReal mfcba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BE, x1, x2, x3p);
-                        LBMReal mfbaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BS, x1, x2p, x3p);
-                        LBMReal mfbca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BN, x1, x2, x3p);
-                        LBMReal mfaaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                        LBMReal mfcaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                        LBMReal mfaca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                        LBMReal mfcca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                        LBMReal mfbbb = (*this->zeroDistributionsH1)(x1, x2, x3);
+                        real mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
+                        real mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
+                        real mfbbc = (*this->localDistributionsH1)(D3Q27System::ET_T, x1, x2, x3);
+                        real mfccb = (*this->localDistributionsH1)(D3Q27System::ET_NE, x1, x2, x3);
+                        real mfacb = (*this->localDistributionsH1)(D3Q27System::ET_NW, x1p, x2, x3);
+                        real mfcbc = (*this->localDistributionsH1)(D3Q27System::ET_TE, x1, x2, x3);
+                        real mfabc = (*this->localDistributionsH1)(D3Q27System::ET_TW, x1p, x2, x3);
+                        real mfbcc = (*this->localDistributionsH1)(D3Q27System::ET_TN, x1, x2, x3);
+                        real mfbac = (*this->localDistributionsH1)(D3Q27System::ET_TS, x1, x2p, x3);
+                        real mfccc = (*this->localDistributionsH1)(D3Q27System::ET_TNE, x1, x2, x3);
+                        real mfacc = (*this->localDistributionsH1)(D3Q27System::ET_TNW, x1p, x2, x3);
+                        real mfcac = (*this->localDistributionsH1)(D3Q27System::ET_TSE, x1, x2p, x3);
+                        real mfaac = (*this->localDistributionsH1)(D3Q27System::ET_TSW, x1p, x2p, x3);
+                        real mfabb = (*this->nonLocalDistributionsH1)(D3Q27System::ET_W, x1p, x2, x3);
+                        real mfbab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_S, x1, x2p, x3);
+                        real mfbba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_B, x1, x2, x3p);
+                        real mfaab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SW, x1p, x2p, x3);
+                        real mfcab = (*this->nonLocalDistributionsH1)(D3Q27System::ET_SE, x1, x2p, x3);
+                        real mfaba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BW, x1p, x2, x3p);
+                        real mfcba = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BE, x1, x2, x3p);
+                        real mfbaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BS, x1, x2p, x3p);
+                        real mfbca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BN, x1, x2, x3p);
+                        real mfaaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                        real mfcaa = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                        real mfaca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                        real mfcca = (*this->nonLocalDistributionsH1)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                        real mfbbb = (*this->zeroDistributionsH1)(x1, x2, x3);
                         (*phaseField)(x1, x2, x3) = (((mfaaa + mfccc) + (mfaca + mfcac)) + ((mfaac + mfcca)  + (mfcaa + mfacc))  ) +
                                                     (((mfaab + mfacb) + (mfcab + mfccb)) + ((mfaba + mfabc) + (mfcba + mfcbc)) +
                                                     ((mfbaa + mfbac) + (mfbca + mfbcc))) + ((mfabb + mfcbb) +
@@ -286,16 +296,16 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 
 						 mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
 
-						LBMReal rhoH = 1.0;
-						LBMReal rhoL = 1.0 / densityRatio;
+						real rhoH = 1.0;
+						real rhoL = 1.0 / densityRatio;
 
-						LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+						real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
 
-						LBMReal drho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+						real drho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 							+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 							+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
-						LBMReal rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
+						real rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
 						(*pressure)(x1, x2, x3) = (*pressure)(x1, x2, x3) + rho * c1o3 * drho;
 
 						////!!!!!! relplace by pointer swap!
@@ -305,7 +315,7 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
             }
         }
 
-        LBMReal collFactorM;
+        real collFactorM;
         //LBMReal forcingTerm[D3Q27System::ENDF + 1];
 
 		////filter
@@ -366,7 +376,7 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 				for (int x1 = 0; x1 <= maxX1; x1++) {
 					if (!bcArray->isSolid(x1, x2, x3) && !bcArray->isUndefined(x1, x2, x3)) {
 
-						LBMReal sum = 0.;
+						real sum = 0.;
 
 
 
@@ -422,9 +432,9 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 									int zzz = (zz+x3 <= maxX3) ? ((zz + x3 > 0) ? zz + x3 : maxX3 ): 0;
 
 									if (!bcArray->isSolid(xxx, yyy, zzz) && !bcArray->isUndefined(xxx, yyy, zzz)) {
-										sum+= 64.0/(216.0*(c1+c3*abs(xx))* (c1 + c3 * abs(yy))* (c1 + c3 * abs(zz)))*(*pressure)(xxx, yyy, zzz);
+										sum+= 64.0/(216.0*(c1o1+c3o1 *abs(xx))* (c1o1 + c3o1 * abs(yy))* (c1o1 + c3o1 * abs(zz)))*(*pressure)(xxx, yyy, zzz);
 									}
-									else{ sum+= 64.0 / (216.0 * (c1 + c3 * abs(xx)) * (c1 + c3 * abs(yy)) * (c1 + c3 * abs(zz))) * (*pressure)(x1, x2, x3);
+									else{ sum+= 64.0 / (216.0 * (c1o1 + c3o1 * abs(xx)) * (c1o1 + c3o1 * abs(yy)) * (c1o1 + c3o1 * abs(zz))) * (*pressure)(x1, x2, x3);
 									}
 
 
@@ -490,43 +500,43 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
                         findNeighbors(phaseField, x1, x2, x3);
 						findNeighbors2(phaseField2, x1, x2, x3);
 
-                        LBMReal mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
-                        LBMReal mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
-                        LBMReal mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
-                        LBMReal mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
-                        LBMReal mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
-                        LBMReal mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
-                        LBMReal mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
-                        LBMReal mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
-                        LBMReal mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
-                        LBMReal mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
-                        LBMReal mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
-                        LBMReal mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
-                        LBMReal mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
-                        LBMReal mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
-                        LBMReal mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
-                        LBMReal mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
-                        LBMReal mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
-                        LBMReal mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
-                        LBMReal mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
-                        LBMReal mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
-                        LBMReal mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
-                        LBMReal mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
-                        LBMReal mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-                        LBMReal mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
-                        LBMReal mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
-                        LBMReal mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-                        LBMReal mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
-
-                        LBMReal rhoH = 1.0;
-                        LBMReal rhoL = 1.0 / densityRatio;
-
-                        LBMReal rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
-
-                        LBMReal dX1_phi = gradX1_phi();
-                        LBMReal dX2_phi = gradX2_phi();
-                        LBMReal dX3_phi = gradX3_phi();
+                        real mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
+                        real mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
+                        real mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
+                        real mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
+                        real mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
+                        real mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
+                        real mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
+                        real mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
+                        real mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
+                        real mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
+                        real mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
+                        real mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
+                        real mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
+                        real mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
+                        real mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
+                        real mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
+                        real mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
+                        real mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
+                        real mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
+                        real mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
+                        real mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
+                        real mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
+                        real mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                        real mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                        real mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                        real mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                        real mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
+
+                        real rhoH = 1.0;
+                        real rhoL = 1.0 / densityRatio;
+
+                        real rhoToPhi = (rhoH - rhoL) / (phiH - phiL);
+
+                        real dX1_phi = gradX1_phi();
+                        real dX2_phi = gradX2_phi();
+                        real dX3_phi = gradX3_phi();
 
 						//LBMReal dX1_phi2 = gradX1_phi2();
 						//LBMReal dX2_phi2 = gradX2_phi2();
@@ -538,20 +548,20 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 						//LBMReal normX2 = (dX2_phi-dX2_phi2)/denom2;
 						//LBMReal normX3 = (dX3_phi-dX3_phi2)/denom2;
 
-						LBMReal denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
-						LBMReal normX1 = dX1_phi / denom;
-						LBMReal normX2 = dX2_phi / denom;
-						LBMReal normX3 = dX3_phi / denom;
+						real denom = sqrt(dX1_phi * dX1_phi + dX2_phi * dX2_phi + dX3_phi * dX3_phi) + 1e-9;
+						real normX1 = dX1_phi / denom;
+						real normX2 = dX2_phi / denom;
+						real normX3 = dX3_phi / denom;
 
 
 
 						collFactorM = collFactorL + (collFactorL - collFactorG) * (phi[DIR_000] - phiH) / (phiH - phiL);
 
 
-                        LBMReal mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
+                        real mu = 2 * beta * phi[DIR_000] * (phi[DIR_000] - 1) * (2 * phi[DIR_000] - 1) - kappa * nabla2_phi();
 
                         //----------- Calculating Macroscopic Values -------------
-                        LBMReal rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
+                        real rho = rhoH + rhoToPhi * (phi[DIR_000] - phiH);
 
                             			   ////Incompressible Kernal
 
@@ -585,21 +595,21 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 						//mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);// / rho * c3;
 
 
-			   LBMReal m0, m1, m2;
-			   LBMReal rhoRef=c1;
+			   real m0, m1, m2;
+			   real rhoRef=c1o1;
 
 			  //LBMReal 
 			//    LBMReal drho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 			// 	   + (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 			// 	   + (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
-			   LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+			   real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 				   (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 				   (mfcbb - mfabb))/rhoRef;
-			   LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+			   real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 				   (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 				   (mfbcb - mfbab))/rhoRef;
-			   LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+			   real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 				   (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 				   (mfbbc - mfbba))/rhoRef;
 
@@ -634,52 +644,52 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 				  // + WEIGTH[DIR_P00] * ((*pressure)(x1, x2, x3+1) - (*pressure)(x1, x2, x3-1)));
 			  
 			   
-			   LBMReal gradPx = 0.0;
-			   LBMReal gradPy = 0.0;
-			   LBMReal gradPz = 0.0;
+			   real gradPx = 0.0;
+			   real gradPy = 0.0;
+			   real gradPz = 0.0;
 			   for (int dir1 = -1; dir1 <= 1; dir1++) {
 				   for (int dir2 = -1; dir2 <= 1; dir2++) {
 					   int yyy = x2 + dir1;
 					   int zzz = x3 + dir2;
 					   if (!bcArray->isSolid(x1-1, yyy, zzz) && !bcArray->isUndefined(x1-1, yyy, zzz)) {
-						   gradPx -= (*pressure)(x1 - 1, yyy, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPx -= (*pressure)(x1 - 1, yyy, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   else {
-						   gradPx -= (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPx -= (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   if (!bcArray->isSolid(x1 + 1, yyy, zzz) && !bcArray->isUndefined(x1 - 1, yyy, zzz)) {
-						   gradPx += (*pressure)(x1 + 1, yyy, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPx += (*pressure)(x1 + 1, yyy, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   else {
-						   gradPx += (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPx += (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 
 					   int xxx = x1 + dir1;
 					   if (!bcArray->isSolid(xxx, x2-1, zzz) && !bcArray->isUndefined(xxx, x2-1, zzz)) {
-						   gradPy -= (*pressure)(xxx, x2-1, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPy -= (*pressure)(xxx, x2-1, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   else {
-						   gradPy -= (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPy -= (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   if (!bcArray->isSolid(xxx, x2+1, zzz) && !bcArray->isUndefined(xxx, x2-1, zzz)) {
-						   gradPy += (*pressure)(xxx, x2+1, zzz) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPy += (*pressure)(xxx, x2+1, zzz) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   else {
-						   gradPy += (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPy += (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 
 					   yyy = x2 + dir2;
 					   if (!bcArray->isSolid(xxx, yyy, x3-1) && !bcArray->isUndefined(xxx, yyy, x3-1)) {
-						   gradPz -= (*pressure)(xxx, yyy, x3-1) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPz -= (*pressure)(xxx, yyy, x3-1) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   else {
-						   gradPz -= (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPz -= (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   if (!bcArray->isSolid(xxx, yyy, x3+1) && !bcArray->isUndefined(xxx, yyy, x3+1)) {
-						   gradPz += (*pressure)(xxx, yyy, x3+1) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPz += (*pressure)(xxx, yyy, x3+1) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 					   else {
-						   gradPz += (*pressure)(x1, x2, x3) * c2o9 / ((c1 + c3 * abs(dir1)) * (c1 + c3 * abs(dir2)));
+						   gradPz += (*pressure)(x1, x2, x3) * c2o9 / ((c1o1 + c3o1 * abs(dir1)) * (c1o1 + c3o1 * abs(dir2)));
 					   }
 
 				   }
@@ -930,9 +940,9 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 			   ///////////////////////////////////////////////////////////////////////////////////////////
 			   if (withForcing)
 			   {
-				   muX1 = static_cast<double>(x1 - 1 + ix1 * maxX1);
-				   muX2 = static_cast<double>(x2 - 1 + ix2 * maxX2);
-				   muX3 = static_cast<double>(x3 - 1 + ix3 * maxX3);
+				   muX1 = static_cast<real>(x1 - 1 + ix1 * maxX1);
+				   muX2 = static_cast<real>(x2 - 1 + ix2 * maxX2);
+				   muX3 = static_cast<real>(x3 - 1 + ix3 * maxX3);
 
 				   //forcingX1 = muForcingX1.Eval();
 				   //forcingX2 = muForcingX2.Eval();
@@ -943,14 +953,14 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 				   //vvz += forcingX3 * deltaT * 0.5; // Z
 			   }
 
-			   LBMReal vx2;
-               LBMReal vy2;
-               LBMReal vz2;
+			   real vx2;
+               real vy2;
+               real vz2;
                vx2 = vvx * vvx;
                vy2 = vvy * vvy;
                vz2 = vvz * vvz;
 			   ///////////////////////////////////////////////////////////////////////////////////////////               
-			   LBMReal oMdrho;
+			   real oMdrho;
 
 
 			   oMdrho = mfccc + mfaaa;
@@ -980,8 +990,8 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 			   oMdrho = (rhoRef - (oMdrho + m0))/rhoRef;// 12.03.21 check derivation!!!!
 
 			   ////////////////////////////////////////////////////////////////////////////////////
-			   LBMReal wadjust;
-			   LBMReal qudricLimit = 0.01;
+			   real wadjust;
+			   real qudricLimit = 0.01;
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //Hin
 			   ////////////////////////////////////////////////////////////////////////////////////
@@ -1215,23 +1225,23 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 
 
 			  // mfaaa = 0.0;
-			   LBMReal OxxPyyPzz = 1.; //omega2 or bulk viscosity
+			   real OxxPyyPzz = 1.; //omega2 or bulk viscosity
 			 //  LBMReal OxyyPxzz = 1.;//-s9;//2+s9;//
 			 //  LBMReal OxyyMxzz  = 1.;//2+s9;//
-			   LBMReal O4 = 1.;
-			   LBMReal O5 = 1.;
-			   LBMReal O6 = 1.;
+			   real O4 = 1.;
+			   real O5 = 1.;
+			   real O6 = 1.;
 
 
 
 			   /////fourth order parameters; here only for test. Move out of loop!
 
-			   LBMReal OxyyPxzz = 8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
-			   LBMReal OxyyMxzz = 8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
-			//    LBMReal Oxyz = 24.0 * (collFactorM - 2.0) * (4.0 * collFactorM * collFactorM + collFactorM * OxxPyyPzz * (18.0 - 13.0 * collFactorM) + OxxPyyPzz * OxxPyyPzz * (2.0 + collFactorM * (6.0 * collFactorM - 11.0))) / (16.0 * collFactorM * collFactorM * (collFactorM - 6.0) - 2.0 * collFactorM * OxxPyyPzz * (216.0 + 5.0 * collFactorM * (9.0 * collFactorM - 46.0)) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (3.0 * collFactorM - 10.0) * (15.0 * collFactorM - 28.0) - 48.0));
-			   LBMReal A = (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+			   real OxyyPxzz = 8.0 * (collFactorM - 2.0) * (OxxPyyPzz * (3.0 * collFactorM - 1.0) - 5.0 * collFactorM) / (8.0 * (5.0 - 2.0 * collFactorM) * collFactorM + OxxPyyPzz * (8.0 + collFactorM * (9.0 * collFactorM - 26.0)));
+			   real OxyyMxzz = 8.0 * (collFactorM - 2.0) * (collFactorM + OxxPyyPzz * (3.0 * collFactorM - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * collFactorM + 9.0 * collFactorM * collFactorM) - 8.0 * collFactorM);
+			//    real Oxyz = 24.0 * (collFactorM - 2.0) * (4.0 * collFactorM * collFactorM + collFactorM * OxxPyyPzz * (18.0 - 13.0 * collFactorM) + OxxPyyPzz * OxxPyyPzz * (2.0 + collFactorM * (6.0 * collFactorM - 11.0))) / (16.0 * collFactorM * collFactorM * (collFactorM - 6.0) - 2.0 * collFactorM * OxxPyyPzz * (216.0 + 5.0 * collFactorM * (9.0 * collFactorM - 46.0)) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (3.0 * collFactorM - 10.0) * (15.0 * collFactorM - 28.0) - 48.0));
+			   real A = (4.0 * collFactorM * collFactorM + 2.0 * collFactorM * OxxPyyPzz * (collFactorM - 6.0) + OxxPyyPzz * OxxPyyPzz * (collFactorM * (10.0 - 3.0 * collFactorM) - 4.0)) / ((collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 			   //FIXME:  warning C4459: declaration of 'B' hides global declaration (message : see declaration of 'D3Q27System::DIR_00M' )
-			   LBMReal BB = (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
+			   real BB = (4.0 * collFactorM * OxxPyyPzz * (9.0 * collFactorM - 16.0) - 4.0 * collFactorM * collFactorM - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * collFactorM * (collFactorM - 2.0))) / (3.0 * (collFactorM - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * collFactorM) - 8.0 * collFactorM));
 
 
 			   //Cum 4.
@@ -1239,21 +1249,21 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 			   //LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
 			   //LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-			   LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-			   LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-			   LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+			   real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
+			   real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
+			   real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-			   LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-			   LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
-			   LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho);
+			   real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+			   real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
+			   real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho);
 
 			   //Cum 5.
-			   LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-			   LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-			   LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+			   real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
+			   real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
+			   real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
 
 			   //Cum 6.
-			   LBMReal CUMccc = mfccc + ((-4. * mfbbb * mfbbb
+			   real CUMccc = mfccc + ((-4. * mfbbb * mfbbb
 				   - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
 				   - 4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
 				   - 2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
@@ -1267,13 +1277,13 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 
 			   //2.
 			   // linear combinations
-			   LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
+			   real mxxPyyPzz = mfcaa + mfaca + mfaac;
 
 			//  LBMReal mfaaaS = (mfaaa * (-4 - 3 * OxxPyyPzz * (-1 + rho)) + 6 * mxxPyyPzz * OxxPyyPzz * (-1 + rho)) / (-4 + 3 * OxxPyyPzz * (-1 + rho));
 			  mxxPyyPzz -= mfaaa ;//12.03.21 shifted by mfaaa
 				//mxxPyyPzz-=(mfaaa+mfaaaS)*c1o2;//12.03.21 shifted by mfaaa
-			   LBMReal mxxMyy = mfcaa - mfaca;
-			   LBMReal mxxMzz = mfcaa - mfaac;
+			   real mxxMyy = mfcaa - mfaca;
+			   real mxxMzz = mfcaa - mfaac;
 
 			   //applying phase field gradients first part:
 			  // mxxPyyPzz += c2o3 * rhoToPhi * (dX1_phi * vvx + dX2_phi * vvy + dX3_phi * vvz);
@@ -1288,13 +1298,13 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
                //mfbab += c1o6 * (dX1_phi * vvz + dX3_phi * vvx) * correctionScaling;
                //mfbba += c1o6 * (dX1_phi * vvy + dX2_phi * vvx) * correctionScaling;
 
-			   LBMReal dxux =  -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
-			   LBMReal dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
-			   LBMReal dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
+			   real dxux =  -c1o2 * collFactorM * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (/*mfaaa*/ -mxxPyyPzz);
+			   real dyuy =  dxux + collFactorM * c3o2 * mxxMyy;
+			   real dzuz =  dxux + collFactorM * c3o2 * mxxMzz;
 
-			   LBMReal Dxy = -three * collFactorM * mfbba;
-			   LBMReal Dxz = -three * collFactorM * mfbab;
-			   LBMReal Dyz = -three * collFactorM * mfabb;
+			   real Dxy = -c3o1 * collFactorM * mfbba;
+			   real Dxz = -c3o1 * collFactorM * mfbab;
+			   real Dyz = -c3o1 * collFactorM * mfabb;
 
 
 			   //relax
@@ -1332,14 +1342,14 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 
 			   //3.
 			   // linear combinations
-			   LBMReal mxxyPyzz = mfcba + mfabc;
-			   LBMReal mxxyMyzz = mfcba - mfabc;
+			   real mxxyPyzz = mfcba + mfabc;
+			   real mxxyMyzz = mfcba - mfabc;
 
-			   LBMReal mxxzPyyz = mfcab + mfacb;
-			   LBMReal mxxzMyyz = mfcab - mfacb;
+			   real mxxzPyyz = mfcab + mfacb;
+			   real mxxzMyyz = mfcab - mfacb;
 
-			   LBMReal mxyyPxzz = mfbca + mfbac;
-			   LBMReal mxyyMxzz = mfbca - mfbac;
+			   real mxyyPxzz = mfbca + mfbac;
+			   real mxyyMxzz = mfbca - mfbac;
 
 			   //relax
 			   wadjust = OxyyMxzz + (1. - OxyyMxzz) * fabs(mfbbb) / (fabs(mfbbb) + qudricLimit);
@@ -1373,12 +1383,12 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 			   //CUMbbc += O4 * (-CUMbbc);
 			   //CUMbcb += O4 * (-CUMbcb);
 			   //CUMcbb += O4 * (-CUMcbb);
-			   CUMacc = -O4 * (one / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (one - O4) * (CUMacc);
-			   CUMcac = -O4 * (one / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (one - O4) * (CUMcac);
-			   CUMcca = -O4 * (one / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (one - O4) * (CUMcca);
-			   CUMbbc = -O4 * (one / collFactorM - c1o2) * Dxy * c1o3 * BB + (one - O4) * (CUMbbc);
-			   CUMbcb = -O4 * (one / collFactorM - c1o2) * Dxz * c1o3 * BB + (one - O4) * (CUMbcb);
-			   CUMcbb = -O4 * (one / collFactorM - c1o2) * Dyz * c1o3 * BB + (one - O4) * (CUMcbb);
+			   CUMacc = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc);
+			   CUMcac = -O4 * (c1o1 / collFactorM - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac);
+			   CUMcca = -O4 * (c1o1 / collFactorM - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca);
+			   CUMbbc = -O4 * (c1o1 / collFactorM - c1o2) * Dxy * c1o3 * BB + (c1o1 - O4) * (CUMbbc);
+			   CUMbcb = -O4 * (c1o1 / collFactorM - c1o2) * Dxz * c1o3 * BB + (c1o1 - O4) * (CUMbcb);
+			   CUMcbb = -O4 * (c1o1 / collFactorM - c1o2) * Dyz * c1o3 * BB + (c1o1 - O4) * (CUMcbb);
 
 			   //5.
 			   CUMbcc += O5 * (-CUMbcc);
@@ -1398,9 +1408,9 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 			   mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
 			   mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-			   mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-			   mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
-			   mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1) * oMdrho;
+			   mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+			   mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
+			   mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - c1o1) * oMdrho;
 
 			   //5.
 			   mfbcc = CUMbcc + (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac) * oMdrho;
@@ -2557,7 +2567,7 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
                         /////////////////////  P H A S E - F I E L D   S O L V E R
                         ////////////////////////////////////////////
 		/////CUMULANT PHASE-FIELD
-				LBMReal omegaD =1.0/( 3.0 * mob + 0.5);
+				real omegaD =1.0/( 3.0 * mob + 0.5);
 				{
 			   mfcbb = (*this->localDistributionsH1)(D3Q27System::ET_E, x1, x2, x3);
 			   mfbcb = (*this->localDistributionsH1)(D3Q27System::ET_N, x1, x2, x3);
@@ -2621,7 +2631,7 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 			 //  LBMReal vvz = uz;
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // second component
-			   LBMReal concentration =
+			   real concentration =
 				   ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				   (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
 					   ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
@@ -2636,26 +2646,26 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 			  // vvy += fy * c1o2;
 			  // vvz += fz * c1o2;
 			   ////////////////////////////////////////////////////////////////////////////////////
-			   LBMReal oneMinusRho = c1- concentration;
+			   real oneMinusRho = c1o1 - concentration;
 
-			   LBMReal cx =
+			   real cx =
 				   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 				   (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 					   (mfcbb - mfabb));
-			   LBMReal cy =
+			   real cy =
 				   ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 				   (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 					   (mfbcb - mfbab));
-			   LBMReal cz =
+			   real cz =
 				   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 				   (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 					   (mfbbc - mfbba));
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // calculate the square of velocities for this lattice node
-			   LBMReal cx2 = cx * cx;
-			   LBMReal cy2 = cy * cy;
-			   LBMReal cz2 = cz * cz;
+			   real cx2 = cx * cx;
+			   real cy2 = cy * cy;
+			   real cz2 = cz * cz;
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
 			   //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -2664,66 +2674,66 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 			   //!
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Z - Dir
-			   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 			   forwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Y - Dir
-			   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   forwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-			   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 			   forwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 			   forwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 			   forwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-			   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   forwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-			   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // X - Dir
-			   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 			   forwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-			   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   forwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 			   forwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 			   forwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-			   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   forwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-			   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3, c1o9, oneMinusRho);
+			   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3o1, c1o9, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   //! - experimental Cumulant ... to be published ... hopefully
 			   //!
 
 			   // linearized orthogonalization of 3rd order central moments
-			   LBMReal Mabc = mfabc - mfaba * c1o3;
-			   LBMReal Mbca = mfbca - mfbaa * c1o3;
-			   LBMReal Macb = mfacb - mfaab * c1o3;
-			   LBMReal Mcba = mfcba - mfaba * c1o3;
-			   LBMReal Mcab = mfcab - mfaab * c1o3;
-			   LBMReal Mbac = mfbac - mfbaa * c1o3;
+			   real Mabc = mfabc - mfaba * c1o3;
+			   real Mbca = mfbca - mfbaa * c1o3;
+			   real Macb = mfacb - mfaab * c1o3;
+			   real Mcba = mfcba - mfaba * c1o3;
+			   real Mcab = mfcab - mfaab * c1o3;
+			   real Mbac = mfbac - mfbaa * c1o3;
 			   // linearized orthogonalization of 5th order central moments
-			   LBMReal Mcbc = mfcbc - mfaba * c1o9;
-			   LBMReal Mbcc = mfbcc - mfbaa * c1o9;
-			   LBMReal Mccb = mfccb - mfaab * c1o9;
+			   real Mcbc = mfcbc - mfaba * c1o9;
+			   real Mbcc = mfbcc - mfbaa * c1o9;
+			   real Mccb = mfccb - mfaab * c1o9;
 
 			   // collision of 1st order moments
 			  // LBMReal ccx, ccy, ccz;
 			   
 
-               cx = cx * (c1 - omegaD) + omegaD * vvx * concentration +
-                    normX1 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
-               cy = cy * (c1 - omegaD) + omegaD * vvy * concentration +
-                    normX2 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
-               cz = cz * (c1 - omegaD) + omegaD * vvz * concentration +
-                    normX3 * (c1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+               cx = cx * (c1o1 - omegaD) + omegaD * vvx * concentration +
+                    normX1 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+               cy = cy * (c1o1 - omegaD) + omegaD * vvy * concentration +
+                    normX2 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
+               cz = cz * (c1o1 - omegaD) + omegaD * vvz * concentration +
+                    normX3 * (c1o1 - 0.5 * omegaD) * (1.0 - phi[DIR_000]) * (phi[DIR_000]) * c1o3 * oneOverInterfaceScale;
 
 			   //mhx = (ux * phi[REST] + normX1 * (tauH - 0.5) * (1.0 - phi[REST]) * (phi[REST])) / tauH + (1.0 - 1.0 / tauH) * mhx;
 			   //mhy = (uy * phi[REST] + normX2 * (tauH - 0.5) * (1.0 - phi[REST]) * (phi[REST])) / tauH + (1.0 - 1.0 / tauH) * mhy;
@@ -2735,9 +2745,9 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 			   cz2 = cz * cz;
 
 			   // equilibration of 2nd order moments
-			   mfbba = zeroReal;
-			   mfbab = zeroReal;
-			   mfabb = zeroReal;
+			   mfbba = c0o1;
+			   mfbab = c0o1;
+			   mfabb = c0o1;
 
 			   mfcaa = c1o3 * concentration;
 			   mfaca = c1o3 * concentration;
@@ -2754,13 +2764,13 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 			   //mfaac = mfaac*(c1 - omega2) + omega2*c1o3 * concentration;
 
 			   // equilibration of 3rd order moments
-			   Mabc = zeroReal;
-			   Mbca = zeroReal;
-			   Macb = zeroReal;
-			   Mcba = zeroReal;
-			   Mcab = zeroReal;
-			   Mbac = zeroReal;
-			   mfbbb = zeroReal;
+			   Mabc = c0o1;
+			   Mbca = c0o1;
+			   Macb = c0o1;
+			   Mcba = c0o1;
+			   Mcab = c0o1;
+			   Mbac = c0o1;
+			   mfbbb = c0o1;
 
 			   // from linearized orthogonalization 3rd order central moments to central moments
 			   mfabc = Mabc + mfaba * c1o3;
@@ -2775,14 +2785,14 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 			   mfcac = c1o9 * concentration;
 			   mfcca = c1o9 * concentration;
 
-			   mfcbb = zeroReal;
-			   mfbcb = zeroReal;
-			   mfbbc = zeroReal;
+			   mfcbb = c0o1;
+			   mfbcb = c0o1;
+			   mfbbc = c0o1;
 
 			   // equilibration of 5th order moments
-			   Mcbc = zeroReal;
-			   Mbcc = zeroReal;
-			   Mccb = zeroReal;
+			   Mcbc = c0o1;
+			   Mbcc = c0o1;
+			   Mccb = c0o1;
 
 			   // from linearized orthogonalization 5th order central moments to central moments
 			   mfcbc = Mcbc + mfaba * c1o9;
@@ -2800,39 +2810,39 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 			   //!
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // X - Dir
-			   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
 			   backwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-			   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   backwardChimera(mfaab, mfbab, mfcab, cx, cx2);
 			   backwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
 			   backwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-			   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
 			   backwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-			   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9o1, c1o9, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Y - Dir
-			   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   backwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-			   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
 			   backwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
 			   backwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
 			   backwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
 			   backwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-			   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
 			   ////////////////////////////////////////////////////////////////////////////////////
 			   // Z - Dir
-			   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
 			   backwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-			   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+			   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 
 
@@ -2937,7 +2947,7 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 	 //  LBMReal vvz = uz;
 	   ////////////////////////////////////////////////////////////////////////////////////
 	   // second component
-   LBMReal concentration =
+   real concentration =
 	   ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 	   (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
 		   ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
@@ -2952,26 +2962,26 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
   // vvy += fy * c1o2;
   // vvz += fz * c1o2;
    ////////////////////////////////////////////////////////////////////////////////////
-   LBMReal oneMinusRho = c1 - concentration;
+   real oneMinusRho = c1o1 - concentration;
 
-   LBMReal cx =
+   real cx =
 	   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 	   (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 		   (mfcbb - mfabb));
-   LBMReal cy =
+   real cy =
 	   ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 	   (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 		   (mfbcb - mfbab));
-   LBMReal cz =
+   real cz =
 	   ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 	   (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 		   (mfbbc - mfbba));
 
    ////////////////////////////////////////////////////////////////////////////////////
    // calculate the square of velocities for this lattice node
-   LBMReal cx2 = cx * cx;
-   LBMReal cy2 = cy * cy;
-   LBMReal cz2 = cz * cz;
+   real cx2 = cx * cx;
+   real cy2 = cy * cy;
+   real cz2 = cz * cz;
    ////////////////////////////////////////////////////////////////////////////////////
    //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
    //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -2980,63 +2990,63 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
    //!
    ////////////////////////////////////////////////////////////////////////////////////
    // Z - Dir
-   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
    forwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    // Y - Dir
-   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
    forwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
    forwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
    forwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
    forwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
    forwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    // X - Dir
-   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
    forwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
    forwardChimera(mfaab, mfbab, mfcab, cx, cx2);
    forwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
    forwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
    forwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3, c1o9, oneMinusRho);
+   forwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c3o1, c1o9, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    //! - experimental Cumulant ... to be published ... hopefully
    //!
 
    // linearized orthogonalization of 3rd order central moments
-   LBMReal Mabc = mfabc - mfaba * c1o3;
-   LBMReal Mbca = mfbca - mfbaa * c1o3;
-   LBMReal Macb = mfacb - mfaab * c1o3;
-   LBMReal Mcba = mfcba - mfaba * c1o3;
-   LBMReal Mcab = mfcab - mfaab * c1o3;
-   LBMReal Mbac = mfbac - mfbaa * c1o3;
+   real Mabc = mfabc - mfaba * c1o3;
+   real Mbca = mfbca - mfbaa * c1o3;
+   real Macb = mfacb - mfaab * c1o3;
+   real Mcba = mfcba - mfaba * c1o3;
+   real Mcab = mfcab - mfaab * c1o3;
+   real Mbac = mfbac - mfbaa * c1o3;
    // linearized orthogonalization of 5th order central moments
-   LBMReal Mcbc = mfcbc - mfaba * c1o9;
-   LBMReal Mbcc = mfbcc - mfbaa * c1o9;
-   LBMReal Mccb = mfccb - mfaab * c1o9;
+   real Mcbc = mfcbc - mfaba * c1o9;
+   real Mbcc = mfbcc - mfbaa * c1o9;
+   real Mccb = mfccb - mfaab * c1o9;
 
    // collision of 1st order moments
-   cx = cx * (c1 - omegaD) + omegaD * vvx * concentration +
-	   normX1 * (c1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
-   cy = cy * (c1 - omegaD) + omegaD * vvy * concentration +
-	   normX2 * (c1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
-   cz = cz * (c1 - omegaD) + omegaD * vvz * concentration +
-	   normX3 * (c1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
+   cx = cx * (c1o1 - omegaD) + omegaD * vvx * concentration +
+	   normX1 * (c1o1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
+   cy = cy * (c1o1 - omegaD) + omegaD * vvy * concentration +
+	   normX2 * (c1o1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
+   cz = cz * (c1o1 - omegaD) + omegaD * vvz * concentration +
+	   normX3 * (c1o1 - 0.5 * omegaD) * ( phi[DIR_000]) * (phi2[DIR_000]) * c1o3 * oneOverInterfaceScale;
 
    //mhx = (ux * phi[REST] + normX1 * (tauH - 0.5) * (1.0 - phi[REST]) * (phi[REST])) / tauH + (1.0 - 1.0 / tauH) * mhx;
    //mhy = (uy * phi[REST] + normX2 * (tauH - 0.5) * (1.0 - phi[REST]) * (phi[REST])) / tauH + (1.0 - 1.0 / tauH) * mhy;
@@ -3048,9 +3058,9 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
    cz2 = cz * cz;
 
    // equilibration of 2nd order moments
-   mfbba = zeroReal;
-   mfbab = zeroReal;
-   mfabb = zeroReal;
+   mfbba = c0o1;
+   mfbab = c0o1;
+   mfabb = c0o1;
 
    mfcaa = c1o3 * concentration;
    mfaca = c1o3 * concentration;
@@ -3067,13 +3077,13 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
    //mfaac = mfaac*(c1 - omega2) + omega2*c1o3 * concentration;
 
    // equilibration of 3rd order moments
-   Mabc = zeroReal;
-   Mbca = zeroReal;
-   Macb = zeroReal;
-   Mcba = zeroReal;
-   Mcab = zeroReal;
-   Mbac = zeroReal;
-   mfbbb = zeroReal;
+   Mabc = c0o1;
+   Mbca = c0o1;
+   Macb = c0o1;
+   Mcba = c0o1;
+   Mcab = c0o1;
+   Mbac = c0o1;
+   mfbbb = c0o1;
 
    // from linearized orthogonalization 3rd order central moments to central moments
    mfabc = Mabc + mfaba * c1o3;
@@ -3088,14 +3098,14 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
    mfcac = c1o9 * concentration;
    mfcca = c1o9 * concentration;
 
-   mfcbb = zeroReal;
-   mfbcb = zeroReal;
-   mfbbc = zeroReal;
+   mfcbb = c0o1;
+   mfbcb = c0o1;
+   mfbbc = c0o1;
 
    // equilibration of 5th order moments
-   Mcbc = zeroReal;
-   Mbcc = zeroReal;
-   Mccb = zeroReal;
+   Mcbc = c0o1;
+   Mbcc = c0o1;
+   Mccb = c0o1;
 
    // from linearized orthogonalization 5th order central moments to central moments
    mfcbc = Mcbc + mfaba * c1o9;
@@ -3113,39 +3123,39 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
    //!
    ////////////////////////////////////////////////////////////////////////////////////
    // X - Dir
-   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1, c1, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaaa, mfbaa, mfcaa, cx, cx2, c1o1, c1o1, oneMinusRho);
    backwardChimera(mfaba, mfbba, mfcba, cx, cx2);
-   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3, c1o3, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaca, mfbca, mfcca, cx, cx2, c3o1, c1o3, oneMinusRho);
    backwardChimera(mfaab, mfbab, mfcab, cx, cx2);
    backwardChimera(mfabb, mfbbb, mfcbb, cx, cx2);
    backwardChimera(mfacb, mfbcb, mfccb, cx, cx2);
-   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3, c1o3, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaac, mfbac, mfcac, cx, cx2, c3o1, c1o3, oneMinusRho);
    backwardChimera(mfabc, mfbbc, mfcbc, cx, cx2);
-   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfacc, mfbcc, mfccc, cx, cx2, c9o1, c1o9, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    // Y - Dir
-   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6, c1o6, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaaa, mfaba, mfaca, cy, cy2, c6o1, c1o6, oneMinusRho);
    backwardChimera(mfaab, mfabb, mfacb, cy, cy2);
-   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18, c1o18, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaac, mfabc, mfacc, cy, cy2, c18o1, c1o18, oneMinusRho);
    backwardInverseChimeraWithKincompressible(mfbaa, mfbba, mfbca, cy, cy2, c3o2, c2o3, oneMinusRho);
    backwardChimera(mfbab, mfbbb, mfbcb, cy, cy2);
    backwardInverseChimeraWithKincompressible(mfbac, mfbbc, mfbcc, cy, cy2, c9o2, c2o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6, c1o6, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcaa, mfcba, mfcca, cy, cy2, c6o1, c1o6, oneMinusRho);
    backwardChimera(mfcab, mfcbb, mfccb, cy, cy2);
-   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18, c1o18, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcac, mfcbc, mfccc, cy, cy2, c18o1, c1o18, oneMinusRho);
 
    ////////////////////////////////////////////////////////////////////////////////////
    // Z - Dir
-   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36, c1o36, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9, c1o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36, c1o36, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaaa, mfaab, mfaac, cz, cz2, c36o1, c1o36, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaba, mfabb, mfabc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfaca, mfacb, mfacc, cz, cz2, c36o1, c1o36, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfbaa, mfbab, mfbac, cz, cz2, c9o1, c1o9, oneMinusRho);
    backwardInverseChimeraWithKincompressible(mfbba, mfbbb, mfbbc, cz, cz2, c9o4, c4o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9, c1o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36, c1o36, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9, c1o9, oneMinusRho);
-   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36, c1o36, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfbca, mfbcb, mfbcc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcaa, mfcab, mfcac, cz, cz2, c36o1, c1o36, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcba, mfcbb, mfcbc, cz, cz2, c9o1, c1o9, oneMinusRho);
+   backwardInverseChimeraWithKincompressible(mfcca, mfccb, mfccc, cz, cz2, c36o1, c1o36, oneMinusRho);
 
 
 
@@ -3278,9 +3288,11 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::calculate(int step)
 }
 //////////////////////////////////////////////////////////////////////////
 
-LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX1_phi()
+real MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX1_phi()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0* ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) + (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) + (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) + (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_PM0] - phi[DIR_MP0]) + (phi[DIR_PP0] - phi[DIR_MM0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_P00] - phi[DIR_M00]));
@@ -3291,9 +3303,11 @@ LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX1_phi()
     //return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX2_phi()
+real MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX2_phi()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PPM] - phi[DIR_MMP])- (phi[DIR_PMP] - phi[DIR_MPM])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_0PP] - phi[DIR_0MM]) + (phi[DIR_0PM] - phi[DIR_0MP])) + ((phi[DIR_PP0] - phi[DIR_MM0])- (phi[DIR_PM0] - phi[DIR_MP0])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_0P0] - phi[DIR_0M0]));
@@ -3304,9 +3318,11 @@ LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX2_phi()
     //return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX3_phi()
+real MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX3_phi()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi[DIR_PPP] - phi[DIR_MMM]) - (phi[DIR_PMM] - phi[DIR_MPP])) + ((phi[DIR_PMP] - phi[DIR_MPM]) - (phi[DIR_PPM] - phi[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi[DIR_P0P] - phi[DIR_M0M]) - (phi[DIR_P0M] - phi[DIR_M0P])) + ((phi[DIR_0MP] - phi[DIR_0PM]) + (phi[DIR_0PP] - phi[DIR_0MM])))) +
 		+WEIGTH[DIR_0P0] * (phi[DIR_00P] - phi[DIR_00M]));
@@ -3317,9 +3333,11 @@ LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX3_phi()
     //return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX1_phi2()
+real MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX1_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) + (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PMP] - phi2[DIR_MPM]) + (phi2[DIR_PPM] - phi2[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_P0P] - phi2[DIR_M0M]) + (phi2[DIR_P0M] - phi2[DIR_M0P])) + ((phi2[DIR_PM0] - phi2[DIR_MP0]) + (phi2[DIR_PP0] - phi2[DIR_MM0])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_P00] - phi2[DIR_M00]));
@@ -3330,9 +3348,11 @@ LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX1_phi2()
 	//return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX2_phi2()
+real MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX2_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) - (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PPM] - phi2[DIR_MMP]) - (phi2[DIR_PMP] - phi2[DIR_MPM])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_0PP] - phi2[DIR_0MM]) + (phi2[DIR_0PM] - phi2[DIR_0MP])) + ((phi2[DIR_PP0] - phi2[DIR_MM0]) - (phi2[DIR_PM0] - phi2[DIR_MP0])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_0P0] - phi2[DIR_0M0]));
@@ -3343,9 +3363,11 @@ LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX2_phi2()
 	//return 3.0 * sum;
 }
 
-LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX3_phi2()
+real MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX3_phi2()
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
 	return 3.0 * ((WEIGTH[DIR_PPP] * (((phi2[DIR_PPP] - phi2[DIR_MMM]) - (phi2[DIR_PMM] - phi2[DIR_MPP])) + ((phi2[DIR_PMP] - phi2[DIR_MPM]) - (phi2[DIR_PPM] - phi2[DIR_MMP])))
 		+ WEIGTH[DIR_PP0] * (((phi2[DIR_P0P] - phi2[DIR_M0M]) - (phi2[DIR_P0M] - phi2[DIR_M0P])) + ((phi2[DIR_0MP] - phi2[DIR_0PM]) + (phi2[DIR_0PP] - phi2[DIR_0MM])))) +
 		+WEIGTH[DIR_0P0] * (phi2[DIR_00P] - phi2[DIR_00M]));
@@ -3360,10 +3382,12 @@ LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::gradX3_phi2()
 
 
 
-LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::nabla2_phi()
+real MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::nabla2_phi()
 {
     using namespace D3Q27System;
-    LBMReal sum = 0.0;
+	using namespace vf::lbm::dir;
+
+    real sum = 0.0;
 	sum += WEIGTH[DIR_PPP] * ((((phi[DIR_PPP] - phi[DIR_000]) + (phi[DIR_MMM] - phi[DIR_000])) + ((phi[DIR_MMP] - phi[DIR_000]) + (phi[DIR_PPM] - phi[DIR_000])))
 		+ (((phi[DIR_MPP] - phi[DIR_000]) + (phi[DIR_PMM] - phi[DIR_000])) + ((phi[DIR_PMP] - phi[DIR_000]) + (phi[DIR_MPM] - phi[DIR_000]))));
 	sum += WEIGTH[DIR_0PP] * (
@@ -3385,6 +3409,8 @@ LBMReal MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::nabla2_phi()
 void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::computePhasefield()
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
+
     SPtr<DistributionArray3D> distributionsH = dataSet->getHdistributions();
 
     int minX1 = ghostLayerWidth;
@@ -3439,10 +3465,11 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::computePhasefield()
     }
 }
 
-void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::findNeighbors(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::findNeighbors(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
                                                 int x3)
 {
     using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
     SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
@@ -3459,10 +3486,11 @@ void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::findNeighbors(CbArray3D<
     }
 }
 
-void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::findNeighbors2(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
+void MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel::findNeighbors2(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2,
 	int x3)
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
 	SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel.h
index 070aff23ff78d079d12806b529a750b007ae7137..a34858ae47c0cb5b10755b21df14290fa242115a 100644
--- a/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel.h
@@ -51,69 +51,69 @@ public:
    virtual ~MultiphaseTwoPhaseFieldsVelocityCumulantLBMKernel(void) = default;
    void calculate(int step) override;
    SPtr<LBMKernel> clone() override;
-   void forwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-   void backwardInverseChimeraWithKincompressible(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2, LBMReal Kinverse, LBMReal K, LBMReal oneMinusRho);
-   void forwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
-   void backwardChimera(LBMReal& mfa, LBMReal& mfb, LBMReal& mfc, LBMReal vv, LBMReal v2);
+   void forwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+   void backwardInverseChimeraWithKincompressible(real& mfa, real& mfb, real& mfc, real vv, real v2, real Kinverse, real K, real oneMinusRho);
+   void forwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
+   void backwardChimera(real& mfa, real& mfb, real& mfc, real vv, real v2);
 
    ///refactor
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressure;
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr pressureOld;
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressure;
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr pressureOld;
 
-   double getCalculationTime() override { return .0; }
+   real getCalculationTime() override { return .0; }
 protected:
    virtual void initDataSet();
    void swapDistributions() override;
-   LBMReal f1[D3Q27System::ENDF+1];
+   real f1[D3Q27System::ENDF+1];
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH1;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH1;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH1;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH1;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH1;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH1;
 
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH2;
-   CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH2;
-   CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsH2;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH2;
+   CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH2;
+   CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsH2;
 
    //CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   phaseField;
 
 
-   LBMReal h  [D3Q27System::ENDF+1];
-   LBMReal h2[D3Q27System::ENDF + 1];
-   LBMReal g  [D3Q27System::ENDF+1];
-   LBMReal phi[D3Q27System::ENDF+1];
-   LBMReal phi2[D3Q27System::ENDF + 1];
-   LBMReal pr1[D3Q27System::ENDF+1];
-   LBMReal phi_cutoff[D3Q27System::ENDF+1];
-
-   LBMReal gradX1_phi();
-   LBMReal gradX2_phi();
-   LBMReal gradX3_phi();
-   LBMReal gradX1_phi2();
-   LBMReal gradX2_phi2();
-   LBMReal gradX3_phi2();
+   real h  [D3Q27System::ENDF+1];
+   real h2[D3Q27System::ENDF + 1];
+   real g  [D3Q27System::ENDF+1];
+   real phi[D3Q27System::ENDF+1];
+   real phi2[D3Q27System::ENDF + 1];
+   real pr1[D3Q27System::ENDF+1];
+   real phi_cutoff[D3Q27System::ENDF+1];
+
+   real gradX1_phi();
+   real gradX2_phi();
+   real gradX3_phi();
+   real gradX1_phi2();
+   real gradX2_phi2();
+   real gradX3_phi2();
    //LBMReal gradX1_pr1();
    //LBMReal gradX2_pr1();
    //LBMReal gradX3_pr1();
    //LBMReal dirgradC_phi(int n, int k);
    void computePhasefield();
-   void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
-   void findNeighbors2(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2, int x3);
+   void findNeighbors(CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, int x1, int x2, int x3);
+   void findNeighbors2(CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr ph, int x1, int x2, int x3);
    //void findNeighbors(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr ph /*Phase-Field*/, CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf /*Pressure-Field*/, int x1, int x2, int x3);
    //void pressureFiltering(CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf /*Pressure-Field*/, CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr pf_filtered /*Pressure-Field*/);
 
-   LBMReal nabla2_phi();
+   real nabla2_phi();
 
 
    mu::value_type muX1,muX2,muX3;
    mu::value_type muDeltaT;
    mu::value_type muNu;
-   LBMReal forcingX1;
-   LBMReal forcingX2;
-   LBMReal forcingX3;
+   real forcingX1;
+   real forcingX2;
+   real forcingX3;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/Rheology.cpp b/src/cpu/VirtualFluidsCore/LBM/Rheology.cpp
index 55f8bba509d53392c804b44e43e54970f3cf1157..79d2c26cc865ac08549a5b85bc996c0c4e9df51d 100644
--- a/src/cpu/VirtualFluidsCore/LBM/Rheology.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/Rheology.cpp
@@ -33,13 +33,13 @@
 #include "Rheology.h"
 
 SPtr<Rheology> Rheology::instance = SPtr<Rheology>();
-LBMReal Rheology::tau0 = 0;
-LBMReal Rheology::k = 0;
-LBMReal Rheology::n = 1;
-LBMReal Rheology::omegaMin = 0;
-LBMReal Rheology::beta = 0;
-LBMReal Rheology::c = 0;
-LBMReal Rheology::mu0 = 0;
+real Rheology::tau0 = 0;
+real Rheology::k = 0;
+real Rheology::n = 1;
+real Rheology::omegaMin = 0;
+real Rheology::beta = 0;
+real Rheology::c = 0;
+real Rheology::mu0 = 0;
 
 //////////////////////////////////////////////////////////////////////////
 SPtr<Rheology> Rheology::getInstance()
@@ -49,66 +49,66 @@ SPtr<Rheology> Rheology::getInstance()
    return instance;
 }
 
-void Rheology::setYieldStress(LBMReal yieldStress)
+void Rheology::setYieldStress(real yieldStress)
 {
 	tau0 = yieldStress;
 }
-LBMReal Rheology::getYieldStress() const
+real Rheology::getYieldStress() const
 {
 	return tau0;
 }
-void Rheology::setViscosityParameter(LBMReal kParameter)
+void Rheology::setViscosityParameter(real kParameter)
 {
 	k = kParameter;
 }
-LBMReal Rheology::getViscosityParameter() const
+real Rheology::getViscosityParameter() const
 {
 	return k;
 }
-void Rheology::setPowerIndex(LBMReal index)
+void Rheology::setPowerIndex(real index)
 {
 	n = index;
 }
-LBMReal Rheology::getPowerIndex() const
+real Rheology::getPowerIndex() const
 {
 	return n;
 }
 
-void Rheology::setOmegaMin(LBMReal omega)
+void Rheology::setOmegaMin(real omega)
 {
 	omegaMin = omega;
 }
-LBMReal Rheology::getOmegaMin() const
+real Rheology::getOmegaMin() const
 {
 	return omegaMin;
 }
 
-void Rheology::setBeta(LBMReal PowellEyringBeta)
+void Rheology::setBeta(real PowellEyringBeta)
 {
 	beta = PowellEyringBeta;
 }
 
-LBMReal Rheology::getBeta() const
+real Rheology::getBeta() const
 {
 	return beta;
 }
 
-void Rheology::setC(LBMReal PowellEyringC)
+void Rheology::setC(real PowellEyringC)
 {
 	c = PowellEyringC;
 }
 
-LBMReal Rheology::getC() const
+real Rheology::getC() const
 {
 	return c;
 }
 
-void Rheology::setMu0(LBMReal mu)
+void Rheology::setMu0(real mu)
 {
 	mu0 = mu;
 }
 
-LBMReal Rheology::getMu0() const
+real Rheology::getMu0() const
 {
 	return mu0;
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/Rheology.h b/src/cpu/VirtualFluidsCore/LBM/Rheology.h
index b1aa22478a69a77be2aa0275ddb7a44b6e626305..782810ca3f68290310af3e70d440488c27bf96de 100644
--- a/src/cpu/VirtualFluidsCore/LBM/Rheology.h
+++ b/src/cpu/VirtualFluidsCore/LBM/Rheology.h
@@ -38,6 +38,7 @@
 #include <LBMSystem.h>
 #include <UbMath.h>
 #include <math.h> 
+#include "basics/constants/NumericConstants.h"
 
 class Rheology
 {
@@ -45,53 +46,53 @@ public:
 	Rheology(Rheology const&) = delete;
 	Rheology& operator=(Rheology const&) = delete;
 	static SPtr<Rheology> getInstance();
-	void setYieldStress(LBMReal tau0);
-	LBMReal getYieldStress() const;
+	void setYieldStress(real tau0);
+	real getYieldStress() const;
 	
-	void setViscosityParameter(LBMReal k);
-	LBMReal getViscosityParameter() const;
+	void setViscosityParameter(real k);
+	real getViscosityParameter() const;
 
-	void setPowerIndex(LBMReal n);
-	LBMReal getPowerIndex() const;
+	void setPowerIndex(real n);
+	real getPowerIndex() const;
 
-	void setOmegaMin(LBMReal omegaMin);
-	LBMReal getOmegaMin() const;
+	void setOmegaMin(real omegaMin);
+	real getOmegaMin() const;
 
-	void setBeta(LBMReal PowellEyringBeta);
-	LBMReal getBeta() const;
+	void setBeta(real PowellEyringBeta);
+	real getBeta() const;
 
-	void setC(LBMReal PowellEyringC);
-	LBMReal getC() const;
+	void setC(real PowellEyringC);
+	real getC() const;
 
-	void setMu0(LBMReal mu);
-	LBMReal getMu0() const;
+	void setMu0(real mu);
+	real getMu0() const;
 
-	static LBMReal getBinghamCollFactorOld(LBMReal omegaInf, LBMReal shearRate, LBMReal drho);
-	static LBMReal getBinghamCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho);
-	static LBMReal getHerschelBulkleyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho);
-	static LBMReal getHerschelBulkleyCollFactorBackward(LBMReal shearRate, LBMReal drho);
-	static LBMReal getPowellEyringCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho);
+	static real getBinghamCollFactorOld(real omegaInf, real shearRate, real drho);
+	static real getBinghamCollFactor(real omegaInf, real shearRate, real drho);
+	static real getHerschelBulkleyCollFactor(real omegaInf, real shearRate, real drho);
+	static real getHerschelBulkleyCollFactorBackward(real shearRate, real drho);
+	static real getPowellEyringCollFactor(real omegaInf, real shearRate, real drho);
 private:
 	Rheology();
 	
 	static SPtr<Rheology> instance;
 
-	static LBMReal tau0;
-	static LBMReal k;
-	static LBMReal n;
-	static LBMReal omegaMin;
-	static LBMReal beta;
-	static LBMReal c;
-	static LBMReal mu0;
+	static real tau0;
+	static real k;
+	static real n;
+	static real omegaMin;
+	static real beta;
+	static real c;
+	static real mu0;
 };
 
 //////////////////////////////////////////////////////////////////////////
-inline LBMReal Rheology::getBinghamCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho)
+inline real Rheology::getBinghamCollFactor(real omegaInf, real shearRate, real drho)
 {
-	LBMReal cs2 = UbMath::one_over_sqrt3 * UbMath::one_over_sqrt3;
-	LBMReal rho = UbMath::one + drho;
+	real cs2 = vf::basics::constant::one_over_sqrt3 * vf::basics::constant::one_over_sqrt3;
+	real rho = vf::basics::constant::c1o1 + drho;
 	//analytical solution
-	LBMReal omega = omegaInf * (UbMath::one - (omegaInf * tau0) / (shearRate * cs2 * rho + UbMath::Epsilon<LBMReal>::val()));
+	real omega = omegaInf * (vf::basics::constant::c1o1 - (omegaInf * tau0) / (shearRate * cs2 * rho + UbMath::Epsilon<real>::val()));
 	
 	//LBMReal omega = cs2 * cs2 * shearRate * shearRate * omegaInf * rho * rho / (cs2 * cs2 * shearRate * shearRate * rho * rho + cs2 * shearRate * omegaInf * rho * tau0+omegaInf*omegaInf*tau0*tau0);
 	
@@ -117,30 +118,30 @@ inline LBMReal Rheology::getBinghamCollFactor(LBMReal omegaInf, LBMReal shearRat
 	return omega;
 }
 
-inline LBMReal Rheology::getBinghamCollFactorOld(LBMReal omegaInf, LBMReal shearRate, LBMReal drho)
+inline real Rheology::getBinghamCollFactorOld(real omegaInf, real shearRate, real drho)
 {
-	const LBMReal cs2 = UbMath::c1o3; // UbMath::one_over_sqrt3* UbMath::one_over_sqrt3;
-	LBMReal rho = UbMath::one + drho;
+	const real cs2 = vf::basics::constant::c1o3; // UbMath::one_over_sqrt3* UbMath::one_over_sqrt3;
+	real rho = vf::basics::constant::c1o1 + drho;
 
-	if (rho * cs2 * (UbMath::c1 / omegaInf - UbMath::c1o2) * shearRate < tau0)
+	if (rho * cs2 * (vf::basics::constant::c1o1 / omegaInf - vf::basics::constant::c1o2) * shearRate < tau0)
 		return 0.0;
 	else
 		return omegaInf;
 }
 //////////////////////////////////////////////////////////////////////////
-inline LBMReal Rheology::getHerschelBulkleyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho)
+inline real Rheology::getHerschelBulkleyCollFactor(real omegaInf, real shearRate, real drho)
 {
-	LBMReal cs2 = UbMath::one_over_sqrt3 * UbMath::one_over_sqrt3;
-	LBMReal rho = UbMath::one + drho;
-	LBMReal gammaDot = shearRate;
-	LBMReal omega = omegaInf;
-	LBMReal epsilon = 1;
-	LBMReal gammaDotPowN = std::pow(gammaDot, n);
+	real cs2 = vf::basics::constant::one_over_sqrt3 * vf::basics::constant::one_over_sqrt3;
+	real rho = vf::basics::constant::c1o1 + drho;
+	real gammaDot = shearRate;
+	real omega = omegaInf;
+	real epsilon = 1;
+	real gammaDotPowN = std::pow(gammaDot, n);
 
 	while (epsilon > 1e-10)
 	{
-		LBMReal omegaOld = omega;
-		LBMReal omegaByOmegaInfPowN = std::pow(omega / omegaInf, n);/*
+		real omegaOld = omega;
+		real omegaByOmegaInfPowN = std::pow(omega / omegaInf, n);/*
 		LBMReal gammaDotPowOneMinusN = std::pow(gammaDot,1- n);
 		LBMReal omegaByOmegaInfPowOneMinusN = std::pow(omega / omegaInf, 1-n);
 		LBMReal numeratorA = (2.0* k *  omegaInf + cs2 * gammaDotPowOneMinusN * omegaByOmegaInfPowOneMinusN *omegaInf* rho );
@@ -148,10 +149,10 @@ inline LBMReal Rheology::getHerschelBulkleyCollFactor(LBMReal omegaInf, LBMReal
 		LBMReal denominatorA = (2.0 * k * n * omegaInf + cs2 * gammaDot * rho * omegaInf* gammaDotPowOneMinusN * omegaByOmegaInfPowOneMinusN) + UbMath::Epsilon<LBMReal>::val();
 		LBMReal denominatorB = (2.0 * k * n * gammaDotPowN * omegaByOmegaInfPowN * omegaInf + cs2 * gammaDot * rho * omega) + UbMath::Epsilon<LBMReal>::val();
 		omega = omega - omega *( numeratorA / denominatorA+ numeratorB / denominatorB);*/
-		LBMReal numerator = (2.0 * gammaDotPowN * k * omegaByOmegaInfPowN * omegaInf + cs2 * gammaDot * (omega - 2.0) * rho + 2.0 * omegaInf * tau0);
-		LBMReal denominator = (2.0 * k * n * gammaDotPowN * omegaByOmegaInfPowN * omegaInf + cs2 * gammaDot * rho * omega) + UbMath::Epsilon<LBMReal>::val();
+		real numerator = (2.0 * gammaDotPowN * k * omegaByOmegaInfPowN * omegaInf + cs2 * gammaDot * (omega - 2.0) * rho + 2.0 * omegaInf * tau0);
+		real denominator = (2.0 * k * n * gammaDotPowN * omegaByOmegaInfPowN * omegaInf + cs2 * gammaDot * rho * omega) + UbMath::Epsilon<real>::val();
 		omega = omega - omega * numerator / denominator;
-		omega = (omega < UbMath::zeroReal) ? UbMath::c1o2 * omegaOld : omega;
+		omega = (omega < vf::basics::constant::c0o1) ? vf::basics::constant::c1o2 * omegaOld : omega;
         //omega = (omega < omegaMin) ? UbMath::c1o2 * (omegaOld-omegaMin)+omegaMin : omega;
 		epsilon = std::abs(omega - omegaOld);
 	}
@@ -159,36 +160,38 @@ inline LBMReal Rheology::getHerschelBulkleyCollFactor(LBMReal omegaInf, LBMReal
 	return omega;
 }
 //////////////////////////////////////////////////////////////////////////
-inline LBMReal Rheology::getHerschelBulkleyCollFactorBackward(LBMReal shearRate, LBMReal drho)
+inline real Rheology::getHerschelBulkleyCollFactorBackward(real shearRate, real drho)
 {
-	LBMReal rho = UbMath::one + drho;
-	LBMReal gamma = shearRate + UbMath::Epsilon<LBMReal>::val();
-	LBMReal cs2 = UbMath::one_over_sqrt3 * UbMath::one_over_sqrt3;
+	real rho = vf::basics::constant::c1o1 + drho;
+	real gamma = shearRate + UbMath::Epsilon<real>::val();
+	real cs2 = vf::basics::constant::one_over_sqrt3 * vf::basics::constant::one_over_sqrt3;
 
-	return 1.0 / ((tau0 + k * std::pow(gamma, n)) / (cs2 * rho * gamma) + UbMath::c1o2);
+	return 1.0 / ((tau0 + k * std::pow(gamma, n)) / (cs2 * rho * gamma) + vf::basics::constant::c1o2);
 }
 //////////////////////////////////////////////////////////////////////////
-inline LBMReal Rheology::getPowellEyringCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho)
+inline real Rheology::getPowellEyringCollFactor(real omegaInf, real shearRate, real drho)
 {
-	using namespace UbMath;
-	LBMReal cs2 = c1o3; // UbMath::one_over_sqrt3* UbMath::one_over_sqrt3;
-	LBMReal rho = c1 + drho;
-	LBMReal gammaDot = shearRate;
-	LBMReal omega = omegaInf;
-	LBMReal epsilon = 1;
+//	using namespace UbMath;
+	using namespace vf::basics::constant;
+
+	real cs2 = c1o3; // UbMath::one_over_sqrt3* UbMath::one_over_sqrt3;
+	real rho = c1o1 + drho;
+	real gammaDot = shearRate;
+	real omega = omegaInf;
+	real epsilon = 1;
 
 	while (epsilon > 1e-10)
 	{
-		LBMReal omegaOld = omega;
+		real omegaOld = omega;
 		epsilon = std::abs(omega - omegaOld);
 
-		LBMReal numerator = c*sqrt(c1+(gammaDot*gammaDot*omega*omega)/(c*c*omegaInf*omegaInf))*(beta*(c2*gammaDot*mu0*omega+cs2*gammaDot*(omega-c2)*rho+c2*omegaInf*tau0)+c2*omegaInf*(asinh((gammaDot*omega)/(c*omegaInf))));
+		real numerator = c*sqrt(c1o1+(gammaDot*gammaDot*omega*omega)/(c*c*omegaInf*omegaInf))*(beta*(c2o1*gammaDot*mu0*omega+cs2*gammaDot*(omega-c2o1)*rho+c2o1*omegaInf*tau0)+c2o1*omegaInf*(asinh((gammaDot*omega)/(c*omegaInf))));
 
-		LBMReal denominator = gammaDot*(c2+beta*c*sqrt(c1+(gammaDot*gammaDot*omega*omega)/(c*c*omegaInf*omegaInf))*(c2*mu0+cs2*rho)) + UbMath::Epsilon<LBMReal>::val();
+		real denominator = gammaDot*(c2o1+beta*c*sqrt(c1o1+(gammaDot*gammaDot*omega*omega)/(c*c*omegaInf*omegaInf))*(c2o1*mu0+cs2*rho)) + UbMath::Epsilon<real>::val();
 
 		omega = omega - numerator / denominator;
 
-		omega = (omega < UbMath::zeroReal) ? UbMath::c1o2 * omegaOld : omega;
+		omega = (omega < c0o1) ? c1o2 * omegaOld : omega;
 	}
 
 	return omega;
diff --git a/src/cpu/VirtualFluidsCore/LBM/RheologyBinghamModelLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/RheologyBinghamModelLBMKernel.h
index 5bd2601dad811be6433eaea1d6acafc0a3e54f4c..da51e6c4e11f67ff48efe7a34c7eeaf900e9e730 100644
--- a/src/cpu/VirtualFluidsCore/LBM/RheologyBinghamModelLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/RheologyBinghamModelLBMKernel.h
@@ -61,7 +61,7 @@ public:
 		return kernel;
 	}
 protected:	
-	LBMReal getRheologyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho) const override
+	real getRheologyCollFactor(real omegaInf, real shearRate, real drho) const override
 	{
 		return Rheology::getBinghamCollFactor(omegaInf, shearRate, drho);
 	}
diff --git a/src/cpu/VirtualFluidsCore/LBM/RheologyHerschelBulkleyModelLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/RheologyHerschelBulkleyModelLBMKernel.h
index 57478041e04e6a07579be53d58b688866e964e75..2422efefd52cdbfac183a9fdd19b9b2f5a5fee70 100644
--- a/src/cpu/VirtualFluidsCore/LBM/RheologyHerschelBulkleyModelLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/RheologyHerschelBulkleyModelLBMKernel.h
@@ -60,7 +60,7 @@ public:
 		return kernel;
 	}
 protected:
-	LBMReal getRheologyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho) const override
+	real getRheologyCollFactor(real omegaInf, real shearRate, real drho) const override
 	{
 		return Rheology::getHerschelBulkleyCollFactor(omegaInf, shearRate, drho);
 	}
diff --git a/src/cpu/VirtualFluidsCore/LBM/RheologyInterpolationProcessor.cpp b/src/cpu/VirtualFluidsCore/LBM/RheologyInterpolationProcessor.cpp
index 09cd40c8eceb10fa57ba136ea5f1439211f928ab..0a9c380dece3ba90f7d2d3d5d2a84ceadfcf4850 100644
--- a/src/cpu/VirtualFluidsCore/LBM/RheologyInterpolationProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/RheologyInterpolationProcessor.cpp
@@ -42,7 +42,7 @@ RheologyInterpolationProcessor::RheologyInterpolationProcessor()
 
 }
 //////////////////////////////////////////////////////////////////////////
-RheologyInterpolationProcessor::RheologyInterpolationProcessor(LBMReal omegaC, LBMReal omegaF, LBMReal omegaMin)
+RheologyInterpolationProcessor::RheologyInterpolationProcessor(real omegaC, real omegaF, real omegaMin)
    : omegaC(omegaC), omegaF(omegaF), omegaMin(omegaMin)
 {
 
@@ -59,18 +59,18 @@ InterpolationProcessorPtr RheologyInterpolationProcessor::clone()
    return iproc;
 }
 //////////////////////////////////////////////////////////////////////////
-void RheologyInterpolationProcessor::setOmegas( LBMReal omegaC, LBMReal omegaF )
+void RheologyInterpolationProcessor::setOmegas( real omegaC, real omegaF )
 {
    this->omegaC = omegaC;
    this->omegaF = omegaF;
 }
 //////////////////////////////////////////////////////////////////////////
-void RheologyInterpolationProcessor::setOmegaMin( LBMReal omegaMin )
+void RheologyInterpolationProcessor::setOmegaMin( real omegaMin )
 {
    this->omegaMin = omegaMin;
 }
 //////////////////////////////////////////////////////////////////////////
-void RheologyInterpolationProcessor::setOffsets(LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void RheologyInterpolationProcessor::setOffsets(real xoff, real yoff, real zoff)
 {
    this->xoff = xoff;
    this->yoff = yoff;
@@ -80,7 +80,7 @@ void RheologyInterpolationProcessor::setOffsets(LBMReal xoff, LBMReal yoff, LBMR
    this->zoff_sq = zoff * zoff;
 }
 //////////////////////////////////////////////////////////////////////////
-void RheologyInterpolationProcessor::interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void RheologyInterpolationProcessor::interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, real xoff, real yoff, real zoff)
 {
     setOffsets(xoff, yoff, zoff);
     calcInterpolatedCoefficiets_intern(icellC, omegaC, 0.5, 0.25, -0.25, -0.25, -1, -1, -1);
@@ -101,60 +101,61 @@ void RheologyInterpolationProcessor::interpolateCoarseToFine(D3Q27ICell& icellC,
     calcInterpolatedNode(icellF.TNE, /*omegaF,*/  0.25,  0.25,  0.25, calcPressTNE(),  1,  1,  1);
 }
 //////////////////////////////////////////////////////////////////////////
-void RheologyInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC, LBMReal xoff, LBMReal yoff, LBMReal zoff)
+void RheologyInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC, real xoff, real yoff, real zoff)
 {
    setOffsets(xoff, yoff, zoff);
     calcInterpolatedCoefficiets_intern(icellF, omegaF, 2.0, 0, 0, 0, 0, 0, 0);
    calcInterpolatedNodeFC(icellC, omegaC);
 }
 //////////////////////////////////////////////////////////////////////////
-void RheologyInterpolationProcessor::calcMoments(const LBMReal* const f, LBMReal omegaInf, LBMReal& press, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3, LBMReal& kxy, LBMReal& kyz, LBMReal& kxz, LBMReal& kxxMyy, LBMReal& kxxMzz)
+void RheologyInterpolationProcessor::calcMoments(const real* const f, real omegaInf, real& press, real& vx1, real& vx2, real& vx3, real& kxy, real& kyz, real& kxz, real& kxxMyy, real& kxxMzz)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
    rho = 0.0;
    D3Q27System::calcIncompMacroscopicValues(f,rho,vx1,vx2,vx3);
 
    shearRate = D3Q27System::getShearRate(f, omegaInf);
 
-   LBMReal omega = Rheology::getHerschelBulkleyCollFactor(omegaInf, shearRate, rho);
+   real omega = Rheology::getHerschelBulkleyCollFactor(omegaInf, shearRate, rho);
 
    press = rho; //interpolate rho!
 
    kxy   = -3.*omega*((((f[DIR_MMP]+f[DIR_PPM])-(f[DIR_MPP]+f[DIR_PMM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_MPM]+f[DIR_PMP])))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_MP0]+f[DIR_PM0]))-(vx1*vx2));// might not be optimal MG 25.2.13
    kyz   = -3.*omega*((((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMP]+f[DIR_MPM]))+((f[DIR_PMM]+f[DIR_MPP])-(f[DIR_MMP]+f[DIR_PPM])))+((f[DIR_0MM]+f[DIR_0PP])-(f[DIR_0MP]+f[DIR_0PM]))-(vx2*vx3));
    kxz   = -3.*omega*((((f[DIR_MPM]+f[DIR_PMP])-(f[DIR_MMP]+f[DIR_PPM]))+((f[DIR_MMM]+f[DIR_PPP])-(f[DIR_PMM]+f[DIR_MPP])))+((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_M0P]+f[DIR_P0M]))-(vx1*vx3));
-   kxxMyy = -3./2.*omega*((((f[D3Q27System::DIR_M0M]+f[DIR_P0P])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_M0P]+f[DIR_P0M])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_0M0]+f[DIR_0P0]))-(vx1*vx1-vx2*vx2));
+   kxxMyy = -3./2.*omega*((((f[DIR_M0M]+f[DIR_P0P])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_M0P]+f[DIR_P0M])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_0M0]+f[DIR_0P0]))-(vx1*vx1-vx2*vx2));
    kxxMzz = -3./2.*omega*((((f[DIR_MP0]+f[DIR_PM0])-(f[DIR_0MM]+f[DIR_0PP]))+((f[DIR_MM0]+f[DIR_PP0])-(f[DIR_0MP]+f[DIR_0PM])))+((f[DIR_M00]+f[DIR_P00])-(f[DIR_00M]+f[DIR_00P]))-(vx1*vx1-vx3*vx3));
 }
 //////////////////////////////////////////////////////////////////////////
 void RheologyInterpolationProcessor::calcInterpolatedCoefficiets_intern(const D3Q27ICell& icell,
-                                                                          LBMReal omega,
-                                                                          LBMReal eps_new,
-                                                                          LBMReal x,
-                                                                          LBMReal y,
-                                                                          LBMReal z,
-                                                                          LBMReal xs,
-                                                                          LBMReal ys,
-                                                                          LBMReal zs)
+                                                                          real omega,
+                                                                          real eps_new,
+                                                                          real x,
+                                                                          real y,
+                                                                          real z,
+                                                                          real xs,
+                                                                          real ys,
+                                                                          real zs)
 {
-   LBMReal        vx1_SWT,vx2_SWT,vx3_SWT;
-   LBMReal        vx1_NWT,vx2_NWT,vx3_NWT;
-   LBMReal        vx1_NET,vx2_NET,vx3_NET;
-   LBMReal        vx1_SET,vx2_SET,vx3_SET;
-   LBMReal        vx1_SWB,vx2_SWB,vx3_SWB;
-   LBMReal        vx1_NWB,vx2_NWB,vx3_NWB;
-   LBMReal        vx1_NEB,vx2_NEB,vx3_NEB;
-   LBMReal        vx1_SEB,vx2_SEB,vx3_SEB;
-
-   LBMReal        kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT;
-   LBMReal        kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT;
-   LBMReal        kxyFromfcNEQ_NET, kyzFromfcNEQ_NET, kxzFromfcNEQ_NET, kxxMyyFromfcNEQ_NET, kxxMzzFromfcNEQ_NET;
-   LBMReal        kxyFromfcNEQ_SET, kyzFromfcNEQ_SET, kxzFromfcNEQ_SET, kxxMyyFromfcNEQ_SET, kxxMzzFromfcNEQ_SET;
-   LBMReal        kxyFromfcNEQ_SWB, kyzFromfcNEQ_SWB, kxzFromfcNEQ_SWB, kxxMyyFromfcNEQ_SWB, kxxMzzFromfcNEQ_SWB;
-   LBMReal        kxyFromfcNEQ_NWB, kyzFromfcNEQ_NWB, kxzFromfcNEQ_NWB, kxxMyyFromfcNEQ_NWB, kxxMzzFromfcNEQ_NWB;
-   LBMReal        kxyFromfcNEQ_NEB, kyzFromfcNEQ_NEB, kxzFromfcNEQ_NEB, kxxMyyFromfcNEQ_NEB, kxxMzzFromfcNEQ_NEB;
-   LBMReal        kxyFromfcNEQ_SEB, kyzFromfcNEQ_SEB, kxzFromfcNEQ_SEB, kxxMyyFromfcNEQ_SEB, kxxMzzFromfcNEQ_SEB;
+   real        vx1_SWT,vx2_SWT,vx3_SWT;
+   real        vx1_NWT,vx2_NWT,vx3_NWT;
+   real        vx1_NET,vx2_NET,vx3_NET;
+   real        vx1_SET,vx2_SET,vx3_SET;
+   real        vx1_SWB,vx2_SWB,vx3_SWB;
+   real        vx1_NWB,vx2_NWB,vx3_NWB;
+   real        vx1_NEB,vx2_NEB,vx3_NEB;
+   real        vx1_SEB,vx2_SEB,vx3_SEB;
+
+   real        kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT;
+   real        kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT;
+   real        kxyFromfcNEQ_NET, kyzFromfcNEQ_NET, kxzFromfcNEQ_NET, kxxMyyFromfcNEQ_NET, kxxMzzFromfcNEQ_NET;
+   real        kxyFromfcNEQ_SET, kyzFromfcNEQ_SET, kxzFromfcNEQ_SET, kxxMyyFromfcNEQ_SET, kxxMzzFromfcNEQ_SET;
+   real        kxyFromfcNEQ_SWB, kyzFromfcNEQ_SWB, kxzFromfcNEQ_SWB, kxxMyyFromfcNEQ_SWB, kxxMzzFromfcNEQ_SWB;
+   real        kxyFromfcNEQ_NWB, kyzFromfcNEQ_NWB, kxzFromfcNEQ_NWB, kxxMyyFromfcNEQ_NWB, kxxMzzFromfcNEQ_NWB;
+   real        kxyFromfcNEQ_NEB, kyzFromfcNEQ_NEB, kxzFromfcNEQ_NEB, kxxMyyFromfcNEQ_NEB, kxxMzzFromfcNEQ_NEB;
+   real        kxyFromfcNEQ_SEB, kyzFromfcNEQ_SEB, kxzFromfcNEQ_SEB, kxxMyyFromfcNEQ_SEB, kxxMzzFromfcNEQ_SEB;
 
    calcMoments(icell.TSW,omega,press_SWT,vx1_SWT,vx2_SWT,vx3_SWT, kxyFromfcNEQ_SWT, kyzFromfcNEQ_SWT, kxzFromfcNEQ_SWT, kxxMyyFromfcNEQ_SWT, kxxMzzFromfcNEQ_SWT);
    calcMoments(icell.TNW,omega,press_NWT,vx1_NWT,vx2_NWT,vx3_NWT, kxyFromfcNEQ_NWT, kyzFromfcNEQ_NWT, kxzFromfcNEQ_NWT, kxxMyyFromfcNEQ_NWT, kxxMzzFromfcNEQ_NWT);
@@ -309,18 +310,18 @@ void RheologyInterpolationProcessor::calcInterpolatedCoefficiets_intern(const D3
    cyz= cyz + xoff*cxyz;
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-   LBMReal dxux = ax + 0.5*axx*xs+ 0.25*(axy*ys+axz*zs)+0.0625*axyz*ys*zs;
-   LBMReal dyuy = by + 0.5 * byy * ys + 0.25 * (bxy * xs + byz * zs) + 0.0625 * bxyz * xs * zs;
-   LBMReal dzuz = cz + 0.5 * czz * zs + 0.25 * (cxz * xs + cyz * ys) + 0.0625 * cxyz * xs * ys;
+   real dxux = ax + 0.5*axx*xs+ 0.25*(axy*ys+axz*zs)+0.0625*axyz*ys*zs;
+   real dyuy = by + 0.5 * byy * ys + 0.25 * (bxy * xs + byz * zs) + 0.0625 * bxyz * xs * zs;
+   real dzuz = cz + 0.5 * czz * zs + 0.25 * (cxz * xs + cyz * ys) + 0.0625 * cxyz * xs * ys;
 
-   LBMReal Dxy = bx + 0.5 * bxx * xs + 0.25 * (bxy * ys + bxz * zs) + 0.0625 * bxyz * ys * zs + ay + 0.5 * ayy * ys + 0.25 * (axy * xs + ayz * zs) + 0.0625 * axyz * xs * zs;
-   LBMReal Dxz = cx + 0.5 * cxx * xs + 0.25 * (cxy * ys + cxz * zs) + 0.0625 * cxyz * ys * zs + az + 0.5 * azz * zs + 0.25 * (axz * xs + ayz * ys) + 0.0625 * axyz * xs * ys;
-   LBMReal Dyz = cy + 0.5 * cyy * ys + 0.25 * (cxy * xs + cyz * zs) + 0.0625 * cxyz * xs * zs + bz + 0.5 * bzz * zs + 0.25 * (bxz * xs + byz * ys) + 0.0625 * bxyz * xs * ys;
+   real Dxy = bx + 0.5 * bxx * xs + 0.25 * (bxy * ys + bxz * zs) + 0.0625 * bxyz * ys * zs + ay + 0.5 * ayy * ys + 0.25 * (axy * xs + ayz * zs) + 0.0625 * axyz * xs * zs;
+   real Dxz = cx + 0.5 * cxx * xs + 0.25 * (cxy * ys + cxz * zs) + 0.0625 * cxyz * ys * zs + az + 0.5 * azz * zs + 0.25 * (axz * xs + ayz * ys) + 0.0625 * axyz * xs * ys;
+   real Dyz = cy + 0.5 * cyy * ys + 0.25 * (cxy * xs + cyz * zs) + 0.0625 * cxyz * xs * zs + bz + 0.5 * bzz * zs + 0.25 * (bxz * xs + byz * ys) + 0.0625 * bxyz * xs * ys;
 
    shearRate = sqrt(dxux * dxux + dyuy * dyuy + dzuz * dzuz + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz);
 
 
-   LBMReal o = Rheology::getHerschelBulkleyCollFactorBackward(shearRate, rho); //omega;
+   real o = Rheology::getHerschelBulkleyCollFactorBackward(shearRate, rho); //omega;
 
    if (o < omegaMin)
       o = omegaMin;
@@ -431,16 +432,17 @@ void RheologyInterpolationProcessor::calcInterpolatedCoefficiets_intern(const D3
    yz_TNW =   0.0625*eps_new *((                bxyz +     cxyz)/(72.*o));
 }
 //////////////////////////////////////////////////////////////////////////
-void RheologyInterpolationProcessor::calcInterpolatedNode(LBMReal* f, /*LBMReal omega,*/ LBMReal x, LBMReal y, LBMReal z, LBMReal press, LBMReal xs, LBMReal ys, LBMReal zs)
+void RheologyInterpolationProcessor::calcInterpolatedNode(real* f, /*real omega,*/ real x, real y, real z, real press, real xs, real ys, real zs)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
-   LBMReal rho  = press ;
-   LBMReal vx1  = a0 + 0.25*( xs*ax + ys*ay + zs*az) + 0.0625*(axx + xs*ys*axy + xs*zs*axz + ayy + ys*zs*ayz + azz) + 0.015625*(xs*ys*zs*axyz);
-   LBMReal vx2  = b0 + 0.25*( xs*bx + ys*by + zs*bz) + 0.0625*(bxx + xs*ys*bxy + xs*zs*bxz + byy + ys*zs*byz + bzz) + 0.015625*(xs*ys*zs*bxyz);
-   LBMReal vx3  = c0 + 0.25*( xs*cx + ys*cy + zs*cz) + 0.0625*(cxx + xs*ys*cxy + xs*zs*cxz + cyy + ys*zs*cyz + czz) + 0.015625*(xs*ys*zs*cxyz);
+   real rho  = press ;
+   real vx1  = a0 + 0.25*( xs*ax + ys*ay + zs*az) + 0.0625*(axx + xs*ys*axy + xs*zs*axz + ayy + ys*zs*ayz + azz) + 0.015625*(xs*ys*zs*axyz);
+   real vx2  = b0 + 0.25*( xs*bx + ys*by + zs*bz) + 0.0625*(bxx + xs*ys*bxy + xs*zs*bxz + byy + ys*zs*byz + bzz) + 0.015625*(xs*ys*zs*bxyz);
+   real vx3  = c0 + 0.25*( xs*cx + ys*cy + zs*cz) + 0.0625*(cxx + xs*ys*cxy + xs*zs*cxz + cyy + ys*zs*cyz + czz) + 0.015625*(xs*ys*zs*cxyz);
 
-   LBMReal feq[ENDF+1];
+   real feq[ENDF+1];
    D3Q27System::calcIncompFeq(feq,rho,vx1,vx2,vx3);
 
    f[DIR_P00]    = f_E    + xs*x_E    + ys*y_E    + zs*z_E    + xs*ys*xy_E    + xs*zs*xz_E    + ys*zs*yz_E    + feq[DIR_P00];
@@ -473,7 +475,7 @@ void RheologyInterpolationProcessor::calcInterpolatedNode(LBMReal* f, /*LBMReal
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SWB -0.25, -0.25, -0.25
-LBMReal RheologyInterpolationProcessor::calcPressBSW()
+real RheologyInterpolationProcessor::calcPressBSW()
 {
    return   press_SWT * (0.140625 + 0.1875 * xoff + 0.1875 * yoff - 0.5625 * zoff) +
       press_NWT * (0.046875 + 0.0625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -486,7 +488,7 @@ LBMReal RheologyInterpolationProcessor::calcPressBSW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SWT -0.25, -0.25, 0.25
-LBMReal RheologyInterpolationProcessor::calcPressTSW()
+real RheologyInterpolationProcessor::calcPressTSW()
 {
    return   press_SWT * (0.421875 + 0.5625 * xoff + 0.5625 * yoff - 0.5625 * zoff) +
       press_NWT * (0.140625 + 0.1875 * xoff - 0.5625 * yoff - 0.1875 * zoff) +
@@ -499,7 +501,7 @@ LBMReal RheologyInterpolationProcessor::calcPressTSW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SET 0.25, -0.25, 0.25
-LBMReal RheologyInterpolationProcessor::calcPressTSE()
+real RheologyInterpolationProcessor::calcPressTSE()
 {
    return   press_SET * (0.421875 - 0.5625 * xoff + 0.5625 * yoff - 0.5625 * zoff) +
       press_NET * (0.140625 - 0.1875 * xoff - 0.5625 * yoff - 0.1875 * zoff) +
@@ -512,7 +514,7 @@ LBMReal RheologyInterpolationProcessor::calcPressTSE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position SEB 0.25, -0.25, -0.25
-LBMReal RheologyInterpolationProcessor::calcPressBSE()
+real RheologyInterpolationProcessor::calcPressBSE()
 {
    return   press_SET * (0.140625 - 0.1875 * xoff + 0.1875 * yoff - 0.5625 * zoff) +
       press_NET * (0.046875 - 0.0625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -525,7 +527,7 @@ LBMReal RheologyInterpolationProcessor::calcPressBSE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NWB -0.25, 0.25, -0.25
-LBMReal RheologyInterpolationProcessor::calcPressBNW()
+real RheologyInterpolationProcessor::calcPressBNW()
 {
    return   press_NWT * (0.140625 + 0.1875 * xoff - 0.1875 * yoff - 0.5625 * zoff) +
       press_NET * (0.046875 - 0.1875 * xoff - 0.0625 * yoff - 0.1875 * zoff) +
@@ -538,7 +540,7 @@ LBMReal RheologyInterpolationProcessor::calcPressBNW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NWT -0.25, 0.25, 0.25
-LBMReal RheologyInterpolationProcessor::calcPressTNW()
+real RheologyInterpolationProcessor::calcPressTNW()
 {
    return   press_NWT * (0.421875 + 0.5625 * xoff - 0.5625 * yoff - 0.5625 * zoff) +
       press_NET * (0.140625 - 0.5625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -551,7 +553,7 @@ LBMReal RheologyInterpolationProcessor::calcPressTNW()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NET 0.25, 0.25, 0.25
-LBMReal RheologyInterpolationProcessor::calcPressTNE()
+real RheologyInterpolationProcessor::calcPressTNE()
 {
    return   press_NET * (0.421875 - 0.5625 * xoff - 0.5625 * yoff - 0.5625 * zoff) +
       press_NWT * (0.140625 + 0.5625 * xoff - 0.1875 * yoff - 0.1875 * zoff) +
@@ -564,7 +566,7 @@ LBMReal RheologyInterpolationProcessor::calcPressTNE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position NEB 0.25, 0.25, -0.25
-LBMReal RheologyInterpolationProcessor::calcPressBNE()
+real RheologyInterpolationProcessor::calcPressBNE()
 {
    return   press_NET * (0.140625 - 0.1875 * xoff - 0.1875 * yoff - 0.5625 * zoff) +
       press_NWT * (0.046875 + 0.1875 * xoff - 0.0625 * yoff - 0.1875 * zoff) +
@@ -577,11 +579,12 @@ LBMReal RheologyInterpolationProcessor::calcPressBNE()
 }
 //////////////////////////////////////////////////////////////////////////
 //Position C 0.0, 0.0, 0.0
-void RheologyInterpolationProcessor::calcInterpolatedNodeFC(LBMReal* f, LBMReal omega)
+void RheologyInterpolationProcessor::calcInterpolatedNodeFC(real* f, real omega)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
-   LBMReal press  =  press_NET * (0.125 - 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
+   real press  =  press_NET * (0.125 - 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
       press_NWT * (0.125 + 0.25 * xoff - 0.25 * yoff - 0.25 * zoff) +
       press_SET * (0.125 - 0.25 * xoff + 0.25 * yoff - 0.25 * zoff) +
       press_SWT * (0.125 + 0.25 * xoff + 0.25 * yoff - 0.25 * zoff) +
@@ -589,30 +592,30 @@ void RheologyInterpolationProcessor::calcInterpolatedNodeFC(LBMReal* f, LBMReal
       press_NWB * (0.125 + 0.25 * xoff - 0.25 * yoff + 0.25 * zoff) +
       press_SEB * (0.125 - 0.25 * xoff + 0.25 * yoff + 0.25 * zoff) +
       press_SWB * (0.125 + 0.25 * xoff + 0.25 * yoff + 0.25 * zoff);
-   LBMReal vx1  = a0;
-   LBMReal vx2  = b0;
-   LBMReal vx3  = c0;
+   real vx1  = a0;
+   real vx2  = b0;
+   real vx3  = c0;
 
-   LBMReal rho = press ;
+   real rho = press ;
 
-   LBMReal feq[ENDF+1];
+   real feq[ENDF+1];
    D3Q27System::calcIncompFeq(feq,rho,vx1,vx2,vx3);
 
-   LBMReal eps_new = 2.;
+   real eps_new = 2.;
    
 
-   LBMReal dxux = ax;
-   LBMReal dyuy = by;
-   LBMReal dzuz = cz;
+   real dxux = ax;
+   real dyuy = by;
+   real dzuz = cz;
 
-   LBMReal Dxy = bx + ay;
-   LBMReal Dxz = cx + az;
-   LBMReal Dyz = cy + bz;
+   real Dxy = bx + ay;
+   real Dxz = cx + az;
+   real Dyz = cy + bz;
 
    shearRate = sqrt(dxux * dxux + dyuy * dyuy + dzuz * dzuz + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz);
 
 
-   LBMReal o = Rheology::getHerschelBulkleyCollFactorBackward(shearRate, rho); //omega;
+   real o = Rheology::getHerschelBulkleyCollFactorBackward(shearRate, rho); //omega;
 
    if (o < omegaMin)
       o = omegaMin;
@@ -661,14 +664,14 @@ void RheologyInterpolationProcessor::calcInterpolatedNodeFC(LBMReal* f, LBMReal
    f[DIR_000] = f_ZERO + feq[DIR_000];
 }
 //////////////////////////////////////////////////////////////////////////
-void RheologyInterpolationProcessor::calcInterpolatedVelocity(LBMReal x, LBMReal y, LBMReal z, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3)
+void RheologyInterpolationProcessor::calcInterpolatedVelocity(real x, real y, real z, real& vx1, real& vx2, real& vx3)
 {
 	vx1  = a0 + ax*x + ay*y + az*z + axx*x*x + ayy*y*y + azz*z*z + axy*x*y + axz*x*z + ayz*y*z+axyz*x*y*z;
 	vx2  = b0 + bx*x + by*y + bz*z + bxx*x*x + byy*y*y + bzz*z*z + bxy*x*y + bxz*x*z + byz*y*z+bxyz*x*y*z;
 	vx3  = c0 + cx*x + cy*y + cz*z + cxx*x*x + cyy*y*y + czz*z*z + cxy*x*y + cxz*x*z + cyz*y*z+cxyz*x*y*z;
 }
 //////////////////////////////////////////////////////////////////////////
-void RheologyInterpolationProcessor::calcInterpolatedShearStress(LBMReal x, LBMReal y, LBMReal z,LBMReal& tauxx, LBMReal& tauyy, LBMReal& tauzz,LBMReal& tauxy, LBMReal& tauxz, LBMReal& tauyz)
+void RheologyInterpolationProcessor::calcInterpolatedShearStress(real x, real y, real z,real& tauxx, real& tauyy, real& tauzz,real& tauxy, real& tauxz, real& tauyz)
 {
 	tauxx=ax+2*axx*x+axy*y+axz*z+axyz*y*z;
 	tauyy=by+2*byy*y+bxy*x+byz*z+bxyz*x*z;
diff --git a/src/cpu/VirtualFluidsCore/LBM/RheologyInterpolationProcessor.h b/src/cpu/VirtualFluidsCore/LBM/RheologyInterpolationProcessor.h
index bce0c3d89e137738d9169fab5dbe15cd3c91f8ad..178932204307606b9fc48d2745ebf1353547e3e8 100644
--- a/src/cpu/VirtualFluidsCore/LBM/RheologyInterpolationProcessor.h
+++ b/src/cpu/VirtualFluidsCore/LBM/RheologyInterpolationProcessor.h
@@ -42,55 +42,55 @@ class RheologyInterpolationProcessor : public InterpolationProcessor
 {
 public:
    RheologyInterpolationProcessor();
-   RheologyInterpolationProcessor(LBMReal omegaC, LBMReal omegaF, LBMReal omegaMin);
+   RheologyInterpolationProcessor(real omegaC, real omegaF, real omegaMin);
    virtual ~RheologyInterpolationProcessor();
    InterpolationProcessorPtr clone();
-   void setOmegas(LBMReal omegaC, LBMReal omegaF);
-   void setOmegaMin(LBMReal omegaMin);
+   void setOmegas(real omegaC, real omegaF);
+   void setOmegaMin(real omegaMin);
    void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF);
-   void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, LBMReal xoff, LBMReal yoff, LBMReal zoff);
-   void interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC); 
-   void interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC, LBMReal xoff, LBMReal yoff, LBMReal zoff); 
-   //LBMReal forcingC, forcingF;
+   void interpolateCoarseToFine(D3Q27ICell& icellC, D3Q27ICell& icellF, real xoff, real yoff, real zoff);
+   void interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC); 
+   void interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC, real xoff, real yoff, real zoff); 
+   //real forcingC, forcingF;
 protected:   
 private:
-   LBMReal omegaC, omegaF;
-   LBMReal a0, ax, ay, az, axx, ayy, azz, axy, axz, ayz, b0, bx, by, bz, bxx, byy, bzz, bxy, bxz, byz, c0, cx, cy, cz, cxx, cyy, czz, cxy, cxz, cyz, axyz, bxyz, cxyz;
-   LBMReal xoff,    yoff,    zoff;
-   LBMReal xoff_sq, yoff_sq, zoff_sq;
-   LBMReal press_SWT, press_NWT, press_NET, press_SET, press_SWB, press_NWB, press_NEB, press_SEB;
+   real omegaC, omegaF;
+   real a0, ax, ay, az, axx, ayy, azz, axy, axz, ayz, b0, bx, by, bz, bxx, byy, bzz, bxy, bxz, byz, c0, cx, cy, cz, cxx, cyy, czz, cxy, cxz, cyz, axyz, bxyz, cxyz;
+   real xoff,    yoff,    zoff;
+   real xoff_sq, yoff_sq, zoff_sq;
+   real press_SWT, press_NWT, press_NET, press_SET, press_SWB, press_NWB, press_NEB, press_SEB;
 
-   LBMReal  f_E,  f_N,  f_T,  f_NE,  f_SE,  f_BE,  f_TE,  f_TN,  f_BN,  f_TNE,  f_TNW,  f_TSE,  f_TSW,  f_ZERO;
-   LBMReal  x_E,  x_N,  x_T,  x_NE,  x_SE,  x_BE,  x_TE,  x_TN,  x_BN,  x_TNE,  x_TNW,  x_TSE,  x_TSW,  x_ZERO;
-   LBMReal  y_E,  y_N,  y_T,  y_NE,  y_SE,  y_BE,  y_TE,  y_TN,  y_BN,  y_TNE,  y_TNW,  y_TSE,  y_TSW,  y_ZERO;
-   LBMReal  z_E,  z_N,  z_T,  z_NE,  z_SE,  z_BE,  z_TE,  z_TN,  z_BN,  z_TNE,  z_TNW,  z_TSE,  z_TSW,  z_ZERO;
-   LBMReal xy_E, xy_N, xy_T, xy_NE, xy_SE, xy_BE, xy_TE, xy_TN, xy_BN, xy_TNE, xy_TNW, xy_TSE, xy_TSW/*, xy_ZERO*/;
-   LBMReal xz_E, xz_N, xz_T, xz_NE, xz_SE, xz_BE, xz_TE, xz_TN, xz_BN, xz_TNE, xz_TNW, xz_TSE, xz_TSW/*, xz_ZERO*/;
-   LBMReal yz_E, yz_N, yz_T, yz_NE, yz_SE, yz_BE, yz_TE, yz_TN, yz_BN, yz_TNE, yz_TNW, yz_TSE, yz_TSW/*, yz_ZERO*/;
+   real  f_E,  f_N,  f_T,  f_NE,  f_SE,  f_BE,  f_TE,  f_TN,  f_BN,  f_TNE,  f_TNW,  f_TSE,  f_TSW,  f_ZERO;
+   real  x_E,  x_N,  x_T,  x_NE,  x_SE,  x_BE,  x_TE,  x_TN,  x_BN,  x_TNE,  x_TNW,  x_TSE,  x_TSW,  x_ZERO;
+   real  y_E,  y_N,  y_T,  y_NE,  y_SE,  y_BE,  y_TE,  y_TN,  y_BN,  y_TNE,  y_TNW,  y_TSE,  y_TSW,  y_ZERO;
+   real  z_E,  z_N,  z_T,  z_NE,  z_SE,  z_BE,  z_TE,  z_TN,  z_BN,  z_TNE,  z_TNW,  z_TSE,  z_TSW,  z_ZERO;
+   real xy_E, xy_N, xy_T, xy_NE, xy_SE, xy_BE, xy_TE, xy_TN, xy_BN, xy_TNE, xy_TNW, xy_TSE, xy_TSW/*, xy_ZERO*/;
+   real xz_E, xz_N, xz_T, xz_NE, xz_SE, xz_BE, xz_TE, xz_TN, xz_BN, xz_TNE, xz_TNW, xz_TSE, xz_TSW/*, xz_ZERO*/;
+   real yz_E, yz_N, yz_T, yz_NE, yz_SE, yz_BE, yz_TE, yz_TN, yz_BN, yz_TNE, yz_TNW, yz_TSE, yz_TSW/*, yz_ZERO*/;
 
-   LBMReal kxyAverage, kyzAverage, kxzAverage, kxxMyyAverage, kxxMzzAverage; 
+   real kxyAverage, kyzAverage, kxzAverage, kxxMyyAverage, kxxMzzAverage; 
 
-   LBMReal rho;
-   LBMReal shearRate;
+   real rho;
+   real shearRate;
 
-   LBMReal omegaMin;
+   real omegaMin;
 
-   void setOffsets(LBMReal xoff, LBMReal yoff, LBMReal zoff);
-   void calcMoments(const LBMReal* const f, LBMReal omegaInf, LBMReal& rho, LBMReal& vx1, LBMReal& vx2, LBMReal& vx3,
-      LBMReal& kxy, LBMReal& kyz, LBMReal& kxz, LBMReal& kxxMyy, LBMReal& kxxMzz);
-   void calcInterpolatedCoefficiets_intern(const D3Q27ICell& icell, LBMReal omega, LBMReal eps_new, LBMReal x, LBMReal y, LBMReal z, LBMReal xs, LBMReal ys, LBMReal zs);
-   void calcInterpolatedNode(LBMReal* f, /*LBMReal omega,*/ LBMReal x, LBMReal y, LBMReal z, LBMReal press, LBMReal xs, LBMReal ys, LBMReal zs);
-   LBMReal calcPressBSW();
-   LBMReal calcPressTSW();
-   LBMReal calcPressTSE();
-   LBMReal calcPressBSE();
-   LBMReal calcPressBNW();
-   LBMReal calcPressTNW();
-   LBMReal calcPressTNE();
-   LBMReal calcPressBNE();
-   void calcInterpolatedNodeFC(LBMReal* f, LBMReal omega);
-   void calcInterpolatedVelocity(LBMReal x, LBMReal y, LBMReal z,LBMReal& vx1, LBMReal& vx2, LBMReal& vx3);
-   void calcInterpolatedShearStress(LBMReal x, LBMReal y, LBMReal z,LBMReal& tauxx, LBMReal& tauyy, LBMReal& tauzz,LBMReal& tauxy, LBMReal& tauxz, LBMReal& tauyz);
+   void setOffsets(real xoff, real yoff, real zoff);
+   void calcMoments(const real* const f, real omegaInf, real& rho, real& vx1, real& vx2, real& vx3,
+      real& kxy, real& kyz, real& kxz, real& kxxMyy, real& kxxMzz);
+   void calcInterpolatedCoefficiets_intern(const D3Q27ICell& icell, real omega, real eps_new, real x, real y, real z, real xs, real ys, real zs);
+   void calcInterpolatedNode(real* f, /*real omega,*/ real x, real y, real z, real press, real xs, real ys, real zs);
+   real calcPressBSW();
+   real calcPressTSW();
+   real calcPressTSE();
+   real calcPressBSE();
+   real calcPressBNW();
+   real calcPressTNW();
+   real calcPressTNE();
+   real calcPressBNE();
+   void calcInterpolatedNodeFC(real* f, real omega);
+   void calcInterpolatedVelocity(real x, real y, real z,real& vx1, real& vx2, real& vx3);
+   void calcInterpolatedShearStress(real x, real y, real z,real& tauxx, real& tauyy, real& tauzz,real& tauxy, real& tauxz, real& tauyz);
 };
 
 //////////////////////////////////////////////////////////////////////////
@@ -99,7 +99,7 @@ inline void RheologyInterpolationProcessor::interpolateCoarseToFine(D3Q27ICell&
    this->interpolateCoarseToFine(icellC, icellF, 0.0, 0.0, 0.0);
 }
 //////////////////////////////////////////////////////////////////////////
-inline void RheologyInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, LBMReal* icellC)
+inline void RheologyInterpolationProcessor::interpolateFineToCoarse(D3Q27ICell& icellF, real* icellC)
 {
    this->interpolateFineToCoarse(icellF, icellC, 0.0, 0.0, 0.0);
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/RheologyK17LBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/RheologyK17LBMKernel.cpp
index 94fbad358b16d923ddc5425e6476ff3892bbbf3c..9cb096cd1df43332e41b09c5229b2cdeb0b2936b 100644
--- a/src/cpu/VirtualFluidsCore/LBM/RheologyK17LBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/RheologyK17LBMKernel.cpp
@@ -43,7 +43,8 @@
 
 #define PROOF_CORRECTNESS
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::basics::constant;
 
 //////////////////////////////////////////////////////////////////////////
 RheologyK17LBMKernel::RheologyK17LBMKernel()
@@ -85,7 +86,7 @@ SPtr<LBMKernel> RheologyK17LBMKernel::clone()
    } 
    else
    {
-      OxxPyyPzz = one;
+      OxxPyyPzz = c1o1;
    }
 
    dynamicPointerCast<RheologyK17LBMKernel>(kernel)->OxxPyyPzz = this->OxxPyyPzz;
@@ -190,63 +191,63 @@ void RheologyK17LBMKernel::calculate(int step)
                // a b c
                //-1 0 1
 
-               LBMReal mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
-               LBMReal mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
-               LBMReal mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
-               LBMReal mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
-               LBMReal mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
-               LBMReal mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
-               LBMReal mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
-               LBMReal mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
-               LBMReal mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
-               LBMReal mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
-               LBMReal mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
-               LBMReal mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
-               LBMReal mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
-
-               LBMReal mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
-               LBMReal mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
-               LBMReal mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
-               LBMReal mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
-               LBMReal mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
-               LBMReal mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
-               LBMReal mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
-               LBMReal mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
-               LBMReal mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
-               LBMReal mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-               LBMReal mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
-               LBMReal mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
-               LBMReal mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-               LBMReal mfbbb = (*this->zeroDistributions)(x1, x2, x3);
-
-               ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal drho = ((((mfccc+mfaaa)+(mfaca+mfcac))+((mfacc+mfcaa)+(mfaac+mfcca)))+
+               real mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
+               real mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
+               real mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
+               real mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
+               real mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
+               real mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
+               real mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
+               real mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
+               real mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
+               real mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
+               real mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
+               real mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
+               real mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
+
+               real mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
+               real mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
+               real mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
+               real mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
+               real mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
+               real mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
+               real mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
+               real mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
+               real mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
+               real mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+               real mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
+               real mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
+               real mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+               real mfbbb = (*this->zeroDistributions)(x1, x2, x3);
+
+               ////////////////////////////////////////////////////////////////////////////////////
+               real drho = ((((mfccc+mfaaa)+(mfaca+mfcac))+((mfacc+mfcaa)+(mfaac+mfcca)))+
                   (((mfbac+mfbca)+(mfbaa+mfbcc))+((mfabc+mfcba)+(mfaba+mfcbc))+((mfacb+mfcab)+(mfaab+mfccb)))+
                   ((mfabb+mfcbb)+(mfbab+mfbcb))+(mfbba+mfbbc))+mfbbb;
 
-               LBMReal rho = one+drho;
+               real rho = c1o1+drho;
                ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal vvx = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfcaa-mfacc)+(mfcca-mfaac)))+
+               real vvx = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfcaa-mfacc)+(mfcca-mfaac)))+
                   (((mfcba-mfabc)+(mfcbc-mfaba))+((mfcab-mfacb)+(mfccb-mfaab)))+
                   (mfcbb-mfabb))/rho;
-               LBMReal vvy = ((((mfccc-mfaaa)+(mfaca-mfcac))+((mfacc-mfcaa)+(mfcca-mfaac)))+
+               real vvy = ((((mfccc-mfaaa)+(mfaca-mfcac))+((mfacc-mfcaa)+(mfcca-mfaac)))+
                   (((mfbca-mfbac)+(mfbcc-mfbaa))+((mfacb-mfcab)+(mfccb-mfaab)))+
                   (mfbcb-mfbab))/rho;
-               LBMReal vvz = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfacc-mfcaa)+(mfaac-mfcca)))+
+               real vvz = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfacc-mfcaa)+(mfaac-mfcca)))+
                   (((mfbac-mfbca)+(mfbcc-mfbaa))+((mfabc-mfcba)+(mfcbc-mfaba)))+
                   (mfbbc-mfbba))/rho;
                ////////////////////////////////////////////////////////////////////////////////////
 
-               LBMReal omega = collFactor;
+               real omega = collFactor;
 
                //forcing 
                ///////////////////////////////////////////////////////////////////////////////////////////
                if (withForcing)
                {
-                  muX1 = static_cast<double>(x1-1+ix1*maxX1);
-                  muX2 = static_cast<double>(x2-1+ix2*maxX2);
-                  muX3 = static_cast<double>(x3-1+ix3*maxX3);
+                  muX1 = static_cast<real>(x1-1+ix1*maxX1);
+                  muX2 = static_cast<real>(x2-1+ix2*maxX2);
+                  muX3 = static_cast<real>(x3-1+ix3*maxX3);
 
                   forcingX1 = muForcingX1.Eval();
                   forcingX2 = muForcingX2.Eval();
@@ -258,20 +259,20 @@ void RheologyK17LBMKernel::calculate(int step)
                }
                ///////////////////////////////////////////////////////////////////////////////////////////               
          ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal oMdrho = one; // comp special
+               real oMdrho = c1o1; // comp special
                ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal m0, m1, m2;
-               LBMReal vx2;
-               LBMReal vy2;
-               LBMReal vz2;
+               real m0, m1, m2;
+               real vx2;
+               real vy2;
+               real vz2;
                vx2 = vvx*vvx;
                vy2 = vvy*vvy;
                vz2 = vvz*vvz;
                ////////////////////////////////////////////////////////////////////////////////////
-               LBMReal wadjust;
-               LBMReal qudricLimitP = 0.01;// * 0.0001f;
-               LBMReal qudricLimitM = 0.01;// * 0.0001f;
-               LBMReal qudricLimitD = 0.01;// * 0.001f;
+               real wadjust;
+               real qudricLimitP = 0.01;// * 0.0001f;
+               real qudricLimitM = 0.01;// * 0.0001f;
+               real qudricLimitD = 0.01;// * 0.001f;
                //LBMReal s9 = minusomega;
                //test
                //s9 = 0.;
@@ -287,7 +288,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfaaa = m0;
                m0 += c1o36 * oMdrho;
                mfaab = m1-m0 * vvz;
-               mfaac = m2-two*	m1 * vvz+vz2 * m0;
+               mfaac = m2-c2o1*	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaba+mfabc;
                m1 = mfabc-mfaba;
@@ -295,7 +296,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfaba = m0;
                m0 += c1o9 * oMdrho;
                mfabb = m1-m0 * vvz;
-               mfabc = m2-two*	m1 * vvz+vz2 * m0;
+               mfabc = m2-c2o1*	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaca+mfacc;
                m1 = mfacc-mfaca;
@@ -303,7 +304,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfaca = m0;
                m0 += c1o36 * oMdrho;
                mfacb = m1-m0 * vvz;
-               mfacc = m2-two*	m1 * vvz+vz2 * m0;
+               mfacc = m2- c2o1 *	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfbaa+mfbac;
@@ -312,7 +313,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfbaa = m0;
                m0 += c1o9 * oMdrho;
                mfbab = m1-m0 * vvz;
-               mfbac = m2-two*	m1 * vvz+vz2 * m0;
+               mfbac = m2- c2o1 *	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfbba+mfbbc;
                m1 = mfbbc-mfbba;
@@ -320,7 +321,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfbba = m0;
                m0 += c4o9 * oMdrho;
                mfbbb = m1-m0 * vvz;
-               mfbbc = m2-two*	m1 * vvz+vz2 * m0;
+               mfbbc = m2- c2o1 *	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfbca+mfbcc;
                m1 = mfbcc-mfbca;
@@ -328,7 +329,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfbca = m0;
                m0 += c1o9 * oMdrho;
                mfbcb = m1-m0 * vvz;
-               mfbcc = m2-two*	m1 * vvz+vz2 * m0;
+               mfbcc = m2- c2o1 *	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfcaa+mfcac;
@@ -337,7 +338,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfcaa = m0;
                m0 += c1o36 * oMdrho;
                mfcab = m1-m0 * vvz;
-               mfcac = m2-two*	m1 * vvz+vz2 * m0;
+               mfcac = m2- c2o1 *	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfcba+mfcbc;
                m1 = mfcbc-mfcba;
@@ -345,7 +346,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfcba = m0;
                m0 += c1o9 * oMdrho;
                mfcbb = m1-m0 * vvz;
-               mfcbc = m2-two*	m1 * vvz+vz2 * m0;
+               mfcbc = m2- c2o1 *	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfcca+mfccc;
                m1 = mfccc-mfcca;
@@ -353,7 +354,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfcca = m0;
                m0 += c1o36 * oMdrho;
                mfccb = m1-m0 * vvz;
-               mfccc = m2-two*	m1 * vvz+vz2 * m0;
+               mfccc = m2- c2o1 *	m1 * vvz+vz2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                // mit  1/6, 0, 1/18, 2/3, 0, 2/9, 1/6, 0, 1/18 Konditionieren
@@ -365,14 +366,14 @@ void RheologyK17LBMKernel::calculate(int step)
                mfaaa = m0;
                m0 += c1o6 * oMdrho;
                mfaba = m1-m0 * vvy;
-               mfaca = m2-two*	m1 * vvy+vy2 * m0;
+               mfaca = m2- c2o1 *	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaab+mfacb;
                m1 = mfacb-mfaab;
                m0 = m2+mfabb;
                mfaab = m0;
                mfabb = m1-m0 * vvy;
-               mfacb = m2-two*	m1 * vvy+vy2 * m0;
+               mfacb = m2- c2o1 *	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaac+mfacc;
                m1 = mfacc-mfaac;
@@ -380,7 +381,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfaac = m0;
                m0 += c1o18 * oMdrho;
                mfabc = m1-m0 * vvy;
-               mfacc = m2-two*	m1 * vvy+vy2 * m0;
+               mfacc = m2- c2o1 *	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfbaa+mfbca;
@@ -389,14 +390,14 @@ void RheologyK17LBMKernel::calculate(int step)
                mfbaa = m0;
                m0 += c2o3 * oMdrho;
                mfbba = m1-m0 * vvy;
-               mfbca = m2-two*	m1 * vvy+vy2 * m0;
+               mfbca = m2- c2o1 *	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfbab+mfbcb;
                m1 = mfbcb-mfbab;
                m0 = m2+mfbbb;
                mfbab = m0;
                mfbbb = m1-m0 * vvy;
-               mfbcb = m2-two*	m1 * vvy+vy2 * m0;
+               mfbcb = m2- c2o1 *	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfbac+mfbcc;
                m1 = mfbcc-mfbac;
@@ -404,7 +405,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfbac = m0;
                m0 += c2o9 * oMdrho;
                mfbbc = m1-m0 * vvy;
-               mfbcc = m2-two*	m1 * vvy+vy2 * m0;
+               mfbcc = m2- c2o1 *	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfcaa+mfcca;
@@ -413,14 +414,14 @@ void RheologyK17LBMKernel::calculate(int step)
                mfcaa = m0;
                m0 += c1o6 * oMdrho;
                mfcba = m1-m0 * vvy;
-               mfcca = m2-two*	m1 * vvy+vy2 * m0;
+               mfcca = m2- c2o1 *	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfcab+mfccb;
                m1 = mfccb-mfcab;
                m0 = m2+mfcbb;
                mfcab = m0;
                mfcbb = m1-m0 * vvy;
-               mfccb = m2-two*	m1 * vvy+vy2 * m0;
+               mfccb = m2- c2o1 *	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfcac+mfccc;
                m1 = mfccc-mfcac;
@@ -428,7 +429,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfcac = m0;
                m0 += c1o18 * oMdrho;
                mfcbc = m1-m0 * vvy;
-               mfccc = m2-two*	m1 * vvy+vy2 * m0;
+               mfccc = m2- c2o1 *	m1 * vvy+vy2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                // mit     1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9		Konditionieren
@@ -438,16 +439,16 @@ void RheologyK17LBMKernel::calculate(int step)
                m1 = mfcaa-mfaaa;
                m0 = m2+mfbaa;
                mfaaa = m0;
-               m0 += one* oMdrho;
+               m0 += c1o1 * oMdrho;
                mfbaa = m1-m0 * vvx;
-               mfcaa = m2-two*	m1 * vvx+vx2 * m0;
+               mfcaa = m2- c2o1 *	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaba+mfcba;
                m1 = mfcba-mfaba;
                m0 = m2+mfbba;
                mfaba = m0;
                mfbba = m1-m0 * vvx;
-               mfcba = m2-two*	m1 * vvx+vx2 * m0;
+               mfcba = m2- c2o1 *	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaca+mfcca;
                m1 = mfcca-mfaca;
@@ -455,7 +456,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfaca = m0;
                m0 += c1o3 * oMdrho;
                mfbca = m1-m0 * vvx;
-               mfcca = m2-two*	m1 * vvx+vx2 * m0;
+               mfcca = m2- c2o1 *	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaab+mfcab;
@@ -463,21 +464,21 @@ void RheologyK17LBMKernel::calculate(int step)
                m0 = m2+mfbab;
                mfaab = m0;
                mfbab = m1-m0 * vvx;
-               mfcab = m2-two*	m1 * vvx+vx2 * m0;
+               mfcab = m2- c2o1 *	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfabb+mfcbb;
                m1 = mfcbb-mfabb;
                m0 = m2+mfbbb;
                mfabb = m0;
                mfbbb = m1-m0 * vvx;
-               mfcbb = m2-two*	m1 * vvx+vx2 * m0;
+               mfcbb = m2- c2o1 *	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfacb+mfccb;
                m1 = mfccb-mfacb;
                m0 = m2+mfbcb;
                mfacb = m0;
                mfbcb = m1-m0 * vvx;
-               mfccb = m2-two*	m1 * vvx+vx2 * m0;
+               mfccb = m2- c2o1 *	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfaac+mfcac;
@@ -486,14 +487,14 @@ void RheologyK17LBMKernel::calculate(int step)
                mfaac = m0;
                m0 += c1o3 * oMdrho;
                mfbac = m1-m0 * vvx;
-               mfcac = m2-two*	m1 * vvx+vx2 * m0;
+               mfcac = m2- c2o1 *	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfabc+mfcbc;
                m1 = mfcbc-mfabc;
                m0 = m2+mfbbc;
                mfabc = m0;
                mfbbc = m1-m0 * vvx;
-               mfcbc = m2-two*	m1 * vvx+vx2 * m0;
+               mfcbc = m2-c2o1*	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                m2 = mfacc+mfccc;
                m1 = mfccc-mfacc;
@@ -501,7 +502,7 @@ void RheologyK17LBMKernel::calculate(int step)
                mfacc = m0;
                m0 += c1o9 * oMdrho;
                mfbcc = m1-m0 * vvx;
-               mfccc = m2-two*	m1 * vvx+vx2 * m0;
+               mfccc = m2-c2o1*	m1 * vvx+vx2 * m0;
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
 
@@ -545,47 +546,47 @@ void RheologyK17LBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////
                //4.
                //////////////////////////////
-               LBMReal O4 = one;
+               real O4 = c1o1;
                //////////////////////////////
                //LBMReal O4        = omega;//TRT
                ////////////////////////////////////////////////////////////
                //5.
                //////////////////////////////
-               LBMReal O5 = one;
+               real O5 = c1o1;
                ////////////////////////////////////////////////////////////
                //6.
                //////////////////////////////
-               LBMReal O6 = one;
+               real O6 = c1o1;
                ////////////////////////////////////////////////////////////
 
 
                //central moments to cumulants
                //4.
-               LBMReal CUMcbb = mfcbb-((mfcaa+c1o3) * mfabb+two * mfbba * mfbab)/rho;	//ab 15.05.2015 verwendet
-               LBMReal CUMbcb = mfbcb-((mfaca+c1o3) * mfbab+two * mfbba * mfabb)/rho; //ab 15.05.2015 verwendet
-               LBMReal CUMbbc = mfbbc-((mfaac+c1o3) * mfbba+two * mfbab * mfabb)/rho; //ab 15.05.2015 verwendet
+               real CUMcbb = mfcbb-((mfcaa+c1o3) * mfabb+ c2o1 * mfbba * mfbab)/rho;	//ab 15.05.2015 verwendet
+               real CUMbcb = mfbcb-((mfaca+c1o3) * mfbab+ c2o1 * mfbba * mfabb)/rho; //ab 15.05.2015 verwendet
+               real CUMbbc = mfbbc-((mfaac+c1o3) * mfbba+ c2o1 * mfbab * mfabb)/rho; //ab 15.05.2015 verwendet
 
-               LBMReal CUMcca = mfcca-(((mfcaa * mfaca+two * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));
-               LBMReal CUMcac = mfcac-(((mfcaa * mfaac+two * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));
-               LBMReal CUMacc = mfacc-(((mfaac * mfaca+two * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));
+               real CUMcca = mfcca-(((mfcaa * mfaca+ c2o1 * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));
+               real CUMcac = mfcac-(((mfcaa * mfaac+ c2o1 * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));
+               real CUMacc = mfacc-(((mfaac * mfaca+ c2o1 * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));
 
                //5.
-               LBMReal CUMbcc = mfbcc-((mfaac * mfbca+mfaca * mfbac+four * mfabb * mfbbb+two * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
-               LBMReal CUMcbc = mfcbc-((mfaac * mfcba+mfcaa * mfabc+four * mfbab * mfbbb+two * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
-               LBMReal CUMccb = mfccb-((mfcaa * mfacb+mfaca * mfcab+four * mfbba * mfbbb+two * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
+               real CUMbcc = mfbcc-((mfaac * mfbca+mfaca * mfbac+ c4o1 * mfabb * mfbbb+ c2o1 * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
+               real CUMcbc = mfcbc-((mfaac * mfcba+mfcaa * mfabc+ c4o1 * mfbab * mfbbb+ c2o1 * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
+               real CUMccb = mfccb-((mfcaa * mfacb+mfaca * mfcab+ c4o1 * mfbba * mfbbb+ c2o1 * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
 
                //6.
 
-               LBMReal CUMccc = mfccc+((-four *  mfbbb * mfbbb
+               real CUMccc = mfccc+((-c4o1 *  mfbbb * mfbbb
                   -(mfcaa * mfacc+mfaca * mfcac+mfaac * mfcca)
-                  -four * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
-                  -two * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
-                  +(four * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
-                     +two * (mfcaa * mfaca * mfaac)
-                     +sixteen *  mfbba * mfbab * mfabb)/(rho * rho)
+                  - c4o1 * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
+                  - c2o1 * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
+                  +(c4o1 * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
+                     + c2o1 * (mfcaa * mfaca * mfaac)
+                     + c16o1 *  mfbba * mfbab * mfabb)/(rho * rho)
                   -c1o3 * (mfacc+mfcac+mfcca)/rho
                   -c1o9 * (mfcaa+mfaca+mfaac)/rho
-                  +(two * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
+                  +(c2o1 * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
                      +(mfaac * mfaca+mfaac * mfcaa+mfaca * mfcaa)+c1o3 *(mfaac+mfaca+mfcaa))/(rho * rho) * c2o3
                   +c1o27*((drho * drho-drho)/(rho*rho)));
                //+ c1o27*(one -three/rho +two/(rho*rho)));
@@ -595,9 +596,9 @@ void RheologyK17LBMKernel::calculate(int step)
 
    //2.
    // linear combinations
-               LBMReal mxxPyyPzz = mfcaa+mfaca+mfaac;
-               LBMReal mxxMyy = mfcaa-mfaca;
-               LBMReal mxxMzz = mfcaa-mfaac;
+               real mxxPyyPzz = mfcaa+mfaca+mfaac;
+               real mxxMyy = mfcaa-mfaca;
+               real mxxMzz = mfcaa-mfaac;
 
                //////////////////////////////////////////////////////////////////////////
       // 			LBMReal magicBulk=(CUMacc+CUMcac+CUMcca)*(one/OxxPyyPzz-c1o2)*c3o2*8.;
@@ -637,17 +638,17 @@ void RheologyK17LBMKernel::calculate(int step)
                ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
                //incl. correction		(hat noch nicht so gut funktioniert...Optimierungsbedarf??)
 
-               LBMReal dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz);// +c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
-               LBMReal dyuy = dxux+omega * c3o2 * mxxMyy;
-               LBMReal dzuz = dxux+omega * c3o2 * mxxMzz;
+               real dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz);// +c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
+               real dyuy = dxux+omega * c3o2 * mxxMyy;
+               real dzuz = dxux+omega * c3o2 * mxxMzz;
 
-               LBMReal Dxy =-three*omega*mfbba;
-               LBMReal Dxz =-three*omega*mfbab;
-               LBMReal Dyz =-three*omega*mfabb;
+               real Dxy =-c3o1 *omega*mfbba;
+               real Dxz =-c3o1 *omega*mfbab;
+               real Dyz =-c3o1 *omega*mfabb;
 
                ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
                //non Newtonian fluid collision factor
-               LBMReal shearRate = sqrt(c2 * (dxux * dxux + dyuy * dyuy + dzuz * dzuz) + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz) / (drho + c1);
+               real shearRate = sqrt(c2o1 * (dxux * dxux + dyuy * dyuy + dzuz * dzuz) + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz) / (drho + c1o1);
                omega = getRheologyCollFactor(omega, shearRate, rho);
                //omega = Rheology::getHerschelBulkleyCollFactor(omega, shearRate, drho);
                //omega = Rheology::getBinghamCollFactor(omega, shearRate, drho);
@@ -657,39 +658,39 @@ void RheologyK17LBMKernel::calculate(int step)
                dyuy = dxux + omega * c3o2 * mxxMyy;
                dzuz = dxux + omega * c3o2 * mxxMzz;
 
-               Dxy = -three * omega * mfbba;
-               Dxz = -three * omega * mfbab;
-               Dyz = -three * omega * mfabb;
+               Dxy = -c3o1 * omega * mfbba;
+               Dxz = -c3o1 * omega * mfbab;
+               Dyz = -c3o1 * omega * mfabb;
 
-               mxxMyy += omega * (-mxxMyy) - three * (one + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
-               mxxMzz += omega * (-mxxMzz) - three * (one + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
+               mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
+               mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
 
                mfabb += omega * (-mfabb);
                mfbab += omega * (-mfbab);
                mfbba += omega * (-mfbba);
 
-               if(omega < c1) { omega = c1; } //arbitrary limit (24.09.2020)
+               if(omega < c1o1) { omega = c1o1; } //arbitrary limit (24.09.2020)
 
-               //omega = collFactor;
+               //omega = collFactorc1o1
 
                //magic parameter for rheology
-               LBMReal a = 10;
-               OxxPyyPzz = c1 / (a * ((c1 / omega) - c1o2) + c1o2);
-               OxxPyyPzz = (OxxPyyPzz > c1) ? c1 : OxxPyyPzz;
+               real a = 10;
+               OxxPyyPzz = c1o1 / (a * ((c1o1 / omega) - c1o2) + c1o2);
+               OxxPyyPzz = (OxxPyyPzz > c1o1) ? c1o1 : OxxPyyPzz;
 
-               LBMReal OxyyPxzz = 8.0 * (omega - 2.0) * (OxxPyyPzz * (3.0 * omega - 1.0) - 5.0 * omega) / (8.0 * (5.0 - 2.0 * omega) * omega + OxxPyyPzz * (8.0 + omega * (9.0 * omega - 26.0)));
-               LBMReal OxyyMxzz = 8.0 * (omega - 2.0) * (omega + OxxPyyPzz * (3.0 * omega - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * omega + 9.0 * omega * omega) - 8.0 * omega);
-               LBMReal Oxyz = 24.0 * (omega - 2.0) * (4.0 * omega * omega + omega * OxxPyyPzz * (18.0 - 13.0 * omega) + OxxPyyPzz * OxxPyyPzz * (2.0 + omega * (6.0 * omega - 11.0))) / (16.0 * omega * omega * (omega - 6.0) - 2.0 * omega * OxxPyyPzz * (216.0 + 5.0 * omega * (9.0 * omega - 46.0)) + OxxPyyPzz * OxxPyyPzz * (omega * (3.0 * omega - 10.0) * (15.0 * omega - 28.0) - 48.0));
+               real OxyyPxzz = 8.0 * (omega - 2.0) * (OxxPyyPzz * (3.0 * omega - 1.0) - 5.0 * omega) / (8.0 * (5.0 - 2.0 * omega) * omega + OxxPyyPzz * (8.0 + omega * (9.0 * omega - 26.0)));
+               real OxyyMxzz = 8.0 * (omega - 2.0) * (omega + OxxPyyPzz * (3.0 * omega - 7.0)) / (OxxPyyPzz * (56.0 - 42.0 * omega + 9.0 * omega * omega) - 8.0 * omega);
+               real Oxyz = 24.0 * (omega - 2.0) * (4.0 * omega * omega + omega * OxxPyyPzz * (18.0 - 13.0 * omega) + OxxPyyPzz * OxxPyyPzz * (2.0 + omega * (6.0 * omega - 11.0))) / (16.0 * omega * omega * (omega - 6.0) - 2.0 * omega * OxxPyyPzz * (216.0 + 5.0 * omega * (9.0 * omega - 46.0)) + OxxPyyPzz * OxxPyyPzz * (omega * (3.0 * omega - 10.0) * (15.0 * omega - 28.0) - 48.0));
 
-               LBMReal A_ = (4.0 * omega * omega + 2.0 * omega * OxxPyyPzz * (omega - 6.0) + OxxPyyPzz * OxxPyyPzz * (omega * (10.0 - 3.0 * omega) - 4.0)) / ((omega - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * omega) - 8.0 * omega));
-               LBMReal B_ = (4.0 * omega * OxxPyyPzz * (9.0 * omega - 16.0) - 4.0 * omega * omega - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * omega * (omega - 2.0))) / (3.0 * (omega - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * omega) - 8.0 * omega));
+               real A_ = (4.0 * omega * omega + 2.0 * omega * OxxPyyPzz * (omega - 6.0) + OxxPyyPzz * OxxPyyPzz * (omega * (10.0 - 3.0 * omega) - 4.0)) / ((omega - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * omega) - 8.0 * omega));
+               real B_ = (4.0 * omega * OxxPyyPzz * (9.0 * omega - 16.0) - 4.0 * omega * omega - 2.0 * OxxPyyPzz * OxxPyyPzz * (2.0 + 9.0 * omega * (omega - 2.0))) / (3.0 * (omega - OxxPyyPzz) * (OxxPyyPzz * (2.0 + 3.0 * omega) - 8.0 * omega));
 
 
                //relax
 
                //wadjust = OxxPyyPzz+(one-OxxPyyPzz)*fabs((mfaaa-mxxPyyPzz))/(fabs((mfaaa-mxxPyyPzz))+qudricLimitD);
                //mxxPyyPzz += wadjust*(mfaaa-mxxPyyPzz)-three * (one-c1o2 * OxxPyyPzz) * (vx2 * dxux+vy2 * dyuy+vz2 * dzuz);
-               mxxPyyPzz += OxxPyyPzz * (mfaaa - mxxPyyPzz) - three * (one - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
+               mxxPyyPzz += OxxPyyPzz * (mfaaa - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
 
               // mxxPyyPzz += OxxPyyPzz*(mfaaa-mxxPyyPzz)-three * (one-c1o2 * OxxPyyPzz) * (vx2 * dxux+vy2 * dyuy+vz2 * dzuz);//-magicBulk*OxxPyyPzz;
                //mxxMyy += omega * (-mxxMyy)-three * (one+c1o2 * (-omega)) * (vx2 * dxux-vy2 * dyuy);
@@ -723,37 +724,37 @@ void RheologyK17LBMKernel::calculate(int step)
 
                // linear combinations back
                mfcaa = c1o3 * (mxxMyy+mxxMzz+mxxPyyPzz);
-               mfaca = c1o3 * (-two*  mxxMyy+mxxMzz+mxxPyyPzz);
-               mfaac = c1o3 * (mxxMyy-two* mxxMzz+mxxPyyPzz);
+               mfaca = c1o3 * (-c2o1 *  mxxMyy+mxxMzz+mxxPyyPzz);
+               mfaac = c1o3 * (mxxMyy- c2o1 * mxxMzz+mxxPyyPzz);
 
                //3.
                // linear combinations
 
-               LBMReal mxxyPyzz = mfcba+mfabc;
-               LBMReal mxxyMyzz = mfcba-mfabc;
+               real mxxyPyzz = mfcba+mfabc;
+               real mxxyMyzz = mfcba-mfabc;
 
-               LBMReal mxxzPyyz = mfcab+mfacb;
-               LBMReal mxxzMyyz = mfcab-mfacb;
+               real mxxzPyyz = mfcab+mfacb;
+               real mxxzMyyz = mfcab-mfacb;
 
-               LBMReal mxyyPxzz = mfbca+mfbac;
-               LBMReal mxyyMxzz = mfbca-mfbac;
+               real mxyyPxzz = mfbca+mfbac;
+               real mxyyMxzz = mfbca-mfbac;
 
                //relax
                //////////////////////////////////////////////////////////////////////////
                //das ist der limiter
-               wadjust = Oxyz+(one-Oxyz)*fabs(mfbbb)/(fabs(mfbbb)+qudricLimitD);
+               wadjust = Oxyz+(c1o1-Oxyz)*fabs(mfbbb)/(fabs(mfbbb)+qudricLimitD);
                mfbbb += wadjust * (-mfbbb);
-               wadjust = OxyyPxzz+(one-OxyyPxzz)*fabs(mxxyPyzz)/(fabs(mxxyPyzz)+qudricLimitP);
+               wadjust = OxyyPxzz+(c1o1-OxyyPxzz)*fabs(mxxyPyzz)/(fabs(mxxyPyzz)+qudricLimitP);
                mxxyPyzz += wadjust * (-mxxyPyzz);
-               wadjust = OxyyMxzz+(one-OxyyMxzz)*fabs(mxxyMyzz)/(fabs(mxxyMyzz)+qudricLimitM);
+               wadjust = OxyyMxzz+(c1o1-OxyyMxzz)*fabs(mxxyMyzz)/(fabs(mxxyMyzz)+qudricLimitM);
                mxxyMyzz += wadjust * (-mxxyMyzz);
-               wadjust = OxyyPxzz+(one-OxyyPxzz)*fabs(mxxzPyyz)/(fabs(mxxzPyyz)+qudricLimitP);
+               wadjust = OxyyPxzz+(c1o1-OxyyPxzz)*fabs(mxxzPyyz)/(fabs(mxxzPyyz)+qudricLimitP);
                mxxzPyyz += wadjust * (-mxxzPyyz);
-               wadjust = OxyyMxzz+(one-OxyyMxzz)*fabs(mxxzMyyz)/(fabs(mxxzMyyz)+qudricLimitM);
+               wadjust = OxyyMxzz+(c1o1-OxyyMxzz)*fabs(mxxzMyyz)/(fabs(mxxzMyyz)+qudricLimitM);
                mxxzMyyz += wadjust * (-mxxzMyyz);
-               wadjust = OxyyPxzz+(one-OxyyPxzz)*fabs(mxyyPxzz)/(fabs(mxyyPxzz)+qudricLimitP);
+               wadjust = OxyyPxzz+(c1o1-OxyyPxzz)*fabs(mxyyPxzz)/(fabs(mxyyPxzz)+qudricLimitP);
                mxyyPxzz += wadjust * (-mxyyPxzz);
-               wadjust = OxyyMxzz+(one-OxyyMxzz)*fabs(mxyyMxzz)/(fabs(mxyyMxzz)+qudricLimitM);
+               wadjust = OxyyMxzz+(c1o1-OxyyMxzz)*fabs(mxyyMxzz)/(fabs(mxyyMxzz)+qudricLimitM);
                mxyyMxzz += wadjust * (-mxyyMxzz);
                //////////////////////////////////////////////////////////////////////////
                //ohne limiter
@@ -804,12 +805,12 @@ void RheologyK17LBMKernel::calculate(int step)
                //CUMbbc += O4 * (-CUMbbc);
                //CUMbcb += O4 * (-CUMbcb);
                //CUMcbb += O4 * (-CUMcbb);
-               CUMacc = -O4*(one / omega - c1o2) * (dyuy + dzuz) * c2o3 * A_ + (one - O4) * (CUMacc);
-               CUMcac = -O4*(one / omega - c1o2) * (dxux + dzuz) * c2o3 * A_ + (one - O4) * (CUMcac);
-               CUMcca = -O4*(one / omega - c1o2) * (dyuy + dxux) * c2o3 * A_ + (one - O4) * (CUMcca);
-               CUMbbc = -O4*(one / omega - c1o2) * Dxy           * c1o3 * B_ + (one - O4) * (CUMbbc);
-               CUMbcb = -O4*(one / omega - c1o2) * Dxz           * c1o3 * B_ + (one - O4) * (CUMbcb);
-               CUMcbb = -O4*(one / omega - c1o2) * Dyz           * c1o3 * B_ + (one - O4) * (CUMcbb);
+               CUMacc = -O4*(c1o1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * A_ + (c1o1 - O4) * (CUMacc);
+               CUMcac = -O4*(c1o1 / omega - c1o2) * (dxux + dzuz) * c2o3 * A_ + (c1o1 - O4) * (CUMcac);
+               CUMcca = -O4*(c1o1 / omega - c1o2) * (dyuy + dxux) * c2o3 * A_ + (c1o1 - O4) * (CUMcca);
+               CUMbbc = -O4*(c1o1 / omega - c1o2) * Dxy           * c1o3 * B_ + (c1o1 - O4) * (CUMbbc);
+               CUMbcb = -O4*(c1o1 / omega - c1o2) * Dxz           * c1o3 * B_ + (c1o1 - O4) * (CUMbcb);
+               CUMcbb = -O4*(c1o1 / omega - c1o2) * Dyz           * c1o3 * B_ + (c1o1 - O4) * (CUMcbb);
                //////////////////////////////////////////////////////////////////////////
 
 
@@ -825,31 +826,31 @@ void RheologyK17LBMKernel::calculate(int step)
 
                //back cumulants to central moments
                //4.
-               mfcbb = CUMcbb+((mfcaa+c1o3) * mfabb+two * mfbba * mfbab)/rho;
-               mfbcb = CUMbcb+((mfaca+c1o3) * mfbab+two * mfbba * mfabb)/rho;
-               mfbbc = CUMbbc+((mfaac+c1o3) * mfbba+two * mfbab * mfabb)/rho;
+               mfcbb = CUMcbb+((mfcaa+c1o3) * mfabb+ c2o1 * mfbba * mfbab)/rho;
+               mfbcb = CUMbcb+((mfaca+c1o3) * mfbab+ c2o1 * mfbba * mfabb)/rho;
+               mfbbc = CUMbbc+((mfaac+c1o3) * mfbba+ c2o1 * mfbab * mfabb)/rho;
 
-               mfcca = CUMcca+(((mfcaa * mfaca+two * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
-               mfcac = CUMcac+(((mfcaa * mfaac+two * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));//(one/rho-one));
-               mfacc = CUMacc+(((mfaac * mfaca+two * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
+               mfcca = CUMcca+(((mfcaa * mfaca+ c2o1 * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
+               mfcac = CUMcac+(((mfcaa * mfaac+ c2o1 * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));//(one/rho-one));
+               mfacc = CUMacc+(((mfaac * mfaca+ c2o1 * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
 
                //5.
-               mfbcc = CUMbcc+((mfaac * mfbca+mfaca * mfbac+four * mfabb * mfbbb+two * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
-               mfcbc = CUMcbc+((mfaac * mfcba+mfcaa * mfabc+four * mfbab * mfbbb+two * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
-               mfccb = CUMccb+((mfcaa * mfacb+mfaca * mfcab+four * mfbba * mfbbb+two * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
+               mfbcc = CUMbcc+((mfaac * mfbca+mfaca * mfbac+ c4o1 * mfabb * mfbbb+ c2o1 * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
+               mfcbc = CUMcbc+((mfaac * mfcba+mfcaa * mfabc+ c4o1 * mfbab * mfbbb+ c2o1 * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
+               mfccb = CUMccb+((mfcaa * mfacb+mfaca * mfcab+ c4o1 * mfbba * mfbbb+ c2o1 * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
 
                //6.
 
-               mfccc = CUMccc-((-four *  mfbbb * mfbbb
+               mfccc = CUMccc-((-c4o1 *  mfbbb * mfbbb
                   -(mfcaa * mfacc+mfaca * mfcac+mfaac * mfcca)
-                  -four * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
-                  -two * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
-                  +(four * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
-                     +two * (mfcaa * mfaca * mfaac)
-                     +sixteen *  mfbba * mfbab * mfabb)/(rho * rho)
+                  - c4o1 * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
+                  - c2o1 * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
+                  +(c4o1 * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
+                     + c2o1 * (mfcaa * mfaca * mfaac)
+                     + c16o1 *  mfbba * mfbab * mfabb)/(rho * rho)
                   -c1o3 * (mfacc+mfcac+mfcca)/rho
                   -c1o9 * (mfcaa+mfaca+mfaac)/rho
-                  +(two * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
+                  +(c2o1 * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
                      +(mfaac * mfaca+mfaac * mfcaa+mfaca * mfcaa)+c1o3 *(mfaac+mfaca+mfcaa))/(rho * rho) * c2o3
                   +c1o27*((drho * drho-drho)/(rho*rho)));
                ////////////////////////////////////////////////////////////////////////////////////
@@ -865,22 +866,22 @@ void RheologyK17LBMKernel::calculate(int step)
          //mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
          ////////////////////////////////////////////////////////////////////////////////////
          // Z - Dir
-               m0 = mfaac * c1o2+mfaab * (vvz-c1o2)+(mfaaa+one* oMdrho) * (vz2-vvz) * c1o2;
-               m1 = -mfaac-two* mfaab *  vvz+mfaaa                * (one-vz2)-one* oMdrho * vz2;
-               m2 = mfaac * c1o2+mfaab * (vvz+c1o2)+(mfaaa+one* oMdrho) * (vz2+vvz) * c1o2;
+               m0 = mfaac * c1o2+mfaab * (vvz-c1o2)+(mfaaa+ c1o1 * oMdrho) * (vz2-vvz) * c1o2;
+               m1 = -mfaac- c2o1 * mfaab *  vvz+mfaaa                * (c1o1 -vz2)- c1o1 * oMdrho * vz2;
+               m2 = mfaac * c1o2+mfaab * (vvz+c1o2)+(mfaaa+ c1o1 * oMdrho) * (vz2+vvz) * c1o2;
                mfaaa = m0;
                mfaab = m1;
                mfaac = m2;
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfabc * c1o2+mfabb * (vvz-c1o2)+mfaba * (vz2-vvz) * c1o2;
-               m1 = -mfabc-two* mfabb *  vvz+mfaba * (one-vz2);
+               m1 = -mfabc- c2o1 * mfabb *  vvz+mfaba * (c1o1 -vz2);
                m2 = mfabc * c1o2+mfabb * (vvz+c1o2)+mfaba * (vz2+vvz) * c1o2;
                mfaba = m0;
                mfabb = m1;
                mfabc = m2;
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfacc * c1o2+mfacb * (vvz-c1o2)+(mfaca+c1o3 * oMdrho) * (vz2-vvz) * c1o2;
-               m1 = -mfacc-two* mfacb *  vvz+mfaca                  * (one-vz2)-c1o3 * oMdrho * vz2;
+               m1 = -mfacc- c2o1 * mfacb *  vvz+mfaca                  * (c1o1 -vz2)-c1o3 * oMdrho * vz2;
                m2 = mfacc * c1o2+mfacb * (vvz+c1o2)+(mfaca+c1o3 * oMdrho) * (vz2+vvz) * c1o2;
                mfaca = m0;
                mfacb = m1;
@@ -888,21 +889,21 @@ void RheologyK17LBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfbac * c1o2+mfbab * (vvz-c1o2)+mfbaa * (vz2-vvz) * c1o2;
-               m1 = -mfbac-two* mfbab *  vvz+mfbaa * (one-vz2);
+               m1 = -mfbac- c2o1 * mfbab *  vvz+mfbaa * (c1o1 -vz2);
                m2 = mfbac * c1o2+mfbab * (vvz+c1o2)+mfbaa * (vz2+vvz) * c1o2;
                mfbaa = m0;
                mfbab = m1;
                mfbac = m2;
                /////////b//////////////////////////////////////////////////////////////////////////
                m0 = mfbbc * c1o2+mfbbb * (vvz-c1o2)+mfbba * (vz2-vvz) * c1o2;
-               m1 = -mfbbc-two* mfbbb *  vvz+mfbba * (one-vz2);
+               m1 = -mfbbc- c2o1 * mfbbb *  vvz+mfbba * (c1o1 -vz2);
                m2 = mfbbc * c1o2+mfbbb * (vvz+c1o2)+mfbba * (vz2+vvz) * c1o2;
                mfbba = m0;
                mfbbb = m1;
                mfbbc = m2;
                /////////b//////////////////////////////////////////////////////////////////////////
                m0 = mfbcc * c1o2+mfbcb * (vvz-c1o2)+mfbca * (vz2-vvz) * c1o2;
-               m1 = -mfbcc-two* mfbcb *  vvz+mfbca * (one-vz2);
+               m1 = -mfbcc- c2o1 * mfbcb *  vvz+mfbca * (c1o1 -vz2);
                m2 = mfbcc * c1o2+mfbcb * (vvz+c1o2)+mfbca * (vz2+vvz) * c1o2;
                mfbca = m0;
                mfbcb = m1;
@@ -910,21 +911,21 @@ void RheologyK17LBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfcac * c1o2+mfcab * (vvz-c1o2)+(mfcaa+c1o3 * oMdrho) * (vz2-vvz) * c1o2;
-               m1 = -mfcac-two* mfcab *  vvz+mfcaa                  * (one-vz2)-c1o3 * oMdrho * vz2;
+               m1 = -mfcac- c2o1 * mfcab *  vvz+mfcaa                  * (c1o1 -vz2)-c1o3 * oMdrho * vz2;
                m2 = mfcac * c1o2+mfcab * (vvz+c1o2)+(mfcaa+c1o3 * oMdrho) * (vz2+vvz) * c1o2;
                mfcaa = m0;
                mfcab = m1;
                mfcac = m2;
                /////////c//////////////////////////////////////////////////////////////////////////
                m0 = mfcbc * c1o2+mfcbb * (vvz-c1o2)+mfcba * (vz2-vvz) * c1o2;
-               m1 = -mfcbc-two* mfcbb *  vvz+mfcba * (one-vz2);
+               m1 = -mfcbc- c2o1 * mfcbb *  vvz+mfcba * (c1o1 -vz2);
                m2 = mfcbc * c1o2+mfcbb * (vvz+c1o2)+mfcba * (vz2+vvz) * c1o2;
                mfcba = m0;
                mfcbb = m1;
                mfcbc = m2;
                /////////c//////////////////////////////////////////////////////////////////////////
                m0 = mfccc * c1o2+mfccb * (vvz-c1o2)+(mfcca+c1o9 * oMdrho) * (vz2-vvz) * c1o2;
-               m1 = -mfccc-two* mfccb *  vvz+mfcca                  * (one-vz2)-c1o9 * oMdrho * vz2;
+               m1 = -mfccc- c2o1 * mfccb *  vvz+mfcca                  * (c1o1 -vz2)-c1o9 * oMdrho * vz2;
                m2 = mfccc * c1o2+mfccb * (vvz+c1o2)+(mfcca+c1o9 * oMdrho) * (vz2+vvz) * c1o2;
                mfcca = m0;
                mfccb = m1;
@@ -935,21 +936,21 @@ void RheologyK17LBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                // Y - Dir
                m0 = mfaca * c1o2+mfaba * (vvy-c1o2)+(mfaaa+c1o6 * oMdrho) * (vy2-vvy) * c1o2;
-               m1 = -mfaca-two* mfaba *  vvy+mfaaa                  * (one-vy2)-c1o6 * oMdrho * vy2;
+               m1 = -mfaca- c2o1 * mfaba *  vvy+mfaaa                  * (c1o1 -vy2)-c1o6 * oMdrho * vy2;
                m2 = mfaca * c1o2+mfaba * (vvy+c1o2)+(mfaaa+c1o6 * oMdrho) * (vy2+vvy) * c1o2;
                mfaaa = m0;
                mfaba = m1;
                mfaca = m2;
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfacb * c1o2+mfabb * (vvy-c1o2)+(mfaab+c2o3 * oMdrho) * (vy2-vvy) * c1o2;
-               m1 = -mfacb-two* mfabb *  vvy+mfaab                  * (one-vy2)-c2o3 * oMdrho * vy2;
+               m1 = -mfacb- c2o1 * mfabb *  vvy+mfaab                  * (c1o1 -vy2)-c2o3 * oMdrho * vy2;
                m2 = mfacb * c1o2+mfabb * (vvy+c1o2)+(mfaab+c2o3 * oMdrho) * (vy2+vvy) * c1o2;
                mfaab = m0;
                mfabb = m1;
                mfacb = m2;
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfacc * c1o2+mfabc * (vvy-c1o2)+(mfaac+c1o6 * oMdrho) * (vy2-vvy) * c1o2;
-               m1 = -mfacc-two* mfabc *  vvy+mfaac                  * (one-vy2)-c1o6 * oMdrho * vy2;
+               m1 = -mfacc- c2o1 * mfabc *  vvy+mfaac                  * (c1o1 -vy2)-c1o6 * oMdrho * vy2;
                m2 = mfacc * c1o2+mfabc * (vvy+c1o2)+(mfaac+c1o6 * oMdrho) * (vy2+vvy) * c1o2;
                mfaac = m0;
                mfabc = m1;
@@ -957,21 +958,21 @@ void RheologyK17LBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfbca * c1o2+mfbba * (vvy-c1o2)+mfbaa * (vy2-vvy) * c1o2;
-               m1 = -mfbca-two* mfbba *  vvy+mfbaa * (one-vy2);
+               m1 = -mfbca- c2o1 * mfbba *  vvy+mfbaa * (c1o1 -vy2);
                m2 = mfbca * c1o2+mfbba * (vvy+c1o2)+mfbaa * (vy2+vvy) * c1o2;
                mfbaa = m0;
                mfbba = m1;
                mfbca = m2;
                /////////b//////////////////////////////////////////////////////////////////////////
                m0 = mfbcb * c1o2+mfbbb * (vvy-c1o2)+mfbab * (vy2-vvy) * c1o2;
-               m1 = -mfbcb-two* mfbbb *  vvy+mfbab * (one-vy2);
+               m1 = -mfbcb- c2o1 * mfbbb *  vvy+mfbab * (c1o1 -vy2);
                m2 = mfbcb * c1o2+mfbbb * (vvy+c1o2)+mfbab * (vy2+vvy) * c1o2;
                mfbab = m0;
                mfbbb = m1;
                mfbcb = m2;
                /////////b//////////////////////////////////////////////////////////////////////////
                m0 = mfbcc * c1o2+mfbbc * (vvy-c1o2)+mfbac * (vy2-vvy) * c1o2;
-               m1 = -mfbcc-two* mfbbc *  vvy+mfbac * (one-vy2);
+               m1 = -mfbcc- c2o1 * mfbbc *  vvy+mfbac * (c1o1 -vy2);
                m2 = mfbcc * c1o2+mfbbc * (vvy+c1o2)+mfbac * (vy2+vvy) * c1o2;
                mfbac = m0;
                mfbbc = m1;
@@ -979,21 +980,21 @@ void RheologyK17LBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfcca * c1o2+mfcba * (vvy-c1o2)+(mfcaa+c1o18 * oMdrho) * (vy2-vvy) * c1o2;
-               m1 = -mfcca-two* mfcba *  vvy+mfcaa                   * (one-vy2)-c1o18 * oMdrho * vy2;
+               m1 = -mfcca- c2o1 * mfcba *  vvy+mfcaa                   * (c1o1 -vy2)-c1o18 * oMdrho * vy2;
                m2 = mfcca * c1o2+mfcba * (vvy+c1o2)+(mfcaa+c1o18 * oMdrho) * (vy2+vvy) * c1o2;
                mfcaa = m0;
                mfcba = m1;
                mfcca = m2;
                /////////c//////////////////////////////////////////////////////////////////////////
                m0 = mfccb * c1o2+mfcbb * (vvy-c1o2)+(mfcab+c2o9 * oMdrho) * (vy2-vvy) * c1o2;
-               m1 = -mfccb-two* mfcbb *  vvy+mfcab                  * (one-vy2)-c2o9 * oMdrho * vy2;
+               m1 = -mfccb- c2o1 * mfcbb *  vvy+mfcab                  * (c1o1 -vy2)-c2o9 * oMdrho * vy2;
                m2 = mfccb * c1o2+mfcbb * (vvy+c1o2)+(mfcab+c2o9 * oMdrho) * (vy2+vvy) * c1o2;
                mfcab = m0;
                mfcbb = m1;
                mfccb = m2;
                /////////c//////////////////////////////////////////////////////////////////////////
                m0 = mfccc * c1o2+mfcbc * (vvy-c1o2)+(mfcac+c1o18 * oMdrho) * (vy2-vvy) * c1o2;
-               m1 = -mfccc-two* mfcbc *  vvy+mfcac                   * (one-vy2)-c1o18 * oMdrho * vy2;
+               m1 = -mfccc- c2o1 * mfcbc *  vvy+mfcac                   * (c1o1 -vy2)-c1o18 * oMdrho * vy2;
                m2 = mfccc * c1o2+mfcbc * (vvy+c1o2)+(mfcac+c1o18 * oMdrho) * (vy2+vvy) * c1o2;
                mfcac = m0;
                mfcbc = m1;
@@ -1004,21 +1005,21 @@ void RheologyK17LBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                // X - Dir
                m0 = mfcaa * c1o2+mfbaa * (vvx-c1o2)+(mfaaa+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcaa-two* mfbaa *  vvx+mfaaa                   * (one-vx2)-c1o36 * oMdrho * vx2;
+               m1 = -mfcaa- c2o1 * mfbaa *  vvx+mfaaa                   * (c1o1 -vx2)-c1o36 * oMdrho * vx2;
                m2 = mfcaa * c1o2+mfbaa * (vvx+c1o2)+(mfaaa+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
                mfaaa = m0;
                mfbaa = m1;
                mfcaa = m2;
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfcba * c1o2+mfbba * (vvx-c1o2)+(mfaba+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcba-two* mfbba *  vvx+mfaba                  * (one-vx2)-c1o9 * oMdrho * vx2;
+               m1 = -mfcba- c2o1 * mfbba *  vvx+mfaba                  * (c1o1 -vx2)-c1o9 * oMdrho * vx2;
                m2 = mfcba * c1o2+mfbba * (vvx+c1o2)+(mfaba+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
                mfaba = m0;
                mfbba = m1;
                mfcba = m2;
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfcca * c1o2+mfbca * (vvx-c1o2)+(mfaca+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcca-two* mfbca *  vvx+mfaca                   * (one-vx2)-c1o36 * oMdrho * vx2;
+               m1 = -mfcca- c2o1 * mfbca *  vvx+mfaca                   * (c1o1 -vx2)-c1o36 * oMdrho * vx2;
                m2 = mfcca * c1o2+mfbca * (vvx+c1o2)+(mfaca+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
                mfaca = m0;
                mfbca = m1;
@@ -1026,21 +1027,21 @@ void RheologyK17LBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfcab * c1o2+mfbab * (vvx-c1o2)+(mfaab+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcab-two* mfbab *  vvx+mfaab                  * (one-vx2)-c1o9 * oMdrho * vx2;
+               m1 = -mfcab- c2o1 * mfbab *  vvx+mfaab                  * (c1o1 -vx2)-c1o9 * oMdrho * vx2;
                m2 = mfcab * c1o2+mfbab * (vvx+c1o2)+(mfaab+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
                mfaab = m0;
                mfbab = m1;
                mfcab = m2;
                ///////////b////////////////////////////////////////////////////////////////////////
                m0 = mfcbb * c1o2+mfbbb * (vvx-c1o2)+(mfabb+c4o9 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcbb-two* mfbbb *  vvx+mfabb                  * (one-vx2)-c4o9 * oMdrho * vx2;
+               m1 = -mfcbb- c2o1 * mfbbb *  vvx+mfabb                  * (c1o1 -vx2)-c4o9 * oMdrho * vx2;
                m2 = mfcbb * c1o2+mfbbb * (vvx+c1o2)+(mfabb+c4o9 * oMdrho) * (vx2+vvx) * c1o2;
                mfabb = m0;
                mfbbb = m1;
                mfcbb = m2;
                ///////////b////////////////////////////////////////////////////////////////////////
                m0 = mfccb * c1o2+mfbcb * (vvx-c1o2)+(mfacb+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfccb-two* mfbcb *  vvx+mfacb                  * (one-vx2)-c1o9 * oMdrho * vx2;
+               m1 = -mfccb- c2o1 * mfbcb *  vvx+mfacb                  * (c1o1 -vx2)-c1o9 * oMdrho * vx2;
                m2 = mfccb * c1o2+mfbcb * (vvx+c1o2)+(mfacb+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
                mfacb = m0;
                mfbcb = m1;
@@ -1048,21 +1049,21 @@ void RheologyK17LBMKernel::calculate(int step)
                ////////////////////////////////////////////////////////////////////////////////////
                ////////////////////////////////////////////////////////////////////////////////////
                m0 = mfcac * c1o2+mfbac * (vvx-c1o2)+(mfaac+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcac-two* mfbac *  vvx+mfaac                   * (one-vx2)-c1o36 * oMdrho * vx2;
+               m1 = -mfcac- c2o1 * mfbac *  vvx+mfaac                   * (c1o1 -vx2)-c1o36 * oMdrho * vx2;
                m2 = mfcac * c1o2+mfbac * (vvx+c1o2)+(mfaac+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
                mfaac = m0;
                mfbac = m1;
                mfcac = m2;
                ///////////c////////////////////////////////////////////////////////////////////////
                m0 = mfcbc * c1o2+mfbbc * (vvx-c1o2)+(mfabc+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfcbc-two* mfbbc *  vvx+mfabc                  * (one-vx2)-c1o9 * oMdrho * vx2;
+               m1 = -mfcbc- c2o1 * mfbbc *  vvx+mfabc                  * (c1o1 -vx2)-c1o9 * oMdrho * vx2;
                m2 = mfcbc * c1o2+mfbbc * (vvx+c1o2)+(mfabc+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
                mfabc = m0;
                mfbbc = m1;
                mfcbc = m2;
                ///////////c////////////////////////////////////////////////////////////////////////
                m0 = mfccc * c1o2+mfbcc * (vvx-c1o2)+(mfacc+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
-               m1 = -mfccc-two* mfbcc *  vvx+mfacc                   * (one-vx2)-c1o36 * oMdrho * vx2;
+               m1 = -mfccc- c2o1 * mfbcc *  vvx+mfacc                   * (c1o1 -vx2)-c1o36 * oMdrho * vx2;
                m2 = mfccc * c1o2+mfbcc * (vvx+c1o2)+(mfacc+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
                mfacc = m0;
                mfbcc = m1;
@@ -1073,11 +1074,11 @@ void RheologyK17LBMKernel::calculate(int step)
                //proof correctness
                //////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-               LBMReal drho_post = (mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
+               real drho_post = (mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
                   +(mfaab+mfacb+mfcab+mfccb)+(mfaba+mfabc+mfcba+mfcbc)+(mfbaa+mfbac+mfbca+mfbcc)
                   +(mfabb+mfcbb)+(mfbab+mfbcb)+(mfbba+mfbbc)+mfbbb;
                //LBMReal dif = fabs(rho - rho_post);
-               LBMReal dif = drho - drho_post;
+               real dif = drho - drho_post;
 #ifdef SINGLEPRECISION
                if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
@@ -1131,13 +1132,13 @@ void RheologyK17LBMKernel::calculate(int step)
    //timer.stop();
 }
 //////////////////////////////////////////////////////////////////////////
-double RheologyK17LBMKernel::getCalculationTime()
+real RheologyK17LBMKernel::getCalculationTime()
 {
    //return timer.getDuration();
    return timer.getTotalTime();
 }
 //////////////////////////////////////////////////////////////////////////
-void RheologyK17LBMKernel::setBulkViscosity(LBMReal value)
+void RheologyK17LBMKernel::setBulkViscosity(real value)
 {
    bulkViscosity = value;
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/RheologyK17LBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/RheologyK17LBMKernel.h
index 77bb5ce84557c87210932f8fd76b6ae846414545..80bf7dcb346f500d75a31eca68fefdd092f2e9e8 100644
--- a/src/cpu/VirtualFluidsCore/LBM/RheologyK17LBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/RheologyK17LBMKernel.h
@@ -54,35 +54,35 @@ public:
    virtual ~RheologyK17LBMKernel(void);
    virtual void calculate(int step) override;
    virtual SPtr<LBMKernel> clone() override;
-   double getCalculationTime() override;
+   real getCalculationTime() override;
    //! The value should not be equal to a shear viscosity
-   void setBulkViscosity(LBMReal value);
+   void setBulkViscosity(real value);
 protected:
    virtual void initDataSet();
 
-   virtual LBMReal getRheologyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho) const
+   virtual real getRheologyCollFactor(real omegaInf, real shearRate, real drho) const
    {
-       UB_THROW(UbException("LBMReal getRheologyCollFactor() - belongs in the derived class"));
+       UB_THROW(UbException("real getRheologyCollFactor() - belongs in the derived class"));
    }
 
-   LBMReal f[D3Q27System::ENDF+1];
+   real f[D3Q27System::ENDF+1];
 
    UbTimer timer;
 
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
-   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
-   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+   CbArray4D<real,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+   CbArray3D<real,IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
 
    mu::value_type muX1,muX2,muX3;
    mu::value_type muDeltaT;
    mu::value_type muNu;
-   LBMReal forcingX1;
-   LBMReal forcingX2;
-   LBMReal forcingX3;
+   real forcingX1;
+   real forcingX2;
+   real forcingX3;
    
    // bulk viscosity
-   LBMReal OxxPyyPzz; //omega2 (bulk viscosity)
-   LBMReal bulkViscosity;
+   real OxxPyyPzz; //omega2 (bulk viscosity)
+   real bulkViscosity;
 
 };
 #endif // RheologyK17LBMKernel_h__
diff --git a/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel.cpp
index f399a00065d3cfd2be13f4184057f8223de38573..8fe8a78a41609e4acb4bcd6fcd09a7285912b6a7 100644
--- a/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel.cpp
@@ -41,7 +41,8 @@
 
 #define PROOF_CORRECTNESS
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::basics::constant;
 
 RheologyModelLBMKernel::RheologyModelLBMKernel() : forcingX1(0), forcingX2(0), forcingX3(0)
 {
@@ -130,61 +131,61 @@ void RheologyModelLBMKernel::calculate(int step)
 						// a b c
 						//-1 0 1
 
-						LBMReal mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
-						LBMReal mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
-						LBMReal mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
-						LBMReal mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
-						LBMReal mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
-						LBMReal mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
-						LBMReal mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
-						LBMReal mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
-						LBMReal mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
-						LBMReal mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
-						LBMReal mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
-						LBMReal mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
-						LBMReal mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
-
-						LBMReal mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
-						LBMReal mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
-						LBMReal mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
-						LBMReal mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
-						LBMReal mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
-						LBMReal mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
-						LBMReal mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
-						LBMReal mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
-						LBMReal mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
-						LBMReal mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-						LBMReal mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
-						LBMReal mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
-						LBMReal mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-						LBMReal mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
-
-						LBMReal m0, m1, m2;
-
-						LBMReal rho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+						real mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
+						real mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
+						real mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
+						real mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
+						real mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
+						real mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
+						real mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
+						real mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
+						real mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
+						real mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
+						real mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
+						real mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
+						real mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
+
+						real mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
+						real mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
+						real mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
+						real mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
+						real mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
+						real mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
+						real mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
+						real mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
+						real mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
+						real mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+						real mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
+						real mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
+						real mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+						real mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
+
+						real m0, m1, m2;
+
+						real rho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 							+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 							+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
-						LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+						real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 							(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 							(mfcbb - mfabb));
-						LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+						real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 							(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 							(mfbcb - mfbab));
-						LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+						real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 							(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 							(mfbbc - mfbba));
 
-						LBMReal collFactorF = collFactor;
+						real collFactorF = collFactor;
 
 						//forcing 
 						///////////////////////////////////////////////////////////////////////////////////////////
 						if (withForcing)
 						{
-							muX1 = static_cast<double>(x1 - 1 + ix1 * maxX1);
-							muX2 = static_cast<double>(x2 - 1 + ix2 * maxX2);
-							muX3 = static_cast<double>(x3 - 1 + ix3 * maxX3);
+							muX1 = static_cast<real>(x1 - 1 + ix1 * maxX1);
+							muX2 = static_cast<real>(x2 - 1 + ix2 * maxX2);
+							muX3 = static_cast<real>(x3 - 1 + ix3 * maxX3);
 
 							forcingX1 = muForcingX1.Eval();
 							forcingX2 = muForcingX2.Eval();
@@ -195,7 +196,7 @@ void RheologyModelLBMKernel::calculate(int step)
 							vvz += forcingX3 * deltaT * 0.5; // Z
 						}
 						///////////////////////////////////////////////////////////////////////////////////////////               
-						LBMReal oMdrho;
+						real oMdrho;
 
 						oMdrho = mfccc + mfaaa;
 						m0 = mfaca + mfcac;
@@ -223,15 +224,15 @@ void RheologyModelLBMKernel::calculate(int step)
 						m0 += mfbbb; //hat gefehlt
 						oMdrho = 1. - (oMdrho + m0);
 
-						LBMReal vx2;
-						LBMReal vy2;
-						LBMReal vz2;
+						real vx2;
+						real vy2;
+						real vz2;
 						vx2 = vvx * vvx;
 						vy2 = vvy * vvy;
 						vz2 = vvz * vvz;
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal wadjust;
-						LBMReal qudricLimit = 0.01;
+						real wadjust;
+						real qudricLimit = 0.01;
 						////////////////////////////////////////////////////////////////////////////////////
 						//Hin
 						////////////////////////////////////////////////////////////////////////////////////
@@ -462,33 +463,33 @@ void RheologyModelLBMKernel::calculate(int step)
 						////////////////////////////////////////////////////////////////////////////////////
 						// Cumulants
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal OxxPyyPzz = 1.; //omega2 or bulk viscosity
-						LBMReal OxyyPxzz = 1.;//-s9;//2+s9;//
+						real OxxPyyPzz = 1.; //omega2 or bulk viscosity
+						real OxyyPxzz = 1.;//-s9;//2+s9;//
 											  //LBMReal OxyyMxzz  = 1.;//2+s9;//
-						LBMReal O4 = 1.;
-						LBMReal O5 = 1.;
-						LBMReal O6 = 1.;
+						real O4 = 1.;
+						real O5 = 1.;
+						real O6 = 1.;
 
 						//Cum 4.
 						//LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3 * oMdrho) * mfabb + 2. * mfbba * mfbab); // till 18.05.2015
 						//LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
 						//LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-						LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-						LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-						LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+						real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
+						real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
+						real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-						LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
-						LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
-						LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
+						real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
+						real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
+						real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
 
 						//Cum 5.
-						LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-						LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-						LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+						real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
+						real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
+						real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
 
 						//Cum 6.
-						LBMReal CUMccc = mfccc + ((-4. * mfbbb * mfbbb
+						real CUMccc = mfccc + ((-4. * mfbbb * mfbbb
 							- (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
 							- 4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
 							- 2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
@@ -502,20 +503,20 @@ void RheologyModelLBMKernel::calculate(int step)
 
 						//2.
 						// linear combinations
-						LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
-						LBMReal mxxMyy = mfcaa - mfaca;
-						LBMReal mxxMzz = mfcaa - mfaac;
+						real mxxPyyPzz = mfcaa + mfaca + mfaac;
+						real mxxMyy = mfcaa - mfaca;
+						real mxxMzz = mfcaa - mfaac;
 
-						LBMReal dxux = -c1o2 * collFactorF * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
-						LBMReal dyuy = dxux + collFactorF * c3o2 * mxxMyy;
-						LBMReal dzuz = dxux + collFactorF * c3o2 * mxxMzz;
+						real dxux = -c1o2 * collFactorF * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
+						real dyuy = dxux + collFactorF * c3o2 * mxxMyy;
+						real dzuz = dxux + collFactorF * c3o2 * mxxMzz;
 
-						LBMReal Dxy = -three * collFactorF * mfbba;
-						LBMReal Dxz = -three * collFactorF * mfbab;
-						LBMReal Dyz = -three * collFactorF * mfabb;
+						real Dxy = -c3o1 * collFactorF * mfbba;
+						real Dxz = -c3o1 * collFactorF * mfbab;
+						real Dyz = -c3o1 * collFactorF * mfabb;
 						////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 						//non Newtonian fluid collision factor
-						LBMReal shearRate = sqrt(c2 * (dxux * dxux + dyuy * dyuy + dzuz * dzuz) + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz) / (rho + one);
+						real shearRate = sqrt(c2o1 * (dxux * dxux + dyuy * dyuy + dzuz * dzuz) + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz) / (rho + c1o1);
 						collFactorF = getRheologyCollFactor(collFactorF, shearRate, rho);
 						////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -535,14 +536,14 @@ void RheologyModelLBMKernel::calculate(int step)
 
 						//3.
 						// linear combinations
-						LBMReal mxxyPyzz = mfcba + mfabc;
-						LBMReal mxxyMyzz = mfcba - mfabc;
+						real mxxyPyzz = mfcba + mfabc;
+						real mxxyMyzz = mfcba - mfabc;
 
-						LBMReal mxxzPyyz = mfcab + mfacb;
-						LBMReal mxxzMyyz = mfcab - mfacb;
+						real mxxzPyyz = mfcab + mfacb;
+						real mxxzMyyz = mfcab - mfacb;
 
-						LBMReal mxyyPxzz = mfbca + mfbac;
-						LBMReal mxyyMxzz = mfbca - mfbac;
+						real mxyyPxzz = mfbca + mfbac;
+						real mxyyMxzz = mfbca - mfbac;
 
 						//relax
 						wadjust = OxyyMxzz + (1. - OxyyMxzz) * fabs(mfbbb) / (fabs(mfbbb) + qudricLimit);
@@ -837,11 +838,11 @@ void RheologyModelLBMKernel::calculate(int step)
 						//proof correctness
 						//////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-						LBMReal rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+						real rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 							+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 							+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 						//LBMReal dif = fabs(rho - rho_post);
-						LBMReal dif = rho - rho_post;
+						real dif = rho - rho_post;
 #ifdef SINGLEPRECISION
 						if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
@@ -913,7 +914,7 @@ void RheologyModelLBMKernel::calculate(int step)
 //	return kernel;
 //}
 
-double RheologyModelLBMKernel::getCalculationTime()
+real RheologyModelLBMKernel::getCalculationTime()
 {
    return timer.getTotalTime();
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel.h
index 0aba5a273536feab9ce892b09ae1837df4fd6a7f..8b4ce82984d12ef33b1011fe020fe7d3f0c638e4 100644
--- a/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel.h
@@ -51,31 +51,31 @@ public:
 	virtual ~RheologyModelLBMKernel();
 	void calculate(int step);
 	virtual SPtr<LBMKernel> clone() { UB_THROW(UbException("SPtr<LBMKernel> clone() - belongs in the derived class")); };
-	double getCalculationTime();
+	real getCalculationTime();
 
 	void swapDistributions();
 
 protected:
 	void initDataSet();
 
-	virtual LBMReal getRheologyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho) const { UB_THROW(UbException("LBMReal getRheologyCollFactor() - belongs in the derived class")); }
+	virtual real getRheologyCollFactor(real omegaInf, real shearRate, real drho) const { UB_THROW(UbException("real getRheologyCollFactor() - belongs in the derived class")); }
 
-	LBMReal f[D3Q27System::ENDF + 1];
+	real f[D3Q27System::ENDF + 1];
 
 	UbTimer timer;
 
-	LBMReal OxyyMxzz;
+	real OxyyMxzz;
 	
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
 
 	mu::value_type muX1, muX2, muX3;
 	mu::value_type muDeltaT;
 	mu::value_type muNu;
-	LBMReal forcingX1;
-	LBMReal forcingX2;
-	LBMReal forcingX3;
+	real forcingX1;
+	real forcingX2;
+	real forcingX3;
 
 	bool test;
 };
diff --git a/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel2.cpp b/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel2.cpp
index 2ab87c0b648b48fe56cff8380afdafb7deed08d4..b7d7f2c916c8ec1c76fc4fe6175d05fd9312b8c4 100644
--- a/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel2.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel2.cpp
@@ -6,10 +6,12 @@
 #include "DataSet3D.h"
 #include "LBMKernel.h"
 #include "Rheology.h"
+#include "UbMath.h"
 
 #define PROOF_CORRECTNESS
 
 using namespace UbMath;
+using namespace vf::basics::constant;
 
 
 RheologyModelLBMKernel2::RheologyModelLBMKernel2() : forcingX1(0), forcingX2(0), forcingX3(0)
@@ -99,61 +101,61 @@ void RheologyModelLBMKernel2::calculate(int step)
 						// a b c
 						//-1 0 1
 
-						LBMReal mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
-						LBMReal mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
-						LBMReal mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
-						LBMReal mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
-						LBMReal mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
-						LBMReal mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
-						LBMReal mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
-						LBMReal mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
-						LBMReal mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
-						LBMReal mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
-						LBMReal mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
-						LBMReal mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
-						LBMReal mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
-
-						LBMReal mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
-						LBMReal mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
-						LBMReal mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
-						LBMReal mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
-						LBMReal mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
-						LBMReal mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
-						LBMReal mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
-						LBMReal mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
-						LBMReal mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
-						LBMReal mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-						LBMReal mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
-						LBMReal mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
-						LBMReal mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
-
-						LBMReal mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
-
-						LBMReal m0, m1, m2;
-
-						LBMReal rho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+						real mfcbb = (*this->localDistributionsF)(D3Q27System::ET_E, x1, x2, x3);
+						real mfbcb = (*this->localDistributionsF)(D3Q27System::ET_N, x1, x2, x3);
+						real mfbbc = (*this->localDistributionsF)(D3Q27System::ET_T, x1, x2, x3);
+						real mfccb = (*this->localDistributionsF)(D3Q27System::ET_NE, x1, x2, x3);
+						real mfacb = (*this->localDistributionsF)(D3Q27System::ET_NW, x1p, x2, x3);
+						real mfcbc = (*this->localDistributionsF)(D3Q27System::ET_TE, x1, x2, x3);
+						real mfabc = (*this->localDistributionsF)(D3Q27System::ET_TW, x1p, x2, x3);
+						real mfbcc = (*this->localDistributionsF)(D3Q27System::ET_TN, x1, x2, x3);
+						real mfbac = (*this->localDistributionsF)(D3Q27System::ET_TS, x1, x2p, x3);
+						real mfccc = (*this->localDistributionsF)(D3Q27System::ET_TNE, x1, x2, x3);
+						real mfacc = (*this->localDistributionsF)(D3Q27System::ET_TNW, x1p, x2, x3);
+						real mfcac = (*this->localDistributionsF)(D3Q27System::ET_TSE, x1, x2p, x3);
+						real mfaac = (*this->localDistributionsF)(D3Q27System::ET_TSW, x1p, x2p, x3);
+
+						real mfabb = (*this->nonLocalDistributionsF)(D3Q27System::ET_W, x1p, x2, x3);
+						real mfbab = (*this->nonLocalDistributionsF)(D3Q27System::ET_S, x1, x2p, x3);
+						real mfbba = (*this->nonLocalDistributionsF)(D3Q27System::ET_B, x1, x2, x3p);
+						real mfaab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SW, x1p, x2p, x3);
+						real mfcab = (*this->nonLocalDistributionsF)(D3Q27System::ET_SE, x1, x2p, x3);
+						real mfaba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BW, x1p, x2, x3p);
+						real mfcba = (*this->nonLocalDistributionsF)(D3Q27System::ET_BE, x1, x2, x3p);
+						real mfbaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BS, x1, x2p, x3p);
+						real mfbca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BN, x1, x2, x3p);
+						real mfaaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+						real mfcaa = (*this->nonLocalDistributionsF)(D3Q27System::ET_BSE, x1, x2p, x3p);
+						real mfaca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNW, x1p, x2, x3p);
+						real mfcca = (*this->nonLocalDistributionsF)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+						real mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
+
+						real m0, m1, m2;
+
+						real rho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 							+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 							+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
-						LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+						real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 							(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 							(mfcbb - mfabb));
-						LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+						real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 							(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 							(mfbcb - mfbab));
-						LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+						real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 							(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 							(mfbbc - mfbba));
 
-						LBMReal collFactorF = collFactor;
+						real collFactorF = collFactor;
 
 						//forcing 
 						///////////////////////////////////////////////////////////////////////////////////////////
 						if (withForcing)
 						{
-							muX1 = static_cast<double>(x1 - 1 + ix1 * maxX1);
-							muX2 = static_cast<double>(x2 - 1 + ix2 * maxX2);
-							muX3 = static_cast<double>(x3 - 1 + ix3 * maxX3);
+							muX1 = static_cast<real>(x1 - 1 + ix1 * maxX1);
+							muX2 = static_cast<real>(x2 - 1 + ix2 * maxX2);
+							muX3 = static_cast<real>(x3 - 1 + ix3 * maxX3);
 
 							forcingX1 = muForcingX1.Eval();
 							forcingX2 = muForcingX2.Eval();
@@ -164,7 +166,7 @@ void RheologyModelLBMKernel2::calculate(int step)
 							vvz += forcingX3 * deltaT * 0.5; // Z
 						}
 						///////////////////////////////////////////////////////////////////////////////////////////               
-						LBMReal oMdrho;
+						real oMdrho;
 
 						oMdrho = mfccc + mfaaa;
 						m0 = mfaca + mfcac;
@@ -192,15 +194,15 @@ void RheologyModelLBMKernel2::calculate(int step)
 						m0 += mfbbb; //hat gefehlt
 						oMdrho = 1. - (oMdrho + m0);
 
-						LBMReal vx2;
-						LBMReal vy2;
-						LBMReal vz2;
+						real vx2;
+						real vy2;
+						real vz2;
 						vx2 = vvx * vvx;
 						vy2 = vvy * vvy;
 						vz2 = vvz * vvz;
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal wadjust;
-						LBMReal qudricLimit = 0.01;
+						real wadjust;
+						real qudricLimit = 0.01;
 						////////////////////////////////////////////////////////////////////////////////////
 						//Hin
 						////////////////////////////////////////////////////////////////////////////////////
@@ -379,7 +381,7 @@ void RheologyModelLBMKernel2::calculate(int step)
 						m1 = mfcca - mfaca;
 						m0 = m2 + mfbca;
 						mfaca = m0;
-						m0 += c1o3 * oMdrho;
+						m0 += vf::basics::constant::c1o3 * oMdrho;
 						mfbca = m1 - m0 * vvx;
 						mfcca = m2 - 2. * m1 * vvx + vx2 * m0;
 						////////////////////////////////////////////////////////////////////////////////////
@@ -410,7 +412,7 @@ void RheologyModelLBMKernel2::calculate(int step)
 						m1 = mfcac - mfaac;
 						m0 = m2 + mfbac;
 						mfaac = m0;
-						m0 += c1o3 * oMdrho;
+						m0 += vf::basics::constant::c1o3 * oMdrho;
 						mfbac = m1 - m0 * vvx;
 						mfcac = m2 - 2. * m1 * vvx + vx2 * m0;
 						////////////////////////////////////////////////////////////////////////////////////
@@ -431,53 +433,53 @@ void RheologyModelLBMKernel2::calculate(int step)
 						////////////////////////////////////////////////////////////////////////////////////
 						// Cumulants
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal OxxPyyPzz = 1.; //omega2 or bulk viscosity
-						LBMReal OxyyPxzz = 1.;//-s9;//2+s9;//
-											  //LBMReal OxyyMxzz  = 1.;//2+s9;//
-						LBMReal O4 = 1.;
-						LBMReal O5 = 1.;
-						LBMReal O6 = 1.;
+						real OxxPyyPzz = 1.; //omega2 or bulk viscosity
+						real OxyyPxzz = 1.;//-s9;//2+s9;//
+											  //real OxyyMxzz  = 1.;//2+s9;//
+						real O4 = 1.;
+						real O5 = 1.;
+						real O6 = 1.;
 
 						//Cum 4.
 						//LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3 * oMdrho) * mfabb + 2. * mfbba * mfbab); // till 18.05.2015
 						//LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
 						//LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-						LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-						LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-						LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+						real CUMcbb = mfcbb - ((mfcaa + vf::basics::constant::c1o3) * mfabb + 2. * mfbba * mfbab);
+						real CUMbcb = mfbcb - ((mfaca + vf::basics::constant::c1o3) * mfbab + 2. * mfbba * mfabb);
+						real CUMbbc = mfbbc - ((mfaac + vf::basics::constant::c1o3) * mfbba + 2. * mfbab * mfabb);
 
-						LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
-						LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
-						LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
+						real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + vf::basics::constant::c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
+						real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + vf::basics::constant::c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
+						real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + vf::basics::constant::c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho);
 
 						//Cum 5.
-						LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-						LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-						LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+						real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - vf::basics::constant::c1o3 * (mfbca + mfbac) * oMdrho;
+						real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - vf::basics::constant::c1o3 * (mfcba + mfabc) * oMdrho;
+						real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - vf::basics::constant::c1o3 * (mfacb + mfcab) * oMdrho;
 
 						//Cum 6.
-						LBMReal CUMccc = mfccc + ((-4. * mfbbb * mfbbb
+						real CUMccc = mfccc + ((-4. * mfbbb * mfbbb
 							- (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
 							- 4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
 							- 2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
 							+ (4. * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
 								+ 2. * (mfcaa * mfaca * mfaac)
 								+ 16. * mfbba * mfbab * mfabb)
-							- c1o3 * (mfacc + mfcac + mfcca) * oMdrho - c1o9 * oMdrho * oMdrho
+							- vf::basics::constant::c1o3 * (mfacc + mfcac + mfcca) * oMdrho - c1o9 * oMdrho * oMdrho
 							- c1o9 * (mfcaa + mfaca + mfaac) * oMdrho * (1. - 2. * oMdrho) - c1o27 * oMdrho * oMdrho * (-2. * oMdrho)
 							+ (2. * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
 								+ (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa)) * c2o3 * oMdrho) + c1o27 * oMdrho;
 
 						//2.
 						// linear combinations
-						LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
-						LBMReal mxxMyy = mfcaa - mfaca;
-						LBMReal mxxMzz = mfcaa - mfaac;
+						real mxxPyyPzz = mfcaa + mfaca + mfaac;
+						real mxxMyy = mfcaa - mfaca;
+						real mxxMzz = mfcaa - mfaac;
 
-						LBMReal dxux = -c1o2 * collFactorF * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
-						LBMReal dyuy = dxux + collFactorF * c3o2 * mxxMyy;
-						LBMReal dzuz = dxux + collFactorF * c3o2 * mxxMzz;
+						real dxux = -c1o2 * collFactorF * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
+						real dyuy = dxux + collFactorF * c3o2 * mxxMyy;
+						real dzuz = dxux + collFactorF * c3o2 * mxxMzz;
 
 //						LBMReal Dxy = -three * collFactorF * mfbba;
 //						LBMReal Dxz = -three * collFactorF * mfbab;
@@ -486,7 +488,7 @@ void RheologyModelLBMKernel2::calculate(int step)
 						//non Newtonian fluid collision factor
 //						LBMReal shearRate = sqrt(c2 * (dxux * dxux + dyuy * dyuy + dzuz * dzuz) + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz) / (rho + one);
 
-						LBMReal shearFactor = sqrt(c1o2 * ((mfcaa - mfaaa * c1o3) * (mfcaa - mfaaa * c1o3) + (mfaca - mfaaa * c1o3) * (mfaca - mfaaa * c1o3) + (mfaac - mfaaa * c1o3) * (mfaac - mfaaa * c1o3)) + mfbba * mfbba + mfbab * mfbab + mfabb * mfabb) + UbMath::Epsilon<LBMReal>::val();
+						real shearFactor = sqrt(c1o2 * ((mfcaa - mfaaa * vf::basics::constant::c1o3) * (mfcaa - mfaaa * vf::basics::constant::c1o3) + (mfaca - mfaaa * vf::basics::constant::c1o3) * (mfaca - mfaaa * vf::basics::constant::c1o3) + (mfaac - mfaaa * vf::basics::constant::c1o3) * (mfaac - mfaaa * vf::basics::constant::c1o3)) + mfbba * mfbba + mfbab * mfbab + mfabb * mfabb) + UbMath::Epsilon<real>::val();
 
 						//collFactorF = getRheologyCollFactor(collFactorF, shearRate, rho);
 						////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -505,7 +507,7 @@ void RheologyModelLBMKernel2::calculate(int step)
 						//mfbba += getRheologyCollFactor(collFactorF, std::abs(Dxy) / (rho + one), rho) * (-mfbba);
 
 						SPtr<Rheology> thix = Rheology::getInstance();
-						LBMReal tau0 = thix->getYieldStress();
+						real tau0 = thix->getYieldStress();
 
 						mxxPyyPzz += OxxPyyPzz * (mfaaa - mxxPyyPzz /*+ ((mxxPyyPzz-mfaaa)/shearFactor*tau0)*/) - 3. * (1. - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
 						//mxxPyyPzz += OxxPyyPzz * (mfaaa - mxxPyyPzz) - 3. * (1. - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
@@ -516,7 +518,7 @@ void RheologyModelLBMKernel2::calculate(int step)
 						//mfbab += collFactorF * (-mfbab + mfbab/shearFactor*tau0);
 						//mfbba += collFactorF * (-mfbba + mfbba/shearFactor*tau0);
 
-						collFactorF = collFactor * (c1 - tau0 / shearFactor);
+						collFactorF = collFactor * (c1o1 - tau0 / shearFactor);
 
 						mxxMyy += collFactorF * (-mxxMyy/* + mxxMyy / shearFactor * tau0*/) - 3. * (1. - c1o2 * collFactorF) * (vx2 * dxux - vy2 * dyuy);
 						mxxMzz += collFactorF * (-mxxMzz/* + mxxMzz / shearFactor * tau0*/) - 3. * (1. - c1o2 * collFactorF) * (vx2 * dxux - vz2 * dzuz);
@@ -527,20 +529,20 @@ void RheologyModelLBMKernel2::calculate(int step)
 
 
 						// linear combinations back
-						mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
-						mfaca = c1o3 * (-2. * mxxMyy + mxxMzz + mxxPyyPzz);
-						mfaac = c1o3 * (mxxMyy - 2. * mxxMzz + mxxPyyPzz);
+						mfcaa = vf::basics::constant::c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
+						mfaca = vf::basics::constant::c1o3 * (-2. * mxxMyy + mxxMzz + mxxPyyPzz);
+						mfaac = vf::basics::constant::c1o3 * (mxxMyy - 2. * mxxMzz + mxxPyyPzz);
 
 						//3.
 						// linear combinations
-						LBMReal mxxyPyzz = mfcba + mfabc;
-						LBMReal mxxyMyzz = mfcba - mfabc;
+						real mxxyPyzz = mfcba + mfabc;
+						real mxxyMyzz = mfcba - mfabc;
 
-						LBMReal mxxzPyyz = mfcab + mfacb;
-						LBMReal mxxzMyyz = mfcab - mfacb;
+						real mxxzPyyz = mfcab + mfacb;
+						real mxxzMyyz = mfcab - mfacb;
 
-						LBMReal mxyyPxzz = mfbca + mfbac;
-						LBMReal mxyyMxzz = mfbca - mfbac;
+						real mxyyPxzz = mfbca + mfbac;
+						real mxyyMxzz = mfbca - mfbac;
 
 						//relax
 						wadjust = OxyyMxzz + (1. - OxyyMxzz) * fabs(mfbbb) / (fabs(mfbbb) + qudricLimit);
@@ -589,18 +591,18 @@ void RheologyModelLBMKernel2::calculate(int step)
 						//mfbcb = CUMbcb + ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
 						//mfbbc = CUMbbc + ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-						mfcbb = CUMcbb + ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-						mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-						mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+						mfcbb = CUMcbb + ((mfcaa + vf::basics::constant::c1o3) * mfabb + 2. * mfbba * mfbab);
+						mfbcb = CUMbcb + ((mfaca + vf::basics::constant::c1o3) * mfbab + 2. * mfbba * mfabb);
+						mfbbc = CUMbbc + ((mfaac + vf::basics::constant::c1o3) * mfbba + 2. * mfbab * mfabb);
 
-						mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho;
-						mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho;
-						mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho;
+						mfcca = CUMcca + (mfcaa * mfaca + 2. * mfbba * mfbba) + vf::basics::constant::c1o3 * (mfcaa + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho;
+						mfcac = CUMcac + (mfcaa * mfaac + 2. * mfbab * mfbab) + vf::basics::constant::c1o3 * (mfcaa + mfaac) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho;
+						mfacc = CUMacc + (mfaac * mfaca + 2. * mfabb * mfabb) + vf::basics::constant::c1o3 * (mfaac + mfaca) * oMdrho + c1o9 * (oMdrho - 1) * oMdrho;
 
 						//5.
-						mfbcc = CUMbcc + (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac) * oMdrho;
-						mfcbc = CUMcbc + (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc) * oMdrho;
-						mfccb = CUMccb + (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab) * oMdrho;
+						mfbcc = CUMbcc + (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) + vf::basics::constant::c1o3 * (mfbca + mfbac) * oMdrho;
+						mfcbc = CUMcbc + (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) + vf::basics::constant::c1o3 * (mfcba + mfabc) * oMdrho;
+						mfccb = CUMccb + (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) + vf::basics::constant::c1o3 * (mfacb + mfcab) * oMdrho;
 
 						//6.
 						mfccc = CUMccc - ((-4. * mfbbb * mfbbb
@@ -610,7 +612,7 @@ void RheologyModelLBMKernel2::calculate(int step)
 							+ (4. * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
 								+ 2. * (mfcaa * mfaca * mfaac)
 								+ 16. * mfbba * mfbab * mfabb)
-							- c1o3 * (mfacc + mfcac + mfcca) * oMdrho - c1o9 * oMdrho * oMdrho
+							- vf::basics::constant::c1o3 * (mfacc + mfcac + mfcca) * oMdrho - c1o9 * oMdrho * oMdrho
 							- c1o9 * (mfcaa + mfaca + mfaac) * oMdrho * (1. - 2. * oMdrho) - c1o27 * oMdrho * oMdrho * (-2. * oMdrho)
 							+ (2. * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
 								+ (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa)) * c2o3 * oMdrho) - c1o27 * oMdrho;
@@ -642,9 +644,9 @@ void RheologyModelLBMKernel2::calculate(int step)
 						mfabb = m1;
 						mfabc = m2;
 						////////////////////////////////////////////////////////////////////////////////////
-						m0 = mfacc * c1o2 + mfacb * (vvz - c1o2) + (mfaca + c1o3 * oMdrho) * (vz2 - vvz) * c1o2;
-						m1 = -mfacc - 2. * mfacb * vvz + mfaca * (1. - vz2) - c1o3 * oMdrho * vz2;
-						m2 = mfacc * c1o2 + mfacb * (vvz + c1o2) + (mfaca + c1o3 * oMdrho) * (vz2 + vvz) * c1o2;
+						m0 = mfacc * c1o2 + mfacb * (vvz - c1o2) + (mfaca + vf::basics::constant::c1o3 * oMdrho) * (vz2 - vvz) * c1o2;
+						m1 = -mfacc - 2. * mfacb * vvz + mfaca * (1. - vz2) - vf::basics::constant::c1o3 * oMdrho * vz2;
+						m2 = mfacc * c1o2 + mfacb * (vvz + c1o2) + (mfaca + vf::basics::constant::c1o3 * oMdrho) * (vz2 + vvz) * c1o2;
 						mfaca = m0;
 						mfacb = m1;
 						mfacc = m2;
@@ -672,9 +674,9 @@ void RheologyModelLBMKernel2::calculate(int step)
 						mfbcc = m2;
 						////////////////////////////////////////////////////////////////////////////////////
 						////////////////////////////////////////////////////////////////////////////////////
-						m0 = mfcac * c1o2 + mfcab * (vvz - c1o2) + (mfcaa + c1o3 * oMdrho) * (vz2 - vvz) * c1o2;
-						m1 = -mfcac - 2. * mfcab * vvz + mfcaa * (1. - vz2) - c1o3 * oMdrho * vz2;
-						m2 = mfcac * c1o2 + mfcab * (vvz + c1o2) + (mfcaa + c1o3 * oMdrho) * (vz2 + vvz) * c1o2;
+						m0 = mfcac * c1o2 + mfcab * (vvz - c1o2) + (mfcaa + vf::basics::constant::c1o3 * oMdrho) * (vz2 - vvz) * c1o2;
+						m1 = -mfcac - 2. * mfcab * vvz + mfcaa * (1. - vz2) - vf::basics::constant::c1o3 * oMdrho * vz2;
+						m2 = mfcac * c1o2 + mfcab * (vvz + c1o2) + (mfcaa + vf::basics::constant::c1o3 * oMdrho) * (vz2 + vvz) * c1o2;
 						mfcaa = m0;
 						mfcab = m1;
 						mfcac = m2;
@@ -835,11 +837,11 @@ void RheologyModelLBMKernel2::calculate(int step)
 						//proof correctness
 						//////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-						LBMReal rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+						real rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 							+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 							+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 						//LBMReal dif = fabs(rho - rho_post);
-						LBMReal dif = rho - rho_post;
+						real dif = rho - rho_post;
 #ifdef SINGLEPRECISION
 						if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
@@ -911,7 +913,7 @@ void RheologyModelLBMKernel2::calculate(int step)
 //	return kernel;
 //}
 
-double RheologyModelLBMKernel2::getCalculationTime()
+real RheologyModelLBMKernel2::getCalculationTime()
 {
    return timer.getTotalTime();
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel2.h b/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel2.h
index c3a20a11038f74586266c8338027a6187ef2fea5..ca058f1e4e67cd8cf51e971999f9a2a359aa197d 100644
--- a/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel2.h
+++ b/src/cpu/VirtualFluidsCore/LBM/RheologyModelLBMKernel2.h
@@ -19,31 +19,31 @@ public:
 	virtual ~RheologyModelLBMKernel2();
 	void calculate(int step);
 	virtual SPtr<LBMKernel> clone() { UB_THROW(UbException("SPtr<LBMKernel> clone() - belongs in the derived class")); };
-	double getCalculationTime();
+	real getCalculationTime();
 
 	void swapDistributions();
 
 protected:
 	void initDataSet();
 
-	virtual LBMReal getRheologyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho) const { UB_THROW(UbException("LBMReal getRheologyCollFactor() - belongs in the derived class")); }
+	virtual real getRheologyCollFactor(real omegaInf, real shearRate, real drho) const { UB_THROW(UbException("real getRheologyCollFactor() - belongs in the derived class")); }
 
-	LBMReal f[D3Q27System::ENDF + 1];
+	real f[D3Q27System::ENDF + 1];
 
 	UbTimer timer;
 
-	LBMReal OxyyMxzz;
+	real OxyyMxzz;
 	
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
 
 	mu::value_type muX1, muX2, muX3;
 	mu::value_type muDeltaT;
 	mu::value_type muNu;
-	LBMReal forcingX1;
-	LBMReal forcingX2;
-	LBMReal forcingX3;
+	real forcingX1;
+	real forcingX2;
+	real forcingX3;
 
 	bool test;
 };
diff --git a/src/cpu/VirtualFluidsCore/LBM/RheologyPowellEyringModelLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/RheologyPowellEyringModelLBMKernel.h
index a68eca9cd627be7b56bcfebfd429d80c3f924aa8..acd02101fed8793175a6aae62394ce14e2847832 100644
--- a/src/cpu/VirtualFluidsCore/LBM/RheologyPowellEyringModelLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/RheologyPowellEyringModelLBMKernel.h
@@ -60,7 +60,7 @@ public:
 		return kernel;
 	}
 protected:
-	LBMReal getRheologyCollFactor(LBMReal omegaInf, LBMReal shearRate, LBMReal drho) const override
+	real getRheologyCollFactor(real omegaInf, real shearRate, real drho) const override
 	{
 		return Rheology::getPowellEyringCollFactor(omegaInf, shearRate, drho);
 	}
diff --git a/src/cpu/VirtualFluidsCore/LBM/ThixotropyExpLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/ThixotropyExpLBMKernel.cpp
index a13e4fc716725156adc28841da22c4b2516dc24f..70bfdfdadc0d3ebb50befbc022798dc91717276d 100644
--- a/src/cpu/VirtualFluidsCore/LBM/ThixotropyExpLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/ThixotropyExpLBMKernel.cpp
@@ -8,7 +8,8 @@
 
 #define PROOF_CORRECTNESS
 
-using namespace UbMath;
+//using namespace UbMath; 
+using namespace vf::basics::constant;
 
 //////////////////////////////////////////////////////////////////////////
 ThixotropyExpLBMKernel::ThixotropyExpLBMKernel()
@@ -134,37 +135,37 @@ void ThixotropyExpLBMKernel::calculate(int step)
 						////////////////////////////////////////////////////////////////////////////
 						//////////////////////////////////////////////////////////////////////////
 
-						LBMReal mfcbb = (*this->localDistributionsH)(D3Q27System::ET_E, x1, x2, x3);
-						LBMReal mfbcb = (*this->localDistributionsH)(D3Q27System::ET_N, x1, x2, x3);
-						LBMReal mfbbc = (*this->localDistributionsH)(D3Q27System::ET_T, x1, x2, x3);
-						LBMReal mfccb = (*this->localDistributionsH)(D3Q27System::ET_NE, x1, x2, x3);
-						LBMReal mfacb = (*this->localDistributionsH)(D3Q27System::ET_NW, x1p, x2, x3);
-						LBMReal mfcbc = (*this->localDistributionsH)(D3Q27System::ET_TE, x1, x2, x3);
-						LBMReal mfabc = (*this->localDistributionsH)(D3Q27System::ET_TW, x1p, x2, x3);
-						LBMReal mfbcc = (*this->localDistributionsH)(D3Q27System::ET_TN, x1, x2, x3);
-						LBMReal mfbac = (*this->localDistributionsH)(D3Q27System::ET_TS, x1, x2p, x3);
-						LBMReal mfccc = (*this->localDistributionsH)(D3Q27System::ET_TNE, x1, x2, x3);
-						LBMReal mfacc = (*this->localDistributionsH)(D3Q27System::ET_TNW, x1p, x2, x3);
-						LBMReal mfcac = (*this->localDistributionsH)(D3Q27System::ET_TSE, x1, x2p, x3);
-						LBMReal mfaac = (*this->localDistributionsH)(D3Q27System::ET_TSW, x1p, x2p, x3);
+						real mfcbb = (*this->localDistributionsH)(D3Q27System::ET_E, x1, x2, x3);
+						real mfbcb = (*this->localDistributionsH)(D3Q27System::ET_N, x1, x2, x3);
+						real mfbbc = (*this->localDistributionsH)(D3Q27System::ET_T, x1, x2, x3);
+						real mfccb = (*this->localDistributionsH)(D3Q27System::ET_NE, x1, x2, x3);
+						real mfacb = (*this->localDistributionsH)(D3Q27System::ET_NW, x1p, x2, x3);
+						real mfcbc = (*this->localDistributionsH)(D3Q27System::ET_TE, x1, x2, x3);
+						real mfabc = (*this->localDistributionsH)(D3Q27System::ET_TW, x1p, x2, x3);
+						real mfbcc = (*this->localDistributionsH)(D3Q27System::ET_TN, x1, x2, x3);
+						real mfbac = (*this->localDistributionsH)(D3Q27System::ET_TS, x1, x2p, x3);
+						real mfccc = (*this->localDistributionsH)(D3Q27System::ET_TNE, x1, x2, x3);
+						real mfacc = (*this->localDistributionsH)(D3Q27System::ET_TNW, x1p, x2, x3);
+						real mfcac = (*this->localDistributionsH)(D3Q27System::ET_TSE, x1, x2p, x3);
+						real mfaac = (*this->localDistributionsH)(D3Q27System::ET_TSW, x1p, x2p, x3);
 								  
-						LBMReal mfabb = (*this->nonLocalDistributionsH)(D3Q27System::ET_W, x1p, x2, x3);
-						LBMReal mfbab = (*this->nonLocalDistributionsH)(D3Q27System::ET_S, x1, x2p, x3);
-						LBMReal mfbba = (*this->nonLocalDistributionsH)(D3Q27System::ET_B, x1, x2, x3p);
-						LBMReal mfaab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SW, x1p, x2p, x3);
-						LBMReal mfcab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SE, x1, x2p, x3);
-						LBMReal mfaba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BW, x1p, x2, x3p);
-						LBMReal mfcba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BE, x1, x2, x3p);
-						LBMReal mfbaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BS, x1, x2p, x3p);
-						LBMReal mfbca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BN, x1, x2, x3p);
-						LBMReal mfaaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-						LBMReal mfcaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSE, x1, x2p, x3p);
-						LBMReal mfaca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNW, x1p, x2, x3p);
-						LBMReal mfcca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNE, x1, x2, x3p);
+						real mfabb = (*this->nonLocalDistributionsH)(D3Q27System::ET_W, x1p, x2, x3);
+						real mfbab = (*this->nonLocalDistributionsH)(D3Q27System::ET_S, x1, x2p, x3);
+						real mfbba = (*this->nonLocalDistributionsH)(D3Q27System::ET_B, x1, x2, x3p);
+						real mfaab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SW, x1p, x2p, x3);
+						real mfcab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SE, x1, x2p, x3);
+						real mfaba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BW, x1p, x2, x3p);
+						real mfcba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BE, x1, x2, x3p);
+						real mfbaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BS, x1, x2p, x3p);
+						real mfbca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BN, x1, x2, x3p);
+						real mfaaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+						real mfcaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSE, x1, x2p, x3p);
+						real mfaca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNW, x1p, x2, x3p);
+						real mfcca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNE, x1, x2, x3p);
 								  
-						LBMReal mfbbb = (*this->zeroDistributionsH)(x1, x2, x3);
+						real mfbbb = (*this->zeroDistributionsH)(x1, x2, x3);
 
-						LBMReal lambda = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+						real lambda = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 							+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 							+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
@@ -214,19 +215,19 @@ void ThixotropyExpLBMKernel::calculate(int step)
 
 						mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
 
-						LBMReal m0, m1, m2;
+						real m0, m1, m2;
 
-						LBMReal rho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+						real rho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 							+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 							+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
-						LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+						real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 							(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 							(mfcbb - mfabb));
-						LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+						real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 							(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 							(mfbcb - mfbab));
-						LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+						real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 							(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 							(mfbbc - mfbba));
 						
@@ -240,9 +241,9 @@ void ThixotropyExpLBMKernel::calculate(int step)
 						///////////////////////////////////////////////////////////////////////////////////////////
 						if (withForcing)
 						{
-							muX1 = static_cast<double>(x1 - 1 + ix1*maxX1);
-							muX2 = static_cast<double>(x2 - 1 + ix2*maxX2);
-							muX3 = static_cast<double>(x3 - 1 + ix3*maxX3);
+							muX1 = static_cast<real>(x1 - 1 + ix1*maxX1);
+							muX2 = static_cast<real>(x2 - 1 + ix2*maxX2);
+							muX3 = static_cast<real>(x3 - 1 + ix3*maxX3);
 
 							forcingX1 = muForcingX1.Eval();
 							forcingX2 = muForcingX2.Eval();
@@ -253,7 +254,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 							vvz += forcingX3*deltaT*0.5; // Z
 						}
 						///////////////////////////////////////////////////////////////////////////////////////////               
-						LBMReal oMdrho;
+						real oMdrho;
 
 						oMdrho = mfccc + mfaaa;
 						m0 = mfaca + mfcac;
@@ -281,15 +282,15 @@ void ThixotropyExpLBMKernel::calculate(int step)
 						m0 += mfbbb; //hat gefehlt
 						oMdrho = 1. - (oMdrho + m0);
 
-						LBMReal vx2;
-						LBMReal vy2;
-						LBMReal vz2;
+						real vx2;
+						real vy2;
+						real vz2;
 						vx2 = vvx*vvx;
 						vy2 = vvy*vvy;
 						vz2 = vvz*vvz;
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal wadjust;
-						LBMReal qudricLimit = 0.01;
+						real wadjust;
+						real qudricLimit = 0.01;
 						////////////////////////////////////////////////////////////////////////////////////
 						//Hin
 						////////////////////////////////////////////////////////////////////////////////////
@@ -520,33 +521,33 @@ void ThixotropyExpLBMKernel::calculate(int step)
 						////////////////////////////////////////////////////////////////////////////////////
 						// Cumulants
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal OxxPyyPzz = 1.; //omega2 or bulk viscosity
-						LBMReal OxyyPxzz = 1.;//-s9;//2+s9;//
+						real OxxPyyPzz = 1.; //omega2 or bulk viscosity
+						real OxyyPxzz = 1.;//-s9;//2+s9;//
 											  //LBMReal OxyyMxzz  = 1.;//2+s9;//
-						LBMReal O4 = 1.;
-						LBMReal O5 = 1.;
-						LBMReal O6 = 1.;
+						real O4 = 1.;
+						real O5 = 1.;
+						real O6 = 1.;
 
 						//Cum 4.
 						//LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3 * oMdrho) * mfabb + 2. * mfbba * mfbab); // till 18.05.2015
 						//LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
 						//LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-						LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-						LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-						LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+						real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
+						real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
+						real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-						LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9*(oMdrho - 1)*oMdrho);
-						LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9*(oMdrho - 1)*oMdrho);
-						LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9*(oMdrho - 1)*oMdrho);
+						real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9*(oMdrho - 1)*oMdrho);
+						real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9*(oMdrho - 1)*oMdrho);
+						real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9*(oMdrho - 1)*oMdrho);
 
 						//Cum 5.
-						LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-						LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-						LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+						real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
+						real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
+						real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
 
 						//Cum 6.
-						LBMReal CUMccc = mfccc + ((-4. *  mfbbb * mfbbb
+						real CUMccc = mfccc + ((-4. *  mfbbb * mfbbb
 							- (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
 							- 4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
 							- 2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
@@ -560,19 +561,19 @@ void ThixotropyExpLBMKernel::calculate(int step)
 
 						//2.
 						// linear combinations
-						LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
-						LBMReal mxxMyy = mfcaa - mfaca;
-						LBMReal mxxMzz = mfcaa - mfaac;
+						real mxxPyyPzz = mfcaa + mfaca + mfaac;
+						real mxxMyy = mfcaa - mfaca;
+						real mxxMzz = mfcaa - mfaac;
 
-						LBMReal dxux = -c1o2 * collFactorF *(mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz*(mfaaa - mxxPyyPzz);
-						LBMReal dyuy = dxux + collFactorF * c3o2 * mxxMyy;
-						LBMReal dzuz = dxux + collFactorF * c3o2 * mxxMzz;
+						real dxux = -c1o2 * collFactorF *(mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz*(mfaaa - mxxPyyPzz);
+						real dyuy = dxux + collFactorF * c3o2 * mxxMyy;
+						real dzuz = dxux + collFactorF * c3o2 * mxxMzz;
 
-						LBMReal Dxy =-three*collFactorF*mfbba;
-                  LBMReal Dxz =-three*collFactorF*mfbab;
-                  LBMReal Dyz =-three*collFactorF*mfabb;
+						real Dxy =-c3o1*collFactorF*mfbba;
+						real Dxz =-c3o1*collFactorF*mfbab;
+						real Dyz =-c3o1*collFactorF*mfabb;
 
-						LBMReal gammaDot = sqrt(dxux * dxux + dyuy * dyuy + dzuz * dzuz + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz) / (rho + one);
+						real gammaDot = sqrt(dxux * dxux + dyuy * dyuy + dzuz * dzuz + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz) / (rho + c1o1);
 						//collFactorF = BinghamModel::getBinghamCollFactor(collFactorF, gammaDot, rho);
 
 						//relax
@@ -591,14 +592,14 @@ void ThixotropyExpLBMKernel::calculate(int step)
 
 						//3.
 						// linear combinations
-						LBMReal mxxyPyzz = mfcba + mfabc;
-						LBMReal mxxyMyzz = mfcba - mfabc;
+						real mxxyPyzz = mfcba + mfabc;
+						real mxxyMyzz = mfcba - mfabc;
 
-						LBMReal mxxzPyyz = mfcab + mfacb;
-						LBMReal mxxzMyyz = mfcab - mfacb;
+						real mxxzPyyz = mfcab + mfacb;
+						real mxxzMyyz = mfcab - mfacb;
 
-						LBMReal mxyyPxzz = mfbca + mfbac;
-						LBMReal mxyyMxzz = mfbca - mfbac;
+						real mxyyPxzz = mfbca + mfbac;
+						real mxyyMxzz = mfbca - mfbac;
 
 						//relax
 						wadjust = OxyyMxzz + (1. - OxyyMxzz)*fabs(mfbbb) / (fabs(mfbbb) + qudricLimit);
@@ -893,11 +894,11 @@ void ThixotropyExpLBMKernel::calculate(int step)
 						//proof correctness
 						//////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-						LBMReal rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+						real rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 							+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 							+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 						//LBMReal dif = fabs(rho - rho_post);
-						LBMReal dif = rho - rho_post;
+						real dif = rho - rho_post;
 #ifdef SINGLEPRECISION
 						if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
@@ -959,7 +960,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 
 
 
-						LBMReal ux, uy, uz;
+						real ux, uy, uz;
 
 						ux = vvx;						
 						uy = vvy;
@@ -1005,7 +1006,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 						mfbbb = (*this->zeroDistributionsH)(x1, x2, x3);
 
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
+						real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 							(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
 							((mfabb + mfcbb) + (mfbab + mfbcb)) + (mfbba + mfbbc)) + mfbbb;
 					
@@ -1021,56 +1022,56 @@ void ThixotropyExpLBMKernel::calculate(int step)
 
 						//collFactorF = (collFactorF < 0.5) ? 0.5 : collFactorF;
 
-						LBMReal dlambda = one / theta - alpha * lambda * gammaDot;
+						real dlambda = c1o1 / theta - alpha * lambda * gammaDot;
 
 
 						//////////////////////////////////////////////////////////////////////////
 						//collision Factorized Central moment Kernel Geier 2015
 						//////////////////////////////////////////////////////////////////////////               
-						LBMReal Mom000 = mfaaa + mfaab + mfaac + mfaba + mfabb + mfabc + mfaca +
+						real Mom000 = mfaaa + mfaab + mfaac + mfaba + mfabb + mfabc + mfaca +
 							mfacb + mfacc + mfbaa + mfbab + mfbac + mfbba + mfbbb + mfbbc + mfbca +
 							mfbcb + mfbcc + mfcaa + mfcab + mfcac + mfcba + mfcbb + mfcbc + mfcca + mfccb + mfccc;
 						
 						Mom000 += dlambda*c1o2;  //1
 
 																												   //(100)//
-						LBMReal Mom100 = mfaaa*(-1 - ux) + mfaab*(-1 - ux) + mfaac*(-1 - ux) + mfaba*(-1 - ux) + mfabb*(-1 - ux) + mfabc*(-1 - ux) + mfaca*(-1 - ux) +
+						real Mom100 = mfaaa*(-1 - ux) + mfaab*(-1 - ux) + mfaac*(-1 - ux) + mfaba*(-1 - ux) + mfabb*(-1 - ux) + mfabc*(-1 - ux) + mfaca*(-1 - ux) +
 							mfacb*(-1 - ux) + mfacc*(-1 - ux) + mfcaa*(1 - ux) + mfcab*(1 - ux) + mfcac*(1 - ux) + mfcba*(1 - ux) + mfcbb*(1 - ux) +
 							mfcbc*(1 - ux) + mfcca*(1 - ux) + mfccb*(1 - ux) + mfccc*(1 - ux) - mfbaa*ux - mfbab*ux - mfbac*ux - mfbba*ux - mfbbb*ux -
 							mfbbc*ux - mfbca*ux - mfbcb*ux - mfbcc*ux;
 
-						LBMReal Mom010 = mfaaa*(-1 - uy) + mfaab*(-1 - uy) + mfaac*(-1 - uy) + mfbaa*(-1 - uy) + mfbab*(-1 - uy) + mfbac*(-1 - uy) + mfcaa*(-1 - uy) +
+						real Mom010 = mfaaa*(-1 - uy) + mfaab*(-1 - uy) + mfaac*(-1 - uy) + mfbaa*(-1 - uy) + mfbab*(-1 - uy) + mfbac*(-1 - uy) + mfcaa*(-1 - uy) +
 							mfcab*(-1 - uy) + mfcac*(-1 - uy) + mfaca*(1 - uy) + mfacb*(1 - uy) + mfacc*(1 - uy) + mfbca*(1 - uy) + mfbcb*(1 - uy) +
 							mfbcc*(1 - uy) + mfcca*(1 - uy) + mfccb*(1 - uy) + mfccc*(1 - uy) - mfaba*uy - mfabb*uy - mfabc*uy - mfbba*uy - mfbbb*uy -
 							mfbbc*uy - mfcba*uy - mfcbb*uy - mfcbc*uy;
 
-						LBMReal Mom001 = mfaaa*(-1 - uz) + mfaba*(-1 - uz) + mfaca*(-1 - uz) + mfbaa*(-1 - uz) + mfbba*(-1 - uz) + mfbca*(-1 - uz) + mfcaa*(-1 - uz) +
+						real Mom001 = mfaaa*(-1 - uz) + mfaba*(-1 - uz) + mfaca*(-1 - uz) + mfbaa*(-1 - uz) + mfbba*(-1 - uz) + mfbca*(-1 - uz) + mfcaa*(-1 - uz) +
 							mfcba*(-1 - uz) + mfcca*(-1 - uz) + mfaac*(1 - uz) + mfabc*(1 - uz) + mfacc*(1 - uz) + mfbac*(1 - uz) + mfbbc*(1 - uz) +
 							mfbcc*(1 - uz) + mfcac*(1 - uz) + mfcbc*(1 - uz) + mfccc*(1 - uz) - mfaab*uz - mfabb*uz - mfacb*uz - mfbab*uz - mfbbb*uz -
 							mfbcb*uz - mfcab*uz - mfcbb*uz - mfccb*uz;
 						////
 
 						//(110)//
-						LBMReal Mom110 = mfaaa*(-1 - ux)*(-1 - uy) + mfaab*(-1 - ux)*(-1 - uy) + mfaac*(-1 - ux)*(-1 - uy) + mfcaa*(1 - ux)*(-1 - uy) +
+						real Mom110 = mfaaa*(-1 - ux)*(-1 - uy) + mfaab*(-1 - ux)*(-1 - uy) + mfaac*(-1 - ux)*(-1 - uy) + mfcaa*(1 - ux)*(-1 - uy) +
 							mfcab*(1 - ux)*(-1 - uy) + mfcac*(1 - ux)*(-1 - uy) - mfbaa*ux*(-1 - uy) - mfbab*ux*(-1 - uy) - mfbac*ux*(-1 - uy) +
 							mfaca*(-1 - ux)*(1 - uy) + mfacb*(-1 - ux)*(1 - uy) + mfacc*(-1 - ux)*(1 - uy) + mfcca*(1 - ux)*(1 - uy) + mfccb*(1 - ux)*(1 - uy) +
 							mfccc*(1 - ux)*(1 - uy) - mfbca*ux*(1 - uy) - mfbcb*ux*(1 - uy) - mfbcc*ux*(1 - uy) - mfaba*(-1 - ux)*uy - mfabb*(-1 - ux)*uy -
 							mfabc*(-1 - ux)*uy - mfcba*(1 - ux)*uy - mfcbb*(1 - ux)*uy - mfcbc*(1 - ux)*uy + mfbba*ux*uy + mfbbb*ux*uy + mfbbc*ux*uy;
 
-						LBMReal Mom101 = mfaaa*(-1 - ux)*(-1 - uz) + mfaba*(-1 - ux)*(-1 - uz) + mfaca*(-1 - ux)*(-1 - uz) + mfcaa*(1 - ux)*(-1 - uz) +
+						real Mom101 = mfaaa*(-1 - ux)*(-1 - uz) + mfaba*(-1 - ux)*(-1 - uz) + mfaca*(-1 - ux)*(-1 - uz) + mfcaa*(1 - ux)*(-1 - uz) +
 							mfcba*(1 - ux)*(-1 - uz) + mfcca*(1 - ux)*(-1 - uz) - mfbaa*ux*(-1 - uz) - mfbba*ux*(-1 - uz) - mfbca*ux*(-1 - uz) +
 							mfaac*(-1 - ux)*(1 - uz) + mfabc*(-1 - ux)*(1 - uz) + mfacc*(-1 - ux)*(1 - uz) + mfcac*(1 - ux)*(1 - uz) + mfcbc*(1 - ux)*(1 - uz) +
 							mfccc*(1 - ux)*(1 - uz) - mfbac*ux*(1 - uz) - mfbbc*ux*(1 - uz) - mfbcc*ux*(1 - uz) - mfaab*(-1 - ux)*uz - mfabb*(-1 - ux)*uz -
 							mfacb*(-1 - ux)*uz - mfcab*(1 - ux)*uz - mfcbb*(1 - ux)*uz - mfccb*(1 - ux)*uz + mfbab*ux*uz + mfbbb*ux*uz + mfbcb*ux*uz;
 
-						LBMReal Mom011 = mfaaa*(-1 - uy)*(-1 - uz) + mfbaa*(-1 - uy)*(-1 - uz) + mfcaa*(-1 - uy)*(-1 - uz) + mfaca*(1 - uy)*(-1 - uz) +
+						real Mom011 = mfaaa*(-1 - uy)*(-1 - uz) + mfbaa*(-1 - uy)*(-1 - uz) + mfcaa*(-1 - uy)*(-1 - uz) + mfaca*(1 - uy)*(-1 - uz) +
 							mfbca*(1 - uy)*(-1 - uz) + mfcca*(1 - uy)*(-1 - uz) - mfaba*uy*(-1 - uz) - mfbba*uy*(-1 - uz) - mfcba*uy*(-1 - uz) +
 							mfaac*(-1 - uy)*(1 - uz) + mfbac*(-1 - uy)*(1 - uz) + mfcac*(-1 - uy)*(1 - uz) + mfacc*(1 - uy)*(1 - uz) + mfbcc*(1 - uy)*(1 - uz) +
 							mfccc*(1 - uy)*(1 - uz) - mfabc*uy*(1 - uz) - mfbbc*uy*(1 - uz) - mfcbc*uy*(1 - uz) - mfaab*(-1 - uy)*uz - mfbab*(-1 - uy)*uz -
 							mfcab*(-1 - uy)*uz - mfacb*(1 - uy)*uz - mfbcb*(1 - uy)*uz - mfccb*(1 - uy)*uz + mfabb*uy*uz + mfbbb*uy*uz + mfcbb*uy*uz;
 						////
 
-						LBMReal Mom111 = mfaaa*(-1 - ux)*(-1 - uy)*(-1 - uz) + mfcaa*(1 - ux)*(-1 - uy)*(-1 - uz) - mfbaa*ux*(-1 - uy)*(-1 - uz) +
+						real Mom111 = mfaaa*(-1 - ux)*(-1 - uy)*(-1 - uz) + mfcaa*(1 - ux)*(-1 - uy)*(-1 - uz) - mfbaa*ux*(-1 - uy)*(-1 - uz) +
 							mfaca*(-1 - ux)*(1 - uy)*(-1 - uz) + mfcca*(1 - ux)*(1 - uy)*(-1 - uz) - mfbca*ux*(1 - uy)*(-1 - uz) -
 							mfaba*(-1 - ux)*uy*(-1 - uz) - mfcba*(1 - ux)*uy*(-1 - uz) + mfbba*ux*uy*(-1 - uz) + mfaac*(-1 - ux)*(-1 - uy)*(1 - uz) +
 							mfcac*(1 - ux)*(-1 - uy)*(1 - uz) - mfbac*ux*(-1 - uy)*(1 - uz) + mfacc*(-1 - ux)*(1 - uy)*(1 - uz) +
@@ -1080,14 +1081,14 @@ void ThixotropyExpLBMKernel::calculate(int step)
 							mfbbb*ux*uy*uz;
 
 						//(200)//
-						LBMReal Mom200 = ((mfcaa + mfcab + mfcac + mfcba + mfcbb + mfcbc + mfcca + mfccb +
+						real Mom200 = ((mfcaa + mfcab + mfcac + mfcba + mfcbb + mfcbc + mfcca + mfccb +
 							mfccc)*pow(-1 + ux, 2) +
 							(mfbaa + mfbab + mfbac + mfbba + mfbbb + mfbbc + mfbca +
 								mfbcb + mfbcc)*pow(ux, 2) +
 								(mfaaa + mfaab + mfaac + mfaba + mfabb + mfabc + mfaca +
 									mfacb + mfacc)*pow(1 + ux, 2)) - Mom000 / 3;
 
-						LBMReal Mom020 = ((mfaca + mfacb + mfacc)*pow(-1 + uy, 2) +
+						real Mom020 = ((mfaca + mfacb + mfacc)*pow(-1 + uy, 2) +
 							(mfbca + mfbcb + mfbcc)*pow(-1 + uy, 2) +
 							(mfcca + mfccb + mfccc)*pow(-1 + uy, 2) +
 							(mfaba + mfabb + mfabc)*pow(uy, 2) +
@@ -1097,7 +1098,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 							(mfbaa + mfbab + mfbac)*pow(1 + uy, 2) +
 							(mfcaa + mfcab + mfcac)*pow(1 + uy, 2)) - Mom000 / 3;
 
-						LBMReal Mom002 = (mfaba + mfabc + mfaca + mfacc + mfbba + mfbbc + mfbca + mfbcc +
+						real Mom002 = (mfaba + mfabc + mfaca + mfacc + mfbba + mfbbc + mfbca + mfbcc +
 							mfcba + mfcbc + mfcca + mfccc + mfaac*pow(-1 + uz, 2) +
 							mfbac*pow(-1 + uz, 2) + mfcac*pow(-1 + uz, 2) +
 							2 * mfaba*uz - 2 * mfabc*uz + 2 * mfaca*uz - 2 * mfacc*uz +
@@ -1115,7 +1116,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 						////
 
 						//(210)//
-						LBMReal Mom210 = (pow(1 + ux, 2)*(-((mfaca + mfacb + mfacc)*(-1 + uy)) -
+						real Mom210 = (pow(1 + ux, 2)*(-((mfaca + mfacb + mfacc)*(-1 + uy)) -
 							(mfaba + mfabb + mfabc)*uy -
 							(mfaaa + mfaab + mfaac)*(1 + uy)) +
 							pow(ux, 2)*(-((mfbca + mfbcb + mfbcc)*(-1 + uy)) -
@@ -1125,7 +1126,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 							(mfcba + mfcbb + mfcbc)*uy -
 								(mfcaa + mfcab + mfcac)*(1 + uy))) - Mom010 / 3;
 
-						LBMReal Mom201 = (-(pow(1 + ux, 2)*(mfaba - mfabc + mfaca - mfacc +
+						real Mom201 = (-(pow(1 + ux, 2)*(mfaba - mfabc + mfaca - mfacc +
 							mfaac*(-1 + uz) + mfaab*uz + mfaba*uz + mfabb*uz +
 							mfabc*uz + mfaca*uz + mfacb*uz + mfacc*uz +
 							mfaaa*(1 + uz))) -
@@ -1136,7 +1137,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 								mfcac*(-1 + uz) + mfcab*uz + mfcba*uz + mfcbb*uz +
 								mfcbc*uz + mfcca*uz + mfccb*uz + mfccc*uz + mfcaa*(1 + uz))) - Mom001 / 3;
 
-						LBMReal Mom120 = ((-1 - ux)*((mfaca + mfacb + mfacc)*pow(-1 + uy, 2) +
+						real Mom120 = ((-1 - ux)*((mfaca + mfacb + mfacc)*pow(-1 + uy, 2) +
 							(mfaba + mfabb + mfabc)*pow(uy, 2) +
 							(mfaaa + mfaab + mfaac)*pow(1 + uy, 2)) -
 							ux*((mfbca + mfbcb + mfbcc)*pow(-1 + uy, 2) +
@@ -1147,7 +1148,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 									(mfcaa + mfcab + mfcac)*pow(1 + uy, 2))) - Mom100 / 3;
 
 
-						LBMReal Mom102 = (-((1 + ux)*(mfaba + mfabc + mfaca + mfacc +
+						real Mom102 = (-((1 + ux)*(mfaba + mfabc + mfaca + mfacc +
 							mfaac*pow(-1 + uz, 2) + 2 * mfaba*uz - 2 * mfabc*uz +
 							2 * mfaca*uz - 2 * mfacc*uz + mfaab*pow(uz, 2) +
 							mfaba*pow(uz, 2) + mfabb*pow(uz, 2) +
@@ -1168,7 +1169,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 									mfccb*pow(uz, 2) + mfccc*pow(uz, 2) +
 									mfcaa*pow(1 + uz, 2))) - Mom100 / 3;
 
-						LBMReal Mom021 = (-(pow(1 + uy, 2)*(mfaac*(-1 + uz) + mfaab*uz +
+						real Mom021 = (-(pow(1 + uy, 2)*(mfaac*(-1 + uz) + mfaab*uz +
 							mfaaa*(1 + uz))) -
 							pow(uy, 2)*(mfabc*(-1 + uz) + mfabb*uz + mfaba*(1 + uz)) -
 							pow(-1 + uy, 2)*(mfacc*(-1 + uz) + mfacb*uz +
@@ -1181,7 +1182,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 							pow(uy, 2)*(mfcbc*(-1 + uz) + mfcbb*uz + mfcba*(1 + uz)) -
 							pow(-1 + uy, 2)*(mfccc*(-1 + uz) + mfccb*uz + mfcca*(1 + uz))) - Mom001 / 3;
 
-						LBMReal Mom012 = (-((1 + uy)*(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
+						real Mom012 = (-((1 + uy)*(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
 							mfaaa*pow(1 + uz, 2))) -
 							uy*(mfabc*pow(-1 + uz, 2) + mfabb*pow(uz, 2) +
 								mfaba*pow(1 + uz, 2)) -
@@ -1203,7 +1204,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 
 
 						//(220)//
-						LBMReal Mom220 = (pow(1 + ux, 2)*((mfaca + mfacb + mfacc)*pow(-1 + uy, 2) +
+						real Mom220 = (pow(1 + ux, 2)*((mfaca + mfacb + mfacc)*pow(-1 + uy, 2) +
 							(mfaba + mfabb + mfabc)*pow(uy, 2) +
 							(mfaaa + mfaab + mfaac)*pow(1 + uy, 2)) +
 							pow(ux, 2)*((mfbca + mfbcb + mfbcc)*pow(-1 + uy, 2) +
@@ -1213,7 +1214,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 							(mfcba + mfcbb + mfcbc)*pow(uy, 2) +
 								(mfcaa + mfcab + mfcac)*pow(1 + uy, 2))) - Mom000 / 9;
 
-						LBMReal Mom202 = (pow(1 + ux, 2)*(mfaba + mfabc + mfaca + mfacc +
+						real Mom202 = (pow(1 + ux, 2)*(mfaba + mfabc + mfaca + mfacc +
 							mfaac*pow(-1 + uz, 2) + 2 * mfaba*uz - 2 * mfabc*uz +
 							2 * mfaca*uz - 2 * mfacc*uz + mfaab*pow(uz, 2) +
 							mfaba*pow(uz, 2) + mfabb*pow(uz, 2) +
@@ -1235,7 +1236,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 								mfccb*pow(uz, 2) + mfccc*pow(uz, 2) +
 								mfcaa*pow(1 + uz, 2))) - Mom000 / 9;
 
-						LBMReal Mom022 = (pow(1 + uy, 2)*(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
+						real Mom022 = (pow(1 + uy, 2)*(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
 							mfaaa*pow(1 + uz, 2)) +
 							pow(uy, 2)*(mfabc*pow(-1 + uz, 2) + mfabb*pow(uz, 2) +
 								mfaba*pow(1 + uz, 2)) +
@@ -1256,7 +1257,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 						////
 
 						//(221)//
-						LBMReal Mom221 = (pow(1 + ux, 2)*(-(pow(1 + uy, 2)*
+						real Mom221 = (pow(1 + ux, 2)*(-(pow(1 + uy, 2)*
 							(mfaac*(-1 + uz) + mfaab*uz + mfaaa*(1 + uz))) -
 							pow(uy, 2)*(mfabc*(-1 + uz) + mfabb*uz +
 								mfaba*(1 + uz)) -
@@ -1275,7 +1276,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 								pow(-1 + uy, 2)*(mfccc*(-1 + uz) + mfccb*uz +
 									mfcca*(1 + uz)))) - Mom001 / 9;
 
-						LBMReal Mom212 = (pow(1 + ux, 2)*(-((1 + uy)*
+						real Mom212 = (pow(1 + ux, 2)*(-((1 + uy)*
 							(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
 								mfaaa*pow(1 + uz, 2))) -
 							uy*(mfabc*pow(-1 + uz, 2) + mfabb*pow(uz, 2) +
@@ -1297,7 +1298,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 									(-1 + uy)*(mfccc*pow(-1 + uz, 2) + mfccb*pow(uz, 2) +
 										mfcca*pow(1 + uz, 2)))) - Mom010 / 9;
 
-						LBMReal Mom122 = ((-1 - ux)*(pow(1 + uy, 2)*
+						real Mom122 = ((-1 - ux)*(pow(1 + uy, 2)*
 							(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
 								mfaaa*pow(1 + uz, 2)) +
 							pow(uy, 2)*(mfabc*pow(-1 + uz, 2) + mfabb*pow(uz, 2) +
@@ -1320,7 +1321,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 						////
 
 						//(211)//
-						LBMReal Mom211 = (pow(1 + ux, 2)*((1 + uy)*(mfaac*(-1 + uz) + mfaab*uz +
+						real Mom211 = (pow(1 + ux, 2)*((1 + uy)*(mfaac*(-1 + uz) + mfaab*uz +
 							mfaaa*(1 + uz)) +
 							uy*(mfabc*(-1 + uz) + mfabb*uz + mfaba*(1 + uz)) +
 							(-1 + uy)*(mfacc*(-1 + uz) + mfacb*uz + mfaca*(1 + uz))) +
@@ -1333,7 +1334,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 								uy*(mfcbc*(-1 + uz) + mfcbb*uz + mfcba*(1 + uz)) +
 								(-1 + uy)*(mfccc*(-1 + uz) + mfccb*uz + mfcca*(1 + uz)))) - Mom011 / 3;
 
-						LBMReal Mom121 = ((-1 - ux)*(-(pow(1 + uy, 2)*
+						real Mom121 = ((-1 - ux)*(-(pow(1 + uy, 2)*
 							(mfaac*(-1 + uz) + mfaab*uz + mfaaa*(1 + uz))) -
 							pow(uy, 2)*(mfabc*(-1 + uz) + mfabb*uz +
 								mfaba*(1 + uz)) -
@@ -1352,7 +1353,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 										pow(-1 + uy, 2)*(mfccc*(-1 + uz) + mfccb*uz +
 											mfcca*(1 + uz)))) - Mom101 / 3;
 
-						LBMReal Mom112 = ((-1 - ux)*(-((1 + uy)*(mfaac*pow(-1 + uz, 2) +
+						real Mom112 = ((-1 - ux)*(-((1 + uy)*(mfaac*pow(-1 + uz, 2) +
 							mfaab*pow(uz, 2) + mfaaa*pow(1 + uz, 2))) -
 							uy*(mfabc*pow(-1 + uz, 2) + mfabb*pow(uz, 2) +
 								mfaba*pow(1 + uz, 2)) -
@@ -1373,7 +1374,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 						////
 
 						//(222)//
-						LBMReal Mom222 = (pow(1 + ux, 2)*(pow(1 + uy, 2)*
+						real Mom222 = (pow(1 + ux, 2)*(pow(1 + uy, 2)*
 							(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
 								mfaaa*pow(1 + uz, 2)) +
 							pow(uy, 2)*(mfabc*pow(-1 + uz, 2) + mfabb*pow(uz, 2) +
@@ -1400,7 +1401,7 @@ void ThixotropyExpLBMKernel::calculate(int step)
 
 
 
-						LBMReal Meq000 = drho+dlambda*c1o2;
+						real Meq000 = drho+dlambda*c1o2;
 
 
 						// relaxation Central Moment MRT
@@ -1821,46 +1822,46 @@ void ThixotropyExpLBMKernel::calculate(int step)
 	}
 }
 //////////////////////////////////////////////////////////////////////////
-double ThixotropyExpLBMKernel::getCalculationTime()
+real ThixotropyExpLBMKernel::getCalculationTime()
 {
 	//return timer.getDuration();
 	return timer.getTotalTime();
 }
 //////////////////////////////////////////////////////////////////////////
-void ThixotropyExpLBMKernel::setCollisionFactorF(double collFactor)
+void ThixotropyExpLBMKernel::setCollisionFactorF(real collFactor)
 {
 	setCollisionFactor(collFactor);
 	this->collFactorF = collFactor;
 
 }
 //////////////////////////////////////////////////////////////////////////
-void ThixotropyExpLBMKernel::setCollisionFactorH(double collFactor)
+void ThixotropyExpLBMKernel::setCollisionFactorH(real collFactor)
 {
 	this->collFactorH = collFactor;
 }
 //////////////////////////////////////////////////////////////////////////
-double ThixotropyExpLBMKernel::getCollisionFactorF() const
+real ThixotropyExpLBMKernel::getCollisionFactorF() const
 {
 	return this->collFactorF;
 }
 //////////////////////////////////////////////////////////////////////////
-double ThixotropyExpLBMKernel::getCollisionFactorH() const
+real ThixotropyExpLBMKernel::getCollisionFactorH() const
 {
 	return this->collFactorH;
 }
-void ThixotropyExpLBMKernel::setAlpha(double alpha)
+void ThixotropyExpLBMKernel::setAlpha(real alpha)
 {
 	this->alpha = alpha;
 }
-double ThixotropyExpLBMKernel::getAlpha() const
+real ThixotropyExpLBMKernel::getAlpha() const
 {
 	return this->alpha;
 }
-void ThixotropyExpLBMKernel::setTheta(double theta)
+void ThixotropyExpLBMKernel::setTheta(real theta)
 {
 	this->theta = theta;
 }
-double ThixotropyExpLBMKernel::getTheta() const
+real ThixotropyExpLBMKernel::getTheta() const
 {
 	return this->theta;
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/ThixotropyExpLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/ThixotropyExpLBMKernel.h
index 3104808ad21c3976a71ab7a84c06b542932ae08d..1316493c81c56a849cdcc39a9adfb064aa684205 100644
--- a/src/cpu/VirtualFluidsCore/LBM/ThixotropyExpLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/ThixotropyExpLBMKernel.h
@@ -22,50 +22,50 @@ public:
 	virtual ~ThixotropyExpLBMKernel(void);
 	virtual void calculate(int step);
 	virtual SPtr<LBMKernel> clone();
-	double getCalculationTime();
+	real getCalculationTime();
  
-	void setCollisionFactorF(double collFactor);
-   void setCollisionFactorH(double collFactor);
-   double getCollisionFactorF() const;
-   double getCollisionFactorH() const;
+	void setCollisionFactorF(real collFactor);
+   void setCollisionFactorH(real collFactor);
+   real getCollisionFactorF() const;
+   real getCollisionFactorH() const;
 
-	void setAlpha(double alpha);
-	double getAlpha() const;
+	void setAlpha(real alpha);
+	real getAlpha() const;
 
-	void setTheta(double theta);
-	double getTheta() const;
+	void setTheta(real theta);
+	real getTheta() const;
 
 	void swapDistributions();
 
 protected:
 	virtual void initDataSet();
-	LBMReal f[D3Q27System::ENDF + 1];
+	real f[D3Q27System::ENDF + 1];
 
 	UbTimer timer;
 
-	LBMReal OxyyMxzz;
+	real OxyyMxzz;
 	Parameter parameter;
 
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
 
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH;
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH;
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH;
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH;
 
 	mu::value_type muX1, muX2, muX3;
 	mu::value_type muDeltaT;
 	mu::value_type muNu;
-	LBMReal forcingX1;
-	LBMReal forcingX2;
-	LBMReal forcingX3;
+	real forcingX1;
+	real forcingX2;
+	real forcingX3;
 
-	LBMReal collFactorF;
-   LBMReal collFactorH;
+	real collFactorF;
+   real collFactorH;
 
-	LBMReal theta;
-	LBMReal alpha;
+	real theta;
+	real alpha;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/ThixotropyLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/ThixotropyLBMKernel.cpp
index b369b45a6c7b10efb91716634443c88aa520a8cf..f866381d42538f01e7194d67415e134d9106ab16 100644
--- a/src/cpu/VirtualFluidsCore/LBM/ThixotropyLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/ThixotropyLBMKernel.cpp
@@ -8,7 +8,8 @@
 
 #define PROOF_CORRECTNESS
 
-using namespace UbMath;
+//using namespace UbMath;
+using namespace vf::basics::constant;
 
 //////////////////////////////////////////////////////////////////////////
 ThixotropyLBMKernel::ThixotropyLBMKernel()
@@ -134,37 +135,37 @@ void ThixotropyLBMKernel::calculate(int step)
 						////////////////////////////////////////////////////////////////////////////
 						//////////////////////////////////////////////////////////////////////////
 
-						LBMReal mfcbb = (*this->localDistributionsH)(D3Q27System::ET_E, x1, x2, x3);
-						LBMReal mfbcb = (*this->localDistributionsH)(D3Q27System::ET_N, x1, x2, x3);
-						LBMReal mfbbc = (*this->localDistributionsH)(D3Q27System::ET_T, x1, x2, x3);
-						LBMReal mfccb = (*this->localDistributionsH)(D3Q27System::ET_NE, x1, x2, x3);
-						LBMReal mfacb = (*this->localDistributionsH)(D3Q27System::ET_NW, x1p, x2, x3);
-						LBMReal mfcbc = (*this->localDistributionsH)(D3Q27System::ET_TE, x1, x2, x3);
-						LBMReal mfabc = (*this->localDistributionsH)(D3Q27System::ET_TW, x1p, x2, x3);
-						LBMReal mfbcc = (*this->localDistributionsH)(D3Q27System::ET_TN, x1, x2, x3);
-						LBMReal mfbac = (*this->localDistributionsH)(D3Q27System::ET_TS, x1, x2p, x3);
-						LBMReal mfccc = (*this->localDistributionsH)(D3Q27System::ET_TNE, x1, x2, x3);
-						LBMReal mfacc = (*this->localDistributionsH)(D3Q27System::ET_TNW, x1p, x2, x3);
-						LBMReal mfcac = (*this->localDistributionsH)(D3Q27System::ET_TSE, x1, x2p, x3);
-						LBMReal mfaac = (*this->localDistributionsH)(D3Q27System::ET_TSW, x1p, x2p, x3);
+						real mfcbb = (*this->localDistributionsH)(D3Q27System::ET_E, x1, x2, x3);
+						real mfbcb = (*this->localDistributionsH)(D3Q27System::ET_N, x1, x2, x3);
+						real mfbbc = (*this->localDistributionsH)(D3Q27System::ET_T, x1, x2, x3);
+						real mfccb = (*this->localDistributionsH)(D3Q27System::ET_NE, x1, x2, x3);
+						real mfacb = (*this->localDistributionsH)(D3Q27System::ET_NW, x1p, x2, x3);
+						real mfcbc = (*this->localDistributionsH)(D3Q27System::ET_TE, x1, x2, x3);
+						real mfabc = (*this->localDistributionsH)(D3Q27System::ET_TW, x1p, x2, x3);
+						real mfbcc = (*this->localDistributionsH)(D3Q27System::ET_TN, x1, x2, x3);
+						real mfbac = (*this->localDistributionsH)(D3Q27System::ET_TS, x1, x2p, x3);
+						real mfccc = (*this->localDistributionsH)(D3Q27System::ET_TNE, x1, x2, x3);
+						real mfacc = (*this->localDistributionsH)(D3Q27System::ET_TNW, x1p, x2, x3);
+						real mfcac = (*this->localDistributionsH)(D3Q27System::ET_TSE, x1, x2p, x3);
+						real mfaac = (*this->localDistributionsH)(D3Q27System::ET_TSW, x1p, x2p, x3);
 								  
-						LBMReal mfabb = (*this->nonLocalDistributionsH)(D3Q27System::ET_W, x1p, x2, x3);
-						LBMReal mfbab = (*this->nonLocalDistributionsH)(D3Q27System::ET_S, x1, x2p, x3);
-						LBMReal mfbba = (*this->nonLocalDistributionsH)(D3Q27System::ET_B, x1, x2, x3p);
-						LBMReal mfaab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SW, x1p, x2p, x3);
-						LBMReal mfcab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SE, x1, x2p, x3);
-						LBMReal mfaba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BW, x1p, x2, x3p);
-						LBMReal mfcba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BE, x1, x2, x3p);
-						LBMReal mfbaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BS, x1, x2p, x3p);
-						LBMReal mfbca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BN, x1, x2, x3p);
-						LBMReal mfaaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSW, x1p, x2p, x3p);
-						LBMReal mfcaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSE, x1, x2p, x3p);
-						LBMReal mfaca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNW, x1p, x2, x3p);
-						LBMReal mfcca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNE, x1, x2, x3p);
+						real mfabb = (*this->nonLocalDistributionsH)(D3Q27System::ET_W, x1p, x2, x3);
+						real mfbab = (*this->nonLocalDistributionsH)(D3Q27System::ET_S, x1, x2p, x3);
+						real mfbba = (*this->nonLocalDistributionsH)(D3Q27System::ET_B, x1, x2, x3p);
+						real mfaab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SW, x1p, x2p, x3);
+						real mfcab = (*this->nonLocalDistributionsH)(D3Q27System::ET_SE, x1, x2p, x3);
+						real mfaba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BW, x1p, x2, x3p);
+						real mfcba = (*this->nonLocalDistributionsH)(D3Q27System::ET_BE, x1, x2, x3p);
+						real mfbaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BS, x1, x2p, x3p);
+						real mfbca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BN, x1, x2, x3p);
+						real mfaaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+						real mfcaa = (*this->nonLocalDistributionsH)(D3Q27System::ET_BSE, x1, x2p, x3p);
+						real mfaca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNW, x1p, x2, x3p);
+						real mfcca = (*this->nonLocalDistributionsH)(D3Q27System::ET_BNE, x1, x2, x3p);
 								  
-						LBMReal mfbbb = (*this->zeroDistributionsH)(x1, x2, x3);
+						real mfbbb = (*this->zeroDistributionsH)(x1, x2, x3);
 
-						LBMReal lambda = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+						real lambda = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 							+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 							+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
@@ -214,35 +215,35 @@ void ThixotropyLBMKernel::calculate(int step)
 
 						mfbbb = (*this->zeroDistributionsF)(x1, x2, x3);
 
-						LBMReal m0, m1, m2;
+						real m0, m1, m2;
 
-						LBMReal rho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+						real rho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 							+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 							+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 
-						LBMReal vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+						real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
 							(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
 							(mfcbb - mfabb));
-						LBMReal vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+						real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
 							(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
 							(mfbcb - mfbab));
-						LBMReal vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+						real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
 							(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
 							(mfbbc - mfbba));
 						
 
-						LBMReal eta0 = (1/collFactor-c1o2)*c1o3;
-						LBMReal eta = (1 + lambda)* eta0;
-						collFactorF = one/(3*eta/(rho+one)+c1o2);
+						real eta0 = (1/collFactor-c1o2)*c1o3;
+						real eta = (1 + lambda)* eta0;
+						collFactorF = c1o1 /(3*eta/(rho+ c1o1)+c1o2);
 						//collFactorF = collFactor;
 
 						//forcing 
 						///////////////////////////////////////////////////////////////////////////////////////////
 						if (withForcing)
 						{
-							muX1 = static_cast<double>(x1 - 1 + ix1*maxX1);
-							muX2 = static_cast<double>(x2 - 1 + ix2*maxX2);
-							muX3 = static_cast<double>(x3 - 1 + ix3*maxX3);
+							muX1 = static_cast<real>(x1 - 1 + ix1*maxX1);
+							muX2 = static_cast<real>(x2 - 1 + ix2*maxX2);
+							muX3 = static_cast<real>(x3 - 1 + ix3*maxX3);
 
 							forcingX1 = muForcingX1.Eval();
 							forcingX2 = muForcingX2.Eval();
@@ -253,7 +254,7 @@ void ThixotropyLBMKernel::calculate(int step)
 							vvz += forcingX3*deltaT*0.5; // Z
 						}
 						///////////////////////////////////////////////////////////////////////////////////////////               
-						LBMReal oMdrho;
+						real oMdrho;
 
 						oMdrho = mfccc + mfaaa;
 						m0 = mfaca + mfcac;
@@ -281,15 +282,15 @@ void ThixotropyLBMKernel::calculate(int step)
 						m0 += mfbbb; //hat gefehlt
 						oMdrho = 1. - (oMdrho + m0);
 
-						LBMReal vx2;
-						LBMReal vy2;
-						LBMReal vz2;
+						real vx2;
+						real vy2;
+						real vz2;
 						vx2 = vvx*vvx;
 						vy2 = vvy*vvy;
 						vz2 = vvz*vvz;
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal wadjust;
-						LBMReal qudricLimit = 0.01;
+						real wadjust;
+						real qudricLimit = 0.01;
 						////////////////////////////////////////////////////////////////////////////////////
 						//Hin
 						////////////////////////////////////////////////////////////////////////////////////
@@ -520,33 +521,33 @@ void ThixotropyLBMKernel::calculate(int step)
 						////////////////////////////////////////////////////////////////////////////////////
 						// Cumulants
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal OxxPyyPzz = 1.; //omega2 or bulk viscosity
-						LBMReal OxyyPxzz = 1.;//-s9;//2+s9;//
-											  //LBMReal OxyyMxzz  = 1.;//2+s9;//
-						LBMReal O4 = 1.;
-						LBMReal O5 = 1.;
-						LBMReal O6 = 1.;
+						real OxxPyyPzz = 1.; //omega2 or bulk viscosity
+						real OxyyPxzz = 1.;//-s9;//2+s9;//
+											  //real OxyyMxzz  = 1.;//2+s9;//
+						real O4 = 1.;
+						real O5 = 1.;
+						real O6 = 1.;
 
 						//Cum 4.
 						//LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3 * oMdrho) * mfabb + 2. * mfbba * mfbab); // till 18.05.2015
 						//LBMReal CUMbcb = mfbcb - ((mfaca + c1o3 * oMdrho) * mfbab + 2. * mfbba * mfabb); // till 18.05.2015
 						//LBMReal CUMbbc = mfbbc - ((mfaac + c1o3 * oMdrho) * mfbba + 2. * mfbab * mfabb); // till 18.05.2015
 
-						LBMReal CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
-						LBMReal CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
-						LBMReal CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
+						real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + 2. * mfbba * mfbab);
+						real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + 2. * mfbba * mfabb);
+						real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + 2. * mfbab * mfabb);
 
-						LBMReal CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9*(oMdrho - 1)*oMdrho);
-						LBMReal CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9*(oMdrho - 1)*oMdrho);
-						LBMReal CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9*(oMdrho - 1)*oMdrho);
+						real CUMcca = mfcca - ((mfcaa * mfaca + 2. * mfbba * mfbba) + c1o3 * (mfcaa + mfaca) * oMdrho + c1o9*(oMdrho - 1)*oMdrho);
+						real CUMcac = mfcac - ((mfcaa * mfaac + 2. * mfbab * mfbab) + c1o3 * (mfcaa + mfaac) * oMdrho + c1o9*(oMdrho - 1)*oMdrho);
+						real CUMacc = mfacc - ((mfaac * mfaca + 2. * mfabb * mfabb) + c1o3 * (mfaac + mfaca) * oMdrho + c1o9*(oMdrho - 1)*oMdrho);
 
 						//Cum 5.
-						LBMReal CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
-						LBMReal CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
-						LBMReal CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
+						real CUMbcc = mfbcc - (mfaac * mfbca + mfaca * mfbac + 4. * mfabb * mfbbb + 2. * (mfbab * mfacb + mfbba * mfabc)) - c1o3 * (mfbca + mfbac) * oMdrho;
+						real CUMcbc = mfcbc - (mfaac * mfcba + mfcaa * mfabc + 4. * mfbab * mfbbb + 2. * (mfabb * mfcab + mfbba * mfbac)) - c1o3 * (mfcba + mfabc) * oMdrho;
+						real CUMccb = mfccb - (mfcaa * mfacb + mfaca * mfcab + 4. * mfbba * mfbbb + 2. * (mfbab * mfbca + mfabb * mfcba)) - c1o3 * (mfacb + mfcab) * oMdrho;
 
 						//Cum 6.
-						LBMReal CUMccc = mfccc + ((-4. *  mfbbb * mfbbb
+						real CUMccc = mfccc + ((-4. *  mfbbb * mfbbb
 							- (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
 							- 4. * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
 							- 2. * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb))
@@ -560,17 +561,17 @@ void ThixotropyLBMKernel::calculate(int step)
 
 						//2.
 						// linear combinations
-						LBMReal mxxPyyPzz = mfcaa + mfaca + mfaac;
-						LBMReal mxxMyy = mfcaa - mfaca;
-						LBMReal mxxMzz = mfcaa - mfaac;
+						real mxxPyyPzz = mfcaa + mfaca + mfaac;
+						real mxxMyy = mfcaa - mfaca;
+						real mxxMzz = mfcaa - mfaac;
 
-						LBMReal dxux = -c1o2 * collFactorF *(mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz*(mfaaa - mxxPyyPzz);
-						LBMReal dyuy = dxux + collFactorF * c3o2 * mxxMyy;
-						LBMReal dzuz = dxux + collFactorF * c3o2 * mxxMzz;
+						real dxux = -c1o2 * collFactorF *(mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz*(mfaaa - mxxPyyPzz);
+						real dyuy = dxux + collFactorF * c3o2 * mxxMyy;
+						real dzuz = dxux + collFactorF * c3o2 * mxxMzz;
 
-						LBMReal Dxy =-three*collFactorF*mfbba;
-                  LBMReal Dxz =-three*collFactorF*mfbab;
-                  LBMReal Dyz =-three*collFactorF*mfabb;
+						real Dxy =-c3o1 *collFactorF*mfbba;
+						real Dxz =-c3o1*collFactorF*mfbab;
+						real Dyz =-c3o1*collFactorF*mfabb;
 
 						//relax
 						mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz) - 3. * (1. - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
@@ -588,14 +589,14 @@ void ThixotropyLBMKernel::calculate(int step)
 
 						//3.
 						// linear combinations
-						LBMReal mxxyPyzz = mfcba + mfabc;
-						LBMReal mxxyMyzz = mfcba - mfabc;
+						real mxxyPyzz = mfcba + mfabc;
+						real mxxyMyzz = mfcba - mfabc;
 
-						LBMReal mxxzPyyz = mfcab + mfacb;
-						LBMReal mxxzMyyz = mfcab - mfacb;
+						real mxxzPyyz = mfcab + mfacb;
+						real mxxzMyyz = mfcab - mfacb;
 
-						LBMReal mxyyPxzz = mfbca + mfbac;
-						LBMReal mxyyMxzz = mfbca - mfbac;
+						real mxyyPxzz = mfbca + mfbac;
+						real mxyyMxzz = mfbca - mfbac;
 
 						//relax
 						wadjust = OxyyMxzz + (1. - OxyyMxzz)*fabs(mfbbb) / (fabs(mfbbb) + qudricLimit);
@@ -890,11 +891,11 @@ void ThixotropyLBMKernel::calculate(int step)
 						//proof correctness
 						//////////////////////////////////////////////////////////////////////////
 #ifdef  PROOF_CORRECTNESS
-						LBMReal rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+						real rho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
 							+ (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
 							+ (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
 						//LBMReal dif = fabs(rho - rho_post);
-						LBMReal dif = rho - rho_post;
+						real dif = rho - rho_post;
 #ifdef SINGLEPRECISION
 						if (dif > 10.0E-7 || dif < -10.0E-7)
 #else
@@ -956,7 +957,7 @@ void ThixotropyLBMKernel::calculate(int step)
 
 
 
-						LBMReal ux, uy, uz;
+						real ux, uy, uz;
 
 						ux = vvx;						
 						uy = vvy;
@@ -1002,7 +1003,7 @@ void ThixotropyLBMKernel::calculate(int step)
 						mfbbb = (*this->zeroDistributionsH)(x1, x2, x3);
 
 						////////////////////////////////////////////////////////////////////////////////////
-						LBMReal drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
+						real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 							(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
 							((mfabb + mfcbb) + (mfbab + mfbcb)) + (mfbba + mfbbc)) + mfbbb;
 					
@@ -1012,59 +1013,59 @@ void ThixotropyLBMKernel::calculate(int step)
 						//LBMReal theta = 60 * 1.28172e+06;
 						//LBMReal alpha = 0.005;// *10.0;
 
-						LBMReal gammaDot = sqrt(dxux * dxux + dyuy * dyuy + dzuz * dzuz + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz) / (rho + one);
+						real gammaDot = sqrt(dxux * dxux + dyuy * dyuy + dzuz * dzuz + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz) / (rho + c1o1);
 
-						LBMReal dlambda = one / theta - alpha * lambda * gammaDot;
+						real dlambda = c1o1 / theta - alpha * lambda * gammaDot;
 
 						//LBMReal dlambda = one / (T*pow(lambda,m)) - alpha * lambda * gammaDot;
 
 						//////////////////////////////////////////////////////////////////////////
 						//collision Factorized Central moment Kernel Geier 2015
 						//////////////////////////////////////////////////////////////////////////               
-						LBMReal Mom000 = mfaaa + mfaab + mfaac + mfaba + mfabb + mfabc + mfaca +
+						real Mom000 = mfaaa + mfaab + mfaac + mfaba + mfabb + mfabc + mfaca +
 							mfacb + mfacc + mfbaa + mfbab + mfbac + mfbba + mfbbb + mfbbc + mfbca +
 							mfbcb + mfbcc + mfcaa + mfcab + mfcac + mfcba + mfcbb + mfcbc + mfcca + mfccb + mfccc;
 						
 						Mom000 += dlambda*c1o2;  //1
 
 																												   //(100)//
-						LBMReal Mom100 = mfaaa*(-1 - ux) + mfaab*(-1 - ux) + mfaac*(-1 - ux) + mfaba*(-1 - ux) + mfabb*(-1 - ux) + mfabc*(-1 - ux) + mfaca*(-1 - ux) +
+						real Mom100 = mfaaa*(-1 - ux) + mfaab*(-1 - ux) + mfaac*(-1 - ux) + mfaba*(-1 - ux) + mfabb*(-1 - ux) + mfabc*(-1 - ux) + mfaca*(-1 - ux) +
 							mfacb*(-1 - ux) + mfacc*(-1 - ux) + mfcaa*(1 - ux) + mfcab*(1 - ux) + mfcac*(1 - ux) + mfcba*(1 - ux) + mfcbb*(1 - ux) +
 							mfcbc*(1 - ux) + mfcca*(1 - ux) + mfccb*(1 - ux) + mfccc*(1 - ux) - mfbaa*ux - mfbab*ux - mfbac*ux - mfbba*ux - mfbbb*ux -
 							mfbbc*ux - mfbca*ux - mfbcb*ux - mfbcc*ux;
 
-						LBMReal Mom010 = mfaaa*(-1 - uy) + mfaab*(-1 - uy) + mfaac*(-1 - uy) + mfbaa*(-1 - uy) + mfbab*(-1 - uy) + mfbac*(-1 - uy) + mfcaa*(-1 - uy) +
+						real Mom010 = mfaaa*(-1 - uy) + mfaab*(-1 - uy) + mfaac*(-1 - uy) + mfbaa*(-1 - uy) + mfbab*(-1 - uy) + mfbac*(-1 - uy) + mfcaa*(-1 - uy) +
 							mfcab*(-1 - uy) + mfcac*(-1 - uy) + mfaca*(1 - uy) + mfacb*(1 - uy) + mfacc*(1 - uy) + mfbca*(1 - uy) + mfbcb*(1 - uy) +
 							mfbcc*(1 - uy) + mfcca*(1 - uy) + mfccb*(1 - uy) + mfccc*(1 - uy) - mfaba*uy - mfabb*uy - mfabc*uy - mfbba*uy - mfbbb*uy -
 							mfbbc*uy - mfcba*uy - mfcbb*uy - mfcbc*uy;
 
-						LBMReal Mom001 = mfaaa*(-1 - uz) + mfaba*(-1 - uz) + mfaca*(-1 - uz) + mfbaa*(-1 - uz) + mfbba*(-1 - uz) + mfbca*(-1 - uz) + mfcaa*(-1 - uz) +
+						real Mom001 = mfaaa*(-1 - uz) + mfaba*(-1 - uz) + mfaca*(-1 - uz) + mfbaa*(-1 - uz) + mfbba*(-1 - uz) + mfbca*(-1 - uz) + mfcaa*(-1 - uz) +
 							mfcba*(-1 - uz) + mfcca*(-1 - uz) + mfaac*(1 - uz) + mfabc*(1 - uz) + mfacc*(1 - uz) + mfbac*(1 - uz) + mfbbc*(1 - uz) +
 							mfbcc*(1 - uz) + mfcac*(1 - uz) + mfcbc*(1 - uz) + mfccc*(1 - uz) - mfaab*uz - mfabb*uz - mfacb*uz - mfbab*uz - mfbbb*uz -
 							mfbcb*uz - mfcab*uz - mfcbb*uz - mfccb*uz;
 						////
 
 						//(110)//
-						LBMReal Mom110 = mfaaa*(-1 - ux)*(-1 - uy) + mfaab*(-1 - ux)*(-1 - uy) + mfaac*(-1 - ux)*(-1 - uy) + mfcaa*(1 - ux)*(-1 - uy) +
+						real Mom110 = mfaaa*(-1 - ux)*(-1 - uy) + mfaab*(-1 - ux)*(-1 - uy) + mfaac*(-1 - ux)*(-1 - uy) + mfcaa*(1 - ux)*(-1 - uy) +
 							mfcab*(1 - ux)*(-1 - uy) + mfcac*(1 - ux)*(-1 - uy) - mfbaa*ux*(-1 - uy) - mfbab*ux*(-1 - uy) - mfbac*ux*(-1 - uy) +
 							mfaca*(-1 - ux)*(1 - uy) + mfacb*(-1 - ux)*(1 - uy) + mfacc*(-1 - ux)*(1 - uy) + mfcca*(1 - ux)*(1 - uy) + mfccb*(1 - ux)*(1 - uy) +
 							mfccc*(1 - ux)*(1 - uy) - mfbca*ux*(1 - uy) - mfbcb*ux*(1 - uy) - mfbcc*ux*(1 - uy) - mfaba*(-1 - ux)*uy - mfabb*(-1 - ux)*uy -
 							mfabc*(-1 - ux)*uy - mfcba*(1 - ux)*uy - mfcbb*(1 - ux)*uy - mfcbc*(1 - ux)*uy + mfbba*ux*uy + mfbbb*ux*uy + mfbbc*ux*uy;
 
-						LBMReal Mom101 = mfaaa*(-1 - ux)*(-1 - uz) + mfaba*(-1 - ux)*(-1 - uz) + mfaca*(-1 - ux)*(-1 - uz) + mfcaa*(1 - ux)*(-1 - uz) +
+						real Mom101 = mfaaa*(-1 - ux)*(-1 - uz) + mfaba*(-1 - ux)*(-1 - uz) + mfaca*(-1 - ux)*(-1 - uz) + mfcaa*(1 - ux)*(-1 - uz) +
 							mfcba*(1 - ux)*(-1 - uz) + mfcca*(1 - ux)*(-1 - uz) - mfbaa*ux*(-1 - uz) - mfbba*ux*(-1 - uz) - mfbca*ux*(-1 - uz) +
 							mfaac*(-1 - ux)*(1 - uz) + mfabc*(-1 - ux)*(1 - uz) + mfacc*(-1 - ux)*(1 - uz) + mfcac*(1 - ux)*(1 - uz) + mfcbc*(1 - ux)*(1 - uz) +
 							mfccc*(1 - ux)*(1 - uz) - mfbac*ux*(1 - uz) - mfbbc*ux*(1 - uz) - mfbcc*ux*(1 - uz) - mfaab*(-1 - ux)*uz - mfabb*(-1 - ux)*uz -
 							mfacb*(-1 - ux)*uz - mfcab*(1 - ux)*uz - mfcbb*(1 - ux)*uz - mfccb*(1 - ux)*uz + mfbab*ux*uz + mfbbb*ux*uz + mfbcb*ux*uz;
 
-						LBMReal Mom011 = mfaaa*(-1 - uy)*(-1 - uz) + mfbaa*(-1 - uy)*(-1 - uz) + mfcaa*(-1 - uy)*(-1 - uz) + mfaca*(1 - uy)*(-1 - uz) +
+						real Mom011 = mfaaa*(-1 - uy)*(-1 - uz) + mfbaa*(-1 - uy)*(-1 - uz) + mfcaa*(-1 - uy)*(-1 - uz) + mfaca*(1 - uy)*(-1 - uz) +
 							mfbca*(1 - uy)*(-1 - uz) + mfcca*(1 - uy)*(-1 - uz) - mfaba*uy*(-1 - uz) - mfbba*uy*(-1 - uz) - mfcba*uy*(-1 - uz) +
 							mfaac*(-1 - uy)*(1 - uz) + mfbac*(-1 - uy)*(1 - uz) + mfcac*(-1 - uy)*(1 - uz) + mfacc*(1 - uy)*(1 - uz) + mfbcc*(1 - uy)*(1 - uz) +
 							mfccc*(1 - uy)*(1 - uz) - mfabc*uy*(1 - uz) - mfbbc*uy*(1 - uz) - mfcbc*uy*(1 - uz) - mfaab*(-1 - uy)*uz - mfbab*(-1 - uy)*uz -
 							mfcab*(-1 - uy)*uz - mfacb*(1 - uy)*uz - mfbcb*(1 - uy)*uz - mfccb*(1 - uy)*uz + mfabb*uy*uz + mfbbb*uy*uz + mfcbb*uy*uz;
 						////
 
-						LBMReal Mom111 = mfaaa*(-1 - ux)*(-1 - uy)*(-1 - uz) + mfcaa*(1 - ux)*(-1 - uy)*(-1 - uz) - mfbaa*ux*(-1 - uy)*(-1 - uz) +
+						real Mom111 = mfaaa*(-1 - ux)*(-1 - uy)*(-1 - uz) + mfcaa*(1 - ux)*(-1 - uy)*(-1 - uz) - mfbaa*ux*(-1 - uy)*(-1 - uz) +
 							mfaca*(-1 - ux)*(1 - uy)*(-1 - uz) + mfcca*(1 - ux)*(1 - uy)*(-1 - uz) - mfbca*ux*(1 - uy)*(-1 - uz) -
 							mfaba*(-1 - ux)*uy*(-1 - uz) - mfcba*(1 - ux)*uy*(-1 - uz) + mfbba*ux*uy*(-1 - uz) + mfaac*(-1 - ux)*(-1 - uy)*(1 - uz) +
 							mfcac*(1 - ux)*(-1 - uy)*(1 - uz) - mfbac*ux*(-1 - uy)*(1 - uz) + mfacc*(-1 - ux)*(1 - uy)*(1 - uz) +
@@ -1074,14 +1075,14 @@ void ThixotropyLBMKernel::calculate(int step)
 							mfbbb*ux*uy*uz;
 
 						//(200)//
-						LBMReal Mom200 = ((mfcaa + mfcab + mfcac + mfcba + mfcbb + mfcbc + mfcca + mfccb +
+						real Mom200 = ((mfcaa + mfcab + mfcac + mfcba + mfcbb + mfcbc + mfcca + mfccb +
 							mfccc)*pow(-1 + ux, 2) +
 							(mfbaa + mfbab + mfbac + mfbba + mfbbb + mfbbc + mfbca +
 								mfbcb + mfbcc)*pow(ux, 2) +
 								(mfaaa + mfaab + mfaac + mfaba + mfabb + mfabc + mfaca +
 									mfacb + mfacc)*pow(1 + ux, 2)) - Mom000 / 3;
 
-						LBMReal Mom020 = ((mfaca + mfacb + mfacc)*pow(-1 + uy, 2) +
+						real Mom020 = ((mfaca + mfacb + mfacc)*pow(-1 + uy, 2) +
 							(mfbca + mfbcb + mfbcc)*pow(-1 + uy, 2) +
 							(mfcca + mfccb + mfccc)*pow(-1 + uy, 2) +
 							(mfaba + mfabb + mfabc)*pow(uy, 2) +
@@ -1091,7 +1092,7 @@ void ThixotropyLBMKernel::calculate(int step)
 							(mfbaa + mfbab + mfbac)*pow(1 + uy, 2) +
 							(mfcaa + mfcab + mfcac)*pow(1 + uy, 2)) - Mom000 / 3;
 
-						LBMReal Mom002 = (mfaba + mfabc + mfaca + mfacc + mfbba + mfbbc + mfbca + mfbcc +
+						real Mom002 = (mfaba + mfabc + mfaca + mfacc + mfbba + mfbbc + mfbca + mfbcc +
 							mfcba + mfcbc + mfcca + mfccc + mfaac*pow(-1 + uz, 2) +
 							mfbac*pow(-1 + uz, 2) + mfcac*pow(-1 + uz, 2) +
 							2 * mfaba*uz - 2 * mfabc*uz + 2 * mfaca*uz - 2 * mfacc*uz +
@@ -1109,7 +1110,7 @@ void ThixotropyLBMKernel::calculate(int step)
 						////
 
 						//(210)//
-						LBMReal Mom210 = (pow(1 + ux, 2)*(-((mfaca + mfacb + mfacc)*(-1 + uy)) -
+						real Mom210 = (pow(1 + ux, 2)*(-((mfaca + mfacb + mfacc)*(-1 + uy)) -
 							(mfaba + mfabb + mfabc)*uy -
 							(mfaaa + mfaab + mfaac)*(1 + uy)) +
 							pow(ux, 2)*(-((mfbca + mfbcb + mfbcc)*(-1 + uy)) -
@@ -1119,7 +1120,7 @@ void ThixotropyLBMKernel::calculate(int step)
 							(mfcba + mfcbb + mfcbc)*uy -
 								(mfcaa + mfcab + mfcac)*(1 + uy))) - Mom010 / 3;
 
-						LBMReal Mom201 = (-(pow(1 + ux, 2)*(mfaba - mfabc + mfaca - mfacc +
+						real Mom201 = (-(pow(1 + ux, 2)*(mfaba - mfabc + mfaca - mfacc +
 							mfaac*(-1 + uz) + mfaab*uz + mfaba*uz + mfabb*uz +
 							mfabc*uz + mfaca*uz + mfacb*uz + mfacc*uz +
 							mfaaa*(1 + uz))) -
@@ -1130,7 +1131,7 @@ void ThixotropyLBMKernel::calculate(int step)
 								mfcac*(-1 + uz) + mfcab*uz + mfcba*uz + mfcbb*uz +
 								mfcbc*uz + mfcca*uz + mfccb*uz + mfccc*uz + mfcaa*(1 + uz))) - Mom001 / 3;
 
-						LBMReal Mom120 = ((-1 - ux)*((mfaca + mfacb + mfacc)*pow(-1 + uy, 2) +
+						real Mom120 = ((-1 - ux)*((mfaca + mfacb + mfacc)*pow(-1 + uy, 2) +
 							(mfaba + mfabb + mfabc)*pow(uy, 2) +
 							(mfaaa + mfaab + mfaac)*pow(1 + uy, 2)) -
 							ux*((mfbca + mfbcb + mfbcc)*pow(-1 + uy, 2) +
@@ -1141,7 +1142,7 @@ void ThixotropyLBMKernel::calculate(int step)
 									(mfcaa + mfcab + mfcac)*pow(1 + uy, 2))) - Mom100 / 3;
 
 
-						LBMReal Mom102 = (-((1 + ux)*(mfaba + mfabc + mfaca + mfacc +
+						real Mom102 = (-((1 + ux)*(mfaba + mfabc + mfaca + mfacc +
 							mfaac*pow(-1 + uz, 2) + 2 * mfaba*uz - 2 * mfabc*uz +
 							2 * mfaca*uz - 2 * mfacc*uz + mfaab*pow(uz, 2) +
 							mfaba*pow(uz, 2) + mfabb*pow(uz, 2) +
@@ -1162,7 +1163,7 @@ void ThixotropyLBMKernel::calculate(int step)
 									mfccb*pow(uz, 2) + mfccc*pow(uz, 2) +
 									mfcaa*pow(1 + uz, 2))) - Mom100 / 3;
 
-						LBMReal Mom021 = (-(pow(1 + uy, 2)*(mfaac*(-1 + uz) + mfaab*uz +
+						real Mom021 = (-(pow(1 + uy, 2)*(mfaac*(-1 + uz) + mfaab*uz +
 							mfaaa*(1 + uz))) -
 							pow(uy, 2)*(mfabc*(-1 + uz) + mfabb*uz + mfaba*(1 + uz)) -
 							pow(-1 + uy, 2)*(mfacc*(-1 + uz) + mfacb*uz +
@@ -1175,7 +1176,7 @@ void ThixotropyLBMKernel::calculate(int step)
 							pow(uy, 2)*(mfcbc*(-1 + uz) + mfcbb*uz + mfcba*(1 + uz)) -
 							pow(-1 + uy, 2)*(mfccc*(-1 + uz) + mfccb*uz + mfcca*(1 + uz))) - Mom001 / 3;
 
-						LBMReal Mom012 = (-((1 + uy)*(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
+						real Mom012 = (-((1 + uy)*(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
 							mfaaa*pow(1 + uz, 2))) -
 							uy*(mfabc*pow(-1 + uz, 2) + mfabb*pow(uz, 2) +
 								mfaba*pow(1 + uz, 2)) -
@@ -1197,7 +1198,7 @@ void ThixotropyLBMKernel::calculate(int step)
 
 
 						//(220)//
-						LBMReal Mom220 = (pow(1 + ux, 2)*((mfaca + mfacb + mfacc)*pow(-1 + uy, 2) +
+						real Mom220 = (pow(1 + ux, 2)*((mfaca + mfacb + mfacc)*pow(-1 + uy, 2) +
 							(mfaba + mfabb + mfabc)*pow(uy, 2) +
 							(mfaaa + mfaab + mfaac)*pow(1 + uy, 2)) +
 							pow(ux, 2)*((mfbca + mfbcb + mfbcc)*pow(-1 + uy, 2) +
@@ -1207,7 +1208,7 @@ void ThixotropyLBMKernel::calculate(int step)
 							(mfcba + mfcbb + mfcbc)*pow(uy, 2) +
 								(mfcaa + mfcab + mfcac)*pow(1 + uy, 2))) - Mom000 / 9;
 
-						LBMReal Mom202 = (pow(1 + ux, 2)*(mfaba + mfabc + mfaca + mfacc +
+						real Mom202 = (pow(1 + ux, 2)*(mfaba + mfabc + mfaca + mfacc +
 							mfaac*pow(-1 + uz, 2) + 2 * mfaba*uz - 2 * mfabc*uz +
 							2 * mfaca*uz - 2 * mfacc*uz + mfaab*pow(uz, 2) +
 							mfaba*pow(uz, 2) + mfabb*pow(uz, 2) +
@@ -1229,7 +1230,7 @@ void ThixotropyLBMKernel::calculate(int step)
 								mfccb*pow(uz, 2) + mfccc*pow(uz, 2) +
 								mfcaa*pow(1 + uz, 2))) - Mom000 / 9;
 
-						LBMReal Mom022 = (pow(1 + uy, 2)*(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
+						real Mom022 = (pow(1 + uy, 2)*(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
 							mfaaa*pow(1 + uz, 2)) +
 							pow(uy, 2)*(mfabc*pow(-1 + uz, 2) + mfabb*pow(uz, 2) +
 								mfaba*pow(1 + uz, 2)) +
@@ -1250,7 +1251,7 @@ void ThixotropyLBMKernel::calculate(int step)
 						////
 
 						//(221)//
-						LBMReal Mom221 = (pow(1 + ux, 2)*(-(pow(1 + uy, 2)*
+						real Mom221 = (pow(1 + ux, 2)*(-(pow(1 + uy, 2)*
 							(mfaac*(-1 + uz) + mfaab*uz + mfaaa*(1 + uz))) -
 							pow(uy, 2)*(mfabc*(-1 + uz) + mfabb*uz +
 								mfaba*(1 + uz)) -
@@ -1269,7 +1270,7 @@ void ThixotropyLBMKernel::calculate(int step)
 								pow(-1 + uy, 2)*(mfccc*(-1 + uz) + mfccb*uz +
 									mfcca*(1 + uz)))) - Mom001 / 9;
 
-						LBMReal Mom212 = (pow(1 + ux, 2)*(-((1 + uy)*
+						real Mom212 = (pow(1 + ux, 2)*(-((1 + uy)*
 							(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
 								mfaaa*pow(1 + uz, 2))) -
 							uy*(mfabc*pow(-1 + uz, 2) + mfabb*pow(uz, 2) +
@@ -1291,7 +1292,7 @@ void ThixotropyLBMKernel::calculate(int step)
 									(-1 + uy)*(mfccc*pow(-1 + uz, 2) + mfccb*pow(uz, 2) +
 										mfcca*pow(1 + uz, 2)))) - Mom010 / 9;
 
-						LBMReal Mom122 = ((-1 - ux)*(pow(1 + uy, 2)*
+						real Mom122 = ((-1 - ux)*(pow(1 + uy, 2)*
 							(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
 								mfaaa*pow(1 + uz, 2)) +
 							pow(uy, 2)*(mfabc*pow(-1 + uz, 2) + mfabb*pow(uz, 2) +
@@ -1314,7 +1315,7 @@ void ThixotropyLBMKernel::calculate(int step)
 						////
 
 						//(211)//
-						LBMReal Mom211 = (pow(1 + ux, 2)*((1 + uy)*(mfaac*(-1 + uz) + mfaab*uz +
+						real Mom211 = (pow(1 + ux, 2)*((1 + uy)*(mfaac*(-1 + uz) + mfaab*uz +
 							mfaaa*(1 + uz)) +
 							uy*(mfabc*(-1 + uz) + mfabb*uz + mfaba*(1 + uz)) +
 							(-1 + uy)*(mfacc*(-1 + uz) + mfacb*uz + mfaca*(1 + uz))) +
@@ -1327,7 +1328,7 @@ void ThixotropyLBMKernel::calculate(int step)
 								uy*(mfcbc*(-1 + uz) + mfcbb*uz + mfcba*(1 + uz)) +
 								(-1 + uy)*(mfccc*(-1 + uz) + mfccb*uz + mfcca*(1 + uz)))) - Mom011 / 3;
 
-						LBMReal Mom121 = ((-1 - ux)*(-(pow(1 + uy, 2)*
+						real Mom121 = ((-1 - ux)*(-(pow(1 + uy, 2)*
 							(mfaac*(-1 + uz) + mfaab*uz + mfaaa*(1 + uz))) -
 							pow(uy, 2)*(mfabc*(-1 + uz) + mfabb*uz +
 								mfaba*(1 + uz)) -
@@ -1346,7 +1347,7 @@ void ThixotropyLBMKernel::calculate(int step)
 										pow(-1 + uy, 2)*(mfccc*(-1 + uz) + mfccb*uz +
 											mfcca*(1 + uz)))) - Mom101 / 3;
 
-						LBMReal Mom112 = ((-1 - ux)*(-((1 + uy)*(mfaac*pow(-1 + uz, 2) +
+						real Mom112 = ((-1 - ux)*(-((1 + uy)*(mfaac*pow(-1 + uz, 2) +
 							mfaab*pow(uz, 2) + mfaaa*pow(1 + uz, 2))) -
 							uy*(mfabc*pow(-1 + uz, 2) + mfabb*pow(uz, 2) +
 								mfaba*pow(1 + uz, 2)) -
@@ -1367,7 +1368,7 @@ void ThixotropyLBMKernel::calculate(int step)
 						////
 
 						//(222)//
-						LBMReal Mom222 = (pow(1 + ux, 2)*(pow(1 + uy, 2)*
+						real Mom222 = (pow(1 + ux, 2)*(pow(1 + uy, 2)*
 							(mfaac*pow(-1 + uz, 2) + mfaab*pow(uz, 2) +
 								mfaaa*pow(1 + uz, 2)) +
 							pow(uy, 2)*(mfabc*pow(-1 + uz, 2) + mfabb*pow(uz, 2) +
@@ -1394,7 +1395,7 @@ void ThixotropyLBMKernel::calculate(int step)
 
 
 
-						LBMReal Meq000 = drho+dlambda*c1o2;
+						real Meq000 = drho+dlambda*c1o2;
 
 
 						// relaxation Central Moment MRT
@@ -1815,46 +1816,46 @@ void ThixotropyLBMKernel::calculate(int step)
 	}
 }
 //////////////////////////////////////////////////////////////////////////
-double ThixotropyLBMKernel::getCalculationTime()
+real ThixotropyLBMKernel::getCalculationTime()
 {
 	//return timer.getDuration();
 	return timer.getTotalTime();
 }
 //////////////////////////////////////////////////////////////////////////
-void ThixotropyLBMKernel::setCollisionFactorF(double collFactor)
+void ThixotropyLBMKernel::setCollisionFactorF(real collFactor)
 {
 	setCollisionFactor(collFactor);
 	this->collFactorF = collFactor;
 
 }
 //////////////////////////////////////////////////////////////////////////
-void ThixotropyLBMKernel::setCollisionFactorH(double collFactor)
+void ThixotropyLBMKernel::setCollisionFactorH(real collFactor)
 {
 	this->collFactorH = collFactor;
 }
 //////////////////////////////////////////////////////////////////////////
-double ThixotropyLBMKernel::getCollisionFactorF() const
+real ThixotropyLBMKernel::getCollisionFactorF() const
 {
 	return this->collFactorF;
 }
 //////////////////////////////////////////////////////////////////////////
-double ThixotropyLBMKernel::getCollisionFactorH() const
+real ThixotropyLBMKernel::getCollisionFactorH() const
 {
 	return this->collFactorH;
 }
-void ThixotropyLBMKernel::setAlpha(double alpha)
+void ThixotropyLBMKernel::setAlpha(real alpha)
 {
 	this->alpha = alpha;
 }
-double ThixotropyLBMKernel::getAlpha() const
+real ThixotropyLBMKernel::getAlpha() const
 {
 	return this->alpha;
 }
-void ThixotropyLBMKernel::setTheta(double theta)
+void ThixotropyLBMKernel::setTheta(real theta)
 {
 	this->theta = theta;
 }
-double ThixotropyLBMKernel::getTheta() const
+real ThixotropyLBMKernel::getTheta() const
 {
 	return this->theta;
 }
diff --git a/src/cpu/VirtualFluidsCore/LBM/ThixotropyLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/ThixotropyLBMKernel.h
index c74af1b1eead237c03c12ba612434a286ebfc656..c638105425c20dfa64a221a02004ee1ece8879fd 100644
--- a/src/cpu/VirtualFluidsCore/LBM/ThixotropyLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/ThixotropyLBMKernel.h
@@ -22,50 +22,50 @@ public:
 	virtual ~ThixotropyLBMKernel(void);
 	virtual void calculate(int step);
 	virtual SPtr<LBMKernel> clone();
-	double getCalculationTime();
+	real getCalculationTime();
  
-	void setCollisionFactorF(double collFactor);
-   void setCollisionFactorH(double collFactor);
-   double getCollisionFactorF() const;
-   double getCollisionFactorH() const;
+	void setCollisionFactorF(real collFactor);
+   void setCollisionFactorH(real collFactor);
+   real getCollisionFactorF() const;
+   real getCollisionFactorH() const;
 
-	void setAlpha(double alpha);
-	double getAlpha() const;
+	void setAlpha(real alpha);
+	real getAlpha() const;
 
-	void setTheta(double theta);
-	double getTheta() const;
+	void setTheta(real theta);
+	real getTheta() const;
 
 	void swapDistributions();
 
 protected:
 	virtual void initDataSet();
-	LBMReal f[D3Q27System::ENDF + 1];
+	real f[D3Q27System::ENDF + 1];
 
 	UbTimer timer;
 
-	LBMReal OxyyMxzz;
+	real OxyyMxzz;
 	Parameter parameter;
 
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF;
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsF;
 
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH;
-	CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH;
-	CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsH;
+	CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsH;
+	CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr   zeroDistributionsH;
 
 	mu::value_type muX1, muX2, muX3;
 	mu::value_type muDeltaT;
 	mu::value_type muNu;
-	LBMReal forcingX1;
-	LBMReal forcingX2;
-	LBMReal forcingX3;
+	real forcingX1;
+	real forcingX2;
+	real forcingX3;
 
-	LBMReal collFactorF;
-   LBMReal collFactorH;
+	real collFactorF;
+   real collFactorH;
 
-	LBMReal theta;
-	LBMReal alpha;
+	real theta;
+	real alpha;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/LBM/VoidLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/VoidLBMKernel.cpp
index d9ce56aa8c4ca5f7a0e2318a9d48120b66f06705..83d6a791e761190e3cdd34d101c4fd1d1588a191 100644
--- a/src/cpu/VirtualFluidsCore/LBM/VoidLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/VoidLBMKernel.cpp
@@ -31,4 +31,4 @@ SPtr<LBMKernel> VoidLBMKernel::clone()
 //////////////////////////////////////////////////////////////////////////
 void VoidLBMKernel::calculate(int step) {}
 //////////////////////////////////////////////////////////////////////////
-double VoidLBMKernel::getCalculationTime() { return 0.0; }
+real VoidLBMKernel::getCalculationTime() { return 0.0; }
diff --git a/src/cpu/VirtualFluidsCore/LBM/VoidLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/VoidLBMKernel.h
index b9b4b5d2d2c53f91871c3770a3acda0401842efe..0984cab144021c3895bf8cb85f50efbc94476e6b 100644
--- a/src/cpu/VirtualFluidsCore/LBM/VoidLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/VoidLBMKernel.h
@@ -10,7 +10,7 @@ public:
     ~VoidLBMKernel() override;
     SPtr<LBMKernel> clone() override;
     void calculate(int step) override;
-    double getCalculationTime() override;
+    real getCalculationTime() override;
     void initDataSet();
 
 protected:
diff --git a/src/cpu/VirtualFluidsCore/Parallel/MPIIODataStructures.h b/src/cpu/VirtualFluidsCore/Parallel/MPIIODataStructures.h
index 74627f6181cd02002e2bc2c7a2d284ff288f3c59..bbc4face9f784d120c80dcfbbbd73d07951fe49b 100644
--- a/src/cpu/VirtualFluidsCore/Parallel/MPIIODataStructures.h
+++ b/src/cpu/VirtualFluidsCore/Parallel/MPIIODataStructures.h
@@ -1,14 +1,16 @@
 #ifndef _MPI_STRUCTURES_H_
 #define _MPI_STRUCTURES_H_
 
+#include "lbm/constants/D3Q27.h"
+
 namespace MPIIODataStructures
 {
 //! \struct GridParam
 //! \brief Structure describes parameters of the grid
 //! \details The structure is nessasary to restore the grid correctly
 struct GridParam {
-    double trafoParams[33];
-    double deltaX;
+    real trafoParams[33];
+    real deltaX;
     int blockNx1;
     int blockNx2;
     int blockNx3;
@@ -57,11 +59,11 @@ struct dataSetParam {
 //! \brief Structure describes parameters of the dataSet in MPIIORestartCoProcessor format
 //! \details The structure is used when reading from the file
 struct DataSetRestart {
-    double collFactor;
-    double deltaT;
-    double collFactorL; // for Multiphase model
-    double collFactorG; // for Multiphase model
-    double densityRatio;// for Multiphase model
+    real collFactor;
+    real deltaT;
+    real collFactorL; // for Multiphase model
+    real collFactorG; // for Multiphase model
+    real densityRatio;// for Multiphase model
     int x1;
     int x2;
     int x3;
@@ -75,11 +77,11 @@ struct DataSetRestart {
 //! \brief Structure describes parameters of the dataSet in MPIIOMigrationCoProcessor format
 //! \details The structure is used to find the needed block in the grid when restoring a dataSet
 struct DataSetMigration {
-    double collFactor;
-    double deltaT;
-    double collFactorL; // for Multiphase model
-    double collFactorG; // for Multiphase model
-    double densityRatio;// for Multiphase model
+    real collFactor;
+    real deltaT;
+    real collFactorL; // for Multiphase model
+    real collFactorG; // for Multiphase model
+    real densityRatio;// for Multiphase model
     int globalID;
     int ghostLayerWidth;
     bool compressible;
@@ -113,14 +115,14 @@ struct BoundaryCondition {
     long long densityBoundaryFlags;
     long long wallModelBoundaryFlags;
 
-    float bcVelocityX1;
-    float bcVelocityX2;
-    float bcVelocityX3;
-    float bcDensity;
-    float bcPhaseField;
+    real bcVelocityX1;
+    real bcVelocityX2;
+    real bcVelocityX3;
+    real bcDensity;
+    real bcPhaseField;
 
-    float nx1, nx2, nx3;
-    float q[26];
+    real nx1, nx2, nx3;
+    real q[26];
 
     char algorithmType;
 };
diff --git a/src/cpu/VirtualFluidsCore/Parallel/SimpleGeometricPartitioner.h b/src/cpu/VirtualFluidsCore/Parallel/SimpleGeometricPartitioner.h
index ee9e56af9f8578b5ea406a270de1cc1c9986f11e..0f80e380c2d9cfc4d8595ba5284bcbec9276b846 100644
--- a/src/cpu/VirtualFluidsCore/Parallel/SimpleGeometricPartitioner.h
+++ b/src/cpu/VirtualFluidsCore/Parallel/SimpleGeometricPartitioner.h
@@ -27,9 +27,9 @@ public:
         if (p == 1)
             return { 1, 1, 1 };
 
-        double a = pow(p * pow(x, 3.0) / xyz, 1.0 / 3.0);
-        double b = pow(p * pow(y, 3.0) / xyz, 1.0 / 3.0);
-        double c = pow(p * pow(z, 3.0) / xyz, 1.0 / 3.0);
+        real a = pow(p * pow(x, 3.0) / xyz, 1.0 / 3.0);
+        real b = pow(p * pow(y, 3.0) / xyz, 1.0 / 3.0);
+        real c = pow(p * pow(z, 3.0) / xyz, 1.0 / 3.0);
 
         MaxDim maxDim;
 
diff --git a/src/cpu/VirtualFluidsCore/Parallel/ZoltanPartitioner.h b/src/cpu/VirtualFluidsCore/Parallel/ZoltanPartitioner.h
index 402e6b9603ff38a8236579f132f74fbf0a43c9e1..b599786b76ebdf0187572abec687cfff439120c7 100644
--- a/src/cpu/VirtualFluidsCore/Parallel/ZoltanPartitioner.h
+++ b/src/cpu/VirtualFluidsCore/Parallel/ZoltanPartitioner.h
@@ -46,12 +46,12 @@ public:
 protected:
     static int get_number_of_vertices(void *data, int *ierr);
     static void get_vertex_list(void *data, int sizeGID, int sizeLID, ZOLTAN_ID_PTR globalID, ZOLTAN_ID_PTR localID,
-                                int wgt_dim, float *obj_wgts, int *ierr);
+                                int wgt_dim, real *obj_wgts, int *ierr);
     static void get_num_edges_list(void *data, int sizeGID, int sizeLID, int num_obj, ZOLTAN_ID_PTR globalID,
                                    ZOLTAN_ID_PTR localID, int *numEdges, int *ierr);
     static void get_edge_list(void *data, int sizeGID, int sizeLID, int num_obj, ZOLTAN_ID_PTR globalID,
                               ZOLTAN_ID_PTR localID, int *num_edges, ZOLTAN_ID_PTR nborGID, int *nborProc, int wgt_dim,
-                              float *ewgts, int *ierr);
+                              real *ewgts, int *ierr);
 
 private:
     MPI_Comm comm;
diff --git a/src/cpu/VirtualFluidsCore/Utilities/ChangeRandomQs.hpp b/src/cpu/VirtualFluidsCore/Utilities/ChangeRandomQs.hpp
index db7c73c93b680161aa8819905d1237725a2f8f60..3fbd3643d71409fe21aa800473310399757f3a44 100644
--- a/src/cpu/VirtualFluidsCore/Utilities/ChangeRandomQs.hpp
+++ b/src/cpu/VirtualFluidsCore/Utilities/ChangeRandomQs.hpp
@@ -27,10 +27,10 @@ namespace Utilities
                   if (bc->hasNoSlipBoundaryFlag(fdir))
                   {
                      const int invDir = D3Q27System::INVDIR[fdir];
-                     float q = (float) bc->getQ(invDir);
+                     real q = (real) bc->getQ(invDir);
                      //double r = (double)UbRandom::rand(-50, 50);
-                     float r = (float)UbRandom::rand(-10, 10);
-                     float q_temp = q + q/r;
+                     real r = (real)UbRandom::rand(-10, 10);
+                     real q_temp = q + q/r;
                      if (q_temp < 0.0)
                      {
                         q_temp = 0.0001f;
diff --git a/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.cpp b/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.cpp
index 6e7968f37493476ac7f076b4d7aa129b56c7326f..53282294203213fe98b8867dfaf2fde523490bc5 100644
--- a/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.cpp
+++ b/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.cpp
@@ -134,8 +134,8 @@ void CheckpointConverter::convert(int step, int procCount)
 void CheckpointConverter::convertBlocks(int step, int procCount)
 {
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     start = MPI_Wtime();
 
     // file to read from
@@ -298,8 +298,8 @@ void CheckpointConverter::convertDataSet(int step, int procCount)
         throw UbException(UB_EXARGS, "couldn't open file " + filenameW);
 
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     start = MPI_Wtime();
 
     int blocksCount = 0;
@@ -307,7 +307,7 @@ void CheckpointConverter::convertDataSet(int step, int procCount)
     DataSetRestart *dataSetReadArray;
     DataSetMigration *dataSetWriteArray;
     size_t doubleCountInBlock;
-    std::vector<double> doubleValuesArray;
+    std::vector<real> doubleValuesArray;
     size_t sizeofOneDataSet;
 
     // calculate the read offset
@@ -341,7 +341,7 @@ void CheckpointConverter::convertDataSet(int step, int procCount)
         // offset to read the data of the next process
         read_offset =
             read_offset + (MPI_Offset)(3 * sizeof(dataSetParam) +
-                                       blocksCount * (sizeof(DataSetRestart) + doubleCountInBlock * sizeof(double)));
+                                       blocksCount * (sizeof(DataSetRestart) + doubleCountInBlock * sizeof(real)));
 
         // write parameters of data arrays
         MPI_File_write_at(file_handlerW, (MPI_Offset)0, &dataSetParamStr1, 1, dataSetParamType, MPI_STATUS_IGNORE);
@@ -350,7 +350,7 @@ void CheckpointConverter::convertDataSet(int step, int procCount)
         MPI_File_write_at(file_handlerW, (MPI_Offset)(2 * sizeof(dataSetParam)), &dataSetParamStr3, 1, dataSetParamType,
                           MPI_STATUS_IGNORE);
 
-        sizeofOneDataSet = sizeof(DataSetMigration) + doubleCountInBlock * sizeof(double);
+        sizeofOneDataSet = sizeof(DataSetMigration) + doubleCountInBlock * sizeof(real);
 
         // write blocks and their data arrays
         for (int nb = 0; nb < blocksCount; nb++) {
@@ -434,8 +434,8 @@ void CheckpointConverter::convertDataSet(int step, int procCount)
 void CheckpointConverter::convert___Array(int /*step*/, int procCount, std::string filenameR, std::string filenameW)
 {
     
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
@@ -456,7 +456,7 @@ void CheckpointConverter::convert___Array(int /*step*/, int procCount, std::stri
     DataSetSmallRestart *dataSetSmallReadArray;
     DataSetSmallMigration *dataSetSmallWriteArray;
     int doubleCountInBlock;
-    std::vector<double> doubleValuesArray;
+    std::vector<real> doubleValuesArray;
 
     // calculate the read offset
     MPI_Offset read_offset = (MPI_Offset)(procCount * sizeof(int));
@@ -482,9 +482,9 @@ void CheckpointConverter::convert___Array(int /*step*/, int procCount, std::stri
                 &doubleValuesArray[0], blocksCount * doubleCountInBlock, MPI_DOUBLE, MPI_STATUS_IGNORE);
 
         read_offset = read_offset + sizeof(dataSetParam) +
-                      blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+                      blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(real));
 
-        sizeofOneDataSet = sizeof(DataSetSmallMigration) + doubleCountInBlock * sizeof(double);
+        sizeofOneDataSet = sizeof(DataSetSmallMigration) + doubleCountInBlock * sizeof(real);
 
         MPI_File_write_at(file_handlerW, 0, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
 
@@ -529,8 +529,8 @@ void CheckpointConverter::convertBC(int step, int procCount)
     if (rcW != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filenameW);
 
-    double start {0.};
-    double finish {0.};
+    real start {0.};
+    real finish {0.};
     if (comm->isRoot())
         start = MPI_Wtime();
 
diff --git a/src/cpu/VirtualFluidsCore/Utilities/MathUtil.hpp b/src/cpu/VirtualFluidsCore/Utilities/MathUtil.hpp
index 5c6fe4e8e2b4a02e733d777db9c045bc57f5b6eb..7c5ffe1f3fbde3fa59756507721ccc04d2bbe365 100644
--- a/src/cpu/VirtualFluidsCore/Utilities/MathUtil.hpp
+++ b/src/cpu/VirtualFluidsCore/Utilities/MathUtil.hpp
@@ -22,10 +22,10 @@ namespace Utilities
          return false;
    }
 
-   //convert from double to int
-   static int cint(double x)
+   //convert from real to int
+   static int cint(real x)
    {
-      double intpart;
+      real intpart;
       if (modf(x,&intpart)>=.5)
          return static_cast<int> (floor(x)+1);
       else
@@ -34,7 +34,7 @@ namespace Utilities
 
    //create new mu parser for duct parabolic profile
    //inflow in X
-   static mu::Parser getDuctParaboloidX(double Cy, double Hy, double Cz, double Hz, double V)
+   static mu::Parser getDuctParaboloidX(real Cy, real Hy, real Cz, real Hz, real V)
    {
       mu::Parser fct;
       fct.SetExpr("V*(((-(x2-Cy)^2.0+(Hy/2.0)^2.0)/(Hy/2.0)^2.0)*((-(x3-Cz)^2.0+(Hz/2.0)^2.0)/(Hz/2.0)^2.0))" );
@@ -46,7 +46,7 @@ namespace Utilities
       return fct;
    }
    //inflow in Y
-   static mu::Parser getDuctParaboloidY(double Cx, double Hx, double Cz, double Hz, double V)
+   static mu::Parser getDuctParaboloidY(real Cx, real Hx, real Cz, real Hz, real V)
    {
       mu::Parser fct;
       fct.SetExpr("V*(((-(x1-Cx)^2.0+(Hx/2.0)^2.0)/(Hx/2.0)^2.0)*((-(x3-Cz)^2.0+(Hz/2.0)^2.0)/(Hz/2.0)^2.0))" );
@@ -58,7 +58,7 @@ namespace Utilities
       return fct;
    }
    //inflow in Z
-   static mu::Parser getDuctParaboloidZ(double Cx, double Hx, double Cy, double Hy, double V)
+   static mu::Parser getDuctParaboloidZ(real Cx, real Hx, real Cy, real Hy, real V)
    {
       mu::Parser fct;
       fct.SetExpr("V*(((-(x1-Cx)^2.0+(Hx/2.0)^2.0)/(Hx/2.0)^2.0)*((-(x2-Cy)^2.0+(Hy/2.0)^2.0)/(Hy/2.0)^2.0))" );
@@ -85,11 +85,11 @@ namespace Utilities
       return hash;
    }
    //linear interpolation
-   static double linear_interpolation1D(double x0, double y0, double x1, double y1, double x)
+   static real linear_interpolation1D(real x0, real y0, real x1, real y1, real x)
    {
-      double a = (y1 - y0) / (x1 - x0);
-      double b = -a*x0 + y0;
-      double y = a * x + b;
+      real a = (y1 - y0) / (x1 - x0);
+      real b = -a*x0 + y0;
+      real y = a * x + b;
       return y;
    }
 }
diff --git a/src/cpu/VirtualFluidsCore/Utilities/MemoryUtil.h b/src/cpu/VirtualFluidsCore/Utilities/MemoryUtil.h
index 4c9f30a902196f8fef5187442f45b94dc64de283..ec5e9b0981d4a02aeca48d8a33d5a52e018aaf87 100644
--- a/src/cpu/VirtualFluidsCore/Utilities/MemoryUtil.h
+++ b/src/cpu/VirtualFluidsCore/Utilities/MemoryUtil.h
@@ -66,6 +66,7 @@
 #include <string>
 #include <vector>
 #include "Grid3D.h"
+#include "lbm/constants/D3Q27.h"
 
 //////////////////////////////////////////////////////////////////////////
 // MemoryUtil
@@ -179,8 +180,8 @@ static std::string toString(SPtr<Grid3D> grid, int numberOfProcesses)
     unsigned long long numberOfNodesPerBlockWithGhostLayer = numberOfBlocks * (val<1>(blockNx) + ghostLayer) *
                                                              (val<2>(blockNx) + ghostLayer) *
                                                              (val<3>(blockNx) + ghostLayer);
-    double needMemAll = double(numberOfNodesPerBlockWithGhostLayer*(27*sizeof(double)+sizeof(int)+sizeof(float)*4));
-    double needMem = needMemAll / double(numberOfProcesses);
+    real needMemAll = real(numberOfNodesPerBlockWithGhostLayer*(27*sizeof(real)+sizeof(int)+sizeof(real)*4));
+    real needMem = needMemAll / real(numberOfProcesses);
     
     std::ostringstream out;
     out << "Grid information:" << std::endl;
@@ -197,7 +198,7 @@ static std::string toString(SPtr<Grid3D> grid, int numberOfProcesses)
     }
     out << "# Necessary memory  = " << needMemAll << " bytes" << std::endl;
     out << "# Necessary memory per process = " << needMem << " bytes" << std::endl;
-    out << "# Available memory per process = " << (double)getTotalPhysMem() << " bytes" << std::endl;
+    out << "# Available memory per process = " << (real)getTotalPhysMem() << " bytes" << std::endl;
     out << "###################################################" << std::endl;
 
     return out.str();
diff --git a/src/cpu/VirtualFluidsCore/Visitors/BoundaryConditionsBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/BoundaryConditionsBlockVisitor.cpp
index f5c87b9fc695d81ad492f89113f2d9e5c56fa9a7..fbfbd1bcab135056fa6b62e31d50b63c898bb83a 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/BoundaryConditionsBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/BoundaryConditionsBlockVisitor.cpp
@@ -76,7 +76,7 @@ void BoundaryConditionsBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> bloc
         SPtr<BCArray3D> bcArray = bcProcessor->getBCArray();
 
         bool compressible = kernel->getCompressible();
-        double collFactor = kernel->getCollisionFactor();
+        real collFactor = kernel->getCollisionFactor();
 
         int minX1 = 0;
         int minX2 = 0;
diff --git a/src/cpu/VirtualFluidsCore/Visitors/ChangeBoundaryDensityBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/ChangeBoundaryDensityBlockVisitor.cpp
index e26b59729594fc3175e523e25d23ce7adc56d74e..c541465183dd084135d60b7112182daae33e22ab 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/ChangeBoundaryDensityBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/ChangeBoundaryDensityBlockVisitor.cpp
@@ -7,7 +7,7 @@
 #include "D3Q27System.h"
 #include "LBMKernel.h"
 
-ChangeBoundaryDensityBlockVisitor::ChangeBoundaryDensityBlockVisitor(float oldBoundaryDensity, float newBoundaryDensity)
+ChangeBoundaryDensityBlockVisitor::ChangeBoundaryDensityBlockVisitor(real oldBoundaryDensity, real newBoundaryDensity)
     : Block3DVisitor(0, D3Q27System::MAXLEVEL), oldBoundaryDensity(oldBoundaryDensity),
       newBoundaryDensity(newBoundaryDensity)
 {
@@ -35,7 +35,7 @@ void ChangeBoundaryDensityBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> b
                         bcPtr = bcArray->getBC(x1, x2, x3);
                         if (bcPtr) {
                             if (bcPtr->hasDensityBoundary()) {
-                                float bcDensity = (float)bcPtr->getBoundaryDensity();
+                                real bcDensity = (real)bcPtr->getBoundaryDensity();
                                 if (bcDensity == oldBoundaryDensity) {
                                     bcPtr->setBoundaryDensity(newBoundaryDensity);
                                 }
diff --git a/src/cpu/VirtualFluidsCore/Visitors/ChangeBoundaryDensityBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/ChangeBoundaryDensityBlockVisitor.h
index 256448a0602bb6e5ab45fc4116aac35073795ddb..64592f7bb2f81e1df1b22cdc6bcbb6bbb7528dfc 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/ChangeBoundaryDensityBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/ChangeBoundaryDensityBlockVisitor.h
@@ -4,6 +4,7 @@
 #include <PointerDefinitions.h>
 
 #include "Block3DVisitor.h"
+#include "lbm/constants/D3Q27.h"
 
 class Block3D;
 class Grid3D;
@@ -12,14 +13,14 @@ class BoundaryConditions;
 class ChangeBoundaryDensityBlockVisitor : public Block3DVisitor
 {
 public:
-    ChangeBoundaryDensityBlockVisitor(float oldBoundaryDensity, float newBoundaryDensity);
+    ChangeBoundaryDensityBlockVisitor(real oldBoundaryDensity, real newBoundaryDensity);
     ~ChangeBoundaryDensityBlockVisitor() override;
 
     void visit(SPtr<Grid3D> grid, SPtr<Block3D> block) override;
 
 private:
-    float oldBoundaryDensity;
-    float newBoundaryDensity;
+    real oldBoundaryDensity;
+    real newBoundaryDensity;
     SPtr<BoundaryConditions> bcPtr;
 };
 #endif // ChangeBoundaryDensityBlockVisitor_h__
diff --git a/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.cpp b/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.cpp
index 7602438a23f16295f8d518f70d5a036dac4515ec..b931cbbbda004f7b2057943222d4523c5fb0916b 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.cpp
@@ -87,22 +87,22 @@ void CreateTransmittersHelper::createTransmitters(SPtr<Block3D> sblock, SPtr<Blo
         string sendPoolKey    = generatePoolKey(srcRank, srcLevel, tgtRank, tgtLevel);
         string receivePoolKey = generatePoolKey(tgtRank, tgtLevel, srcRank, srcLevel);
 
-        TbCbVectorMpiPool<LBMReal>::MpiPoolPtr sendPool = TbCbVectorMpiPool<LBMReal>::getTbCbVectorMpiPool(sendPoolKey);
-        TbCbVectorMpiPool<LBMReal>::MpiPoolPtr recvPool =
-            TbCbVectorMpiPool<LBMReal>::getTbCbVectorMpiPool(receivePoolKey);
+        TbCbVectorMpiPool<real>::MpiPoolPtr sendPool = TbCbVectorMpiPool<real>::getTbCbVectorMpiPool(sendPoolKey);
+        TbCbVectorMpiPool<real>::MpiPoolPtr recvPool =
+            TbCbVectorMpiPool<real>::getTbCbVectorMpiPool(receivePoolKey);
 
         MPI_Comm mpi_comm = *((MPI_Comm *)comm->getNativeCommunicator());
 
         if (!sendPool)
-            sendPool = TbCbVectorMpiPool<LBMReal>::createTbCbVectorMpiPool(
+            sendPool = TbCbVectorMpiPool<real>::createTbCbVectorMpiPool(
                 sendPoolKey, tgtRank, generateMPITag(srcLevel, tgtLevel), mpi_comm);
         if (!recvPool)
-            recvPool = TbCbVectorMpiPool<LBMReal>::createTbCbVectorMpiPool(
+            recvPool = TbCbVectorMpiPool<real>::createTbCbVectorMpiPool(
                 receivePoolKey, tgtRank, generateMPITag(tgtLevel, srcLevel), mpi_comm);
 
-        TbCbVectorMpiPool<LBMReal>::CbVectorKey keyOfSendCbVectorKey =
+        TbCbVectorMpiPool<real>::CbVectorKey keyOfSendCbVectorKey =
             generateVectorKey(sblock->getX1(), sblock->getX2(), sblock->getX3() /*tgtID*/, dir, ib);
-        TbCbVectorMpiPool<LBMReal>::CbVectorKey keyOfRecvCbVectorKey =
+        TbCbVectorMpiPool<real>::CbVectorKey keyOfRecvCbVectorKey =
             generateVectorKey(tblock->getX1(), tblock->getX2(), tblock->getX3() /*srcID*/, invDir, ib);
 
         ////////////////////////////////////////////////////////
@@ -118,8 +118,8 @@ void CreateTransmittersHelper::createTransmitters(SPtr<Block3D> sblock, SPtr<Blo
         ////////////////////////////////////////////////////////
 
         // create sender-/receiver
-        sender   = TransmitterPtr(new TbCbVectorSenderMpiPool<LBMReal>(keyOfSendCbVectorKey, sendPool.get()));
-        receiver = TransmitterPtr(new TbCbVectorReceiverMpiPool<LBMReal>(keyOfRecvCbVectorKey, recvPool.get()));
+        sender   = TransmitterPtr(new TbCbVectorSenderMpiPool<real>(keyOfSendCbVectorKey, sendPool.get()));
+        receiver = TransmitterPtr(new TbCbVectorReceiverMpiPool<real>(keyOfRecvCbVectorKey, recvPool.get()));
     }
 #ifdef VF_FETOL
     if (tType == BOND) {
@@ -129,24 +129,24 @@ void CreateTransmittersHelper::createTransmitters(SPtr<Block3D> sblock, SPtr<Blo
         int sendBondPoolKey    = generatePoolKey(srcBondRank, srcLevel, tgtBondRank, tgtLevel);
         int receiveBondPoolKey = generatePoolKey(tgtBondRank, tgtLevel, srcBondRank, srcLevel);
 
-        TbCbVectorBondPool<LBMReal>::BondPoolPtr sendPool =
-            TbCbVectorBondPool<LBMReal>::getTbCbVectorBondPool(sendBondPoolKey);
-        TbCbVectorBondPool<LBMReal>::BondPoolPtr recvPool =
-            TbCbVectorBondPool<LBMReal>::getTbCbVectorBondPool(receiveBondPoolKey);
+        TbCbVectorBondPool<real>::BondPoolPtr sendPool =
+            TbCbVectorBondPool<real>::getTbCbVectorBondPool(sendBondPoolKey);
+        TbCbVectorBondPool<real>::BondPoolPtr recvPool =
+            TbCbVectorBondPool<real>::getTbCbVectorBondPool(receiveBondPoolKey);
 
         if (!sendPool)
-            sendPool = TbCbVectorBondPool<LBMReal>::createTbCbVectorBondPool(sendBondPoolKey, tgtBondRank,
+            sendPool = TbCbVectorBondPool<real>::createTbCbVectorBondPool(sendBondPoolKey, tgtBondRank,
                                                                              generateMPITag(srcLevel, tgtLevel));
         if (!recvPool)
-            recvPool = TbCbVectorBondPool<LBMReal>::createTbCbVectorBondPool(receiveBondPoolKey, tgtBondRank,
+            recvPool = TbCbVectorBondPool<real>::createTbCbVectorBondPool(receiveBondPoolKey, tgtBondRank,
                                                                              generateMPITag(tgtLevel, srcLevel));
 
-        TbCbVectorBondPool<LBMReal>::CbVectorKey keyOfSendCbVectorKey = generateVectorKey(tgtID, dir, ib);
-        TbCbVectorBondPool<LBMReal>::CbVectorKey keyOfRecvCbVectorKey = generateVectorKey(srcID, invDir, ib);
+        TbCbVectorBondPool<real>::CbVectorKey keyOfSendCbVectorKey = generateVectorKey(tgtID, dir, ib);
+        TbCbVectorBondPool<real>::CbVectorKey keyOfRecvCbVectorKey = generateVectorKey(srcID, invDir, ib);
 
         // create sender-/receiver
-        sender   = TransmitterPtr(new TbCbVectorSenderBondPool<LBMReal>(keyOfSendCbVectorKey, sendPool.get()));
-        receiver = TransmitterPtr(new TbCbVectorReceiverBondPool<LBMReal>(keyOfRecvCbVectorKey, recvPool.get()));
+        sender   = TransmitterPtr(new TbCbVectorSenderBondPool<real>(keyOfSendCbVectorKey, sendPool.get()));
+        receiver = TransmitterPtr(new TbCbVectorReceiverBondPool<real>(keyOfRecvCbVectorKey, recvPool.get()));
     }
 #endif
 }
diff --git a/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.h b/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.h
index d51f6352a251fe360aaf2a8365c77315e099d4d2..af60de0a2e2b9e06488df3011584b8448594bf85 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.h
@@ -55,7 +55,7 @@ public:
     enum TransmitterType { MPI, BOND, MPI2BOND };
 
 public:
-    using DataType       = CbVector<LBMReal>;
+    using DataType       = CbVector<real>;
     using TransmitterPtr = SPtr<TbTransmitter<DataType>>;
 
 public:
diff --git a/src/cpu/VirtualFluidsCore/Visitors/GenBlocksGridVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/GenBlocksGridVisitor.cpp
index 29ea3bfda98c2ce191d1f7c5bc20691049dc2a04..eec58e1643ec3c3f3aac63899f019247f8b0851e 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/GenBlocksGridVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/GenBlocksGridVisitor.cpp
@@ -43,17 +43,17 @@ GenBlocksGridVisitor::GenBlocksGridVisitor(SPtr<GbObject3D> boundingBox) : bound
 //////////////////////////////////////////////////////////////////////////
 void GenBlocksGridVisitor::visit(const SPtr<Grid3D> grid)
 {
-    double orgX1 = boundingBox->getX1Minimum();
-    double orgX2 = boundingBox->getX2Minimum();
-    double orgX3 = boundingBox->getX3Minimum();
+    real orgX1 = boundingBox->getX1Minimum();
+    real orgX2 = boundingBox->getX2Minimum();
+    real orgX3 = boundingBox->getX3Minimum();
 
-    double dx = grid->getDeltaX(0);
+    real dx = grid->getDeltaX(0);
 
     UbTupleInt3 blockNX = grid->getBlockNX();
 
-    double blockLentghX1 = (double)val<1>(blockNX) * dx;
-    double blockLentghX2 = (double)val<2>(blockNX) * dx;
-    double blockLentghX3 = (double)val<3>(blockNX) * dx;
+    real blockLentghX1 = (real)val<1>(blockNX) * dx;
+    real blockLentghX2 = (real)val<2>(blockNX) * dx;
+    real blockLentghX3 = (real)val<3>(blockNX) * dx;
 
     SPtr<CoordinateTransformation3D> trafo(
         new CoordinateTransformation3D(orgX1, orgX2, orgX3, blockLentghX1, blockLentghX2, blockLentghX3));
@@ -78,9 +78,9 @@ void GenBlocksGridVisitor::genBlocks(SPtr<Grid3D> grid)
 {
     minInd =
         grid->getBlockIndexes(boundingBox->getX1Minimum(), boundingBox->getX2Minimum(), boundingBox->getX3Minimum());
-    double geoMaxX1           = boundingBox->getX1Maximum();
-    double geoMaxX2           = boundingBox->getX2Maximum();
-    double geoMaxX3           = boundingBox->getX3Maximum();
+    real geoMaxX1           = boundingBox->getX1Maximum();
+    real geoMaxX2           = boundingBox->getX2Maximum();
+    real geoMaxX3           = boundingBox->getX3Maximum();
     maxInd                    = grid->getBlockIndexes(geoMaxX1, geoMaxX2, geoMaxX3);
     UbTupleDouble3 blockCoord = grid->getBlockWorldCoordinates(
         static_cast<int>(val<1>(maxInd)), static_cast<int>(val<2>(maxInd)), static_cast<int>(val<3>(maxInd)), 0);
@@ -91,7 +91,7 @@ void GenBlocksGridVisitor::genBlocks(SPtr<Grid3D> grid)
     // if (geoMaxX3 > val<3>(blockCoord))
     //    val<3>(maxInd) += 1;
 
-    double dx = grid->getDeltaX(0);
+    real dx = grid->getDeltaX(0);
     if (fabs(geoMaxX1 - val<1>(blockCoord)) > dx)
         val<1>(maxInd) += 1;
     if (fabs(geoMaxX2 - val<2>(blockCoord)) > dx)
diff --git a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsBlockVisitor.cpp
index 0ba49c1a0683d052a07caae46410b5ea8c35aad7..1c4860070a5ca8aefc4850a9b16dd7273c65f231 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsBlockVisitor.cpp
@@ -97,25 +97,25 @@ void InitDistributionsBlockVisitor::setRho(const std::string &muParserString)
     this->checkFunction(muRho);
 }
 //////////////////////////////////////////////////////////////////////////
-void InitDistributionsBlockVisitor::setVx1(LBMReal vx1)
+void InitDistributionsBlockVisitor::setVx1(real vx1)
 {
     this->muVx1.SetExpr(UbSystem::toString(vx1, D3Q27RealLim::digits10));
     this->checkFunction(muVx1);
 }
 //////////////////////////////////////////////////////////////////////////
-void InitDistributionsBlockVisitor::setVx2(LBMReal vx2)
+void InitDistributionsBlockVisitor::setVx2(real vx2)
 {
     this->muVx2.SetExpr(UbSystem::toString(vx2, D3Q27RealLim::digits10));
     this->checkFunction(muVx2);
 }
 //////////////////////////////////////////////////////////////////////////
-void InitDistributionsBlockVisitor::setVx3(LBMReal vx3)
+void InitDistributionsBlockVisitor::setVx3(real vx3)
 {
     this->muVx3.SetExpr(UbSystem::toString(vx3, D3Q27RealLim::digits10));
     this->checkFunction(muVx3);
 }
 //////////////////////////////////////////////////////////////////////////
-void InitDistributionsBlockVisitor::setRho(LBMReal rho)
+void InitDistributionsBlockVisitor::setRho(real rho)
 {
     this->muRho.SetExpr(UbSystem::toString(rho, D3Q27RealLim::digits10));
     this->checkFunction(muRho);
@@ -124,10 +124,11 @@ void InitDistributionsBlockVisitor::setRho(LBMReal rho)
 void InitDistributionsBlockVisitor::visit(const SPtr<Grid3D> grid, SPtr<Block3D> block)
 {
    using namespace D3Q27System;
+   using namespace vf::lbm::dir;
 
    if(!block) UB_THROW( UbException(UB_EXARGS,"block is not exist") );
 
-   double dx = grid->getDeltaX(block);
+   real dx = grid->getDeltaX(block);
 
    //define vars for functions
    mu::value_type x1,x2,x3;
@@ -136,11 +137,11 @@ void InitDistributionsBlockVisitor::visit(const SPtr<Grid3D> grid, SPtr<Block3D>
    this->muVx3.DefineVar("x1",&x1); this->muVx3.DefineVar("x2",&x2); this->muVx3.DefineVar("x3",&x3);
    this->muRho.DefineVar("x1",&x1); this->muRho.DefineVar("x2",&x2); this->muRho.DefineVar("x3",&x3);
 
-    using CalcFeqsFct = void (*)(LBMReal *const & /*feq[27]*/, const LBMReal & /*(d)rho*/, const LBMReal & /*vx1*/,
-                                 const LBMReal & /*vx2*/, const LBMReal & /*vx3*/);
+    using CalcFeqsFct = void (*)(real *const & /*feq[27]*/, const real & /*(d)rho*/, const real & /*vx1*/,
+                                 const real & /*vx2*/, const real & /*vx3*/);
     CalcFeqsFct calcFeqsFct = NULL;
    
-   LBMReal vx1, vx2, vx3, rho;
+   real vx1, vx2, vx3, rho;
 
    int gridRank = grid->getRank();
    int blockRank = block->getRank();
@@ -159,9 +160,9 @@ void InitDistributionsBlockVisitor::visit(const SPtr<Grid3D> grid, SPtr<Block3D>
       SPtr<BCArray3D> bcArray = kernel->getBCProcessor()->getBCArray();
       SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions();  
 
-      LBMReal o  = kernel->getCollisionFactor();
+      real o  = kernel->getCollisionFactor();
 
-      LBMReal f[D3Q27System::ENDF+1];
+      real f[D3Q27System::ENDF+1];
 
       for(std::size_t ix3=0; ix3<bcArray->getNX3(); ix3++)
          for(std::size_t ix2=0; ix2<bcArray->getNX2(); ix2++)
@@ -178,73 +179,73 @@ void InitDistributionsBlockVisitor::visit(const SPtr<Grid3D> grid, SPtr<Block3D>
                rho = muRho.Eval();
 
                //x-derivative
-               double deltaX=dx*0.5;
+               real deltaX=dx*0.5;
                x1 = coords[0]+deltaX;
-               double vx1Plusx1 = muVx1.Eval();
-               double vx2Plusx1 = muVx2.Eval();
-               double vx3Plusx1 = muVx3.Eval();
+               real vx1Plusx1 = muVx1.Eval();
+               real vx2Plusx1 = muVx2.Eval();
+               real vx3Plusx1 = muVx3.Eval();
 
                x1 = coords[0]-deltaX;
-               double vx1Minusx1 = muVx1.Eval();
-               double vx2Minusx1 = muVx2.Eval();
-               double vx3Minusx1 = muVx3.Eval();
+               real vx1Minusx1 = muVx1.Eval();
+               real vx2Minusx1 = muVx2.Eval();
+               real vx3Minusx1 = muVx3.Eval();
 
                //y-derivative
                x1 = coords[0];
                x2 = coords[1]+deltaX;
-               double vx1Plusx2 = muVx1.Eval();
-               double vx2Plusx2 = muVx2.Eval();
-               double vx3Plusx2 = muVx3.Eval();
+               real vx1Plusx2 = muVx1.Eval();
+               real vx2Plusx2 = muVx2.Eval();
+               real vx3Plusx2 = muVx3.Eval();
 
                x2 = coords[1]-deltaX;
-               double vx1Minusx2 = muVx1.Eval();
-               double vx2Minusx2 = muVx2.Eval();
-               double vx3Minusx2 = muVx3.Eval();
+               real vx1Minusx2 = muVx1.Eval();
+               real vx2Minusx2 = muVx2.Eval();
+               real vx3Minusx2 = muVx3.Eval();
 
                //z-derivative
                x2 = coords[1];
                x3 = coords[2]+deltaX;
-               double vx1Plusx3 = muVx1.Eval();
-               double vx2Plusx3 = muVx2.Eval();
-               double vx3Plusx3 = muVx3.Eval();
+               real vx1Plusx3 = muVx1.Eval();
+               real vx2Plusx3 = muVx2.Eval();
+               real vx3Plusx3 = muVx3.Eval();
 
                x3 = coords[2]-deltaX;
-               double vx1Minusx3 = muVx1.Eval();
-               double vx2Minusx3 = muVx2.Eval();
-               double vx3Minusx3 = muVx3.Eval();
+               real vx1Minusx3 = muVx1.Eval();
+               real vx2Minusx3 = muVx2.Eval();
+               real vx3Minusx3 = muVx3.Eval();
 
-               double ax=(vx1Plusx1-vx1Minusx1)/(2.0*deltaX)*dx;
-               double bx=(vx2Plusx1-vx2Minusx1)/(2.0*deltaX)*dx;
-               double cx=(vx3Plusx1-vx3Minusx1)/(2.0*deltaX)*dx;
+               real ax=(vx1Plusx1-vx1Minusx1)/(2.0*deltaX)*dx;
+               real bx=(vx2Plusx1-vx2Minusx1)/(2.0*deltaX)*dx;
+               real cx=(vx3Plusx1-vx3Minusx1)/(2.0*deltaX)*dx;
 
-               double ay=(vx1Plusx2-vx1Minusx2)/(2.0*deltaX)*dx;
-               double by=(vx2Plusx2-vx2Minusx2)/(2.0*deltaX)*dx;
-               double cy=(vx3Plusx2-vx3Minusx2)/(2.0*deltaX)*dx;
+               real ay=(vx1Plusx2-vx1Minusx2)/(2.0*deltaX)*dx;
+               real by=(vx2Plusx2-vx2Minusx2)/(2.0*deltaX)*dx;
+               real cy=(vx3Plusx2-vx3Minusx2)/(2.0*deltaX)*dx;
 
-               double az=(vx1Plusx3-vx1Minusx3)/(2.0*deltaX)*dx;
-               double bz=(vx2Plusx3-vx2Minusx3)/(2.0*deltaX)*dx;
-               double cz=(vx3Plusx3-vx3Minusx3)/(2.0*deltaX)*dx;
-               double eps_new=1.0;
-               LBMReal op = 1.;
+               real az=(vx1Plusx3-vx1Minusx3)/(2.0*deltaX)*dx;
+               real bz=(vx2Plusx3-vx2Minusx3)/(2.0*deltaX)*dx;
+               real cz=(vx3Plusx3-vx3Minusx3)/(2.0*deltaX)*dx;
+               real eps_new=1.0;
+               real op = 1.;
 
-               LBMReal feq[27];
+               real feq[27];
 
                calcFeqsFct(feq,rho,vx1,vx2,vx3);
 
-               double f_E    = eps_new *((5.*ax*o + 5.*by*o + 5.*cz*o - 8.*ax*op + 4.*by*op + 4.*cz*op)/(54.*o*op));
-               double f_N    = f_E + eps_new *((2.*(ax - by))/(9.*o));
-               double f_T    = f_E + eps_new *((2.*(ax - cz))/(9.*o));
-               double f_NE   = eps_new *(-(5.*cz*o + 3.*(ay + bx)*op - 2.*cz*op + ax*(5.*o + op) + by*(5.*o + op))/(54.*o*op));
-               double f_SE   = f_NE + eps_new *((  ay + bx )/(9.*o));
-               double f_TE   = eps_new *(-(5.*cz*o + by*(5.*o - 2.*op) + 3.*(az + cx)*op + cz*op + ax*(5.*o + op))/(54.*o*op));
-               double f_BE   = f_TE + eps_new *((  az + cx )/(9.*o));
-               double f_TN   = eps_new *(-(5.*ax*o + 5.*by*o + 5.*cz*o - 2.*ax*op + by*op + 3.*bz*op + 3.*cy*op + cz*op)/(54.*o*op));
-               double f_BN   = f_TN + eps_new *((  bz + cy )/(9.*o));
-               double f_ZERO = eps_new *((5.*(ax + by + cz))/(9.*op));
-               double f_TNE  = eps_new *(-(ay + az + bx + bz + cx + cy)/(72.*o));
-               double f_TSW  = - eps_new *((ay + bx)/(36.*o)) - f_TNE;
-               double f_TSE  = - eps_new *((az + cx)/(36.*o)) - f_TNE;
-               double f_TNW  = - eps_new *((bz + cy)/(36.*o)) - f_TNE;
+               real f_E    = eps_new *((5.*ax*o + 5.*by*o + 5.*cz*o - 8.*ax*op + 4.*by*op + 4.*cz*op)/(54.*o*op));
+               real f_N    = f_E + eps_new *((2.*(ax - by))/(9.*o));
+               real f_T    = f_E + eps_new *((2.*(ax - cz))/(9.*o));
+               real f_NE   = eps_new *(-(5.*cz*o + 3.*(ay + bx)*op - 2.*cz*op + ax*(5.*o + op) + by*(5.*o + op))/(54.*o*op));
+               real f_SE   = f_NE + eps_new *((  ay + bx )/(9.*o));
+               real f_TE   = eps_new *(-(5.*cz*o + by*(5.*o - 2.*op) + 3.*(az + cx)*op + cz*op + ax*(5.*o + op))/(54.*o*op));
+               real f_BE   = f_TE + eps_new *((  az + cx )/(9.*o));
+               real f_TN   = eps_new *(-(5.*ax*o + 5.*by*o + 5.*cz*o - 2.*ax*op + by*op + 3.*bz*op + 3.*cy*op + cz*op)/(54.*o*op));
+               real f_BN   = f_TN + eps_new *((  bz + cy )/(9.*o));
+               real f_ZERO = eps_new *((5.*(ax + by + cz))/(9.*op));
+               real f_TNE  = eps_new *(-(ay + az + bx + bz + cx + cy)/(72.*o));
+               real f_TSW  = - eps_new *((ay + bx)/(36.*o)) - f_TNE;
+               real f_TSE  = - eps_new *((az + cx)/(36.*o)) - f_TNE;
+               real f_TNW  = - eps_new *((bz + cy)/(36.*o)) - f_TNE;
 
 
                f[DIR_P00]    = f_E    + feq[DIR_P00];
@@ -297,7 +298,7 @@ void InitDistributionsBlockVisitor::visit(const SPtr<Grid3D> grid, SPtr<Block3D>
 //////////////////////////////////////////////////////////////////////////
 void InitDistributionsBlockVisitor::checkFunction(mu::Parser fct)
 {
-    double x1 = 1.0, x2 = 1.0, x3 = 1.0;
+    real x1 = 1.0, x2 = 1.0, x3 = 1.0;
     fct.DefineVar("x1", &x1);
     fct.DefineVar("x2", &x2);
     fct.DefineVar("x3", &x3);
diff --git a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsBlockVisitor.h
index c64f0ed936c9d9f527ec49f31e9646d5fa3150e9..68ba69f8388fe1dffe7ed1acad8d54619f799eb2 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsBlockVisitor.h
@@ -63,7 +63,7 @@ class Block3D;
 class InitDistributionsBlockVisitor : public Block3DVisitor
 {
 public:
-    using D3Q27RealLim = std::numeric_limits<LBMReal>;
+    using D3Q27RealLim = std::numeric_limits<real>;
 
 public:
     InitDistributionsBlockVisitor();
@@ -81,10 +81,10 @@ public:
     void setVx3(const std::string &muParserString);
     void setRho(const std::string &muParserString);
     //////////////////////////////////////////////////////////////////////////
-    void setVx1(LBMReal vx1);
-    void setVx2(LBMReal vx2);
-    void setVx3(LBMReal vx3);
-    void setRho(LBMReal rho);
+    void setVx1(real vx1);
+    void setVx2(real vx2);
+    void setVx3(real vx3);
+    void setRho(real rho);
 
     void visit(SPtr<Grid3D> grid, SPtr<Block3D> block) override;
 
diff --git a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsFromFileBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsFromFileBlockVisitor.cpp
index 1bcb6057f5b5987ced9adc17e7d6fabd262911e6..2632f2c59db6d4982806c50dcc50f743cc5c2ad3 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsFromFileBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsFromFileBlockVisitor.cpp
@@ -10,7 +10,7 @@
 #include "LBMKernel.h"
 #include <basics/utilities/UbFileInputASCII.h>
 
-InitDistributionsFromFileBlockVisitor::InitDistributionsFromFileBlockVisitor(/*LBMReal nu, */ LBMReal rho,
+InitDistributionsFromFileBlockVisitor::InitDistributionsFromFileBlockVisitor(/*LBMReal nu, */ real rho,
                                                                              std::string filename)
     : Block3DVisitor(0, D3Q27System::MAXLEVEL), /*nu(nu),*/ rho(rho)
 {
@@ -23,7 +23,7 @@ InitDistributionsFromFileBlockVisitor::InitDistributionsFromFileBlockVisitor(/*L
     int nodesX2 = in.readInteger();
     int nodesX3 = in.readInteger();
 
-    matrix = CbArray4D<LBMReal, IndexerX4X3X2X1>(3, nodesX1, nodesX2, nodesX3, 0);
+    matrix = CbArray4D<real, IndexerX4X3X2X1>(3, nodesX1, nodesX2, nodesX3, 0);
 
     for (int x3 = 0; x3 < nodesX3; x3++)
         for (int x2 = 0; x2 < nodesX2; x2++)
@@ -52,11 +52,11 @@ void InitDistributionsFromFileBlockVisitor::visit(const SPtr<Grid3D> grid, SPtr<
     //   LBMReal o = LBMSystem::calcCollisionFactor(nu, block->getLevel());
 
     // Funktionszeiger
-    typedef void (*CalcFeqsFct)(LBMReal *const & /*feq[27]*/, const LBMReal & /*(d)rho*/, const LBMReal & /*vx1*/,
-                                const LBMReal & /*vx2*/, const LBMReal & /*vx3*/);
+    typedef void (*CalcFeqsFct)(real *const & /*feq[27]*/, const real & /*(d)rho*/, const real & /*vx1*/,
+                                const real & /*vx2*/, const real & /*vx3*/);
     CalcFeqsFct calcFeqsFct = NULL;
 
-    LBMReal vx1, vx2, vx3;
+    real vx1, vx2, vx3;
 
     int gridRank  = grid->getRank();
     int blockRank = block->getRank();
@@ -76,7 +76,7 @@ void InitDistributionsFromFileBlockVisitor::visit(const SPtr<Grid3D> grid, SPtr<
         SPtr<BCArray3D> bcArray        = kernel->getBCProcessor()->getBCArray();
         SPtr<EsoTwist3D> distributions = dynamicPointerCast<EsoTwist3D>(kernel->getDataSet()->getFdistributions());
 
-        LBMReal f[D3Q27System::ENDF + 1];
+        real f[D3Q27System::ENDF + 1];
 
         //      size_t nx1 = distributions->getNX1();
         //      size_t nx2 = distributions->getNX2();
diff --git a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsFromFileBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsFromFileBlockVisitor.h
index 1f40abadeef750da38e03d3db30ba752d4ae9da0..cc7acc395d8b17358cc567692e46c67738328436 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsFromFileBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsFromFileBlockVisitor.h
@@ -12,14 +12,14 @@ class Block3D;
 class InitDistributionsFromFileBlockVisitor : public Block3DVisitor
 {
 public:
-    InitDistributionsFromFileBlockVisitor(/*LBMReal nu, */ LBMReal rho, std::string file);
+    InitDistributionsFromFileBlockVisitor(/*real nu, */ real rho, std::string file);
 
     void visit(SPtr<Grid3D> grid, SPtr<Block3D> block) override;
 
 private:
-    CbArray4D<LBMReal, IndexerX4X3X2X1> matrix;
+    CbArray4D<real, IndexerX4X3X2X1> matrix;
     enum Velocity { Vx1, Vx2, Vx3 };
     //   LBMReal nu;
-    LBMReal rho;
+    real rho;
 };
 #endif // InitDistributionsFromFileBlockVisitor_h__
diff --git a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsWithInterpolationGridVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsWithInterpolationGridVisitor.cpp
index 567ce2e7ff5b40f3c8042bd404394a3fbf9ffee4..6dd6976ca3cb250e720079031632b9b5e3902696 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsWithInterpolationGridVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsWithInterpolationGridVisitor.cpp
@@ -16,7 +16,7 @@
 using namespace std;
 
 InitDistributionsWithInterpolationGridVisitor::InitDistributionsWithInterpolationGridVisitor(
-    SPtr<Grid3D> oldGrid, InterpolationProcessorPtr iProcessor, LBMReal nu)
+    SPtr<Grid3D> oldGrid, InterpolationProcessorPtr iProcessor, real nu)
     : oldGrid(oldGrid), iProcessor(iProcessor), nu(nu)
 {
 }
@@ -119,11 +119,11 @@ void InitDistributionsWithInterpolationGridVisitor::copyRemoteBlock(SPtr<Block3D
         SPtr<EsoTwist3D> oldDistributions =
             dynamicPointerCast<EsoTwist3D>(oldKernel->getDataSet()->getFdistributions());
 
-        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions =
+        CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getLocalDistributions();
-        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions =
+        CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getNonLocalDistributions();
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions =
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getZeroDistributions();
 
         MPI_Send(localDistributions->getStartAdressOfSortedArray(0, 0, 0, 0),
@@ -141,11 +141,11 @@ void InitDistributionsWithInterpolationGridVisitor::copyRemoteBlock(SPtr<Block3D
         SPtr<EsoTwist3D> newDistributions =
             dynamicPointerCast<EsoTwist3D>(newKernel->getDataSet()->getFdistributions());
 
-        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions =
+        CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(newDistributions)->getLocalDistributions();
-        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions =
+        CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(newDistributions)->getNonLocalDistributions();
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions =
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(newDistributions)->getZeroDistributions();
 
         MPI_Recv(localDistributions->getStartAdressOfSortedArray(0, 0, 0, 0),
@@ -165,10 +165,10 @@ void InitDistributionsWithInterpolationGridVisitor::interpolateLocalBlockCoarseT
 {
     D3Q27ICell icellC;
     D3Q27ICell icellF;
-    LBMReal xoff, yoff, zoff;
+    real xoff, yoff, zoff;
 
-    LBMReal omegaC = LBMSystem::calcCollisionFactor(nu, oldBlock->getLevel());
-    LBMReal omegaF = LBMSystem::calcCollisionFactor(nu, newBlock->getLevel());
+    real omegaC = LBMSystem::calcCollisionFactor(nu, oldBlock->getLevel());
+    real omegaF = LBMSystem::calcCollisionFactor(nu, newBlock->getLevel());
 
     iProcessor->setOmegas(omegaC, omegaF);
 
@@ -265,11 +265,11 @@ void InitDistributionsWithInterpolationGridVisitor::interpolateRemoteBlockCoarse
         SPtr<EsoTwist3D> oldDistributions =
             dynamicPointerCast<EsoTwist3D>(oldKernel->getDataSet()->getFdistributions());
 
-        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions =
+        CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getLocalDistributions();
-        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions =
+        CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getNonLocalDistributions();
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions =
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getZeroDistributions();
 
         MPI_Send(localDistributions->getStartAdressOfSortedArray(0, 0, 0, 0),
@@ -285,10 +285,10 @@ void InitDistributionsWithInterpolationGridVisitor::interpolateRemoteBlockCoarse
     } else if (newBlockRank == newGridRank && newBlock->isActive()) {
         D3Q27ICell icellC;
         D3Q27ICell icellF;
-        LBMReal xoff, yoff, zoff;
+        real xoff, yoff, zoff;
 
-        LBMReal omegaC = LBMSystem::calcCollisionFactor(nu, oldBlock->getLevel());
-        LBMReal omegaF = LBMSystem::calcCollisionFactor(nu, newBlock->getLevel());
+        real omegaC = LBMSystem::calcCollisionFactor(nu, oldBlock->getLevel());
+        real omegaF = LBMSystem::calcCollisionFactor(nu, newBlock->getLevel());
 
         iProcessor->setOmegas(omegaC, omegaF);
 
@@ -313,11 +313,11 @@ void InitDistributionsWithInterpolationGridVisitor::interpolateRemoteBlockCoarse
 
         SPtr<EsoTwist3D> oldDistributions(new D3Q27EsoTwist3DSplittedVector(bMaxX1, bMaxX2, bMaxX3, 0));
 
-        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions =
+        CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getLocalDistributions();
-        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions =
+        CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getNonLocalDistributions();
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions =
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getZeroDistributions();
 
         MPI_Recv(localDistributions->getStartAdressOfSortedArray(0, 0, 0, 0),
@@ -393,12 +393,12 @@ void InitDistributionsWithInterpolationGridVisitor::interpolateRemoteBlockCoarse
 void InitDistributionsWithInterpolationGridVisitor::interpolateLocalBlockFineToCoarse(SPtr<Block3D> oldBlock,
                                                                                       SPtr<Block3D> newBlock)
 {
-    LBMReal icellC[27];
+    real icellC[27];
     D3Q27ICell icellF;
-    LBMReal xoff, yoff, zoff;
+    real xoff, yoff, zoff;
 
-    LBMReal omegaF = LBMSystem::calcCollisionFactor(nu, oldBlock->getLevel());
-    LBMReal omegaC = LBMSystem::calcCollisionFactor(nu, newBlock->getLevel());
+    real omegaF = LBMSystem::calcCollisionFactor(nu, oldBlock->getLevel());
+    real omegaC = LBMSystem::calcCollisionFactor(nu, newBlock->getLevel());
 
     iProcessor->setOmegas(omegaC, omegaF);
 
@@ -496,11 +496,11 @@ void InitDistributionsWithInterpolationGridVisitor::interpolateRemoteBlockFineTo
         SPtr<EsoTwist3D> oldDistributions =
             dynamicPointerCast<EsoTwist3D>(oldKernel->getDataSet()->getFdistributions());
 
-        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions =
+        CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getLocalDistributions();
-        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions =
+        CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getNonLocalDistributions();
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions =
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getZeroDistributions();
 
         MPI_Send(localDistributions->getStartAdressOfSortedArray(0, 0, 0, 0),
@@ -514,12 +514,12 @@ void InitDistributionsWithInterpolationGridVisitor::interpolateRemoteBlockFineTo
         std::vector<int> &bcDataVector  = bcArrayOldBlock->getBcindexmatrixDataVector();
         MPI_Send(&bcDataVector[0], (int)bcDataVector.size(), MPI_INT, newBlockRank, 0, MPI_COMM_WORLD);
     } else if (newBlockRank == newGridRank && newBlock->isActive()) {
-        LBMReal icellC[27];
+        real icellC[27];
         D3Q27ICell icellF;
-        LBMReal xoff, yoff, zoff;
+        real xoff, yoff, zoff;
 
-        LBMReal omegaF = LBMSystem::calcCollisionFactor(nu, oldBlock->getLevel());
-        LBMReal omegaC = LBMSystem::calcCollisionFactor(nu, newBlock->getLevel());
+        real omegaF = LBMSystem::calcCollisionFactor(nu, oldBlock->getLevel());
+        real omegaC = LBMSystem::calcCollisionFactor(nu, newBlock->getLevel());
 
         iProcessor->setOmegas(omegaC, omegaF);
 
@@ -544,11 +544,11 @@ void InitDistributionsWithInterpolationGridVisitor::interpolateRemoteBlockFineTo
 
         SPtr<EsoTwist3D> oldDistributions(new D3Q27EsoTwist3DSplittedVector(bMaxX1, bMaxX2, bMaxX3, 0));
 
-        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions =
+        CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr localDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getLocalDistributions();
-        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions =
+        CbArray4D<real, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getNonLocalDistributions();
-        CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions =
+        CbArray3D<real, IndexerX3X2X1>::CbArray3DPtr zeroDistributions =
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(oldDistributions)->getZeroDistributions();
 
         MPI_Recv(localDistributions->getStartAdressOfSortedArray(0, 0, 0, 0),
diff --git a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsWithInterpolationGridVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsWithInterpolationGridVisitor.h
index 207656e1f3bfb287cf8cc1bd1270daf510ce9aa3..a143ab6b0b40a8b35023a4916ffcbd20ae4b726c 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsWithInterpolationGridVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/InitDistributionsWithInterpolationGridVisitor.h
@@ -14,7 +14,7 @@ class InitDistributionsWithInterpolationGridVisitor : public Grid3DVisitor
 {
 public:
     InitDistributionsWithInterpolationGridVisitor(SPtr<Grid3D> oldGrid, SPtr<InterpolationProcessor> iProcessor,
-                                                  LBMReal nu);
+                                                  real nu);
     ~InitDistributionsWithInterpolationGridVisitor() override;
     void visit(SPtr<Grid3D> grid) override;
 
@@ -28,7 +28,7 @@ private:
 
     SPtr<Grid3D> newGrid;
     SPtr<Grid3D> oldGrid;
-    LBMReal nu;
+    real nu;
 
     SPtr<InterpolationProcessor> iProcessor;
 };
diff --git a/src/cpu/VirtualFluidsCore/Visitors/InitThixotropyBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/InitThixotropyBlockVisitor.cpp
index 9c8c05babe4fc2d454908095e8a232eb14434df2..0c666958912c7f73f74d91b179e19cf6d3b06dd1 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/InitThixotropyBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/InitThixotropyBlockVisitor.cpp
@@ -209,7 +209,7 @@ void InitThixotropyBlockVisitor::setLambda(const std::string& muParserString)
 //	this->checkFunction(muf3);
 //}
 //////////////////////////////////////////////////////////////////////////
-void InitThixotropyBlockVisitor::setLambda(LBMReal lambda)
+void InitThixotropyBlockVisitor::setLambda(real lambda)
 {
    this->muLambda.SetExpr(UbSystem::toString(lambda, D3Q27RealLim::digits10));
    this->checkFunction(muLambda);
@@ -233,7 +233,7 @@ void InitThixotropyBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block)
    this->muLambda.DefineVar("x1",&x1); this->muLambda.DefineVar("x2",&x2); this->muLambda.DefineVar("x3",&x3);
 
    //Funktionszeiger
-   typedef void (*CalcFeqsFct)(LBMReal* const& /*feq[27]*/,const LBMReal& /*(d)rho*/,const LBMReal& /*vx1*/,const LBMReal& /*vx2*/,const LBMReal& /*vx3*/);
+   typedef void (*CalcFeqsFct)(real* const& /*feq[27]*/,const real& /*(d)rho*/,const real& /*vx1*/,const real& /*vx2*/,const real& /*vx3*/);
    CalcFeqsFct   calcFeqsFct   = NULL;
 
    int gridRank = grid->getRank();
@@ -253,7 +253,7 @@ void InitThixotropyBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block)
       SPtr<BCArray3D> bcArray = kernel->getBCProcessor()->getBCArray();
       SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getHdistributions();  
 
-      LBMReal h[D3Q27System::ENDF+1];
+      real h[D3Q27System::ENDF+1];
 
       for(std::size_t ix3=0; ix3<bcArray->getNX3(); ix3++)
          for(std::size_t ix2=0; ix2<bcArray->getNX2(); ix2++)
@@ -281,7 +281,7 @@ void InitThixotropyBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block)
                //distributionsf->setDistribution(f, ix1, ix2, ix3);
                //distributionsf->setDistributionInv(f, ix1, ix2, ix3);
 
-               LBMReal lambda = muLambda.Eval();
+               real lambda = muLambda.Eval();
                
                calcFeqsFct(h,lambda,0.0,0.0,0.0);
                
@@ -303,7 +303,7 @@ void InitThixotropyBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block)
 //////////////////////////////////////////////////////////////////////////
 void InitThixotropyBlockVisitor::checkFunction(mu::Parser fct)
 {
-   double x1 = 1.0, x2 = 1.0, x3 = 1.0;
+   real x1 = 1.0, x2 = 1.0, x3 = 1.0;
    fct.DefineVar("x1", &x1);
    fct.DefineVar("x2", &x2);
    fct.DefineVar("x3", &x3);
diff --git a/src/cpu/VirtualFluidsCore/Visitors/InitThixotropyBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/InitThixotropyBlockVisitor.h
index a9105e027c0fed48dce613b2594d199ba7531f22..eb35a9ad7d7718bb0f22ec16c71ebbd7cb646eb0 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/InitThixotropyBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/InitThixotropyBlockVisitor.h
@@ -51,7 +51,7 @@
 class InitThixotropyBlockVisitor : public Block3DVisitor
 {
 public:
-	typedef std::numeric_limits<LBMReal> D3Q27RealLim;
+	typedef std::numeric_limits<real> D3Q27RealLim;
 
 public:
 	InitThixotropyBlockVisitor();
@@ -98,7 +98,7 @@ public:
 	//void setf1(LBMReal f1);
 	//void setf2(LBMReal f2);
 	//void setf3(LBMReal f3);
-	void setLambda(LBMReal lambda);
+	void setLambda(real lambda);
 	//void setD(LBMReal D);
 
 	//void initialize(double* f, double x1, double x2, double x3, double vx1, double vx2, double vx3, double rho, UbTupleDouble3 coords, double dx, double o, bool NSE);
@@ -107,7 +107,7 @@ public:
 
 protected:
 	void checkFunction(mu::Parser fct);
-	typedef void(*CalcFeqsFct)(LBMReal* const& /*feq[27]*/, const LBMReal& /*(d)rho*/, const LBMReal& /*vx1*/, const LBMReal& /*vx2*/, const LBMReal& /*vx3*/);
+	typedef void(*CalcFeqsFct)(real* const& /*feq[27]*/, const real& /*(d)rho*/, const real& /*vx1*/, const real& /*vx2*/, const real& /*vx3*/);
 
 private:
 	mu::Parser muVx1;
diff --git a/src/cpu/VirtualFluidsCore/Visitors/MetisPartitioningGridVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/MetisPartitioningGridVisitor.cpp
index 7546aa30721cac1655fba94cb3d68e98d1398546..1e62e0a2c35367fb6189822bcdbf96b611d75bb9 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/MetisPartitioningGridVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/MetisPartitioningGridVisitor.cpp
@@ -110,6 +110,8 @@ void MetisPartitioningGridVisitor::distributePartitionData(SPtr<Grid3D> grid, Pa
 //////////////////////////////////////////////////////////////////////////
 void MetisPartitioningGridVisitor::buildMetisGraphLevelIntersected(SPtr<Grid3D> grid, int nofSegments, PartLevel level)
 {
+    using namespace vf::lbm::dir;
+
     int edges                       = 0;
     const int edgeWeight            = 1;
     const int edgeWeightChildFactor = 8;
@@ -133,7 +135,7 @@ void MetisPartitioningGridVisitor::buildMetisGraphLevelIntersected(SPtr<Grid3D>
             // the weights of the vertices are 2^level of grid (1, 2, 4, 8 .....) 1<<level
             metis.vwgt.push_back((idx_t)(1 << block->getLevel()));
 
-            for (int dir = D3Q27System::DIR_P00; dir <= numOfDirs; dir++) {
+            for (int dir = (int)DIR_P00; dir <= numOfDirs; dir++) {
                 SPtr<Block3D> neighBlock = grid->getNeighborBlock(dir, block);
                 if (neighBlock) {
                     if (this->getPartitionCondition(neighBlock, level)) {
@@ -169,6 +171,8 @@ void MetisPartitioningGridVisitor::buildMetisGraphLevelIntersected(SPtr<Grid3D>
 //////////////////////////////////////////////////////////////////////////
 void MetisPartitioningGridVisitor::buildMetisGraphLevelBased(SPtr<Grid3D> grid, int nofSegments, PartLevel level)
 {
+    using namespace vf::lbm::dir;
+
     int minInitLevel = grid->getCoarsestInitializedLevel();
     int maxInitLevel = grid->getFinestInitializedLevel();
 
@@ -200,7 +204,7 @@ void MetisPartitioningGridVisitor::buildMetisGraphLevelBased(SPtr<Grid3D> grid,
             metis.xadj.push_back(edges);
             metis.vwgt.push_back(vertexWeight);
 
-            for (int dir = D3Q27System::DIR_P00; dir <= numOfDirs; dir++) {
+            for (int dir = (int)DIR_P00; dir <= numOfDirs; dir++) {
                 SPtr<Block3D> neighBlock = grid->getNeighborBlock(dir, block);
                 if (neighBlock) {
                     if (this->getPartitionCondition(neighBlock, level)) {
@@ -256,11 +260,13 @@ void MetisPartitioningGridVisitor::clear()
 int MetisPartitioningGridVisitor::getEdgeWeight(int dir)
 {
     using namespace D3Q27System;
-    if (dir <= DIR_00M) {
+    using namespace vf::lbm::dir;
+
+    if (dir <= (int)DIR_00M) {
         return 100;
-    } else if (dir >= DIR_PP0 && dir <= DIR_0MP) {
+    } else if (dir >= (int)DIR_PP0 && dir <= (int)DIR_0MP) {
         return 10;
-    } else if (dir >= DIR_PPP) {
+    } else if (dir >= (int)DIR_PPP) {
         return 1;
     }
 
diff --git a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseBoundaryConditionsBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseBoundaryConditionsBlockVisitor.cpp
index 003d5d31204fafc82f78a0fddb04897c2c60e77f..b4eee2dfbd952d27835dbaab24da84c041999a21 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseBoundaryConditionsBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseBoundaryConditionsBlockVisitor.cpp
@@ -77,12 +77,12 @@ void MultiphaseBoundaryConditionsBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Blo
       SPtr<BCArray3D> bcArray = bcProcessor->getBCArray();
 
       bool compressible = kernel->getCompressible();
-      double collFactorL = kernel->getCollisionFactorL();
-	  double collFactorG = kernel->getCollisionFactorG();
-	  double collFactorPh = 1.0/kernel->getPhaseFieldRelaxation();
-	  double densityRatio = kernel->getDensityRatio();
-	  LBMReal phiL = kernel->getPhiL();
-	  LBMReal phiH = kernel->getPhiH();
+      real collFactorL = kernel->getCollisionFactorL();
+	  real collFactorG = kernel->getCollisionFactorG();
+	  real collFactorPh = 1.0/kernel->getPhaseFieldRelaxation();
+	  real densityRatio = kernel->getDensityRatio();
+	  real phiL = kernel->getPhiL();
+	  real phiH = kernel->getPhiH();
       //int level = block->getLevel();
 
       int minX1 = 0;
diff --git a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseInitDistributionsBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseInitDistributionsBlockVisitor.cpp
index a35fc289b7505c722151e2a5afe98815131a989d..4bc8a1862535ad4f7129577b864de6fe6dfc9ae8 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseInitDistributionsBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseInitDistributionsBlockVisitor.cpp
@@ -50,7 +50,7 @@ MultiphaseInitDistributionsBlockVisitor::MultiphaseInitDistributionsBlockVisitor
 	this->setRho(0.0);
 }
 //////////////////////////////////////////////////////////////////////////
-MultiphaseInitDistributionsBlockVisitor::MultiphaseInitDistributionsBlockVisitor( LBMReal densityRatio, LBMReal vx1, LBMReal vx2, LBMReal vx3, LBMReal rho)
+MultiphaseInitDistributionsBlockVisitor::MultiphaseInitDistributionsBlockVisitor( real densityRatio, real vx1, real vx2, real vx3, real rho)
 	: Block3DVisitor(0, D3Q27System::MAXLEVEL), densityRatio(densityRatio) 
 {
 	this->setVx1(vx1);
@@ -118,31 +118,31 @@ void MultiphaseInitDistributionsBlockVisitor::setPhi( const std::string& muParse
 	this->checkFunction(muPhi); 
 }
 //////////////////////////////////////////////////////////////////////////
-void MultiphaseInitDistributionsBlockVisitor::setVx1( LBMReal vx1 ) 
+void MultiphaseInitDistributionsBlockVisitor::setVx1( real vx1 ) 
 { 
 	this->muVx1.SetExpr( UbSystem::toString(vx1,D3Q27RealLim::digits10) );  
 	this->checkFunction(muVx1); 
 }
 //////////////////////////////////////////////////////////////////////////
-void MultiphaseInitDistributionsBlockVisitor::setVx2( LBMReal vx2 ) 
+void MultiphaseInitDistributionsBlockVisitor::setVx2( real vx2 ) 
 { 
 	this->muVx2.SetExpr( UbSystem::toString(vx2,D3Q27RealLim::digits10) );  
 	this->checkFunction(muVx2); 
 }
 //////////////////////////////////////////////////////////////////////////
-void MultiphaseInitDistributionsBlockVisitor::setVx3( LBMReal vx3 ) 
+void MultiphaseInitDistributionsBlockVisitor::setVx3( real vx3 ) 
 { 
 	this->muVx3.SetExpr( UbSystem::toString(vx3,D3Q27RealLim::digits10) );  
 	this->checkFunction(muVx3); 
 }
 //////////////////////////////////////////////////////////////////////////
-void MultiphaseInitDistributionsBlockVisitor::setRho( LBMReal rho ) 
+void MultiphaseInitDistributionsBlockVisitor::setRho( real rho ) 
 { 
 	this->muRho.SetExpr( UbSystem::toString(rho,D3Q27RealLim::digits10) );  
 	this->checkFunction(muRho); 
 }
 //////////////////////////////////////////////////////////////////////////
-void MultiphaseInitDistributionsBlockVisitor::setPhi( LBMReal phi ) 
+void MultiphaseInitDistributionsBlockVisitor::setPhi( real phi ) 
 { 
 	this->muPhi.SetExpr( UbSystem::toString(phi,D3Q27RealLim::digits10) );  
 	this->checkFunction(muPhi); 
@@ -151,6 +151,7 @@ void MultiphaseInitDistributionsBlockVisitor::setPhi( LBMReal phi )
 void MultiphaseInitDistributionsBlockVisitor::visit(const SPtr<Grid3D> grid, SPtr<Block3D> block) 
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
 	if(!block) UB_THROW( UbException(UB_EXARGS,"block is not exist") );
 
@@ -162,7 +163,7 @@ void MultiphaseInitDistributionsBlockVisitor::visit(const SPtr<Grid3D> grid, SPt
 	this->muRho.DefineVar("x1",&x1); this->muRho.DefineVar("x2",&x2); this->muRho.DefineVar("x3",&x3);
 	this->muPhi.DefineVar("x1",&x1); this->muPhi.DefineVar("x2",&x2); this->muPhi.DefineVar("x3",&x3);
 
-	LBMReal vx1, vx2, vx3, rho, /*p1,*/ phi;
+	real vx1, vx2, vx3, rho, /*p1,*/ phi;
 
 	int gridRank = grid->getRank();
 	int blockRank = block->getRank();
@@ -178,10 +179,10 @@ void MultiphaseInitDistributionsBlockVisitor::visit(const SPtr<Grid3D> grid, SPt
 		SPtr<EsoTwist3D> distributionsH = dynamicPointerCast<EsoTwist3D>(kernel->getDataSet()->getHdistributions());
         SPtr<EsoTwist3D> distributionsH2 = dynamicPointerCast<EsoTwist3D>(kernel->getDataSet()->getH2distributions());
 
-		LBMReal phiL = kernel->getPhiL();
-		LBMReal phiH = kernel->getPhiH();
+		real phiL = kernel->getPhiL();
+		real phiH = kernel->getPhiH();
 
-		LBMReal f[D3Q27System::ENDF+1];
+		real f[D3Q27System::ENDF+1];
 
 		for(int ix3=0; ix3<(int)bcArray->getNX3(); ix3++)
             for (int ix2 = 0; ix2 < (int)bcArray->getNX2(); ix2++)
@@ -201,29 +202,29 @@ void MultiphaseInitDistributionsBlockVisitor::visit(const SPtr<Grid3D> grid, SPt
 					phi = muPhi.Eval();
 					
 					//rho = phi*1.0 + (1.0-phi)/densityRatio;
-					LBMReal rhoH = 1.0;
-					LBMReal rhoL = 1.0/densityRatio;
+					real rhoH = 1.0;
+					real rhoL = 1.0/densityRatio;
 					rho = rhoH + (rhoH - rhoL)*(phi - phiH)/(phiH - phiL);
 
 			
-					LBMReal feq[27];
-					LBMReal geq[27];
+					real feq[27];
+					real geq[27];
 
 					//calcFeqsFct(feq,rho,vx1,vx2,vx3);
-					LBMReal vx1Sq = vx1*vx1;
-					LBMReal vx2Sq = vx2*vx2;
-					LBMReal vx3Sq = vx3*vx3;
+					real vx1Sq = vx1*vx1;
+					real vx2Sq = vx2*vx2;
+					real vx3Sq = vx3*vx3;
 					for (int dir = STARTF; dir < (ENDF+1); dir++)
 					{
-						LBMReal velProd = DX1[dir]*vx1 + DX2[dir]*vx2 + DX3[dir]*vx3;
-						LBMReal velSq1 = velProd*velProd;
-						LBMReal gamma = WEIGTH[dir]*(3*velProd + 4.5*velSq1 - 1.5*(vx1Sq+vx2Sq+vx3Sq));
+						real velProd = DX1[dir]*vx1 + DX2[dir]*vx2 + DX3[dir]*vx3;
+						real velSq1 = velProd*velProd;
+						real gamma = WEIGTH[dir]*(3*velProd + 4.5*velSq1 - 1.5*(vx1Sq+vx2Sq+vx3Sq));
 
 						feq[dir] = rho*WEIGTH[dir]*(1 + 3*velProd + 4.5*velSq1 - 1.5*(vx1Sq+vx2Sq+vx3Sq));
 						//geq[dir] = p1*WEIGTH[dir] + gamma;
 						//geq[dir] = p1*WEIGTH[dir]/(rho*UbMath::c1o3) + gamma*rho;
 						//geq[dir] = (p1*WEIGTH[dir]/(rho*UbMath::c1o3) + gamma*rho)*UbMath::c1o3;
-						geq[dir] = (gamma*rho)*UbMath::c1o3;
+						geq[dir] = (gamma*rho)* vf::basics::constant::c1o3;
 					}
 
 
@@ -335,7 +336,7 @@ void MultiphaseInitDistributionsBlockVisitor::visit(const SPtr<Grid3D> grid, SPt
 //////////////////////////////////////////////////////////////////////////
 void MultiphaseInitDistributionsBlockVisitor::checkFunction(mu::Parser fct)
 {
-	double x1=1.0,x2=1.0,x3=1.0;
+	real x1=1.0,x2=1.0,x3=1.0;
 	fct.DefineVar("x1",&x1); 
 	fct.DefineVar("x2",&x2); 
 	fct.DefineVar("x3",&x3);
@@ -352,7 +353,7 @@ void MultiphaseInitDistributionsBlockVisitor::checkFunction(mu::Parser fct)
 	}
 }
 //////////////////////////////////////////////////////////////////////////
-void MultiphaseInitDistributionsBlockVisitor::setNu( LBMReal nu )
+void MultiphaseInitDistributionsBlockVisitor::setNu( real nu )
 {
 	this->nu = nu;
 }
diff --git a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseInitDistributionsBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseInitDistributionsBlockVisitor.h
index 6ff60387daeef966da6143ef459fa7b7d247fbd5..6077a1294582ea423dd3c5fcb446dd841e65491a 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseInitDistributionsBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseInitDistributionsBlockVisitor.h
@@ -45,7 +45,7 @@
 class MultiphaseInitDistributionsBlockVisitor : public Block3DVisitor
 {
 public:
-	typedef std::numeric_limits<LBMReal> D3Q27RealLim;
+	typedef std::numeric_limits<real> D3Q27RealLim;
 
 public:
 	MultiphaseInitDistributionsBlockVisitor();
@@ -56,7 +56,7 @@ public:
 	//! \param vx1 - velocity in x
 	//! \param vx2 - velocity in y
 	//! \param vx3 - velocity in z
-	MultiphaseInitDistributionsBlockVisitor( LBMReal densityRatio, LBMReal vx1=0.0, LBMReal vx2=0.0, LBMReal vx3=0.0, LBMReal rho=0.0);
+	MultiphaseInitDistributionsBlockVisitor( real densityRatio, real vx1=0.0, real vx2=0.0, real vx3=0.0, real rho=0.0);
 	//////////////////////////////////////////////////////////////////////////
 	//automatic vars are: x1,x2, x3
 	//ussage example: setVx1("x1*0.01+x2*0.003")
@@ -74,12 +74,12 @@ public:
 	void setPhi( const std::string& muParserString);
 
 	//////////////////////////////////////////////////////////////////////////
-	void setVx1( LBMReal vx1 );
-	void setVx2( LBMReal vx2 );
-	void setVx3( LBMReal vx3 );
-	void setRho( LBMReal rho );
-	void setPhi( LBMReal rho );
-	void setNu( LBMReal nu );
+	void setVx1( real vx1 );
+	void setVx2( real vx2 );
+	void setVx3( real vx3 );
+	void setRho( real rho );
+	void setPhi( real rho );
+	void setNu( real nu );
 
 	void visit(SPtr<Grid3D> grid, SPtr<Block3D> block);
 
@@ -93,8 +93,8 @@ private:
 	mu::Parser muRho;
 	mu::Parser muPhi;
 
-	LBMReal nu;
-	LBMReal densityRatio;
+	real nu;
+	real densityRatio;
 };
 
 #endif //D3Q27INITDISTRIBUTIONSPATCHVISITOR_H
diff --git a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseSetKernelBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseSetKernelBlockVisitor.cpp
index 4990690e2d7d464cfbdc69f2966655568021e7d0..8885b09ea0e19d56bce205334263a7b5c1f16313 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseSetKernelBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseSetKernelBlockVisitor.cpp
@@ -19,7 +19,7 @@
 //   }
 //}
 //////////////////////////////////////////////////////////////////////////
-MultiphaseSetKernelBlockVisitor::MultiphaseSetKernelBlockVisitor(SPtr<LBMKernel> kernel, LBMReal nuL, LBMReal nuG, double availMem, double needMem, MultiphaseSetKernelBlockVisitor::Action action /*= SetKernelBlockVisitor::New*/) :
+MultiphaseSetKernelBlockVisitor::MultiphaseSetKernelBlockVisitor(SPtr<LBMKernel> kernel, real nuL, real nuG, real availMem, real needMem, MultiphaseSetKernelBlockVisitor::Action action /*= SetKernelBlockVisitor::New*/) :
 	Block3DVisitor(0, D3Q27System::MAXLEVEL), kernel(kernel), nuL(nuL), nuG(nuG), action(action), dataSetFlag(true)
 {
 	if (needMem > availMem)
@@ -32,8 +32,8 @@ void MultiphaseSetKernelBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> blo
 {
 	if(kernel && (block->getRank() == grid->getRank()))
 	{
-		LBMReal collFactorL = LBMSystem::calcCollisionFactor(nuL, block->getLevel());
-		LBMReal collFactorG = LBMSystem::calcCollisionFactor(nuG, block->getLevel());
+		real collFactorL = LBMSystem::calcCollisionFactor(nuL, block->getLevel());
+		real collFactorG = LBMSystem::calcCollisionFactor(nuG, block->getLevel());
 		kernel->setCollisionFactorMultiphase(collFactorL, collFactorG);
 
 		kernel->setIndex(block->getX1(), block->getX2(), block->getX3());
diff --git a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseSetKernelBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseSetKernelBlockVisitor.h
index 24d2b35c3a85b80e793b94d61feceb58b607ff19..566419d7f6f8a1e87a946e748e725ec1624d29ce 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseSetKernelBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseSetKernelBlockVisitor.h
@@ -42,7 +42,7 @@ class MultiphaseSetKernelBlockVisitor : public Block3DVisitor
 public:
 	enum Action { NewKernel, ChangeKernel, ChangeKernelWithData};
 public:
-	MultiphaseSetKernelBlockVisitor(SPtr<LBMKernel> kernel, LBMReal nuL, LBMReal nuG, double availMem, double needMem, 
+	MultiphaseSetKernelBlockVisitor(SPtr<LBMKernel> kernel, real nuL, real nuG, real availMem, real needMem, 
 		MultiphaseSetKernelBlockVisitor::Action action = MultiphaseSetKernelBlockVisitor::NewKernel);
 
 	virtual ~MultiphaseSetKernelBlockVisitor() {}
@@ -53,8 +53,8 @@ public:
 
 private:
 	SPtr<LBMKernel> kernel;
-	LBMReal nuL;
-	LBMReal nuG;
+	real nuL;
+	real nuG;
 	Action action;
 	bool dataSetFlag;
 };
diff --git a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseVelocityFormInitDistributionsBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseVelocityFormInitDistributionsBlockVisitor.cpp
index 28b035f71f7ab83f4ef33188e0d265588835d0eb..1338ab8822f254cc00fc795aa07bdb1b9a525ac2 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseVelocityFormInitDistributionsBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseVelocityFormInitDistributionsBlockVisitor.cpp
@@ -121,31 +121,31 @@ void MultiphaseVelocityFormInitDistributionsBlockVisitor::setPressure(const std:
 	this->checkFunction(muPressure);
 }
 //////////////////////////////////////////////////////////////////////////
-void MultiphaseVelocityFormInitDistributionsBlockVisitor::setVx1( LBMReal vx1 ) 
+void MultiphaseVelocityFormInitDistributionsBlockVisitor::setVx1( real vx1 ) 
 { 
 	this->muVx1.SetExpr( UbSystem::toString(vx1,D3Q27RealLim::digits10) );  
 	this->checkFunction(muVx1); 
 }
 //////////////////////////////////////////////////////////////////////////
-void MultiphaseVelocityFormInitDistributionsBlockVisitor::setVx2( LBMReal vx2 ) 
+void MultiphaseVelocityFormInitDistributionsBlockVisitor::setVx2( real vx2 ) 
 { 
 	this->muVx2.SetExpr( UbSystem::toString(vx2,D3Q27RealLim::digits10) );  
 	this->checkFunction(muVx2); 
 }
 //////////////////////////////////////////////////////////////////////////
-void MultiphaseVelocityFormInitDistributionsBlockVisitor::setVx3( LBMReal vx3 ) 
+void MultiphaseVelocityFormInitDistributionsBlockVisitor::setVx3( real vx3 ) 
 { 
 	this->muVx3.SetExpr( UbSystem::toString(vx3,D3Q27RealLim::digits10) );  
 	this->checkFunction(muVx3); 
 }
 //////////////////////////////////////////////////////////////////////////
-void MultiphaseVelocityFormInitDistributionsBlockVisitor::setRho( LBMReal rho ) 
+void MultiphaseVelocityFormInitDistributionsBlockVisitor::setRho( real rho ) 
 { 
 	this->muRho.SetExpr( UbSystem::toString(rho,D3Q27RealLim::digits10) );  
 	this->checkFunction(muRho); 
 }
 //////////////////////////////////////////////////////////////////////////
-void MultiphaseVelocityFormInitDistributionsBlockVisitor::setPhi( LBMReal phi ) 
+void MultiphaseVelocityFormInitDistributionsBlockVisitor::setPhi( real phi ) 
 { 
 	this->muPhi.SetExpr( UbSystem::toString(phi,D3Q27RealLim::digits10) );  
 	this->checkFunction(muPhi); 
@@ -154,6 +154,7 @@ void MultiphaseVelocityFormInitDistributionsBlockVisitor::setPhi( LBMReal phi )
 void MultiphaseVelocityFormInitDistributionsBlockVisitor::visit(const SPtr<Grid3D> grid, SPtr<Block3D> block) 
 {
 	using namespace D3Q27System;
+	using namespace vf::lbm::dir;
 
 	if(!block) UB_THROW( UbException(UB_EXARGS,"block is not exist") );
 
@@ -187,7 +188,7 @@ void MultiphaseVelocityFormInitDistributionsBlockVisitor::visit(const SPtr<Grid3
 		//LBMReal phiL = kernel->getPhiL();
 		//LBMReal phiH = kernel->getPhiH();
 
-		LBMReal f[D3Q27System::ENDF+1];
+		real f[D3Q27System::ENDF+1];
 
 		for(int ix3=0; ix3<(int)bcArray->getNX3(); ix3++)
             for (int ix2 = 0; ix2 < (int)bcArray->getNX2(); ix2++)
@@ -198,7 +199,7 @@ void MultiphaseVelocityFormInitDistributionsBlockVisitor::visit(const SPtr<Grid3
                     x2              = coords[1];
                     x3              = coords[2];
 
-					LBMReal vx1 = 0, vx2 = 0, vx3 = 0, p1 = 0, phi = 0,pres=0;
+					real vx1 = 0, vx2 = 0, vx3 = 0, p1 = 0, phi = 0,pres=0;
 					//p1  = 0.0;
 					p1 = muRho.Eval();
 					vx1 = muVx1.Eval();
@@ -215,24 +216,24 @@ void MultiphaseVelocityFormInitDistributionsBlockVisitor::visit(const SPtr<Grid3
 					//LBMReal rho = rhoH + (rhoH - rhoL)*(phi - phiH)/(phiH - phiL);
 
 			
-					LBMReal feq[27];
-					LBMReal geq[27];
+					real feq[27];
+					real geq[27];
 
 					//calcFeqsFct(feq,rho,vx1,vx2,vx3);
-					LBMReal vx1Sq = vx1*vx1;
-					LBMReal vx2Sq = vx2*vx2;
-					LBMReal vx3Sq = vx3*vx3;
+					real vx1Sq = vx1*vx1;
+					real vx2Sq = vx2*vx2;
+					real vx3Sq = vx3*vx3;
 					for (int dir = STARTF; dir < (ENDF+1); dir++)
 					{
-						LBMReal velProd = DX1[dir]*vx1 + DX2[dir]*vx2 + DX3[dir]*vx3;
-						LBMReal velSq1 = velProd*velProd;
-						LBMReal gamma = WEIGTH[dir]*(3*velProd + 4.5*velSq1 - 1.5*(vx1Sq+vx2Sq+vx3Sq));
+						real velProd = DX1[dir]*vx1 + DX2[dir]*vx2 + DX3[dir]*vx3;
+						real velSq1 = velProd*velProd;
+						real gamma = WEIGTH[dir]*(3*velProd + 4.5*velSq1 - 1.5*(vx1Sq+vx2Sq+vx3Sq));
 
 						//feq[dir] = rho*WEIGTH[dir]*(1 + 3*velProd + 4.5*velSq1 - 1.5*(vx1Sq+vx2Sq+vx3Sq));
 						feq[dir] =  WEIGTH[dir] * (1 + 3 * velProd + 4.5 * velSq1 - 1.5 * (vx1Sq + vx2Sq + vx3Sq));
 						//geq[dir] = p1*WEIGTH1[dir] + gamma;
 						//geq[dir] = p1*WEIGTH[dir]/(rho*UbMath::c1o3) + gamma*rho;
-						geq[dir] = p1 * WEIGTH[dir] / ( UbMath::c1o3) + gamma ;
+						geq[dir] = p1 * WEIGTH[dir] / (vf::basics::constant::c1o3) + gamma ;
 					}
 
 
@@ -346,7 +347,7 @@ void MultiphaseVelocityFormInitDistributionsBlockVisitor::visit(const SPtr<Grid3
 //////////////////////////////////////////////////////////////////////////
 void MultiphaseVelocityFormInitDistributionsBlockVisitor::checkFunction(mu::Parser fct)
 {
-	double x1=1.0,x2=1.0,x3=1.0;
+	real x1=1.0,x2=1.0,x3=1.0;
 	fct.DefineVar("x1",&x1); 
 	fct.DefineVar("x2",&x2); 
 	fct.DefineVar("x3",&x3);
@@ -363,12 +364,12 @@ void MultiphaseVelocityFormInitDistributionsBlockVisitor::checkFunction(mu::Pars
 	}
 }
 //////////////////////////////////////////////////////////////////////////
-void MultiphaseVelocityFormInitDistributionsBlockVisitor::setNu( LBMReal nu )
+void MultiphaseVelocityFormInitDistributionsBlockVisitor::setNu( real nu )
 {
 	this->nu = nu;
 }
 
-void MultiphaseVelocityFormInitDistributionsBlockVisitor::setPressure(LBMReal pres)
+void MultiphaseVelocityFormInitDistributionsBlockVisitor::setPressure(real pres)
 {
 	this->muPressure.SetExpr(UbSystem::toString(pres, D3Q27RealLim::digits10));
 	this->checkFunction(muPressure);
diff --git a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseVelocityFormInitDistributionsBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseVelocityFormInitDistributionsBlockVisitor.h
index 553cfe75b6b881c96a0542f184bf50c88146babc..92ab5eff9af4559bf22893c9a8506362604606c3 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/MultiphaseVelocityFormInitDistributionsBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/MultiphaseVelocityFormInitDistributionsBlockVisitor.h
@@ -45,7 +45,7 @@
 class MultiphaseVelocityFormInitDistributionsBlockVisitor : public Block3DVisitor
 {
 public:
-	typedef std::numeric_limits<LBMReal> D3Q27RealLim;
+	typedef std::numeric_limits<real> D3Q27RealLim;
 
 public:
 	MultiphaseVelocityFormInitDistributionsBlockVisitor();
@@ -75,13 +75,13 @@ public:
 	void setPressure(const std::string& muParserString);
 
 	//////////////////////////////////////////////////////////////////////////
-	void setVx1( LBMReal vx1 );
-	void setVx2( LBMReal vx2 );
-	void setVx3( LBMReal vx3 );
-	void setRho( LBMReal rho );
-	void setPhi( LBMReal rho );
-	void setNu( LBMReal nu );
-	void setPressure(LBMReal pres);
+	void setVx1( real vx1 );
+	void setVx2( real vx2 );
+	void setVx3( real vx3 );
+	void setRho( real rho );
+	void setPhi( real rho );
+	void setNu( real nu );
+	void setPressure(real pres);
 
 	void visit(SPtr<Grid3D> grid, SPtr<Block3D> block);
 
@@ -96,7 +96,7 @@ private:
 	mu::Parser muPhi;
 	mu::Parser muPressure;
 
-	LBMReal nu;
+	real nu;
 };
 
 #endif //D3Q27INITDISTRIBUTIONSPATCHVISITOR_H
diff --git a/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.cpp b/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.cpp
index b0b0cf7743fd195796ef2fb3276a9a3921adf465..a73965641237c804cd094f399e582336e6be8e04 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.cpp
@@ -10,7 +10,7 @@
 
 RefineAroundGbObjectHelper::RefineAroundGbObjectHelper(SPtr<Grid3D> grid, int refineLevel,
                                                        SPtr<D3Q27TriFaceMeshInteractor> objectIter,
-                                                       double startDistance, double stopDistance,
+                                                       real startDistance, real stopDistance,
                                                        std::shared_ptr<vf::mpi::Communicator> comm)
     : grid(grid), refineLevel(refineLevel), objectIter(objectIter), startDistance(startDistance),
       stopDistance(stopDistance), comm(comm)
@@ -21,6 +21,8 @@ RefineAroundGbObjectHelper::~RefineAroundGbObjectHelper(void) = default;
 //////////////////////////////////////////////////////////////////////////
 void RefineAroundGbObjectHelper::refine()
 {
+    using namespace vf::lbm::dir;
+
     UBLOG(logDEBUG5, "RefineCrossAndInsideGbObjectHelper: refine - start");
 
     int rank = grid->getRank();
@@ -38,7 +40,7 @@ void RefineAroundGbObjectHelper::refine()
     grid->accept(overlapVisitor);
 
     std::vector<int> dirs;
-    for (int i = D3Q27System::DIR_P00; i <= D3Q27System::DIR_0MP; i++) {
+    for (int i = (int)DIR_P00; i <= (int)DIR_0MP; i++) {
         dirs.push_back(i);
     }
     SetInterpolationDirsBlockVisitor interDirsVisitor(dirs);
diff --git a/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.h b/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.h
index 0421a963e6d57da5096370eed9721220c98939b4..76874ce767294efa318bb7e8b9f8b4d2e2a348eb 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.h
@@ -2,6 +2,7 @@
 #define RefineAroundGbObjectHelper_H
 
 #include <PointerDefinitions.h>
+#include "lbm/constants/D3Q27.h"
 
 class Grid3D;
 namespace vf::mpi {class Communicator;}
@@ -20,7 +21,7 @@ public:
     //! \param startDistance start distance from geometry for refinement
     //! \param stopDistance stop distance from geometry for refinement
     RefineAroundGbObjectHelper(SPtr<Grid3D> grid, int maxRefineLevel, SPtr<D3Q27TriFaceMeshInteractor> objectIter,
-                               double startDistance, double stopDistance, std::shared_ptr<vf::mpi::Communicator> comm);
+                               real startDistance, real stopDistance, std::shared_ptr<vf::mpi::Communicator> comm);
     virtual ~RefineAroundGbObjectHelper();
     //! start refinement
     void refine();
@@ -29,7 +30,7 @@ private:
     SPtr<Grid3D> grid;
     SPtr<D3Q27TriFaceMeshInteractor> objectIter;
     int refineLevel;
-    double startDistance, stopDistance;
+    real startDistance, stopDistance;
     std::shared_ptr<vf::mpi::Communicator> comm;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/Visitors/RefineCrossAndInsideGbObjectHelper.cpp b/src/cpu/VirtualFluidsCore/Visitors/RefineCrossAndInsideGbObjectHelper.cpp
index 3bb1546896ee40ecdb9acf69586251ad1f03bb62..52c7c3ac1204a96fe7db3089ef2eb3ecc93ac143 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/RefineCrossAndInsideGbObjectHelper.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/RefineCrossAndInsideGbObjectHelper.cpp
@@ -55,7 +55,7 @@ void RefineCrossAndInsideGbObjectHelper::refine()
 
     std::vector<int> dirs;
 
-    for (int i = D3Q27System::STARTDIR; i <= D3Q27System::ENDDIR; i++) {
+    for (int i = D3Q27System::FSTARTDIR; i <= D3Q27System::FENDDIR; i++) {
         dirs.push_back(i);
     }
     SetInterpolationDirsBlockVisitor interDirsVisitor(dirs);
diff --git a/src/cpu/VirtualFluidsCore/Visitors/RefineInterGbObjectsVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/RefineInterGbObjectsVisitor.cpp
index 7fa6a6283c1a97d07f55405ad3b00af55f1d7690..b1a9a3b399dd0e3a8538187165af079e6bd3fdc5 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/RefineInterGbObjectsVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/RefineInterGbObjectsVisitor.cpp
@@ -3,6 +3,7 @@
 #include "Block3D.h"
 #include "Grid3D.h"
 #include <geometry3d/GbObject3D.h>
+#include "lbm/constants/D3Q27.h"
 
 RefineInterGbObjectsBlockVisitor::RefineInterGbObjectsBlockVisitor() : Block3DVisitor(-1, -1) {}
 //////////////////////////////////////////////////////////////////////////
@@ -29,12 +30,12 @@ void RefineInterGbObjectsBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> bl
     UbTupleDouble3 coords = grid->getBlockWorldCoordinates(block);
     UbTupleDouble3 delta  = grid->getBlockLengths(block);
 
-    double cellMinX1 = val<1>(coords);
-    double cellMinX2 = val<2>(coords);
-    double cellMinX3 = val<3>(coords);
-    double cellMaxX1 = val<1>(coords) + val<1>(delta);
-    double cellMaxX2 = val<2>(coords) + val<2>(delta);
-    double cellMaxX3 = val<3>(coords) + val<3>(delta);
+    real cellMinX1 = val<1>(coords);
+    real cellMinX2 = val<2>(coords);
+    real cellMinX3 = val<3>(coords);
+    real cellMaxX1 = val<1>(coords) + val<1>(delta);
+    real cellMaxX2 = val<2>(coords) + val<2>(delta);
+    real cellMaxX3 = val<3>(coords) + val<3>(delta);
 
     bool insideInclude = false;
     for (size_t i = 0; i < includeGbObjects3D.size(); i++) {
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetConnectorsBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/SetConnectorsBlockVisitor.h
index 7930219451088aaef222d06c1a5a72d159817798..ae214c77ca425dde4ecde31f7dc88d19a1616555 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SetConnectorsBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/SetConnectorsBlockVisitor.h
@@ -104,6 +104,8 @@ void SetConnectorsBlockVisitor<T1, T2>::visit(SPtr<Grid3D> grid, SPtr<Block3D> b
 template <class T1, class T2>
 void SetConnectorsBlockVisitor<T1, T2>::setSameLevelConnectors(SPtr<Grid3D> grid, SPtr<Block3D> block)
 {
+    using namespace vf::lbm::dir;
+
     UBLOG(logDEBUG5, "SetConnectorsBlockVisitor::setSameLevelConnectors() - start");
     int blockRank = block->getRank();
     if (gridRank == blockRank && block->isActive()) {
@@ -114,7 +116,7 @@ void SetConnectorsBlockVisitor<T1, T2>::setSameLevelConnectors(SPtr<Grid3D> grid
         int ix3   = block->getX3();
         int level = block->getLevel();
 
-        for (int dir = D3Q27System::STARTDIR; dir <= D3Q27System::ENDDIR; dir++) {
+        for (int dir = D3Q27System::FSTARTDIR; dir <= D3Q27System::FENDDIR; dir++) { 
             SPtr<Block3D> neighBlock = grid->getNeighborBlock(dir, ix1, ix2, ix3, level);
 
             if (neighBlock) {
@@ -126,7 +128,7 @@ void SetConnectorsBlockVisitor<T1, T2>::setSameLevelConnectors(SPtr<Grid3D> grid
                 } else if (blockRank != neighBlockRank && neighBlock->isActive()) {
                     setRemoteConnectors(block, neighBlock, dir);
 
-                    if (dir >= D3Q27System::DIR_P00 && dir <= D3Q27System::DIR_00M) {
+                    if (dir >= (int)DIR_P00 && dir <= (int)DIR_00M) {
                         int weight = block->getWeight(neighBlockRank);
                         weight++;
                         block->setWeight(neighBlockRank, weight);
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetForcingBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/SetForcingBlockVisitor.cpp
index abf828a06e0ec83b492ff9107be4a9a3c4445674..a325fc5ac355e31a1cf1188b884d5e719e85c7be 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SetForcingBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/SetForcingBlockVisitor.cpp
@@ -4,7 +4,7 @@
 #include "D3Q27System.h"
 #include "LBMSystem.h"
 
-SetForcingBlockVisitor::SetForcingBlockVisitor(LBMReal forcingX1, LBMReal forcingX2, LBMReal forcingX3)
+SetForcingBlockVisitor::SetForcingBlockVisitor(real forcingX1, real forcingX2, real forcingX3)
     : Block3DVisitor(0, D3Q27System::MAXLEVEL), forcingX1(forcingX1), forcingX2(forcingX2), forcingX3(forcingX3)
 {
     ftype = 0;
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetForcingBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/SetForcingBlockVisitor.h
index a6d13c2a702f7ceca6122a78dda1b34f63caf376..e7237d7fc7833f69aa486858527a167f53864afe 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SetForcingBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/SetForcingBlockVisitor.h
@@ -13,7 +13,7 @@ class Grid3D;
 class SetForcingBlockVisitor : public Block3DVisitor
 {
 public:
-    SetForcingBlockVisitor(LBMReal forcingX1, LBMReal forcingX2, LBMReal forcingX3);
+    SetForcingBlockVisitor(real forcingX1, real forcingX2, real forcingX3);
 
     SetForcingBlockVisitor(const mu::Parser &muForcingX1, const mu::Parser &muForcingX2, const mu::Parser &muForcingX3);
 
@@ -25,9 +25,9 @@ public:
 
 private:
     int ftype;
-    LBMReal forcingX1;
-    LBMReal forcingX2;
-    LBMReal forcingX3;
+    real forcingX1;
+    real forcingX2;
+    real forcingX3;
     mu::Parser muForcingX1;
     mu::Parser muForcingX2;
     mu::Parser muForcingX3;
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.cpp
index 54f46c811d4c2d065bbda7232bd4e32f24559c22..7ff7d20b9fcd85b6939f3184fcde86c6fdaae77d 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.cpp
@@ -42,7 +42,7 @@
 #include <mpi/Communicator.h>
 #include "InterpolationProcessor.h"
 
-SetInterpolationConnectorsBlockVisitor::SetInterpolationConnectorsBlockVisitor(std::shared_ptr<vf::mpi::Communicator> comm, LBMReal nue, SPtr<InterpolationProcessor> iProcessor) :
+SetInterpolationConnectorsBlockVisitor::SetInterpolationConnectorsBlockVisitor(std::shared_ptr<vf::mpi::Communicator> comm, real nue, SPtr<InterpolationProcessor> iProcessor) :
 Block3DVisitor(0, D3Q27System::MAXLEVEL), 
 	comm(comm),
 	nue(nue),
@@ -72,6 +72,8 @@ void SetInterpolationConnectorsBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block
 //////////////////////////////////////////////////////////////////////////
 void SetInterpolationConnectorsBlockVisitor::setInterpolationConnectors(SPtr<Grid3D> grid, SPtr<Block3D> block)
 {
+	using namespace vf::lbm::dir;
+
    UBLOG(logDEBUG5, "SetInterpolationConnectorsBlockVisitor::setInterpolationConnectors() - start");
 
 	//search for all blocks with different ranks
@@ -82,251 +84,251 @@ void SetInterpolationConnectorsBlockVisitor::setInterpolationConnectors(SPtr<Gri
 		int fbx3 = block->getX3() << 1;
 		int level = block->getLevel() + 1;
 
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_P00))
+		if( block->hasInterpolationFlagCF(DIR_P00))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1+1,fbx2,fbx3,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2+1,fbx3,level);
 			SPtr<Block3D> fblockNW = grid->getBlock(fbx1+1,fbx2,fbx3+1,level);
 			SPtr<Block3D> fblockNE = grid->getBlock(fbx1+1,fbx2+1,fbx3+1,level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_P00);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_P00);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_M00))
+		if( block->hasInterpolationFlagCF(DIR_M00))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1,fbx2,fbx3,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1,fbx2+1,fbx3,level);
 			SPtr<Block3D> fblockNW = grid->getBlock(fbx1,fbx2,fbx3+1,level);
 			SPtr<Block3D> fblockNE = grid->getBlock(fbx1,fbx2+1,fbx3+1,level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_M00);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_M00);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_0P0))
+		if( block->hasInterpolationFlagCF(DIR_0P0))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1,fbx2+1,fbx3,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2+1,fbx3,level);
 			SPtr<Block3D> fblockNW = grid->getBlock(fbx1,fbx2+1,fbx3+1,level);
 			SPtr<Block3D> fblockNE = grid->getBlock(fbx1+1,fbx2+1,fbx3+1,level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_0P0);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_0P0);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_0M0))
+		if( block->hasInterpolationFlagCF(DIR_0M0))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1,fbx2,fbx3,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2,fbx3,level);
 			SPtr<Block3D> fblockNW = grid->getBlock(fbx1,fbx2,fbx3+1,level);
 			SPtr<Block3D> fblockNE = grid->getBlock(fbx1+1,fbx2,fbx3+1,level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_0M0);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_0M0);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_00P))
+		if( block->hasInterpolationFlagCF(DIR_00P))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1,fbx2,fbx3+1,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2,fbx3+1,level);
 			SPtr<Block3D> fblockNW = grid->getBlock(fbx1,fbx2+1,fbx3+1,level);
 			SPtr<Block3D> fblockNE = grid->getBlock(fbx1+1,fbx2+1,fbx3+1,level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_00P);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_00P);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_00M))
+		if( block->hasInterpolationFlagCF(DIR_00M))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1,fbx2,fbx3,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2,fbx3,level);
 			SPtr<Block3D> fblockNW = grid->getBlock(fbx1,fbx2+1,fbx3,level);
 			SPtr<Block3D> fblockNE = grid->getBlock(fbx1+1,fbx2+1,fbx3,level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_00M);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_00M);
 		}
 
 		//////NE-NW-SE-SW
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_PP0)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_0P0) && !block->hasInterpolationFlagCF(D3Q27System::DIR_P00))
+		if( block->hasInterpolationFlagCF(DIR_PP0)&&!block->hasInterpolationFlagCF(DIR_0P0) && !block->hasInterpolationFlagCF(DIR_P00))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1+1,fbx2+1,fbx3+0,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2+1,fbx3+1,level);
-         SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
-         SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
+			SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
+			SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_PP0);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_PP0);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_MM0)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_M00) && !block->hasInterpolationFlagCF(D3Q27System::DIR_0M0))
+		if( block->hasInterpolationFlagCF(DIR_MM0)&& !block->hasInterpolationFlagCF(DIR_M00) && !block->hasInterpolationFlagCF(DIR_0M0))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1,fbx2,fbx3,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1,fbx2,fbx3+1,level);
-         SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1, fbx2, fbx3+1, level);
-         SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1, fbx2, fbx3+1, level);
+			SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1, fbx2, fbx3+1, level);
+			SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1, fbx2, fbx3+1, level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_MM0);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_MM0);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_PM0)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_P00) && !block->hasInterpolationFlagCF(D3Q27System::DIR_0M0))
+		if( block->hasInterpolationFlagCF(DIR_PM0)&& !block->hasInterpolationFlagCF(DIR_P00) && !block->hasInterpolationFlagCF(DIR_0M0))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1+1,fbx2,fbx3+0,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2,fbx3+1,level);
-         SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+1, fbx2, fbx3+1, level);
-         SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2, fbx3+1, level);
+			SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+1, fbx2, fbx3+1, level);
+			SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2, fbx3+1, level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_PM0);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_PM0);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_MP0)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_0P0) && !block->hasInterpolationFlagCF(D3Q27System::DIR_M00))
+		if( block->hasInterpolationFlagCF(DIR_MP0)&& !block->hasInterpolationFlagCF(DIR_0P0) && !block->hasInterpolationFlagCF(DIR_M00))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1,fbx2+1,fbx3,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1,fbx2+1,fbx3+1,level);
-         SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1, fbx2+1, fbx3+1, level);
-         SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1, fbx2+1, fbx3+1, level);
+			SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1, fbx2+1, fbx3+1, level);
+			SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1, fbx2+1, fbx3+1, level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_MP0);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_MP0);
 		}
 
 		/////////TE-BW-BE-TW 1-0
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_P0P)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_P00) && !block->hasInterpolationFlagCF(D3Q27System::DIR_00P))
+		if( block->hasInterpolationFlagCF(DIR_P0P)&& !block->hasInterpolationFlagCF(DIR_P00) && !block->hasInterpolationFlagCF(DIR_00P))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1+1,fbx2+0,fbx3+1,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2+1,fbx3+1,level);
-         SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+1, fbx2+0, fbx3+1, level);
-         SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
+			SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+1, fbx2+0, fbx3+1, level);
+			SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_P0P);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_P0P);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_M0M)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_M00) && !block->hasInterpolationFlagCF(D3Q27System::DIR_00M))
+		if( block->hasInterpolationFlagCF(DIR_M0M)&& !block->hasInterpolationFlagCF(DIR_M00) && !block->hasInterpolationFlagCF(DIR_00M))
 		{
 
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1,fbx2+0,fbx3,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1,fbx2+1,fbx3,level);
-         SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1, fbx2+0, fbx3, level);
-         SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1, fbx2+1, fbx3, level);
+			SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1, fbx2+0, fbx3, level);
+			SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1, fbx2+1, fbx3, level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_M0M);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_M0M);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_P0M)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_P00) && !block->hasInterpolationFlagCF(D3Q27System::DIR_00M))
+		if( block->hasInterpolationFlagCF(DIR_P0M)&& !block->hasInterpolationFlagCF(DIR_P00) && !block->hasInterpolationFlagCF(DIR_00M))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1+1,fbx2+0,fbx3,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2+1,fbx3,level);
-         SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+1, fbx2+0, fbx3, level);
-         SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3, level);
+			SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+1, fbx2+0, fbx3, level);
+			SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3, level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_P0M);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_P0M);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_M0P)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_M00) && !block->hasInterpolationFlagCF(D3Q27System::DIR_00P))
+		if( block->hasInterpolationFlagCF(DIR_M0P)&& !block->hasInterpolationFlagCF(DIR_M00) && !block->hasInterpolationFlagCF(DIR_00P))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1,fbx2+0,fbx3+1,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1,fbx2+1,fbx3+1,level);
-         SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1, fbx2+0, fbx3+1, level);
-         SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1, fbx2+1, fbx3+1, level);
+			SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1, fbx2+0, fbx3+1, level);
+			SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1, fbx2+1, fbx3+1, level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_M0P);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_M0P);
 		}
 
 		//////TN-BS-BN-TS
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_0PP)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_0P0) && !block->hasInterpolationFlagCF(D3Q27System::DIR_00P))
+		if( block->hasInterpolationFlagCF(DIR_0PP)&& !block->hasInterpolationFlagCF(DIR_0P0) && !block->hasInterpolationFlagCF(DIR_00P))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1+0,fbx2+1,fbx3+1,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2+1,fbx3+1,level);
-         SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+0, fbx2+1, fbx3+1, level);
-         SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
+			SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+0, fbx2+1, fbx3+1, level);
+			SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_0PP);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_0PP);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_0MM)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_0M0) && !block->hasInterpolationFlagCF(D3Q27System::DIR_00M))
+		if( block->hasInterpolationFlagCF(DIR_0MM)&& !block->hasInterpolationFlagCF(DIR_0M0) && !block->hasInterpolationFlagCF(DIR_00M))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1+0,fbx2,fbx3,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2,fbx3,level);
-         SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+0, fbx2, fbx3, level);
-         SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2, fbx3, level);
+			SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+0, fbx2, fbx3, level);
+			SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2, fbx3, level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_0MM);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_0MM);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_0PM)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_0P0) && !block->hasInterpolationFlagCF(D3Q27System::DIR_00M))
+		if( block->hasInterpolationFlagCF(DIR_0PM)&& !block->hasInterpolationFlagCF(DIR_0P0) && !block->hasInterpolationFlagCF(DIR_00M))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1+0,fbx2+1,fbx3,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2+1,fbx3,level);
-         SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+0, fbx2+1, fbx3, level);
-         SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3, level);
+			SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+0, fbx2+1, fbx3, level);
+			SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3, level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_0PM);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_0PM);
 		}
-		if( block->hasInterpolationFlagCF(D3Q27System::DIR_0MP)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_0M0) && !block->hasInterpolationFlagCF(D3Q27System::DIR_00P))
+		if( block->hasInterpolationFlagCF(DIR_0MP)&& !block->hasInterpolationFlagCF(DIR_0M0) && !block->hasInterpolationFlagCF(DIR_00P))
 		{
 			SPtr<Block3D> fblockSW = grid->getBlock(fbx1+0,fbx2,fbx3+1,level);
 			SPtr<Block3D> fblockSE = grid->getBlock(fbx1+1,fbx2,fbx3+1,level);
-         SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+0, fbx2, fbx3+1, level);
-         SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2, fbx3+1, level);
+			SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+0, fbx2, fbx3+1, level);
+			SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2, fbx3+1, level);
 
-			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_0MP);
+			setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_0MP);
 		}
 
 
 
 
       //////corners
-      if (block->hasInterpolationFlagCF(D3Q27System::DIR_PPP)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_P0P)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_0PP)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_PP0)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_00P)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_0P0) && !block->hasInterpolationFlagCF(D3Q27System::DIR_P00))
+      if (block->hasInterpolationFlagCF(DIR_PPP)&&!block->hasInterpolationFlagCF(DIR_P0P)&&!block->hasInterpolationFlagCF(DIR_0PP)&&!block->hasInterpolationFlagCF(DIR_PP0)&&!block->hasInterpolationFlagCF(DIR_00P)&&!block->hasInterpolationFlagCF(DIR_0P0) && !block->hasInterpolationFlagCF(DIR_P00))
       {
          SPtr<Block3D> fblockSW = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
          SPtr<Block3D> fblockSE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+0, level);
          SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
          SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
 
-         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_PPP);
+         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_PPP);
       }
-      if (block->hasInterpolationFlagCF(D3Q27System::DIR_MMP)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_M0P)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_0MP)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_MM0)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_00P)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_M00) && !block->hasInterpolationFlagCF(D3Q27System::DIR_0M0))
+      if (block->hasInterpolationFlagCF(DIR_MMP)&&!block->hasInterpolationFlagCF(DIR_M0P)&&!block->hasInterpolationFlagCF(DIR_0MP)&& !block->hasInterpolationFlagCF(DIR_MM0)&& !block->hasInterpolationFlagCF(DIR_00P)&& !block->hasInterpolationFlagCF(DIR_M00) && !block->hasInterpolationFlagCF(DIR_0M0))
       {
          SPtr<Block3D> fblockSW = grid->getBlock(fbx1, fbx2, fbx3+1, level);
          SPtr<Block3D> fblockSE;// = grid->getBlock(fbx1, fbx2, fbx3, level);
          SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1, fbx2, fbx3+1, level);
          SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1, fbx2, fbx3+1, level);
 
-         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_MMP);
+         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_MMP);
       }
-      if (block->hasInterpolationFlagCF(D3Q27System::DIR_PMP)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_P0P)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_0MP)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_PM0)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_00P)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_P00) && !block->hasInterpolationFlagCF(D3Q27System::DIR_0M0))
+      if (block->hasInterpolationFlagCF(DIR_PMP)&&!block->hasInterpolationFlagCF(DIR_P0P)&&!block->hasInterpolationFlagCF(DIR_0MP)&& !block->hasInterpolationFlagCF(DIR_PM0)&& !block->hasInterpolationFlagCF(DIR_00P)&& !block->hasInterpolationFlagCF(DIR_P00) && !block->hasInterpolationFlagCF(DIR_0M0))
       {
          SPtr<Block3D> fblockSW = grid->getBlock(fbx1+1, fbx2, fbx3+1, level);
          SPtr<Block3D> fblockSE;// = grid->getBlock(fbx1+1, fbx2, fbx3+0, level);
          SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+1, fbx2, fbx3+1, level);
          SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2, fbx3+1, level);
 
-         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_PMP);
+         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_PMP);
       }
-      if (block->hasInterpolationFlagCF(D3Q27System::DIR_MPP)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_M0P)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_0PP)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_MP0)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_00P)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_0P0) && !block->hasInterpolationFlagCF(D3Q27System::DIR_M00))
+      if (block->hasInterpolationFlagCF(DIR_MPP)&&!block->hasInterpolationFlagCF(DIR_M0P)&&!block->hasInterpolationFlagCF(DIR_0PP)&& !block->hasInterpolationFlagCF(DIR_MP0)&& !block->hasInterpolationFlagCF(DIR_00P)&& !block->hasInterpolationFlagCF(DIR_0P0) && !block->hasInterpolationFlagCF(DIR_M00))
       {
          SPtr<Block3D> fblockSW = grid->getBlock(fbx1, fbx2+1, fbx3+1, level);
          SPtr<Block3D> fblockSE;// = grid->getBlock(fbx1, fbx2+1, fbx3, level);
          SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1, fbx2+1, fbx3+1, level);
          SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1, fbx2+1, fbx3+1, level);
 
-         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_MPP);
+         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_MPP);
       }
-      if (block->hasInterpolationFlagCF(D3Q27System::DIR_PPM)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_P0M)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_0PM)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_PP0)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_00M)&&!block->hasInterpolationFlagCF(D3Q27System::DIR_0P0) && !block->hasInterpolationFlagCF(D3Q27System::DIR_P00))
+      if (block->hasInterpolationFlagCF(DIR_PPM)&&!block->hasInterpolationFlagCF(DIR_P0M)&&!block->hasInterpolationFlagCF(DIR_0PM)&& !block->hasInterpolationFlagCF(DIR_PP0)&&!block->hasInterpolationFlagCF(DIR_00M)&&!block->hasInterpolationFlagCF(DIR_0P0) && !block->hasInterpolationFlagCF(DIR_P00))
       {
          SPtr<Block3D> fblockSW = grid->getBlock(fbx1+1, fbx2+1, fbx3+0, level);
          SPtr<Block3D> fblockSE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+0, level);
          SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
          SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2+1, fbx3+1, level);
 
-         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_PPM);
+         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_PPM);
       }
-      if (block->hasInterpolationFlagCF(D3Q27System::DIR_MMM)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_0MM)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_M0M)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_MM0)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_00M)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_M00) && !block->hasInterpolationFlagCF(D3Q27System::DIR_0M0))
+      if (block->hasInterpolationFlagCF(DIR_MMM)&& !block->hasInterpolationFlagCF(DIR_0MM)&& !block->hasInterpolationFlagCF(DIR_M0M)&& !block->hasInterpolationFlagCF(DIR_MM0)&& !block->hasInterpolationFlagCF(DIR_00M)&& !block->hasInterpolationFlagCF(DIR_M00) && !block->hasInterpolationFlagCF(DIR_0M0))
       {
          SPtr<Block3D> fblockSW = grid->getBlock(fbx1, fbx2, fbx3+0, level);
          SPtr<Block3D> fblockSE;// = grid->getBlock(fbx1, fbx2, fbx3, level);
          SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1, fbx2, fbx3+1, level);
          SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1, fbx2, fbx3+1, level);
 
-         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_MMM);
+         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_MMM);
       }
-      if (block->hasInterpolationFlagCF(D3Q27System::DIR_PMM)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_0MM)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_P0M)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_PM0)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_00M)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_P00) && !block->hasInterpolationFlagCF(D3Q27System::DIR_0M0))
+      if (block->hasInterpolationFlagCF(DIR_PMM)&& !block->hasInterpolationFlagCF(DIR_0MM)&& !block->hasInterpolationFlagCF(DIR_P0M)&& !block->hasInterpolationFlagCF(DIR_PM0)&& !block->hasInterpolationFlagCF(DIR_00M)&& !block->hasInterpolationFlagCF(DIR_P00) && !block->hasInterpolationFlagCF(DIR_0M0))
       {
          SPtr<Block3D> fblockSW = grid->getBlock(fbx1+1, fbx2, fbx3, level);
          SPtr<Block3D> fblockSE;// = grid->getBlock(fbx1+1, fbx2, fbx3+0, level);
          SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1+1, fbx2, fbx3+1, level);
          SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1+1, fbx2, fbx3+1, level);
 
-         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_PMM);
+         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_PMM);
       }
-      if (block->hasInterpolationFlagCF(D3Q27System::DIR_MPM)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_0PM)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_M0M)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_MP0)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_00M)&& !block->hasInterpolationFlagCF(D3Q27System::DIR_0P0) && !block->hasInterpolationFlagCF(D3Q27System::DIR_M00))
+      if (block->hasInterpolationFlagCF(DIR_MPM)&& !block->hasInterpolationFlagCF(DIR_0PM)&& !block->hasInterpolationFlagCF(DIR_M0M)&& !block->hasInterpolationFlagCF(DIR_MP0)&& !block->hasInterpolationFlagCF(DIR_00M)&& !block->hasInterpolationFlagCF(DIR_0P0) && !block->hasInterpolationFlagCF(DIR_M00))
       {
          SPtr<Block3D> fblockSW = grid->getBlock(fbx1, fbx2+1, fbx3+0, level);
          SPtr<Block3D> fblockSE;// = grid->getBlock(fbx1, fbx2+1, fbx3, level);
          SPtr<Block3D> fblockNW;// = grid->getBlock(fbx1, fbx2+1, fbx3+1, level);
          SPtr<Block3D> fblockNE;// = grid->getBlock(fbx1, fbx2+1, fbx3+1, level);
 
-         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, D3Q27System::DIR_MPM);
+         setInterpolationConnectors(fblockSW, fblockSE, fblockNW, fblockNE, block, DIR_MPM);
       }
 
 	}
@@ -343,12 +345,12 @@ void SetInterpolationConnectorsBlockVisitor::setInterpolationConnectors(SPtr<Blo
 	if(fBlockNE) fBlockNERank = fBlockNE->getRank();
 	int cBlockRank   = cBlock->getRank();
 
-	LBMReal omegaF {0.0};
+	real omegaF {0.0};
 	if(fBlockSW) omegaF =LBMSystem::calcCollisionFactor(nue, fBlockSW->getLevel());
 	if(fBlockNW) omegaF =LBMSystem::calcCollisionFactor(nue, fBlockNW->getLevel());
 	if(fBlockSE) omegaF =LBMSystem::calcCollisionFactor(nue, fBlockSE->getLevel());
 	if(fBlockNE) omegaF =LBMSystem::calcCollisionFactor(nue, fBlockNE->getLevel());
-	LBMReal omegaC = LBMSystem::calcCollisionFactor(nue, cBlock->getLevel());
+	real omegaC = LBMSystem::calcCollisionFactor(nue, cBlock->getLevel());
 	iProcessor->setOmegas(omegaC, omegaF);
 
 	InterpolationProcessorPtr cIProcessor(iProcessor->clone());
@@ -373,7 +375,7 @@ void SetInterpolationConnectorsBlockVisitor::setInterpolationConnectors(SPtr<Blo
 
 	if(cBlockRank == gridRank)
 	{
-      SPtr<Block3DConnector> connector(new CoarseToFineVectorConnector< TbTransmitter< CbVector< LBMReal > > >(cBlock,
+      SPtr<Block3DConnector> connector(new CoarseToFineVectorConnector< TbTransmitter< CbVector< real > > >(cBlock,
 			senderCFevenEvenSW, receiverCFevenEvenSW, senderCFevenOddNW,  receiverCFevenOddNW, 
 			senderCFoddEvenSE,  receiverCFoddEvenSE,  senderCFoddOddNE,   receiverCFoddOddNE, 
 			dir, cIProcessor) );
@@ -381,25 +383,25 @@ void SetInterpolationConnectorsBlockVisitor::setInterpolationConnectors(SPtr<Blo
 	}
 	if(fBlockSW && fBlockSWRank == gridRank)
 	{
-		SPtr<Block3DConnector> connector( new FineToCoarseVectorConnector< TbTransmitter< CbVector< LBMReal > > >(fBlockSW, 
+		SPtr<Block3DConnector> connector( new FineToCoarseVectorConnector< TbTransmitter< CbVector< real > > >(fBlockSW, 
 			senderFCevenEvenSW, receiverFCevenEvenSW, dir, fIProcessorSW, EvenEvenSW) );
 		fBlockSW->setConnector(connector);
 	}
 	if(fBlockNW && fBlockNWRank == gridRank)
 	{
-		SPtr<Block3DConnector> connector( new FineToCoarseVectorConnector< TbTransmitter< CbVector< LBMReal > > >(fBlockNW, 
+		SPtr<Block3DConnector> connector( new FineToCoarseVectorConnector< TbTransmitter< CbVector< real > > >(fBlockNW, 
 			senderFCevenOddNW, receiverFCevenOddNW, dir, fIProcessorNW, EvenOddNW) );
 		fBlockNW->setConnector(connector);
 	}
 	if(fBlockSE && fBlockSERank == gridRank)
 	{
-		SPtr<Block3DConnector> connector( new FineToCoarseVectorConnector< TbTransmitter< CbVector< LBMReal > > >(fBlockSE, 
+		SPtr<Block3DConnector> connector( new FineToCoarseVectorConnector< TbTransmitter< CbVector< real > > >(fBlockSE, 
 			senderFCoddEvenSE, receiverFCoddEvenSE, dir, fIProcessorSE, OddEvenSE) );
 		fBlockSE->setConnector(connector);
 	}
 	if(fBlockNE && fBlockNERank == gridRank)
 	{
-		SPtr<Block3DConnector> connector( new FineToCoarseVectorConnector< TbTransmitter< CbVector< LBMReal > > >(fBlockNE, 
+		SPtr<Block3DConnector> connector( new FineToCoarseVectorConnector< TbTransmitter< CbVector< real > > >(fBlockNE, 
 			senderFCoddOddNE, receiverFCoddOddNE, dir, fIProcessorNE, OddOddNE) );
 		fBlockNE->setConnector(connector);
 	}
@@ -419,8 +421,8 @@ void SetInterpolationConnectorsBlockVisitor::createTransmitters(SPtr<Block3D> cB
 	int cBlockRank = cBlock->getRank();
 	if(fBlockRank == cBlockRank && fBlockRank == gridRank)
 	{
-		senderCF = receiverFC = CreateTransmittersHelper::TransmitterPtr( new TbLocalTransmitter< CbVector< LBMReal > >());
-		senderFC = receiverCF = CreateTransmittersHelper::TransmitterPtr( new TbLocalTransmitter< CbVector< LBMReal > >());
+		senderCF = receiverFC = CreateTransmittersHelper::TransmitterPtr( new TbLocalTransmitter< CbVector< real > >());
+		senderFC = receiverCF = CreateTransmittersHelper::TransmitterPtr( new TbLocalTransmitter< CbVector< real > >());
 	}
 	else if(cBlockRank == gridRank)
 	{
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.h
index 7ae54b0b62cadbc58eb5b0cc804f00a977d47615..c30d87ecaa042a30d931e29ba185fdd5230e2a68 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.h
@@ -50,7 +50,7 @@ class InterpolationProcessor;
 class SetInterpolationConnectorsBlockVisitor : public Block3DVisitor
 {
 public:
-    SetInterpolationConnectorsBlockVisitor(std::shared_ptr<vf::mpi::Communicator> comm, LBMReal nue, SPtr<InterpolationProcessor> iProcessor);
+    SetInterpolationConnectorsBlockVisitor(std::shared_ptr<vf::mpi::Communicator> comm, real nue, SPtr<InterpolationProcessor> iProcessor);
     ~SetInterpolationConnectorsBlockVisitor() override;
     void visit(SPtr<Grid3D> grid, SPtr<Block3D> block) override;
     //////////////////////////////////////////////////////////////////////////
@@ -65,7 +65,7 @@ protected:
                             CreateTransmittersHelper::TransmitterPtr &receiverFC);
     std::shared_ptr<vf::mpi::Communicator> comm;
     int gridRank;
-    LBMReal nue;
+    real nue;
     SPtr<InterpolationProcessor> iProcessor;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationDirsBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationDirsBlockVisitor.cpp
index 689d84d0754f74c2f680fd2b7aa22ec0c54008c1..dbb85c0b848d2c5dc89e99f72d4091476eb31790 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationDirsBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationDirsBlockVisitor.cpp
@@ -11,6 +11,8 @@ SetInterpolationDirsBlockVisitor::SetInterpolationDirsBlockVisitor(std::vector<i
 //////////////////////////////////////////////////////////////////////////
 void SetInterpolationDirsBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block)
 {
+    using namespace vf::lbm::dir;
+
     int ix1, ix2, ix3, level;
     ix1   = block->getX1();
     ix2   = block->getX2();
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetKernelBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/SetKernelBlockVisitor.cpp
index 354a577e701f9d017181e6006833ad40749eef60..54271370c11700886f969eeef75a2389ef062828 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SetKernelBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/SetKernelBlockVisitor.cpp
@@ -44,7 +44,7 @@
 #include <utility>
 
 //////////////////////////////////////////////////////////////////////////
-SetKernelBlockVisitor::SetKernelBlockVisitor(SPtr<LBMKernel> kernel, LBMReal nue, double availMem, double needMem,
+SetKernelBlockVisitor::SetKernelBlockVisitor(SPtr<LBMKernel> kernel, real nue, real availMem, real needMem,
                                              SetKernelBlockVisitor::Action action)
     : Block3DVisitor(0, D3Q27System::MAXLEVEL), kernel(std::move(kernel)), nue(nue), action(action), dataSetFlag(true)
 {
@@ -53,7 +53,7 @@ SetKernelBlockVisitor::SetKernelBlockVisitor(SPtr<LBMKernel> kernel, LBMReal nue
     }
 }
 
-SetKernelBlockVisitor::SetKernelBlockVisitor(SPtr<LBMKernel> kernel, LBMReal nue, int numberOfProcesses,
+SetKernelBlockVisitor::SetKernelBlockVisitor(SPtr<LBMKernel> kernel, real nue, int numberOfProcesses,
                                              SetKernelBlockVisitor::Action action)
     : Block3DVisitor(0, D3Q27System::MAXLEVEL), kernel(std::move(kernel)), nue(nue), action(action), dataSetFlag(true),
       numberOfProcesses(numberOfProcesses)
@@ -66,7 +66,7 @@ void SetKernelBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block)
     throwExceptionIfNotEnoughMemory(grid);
 
     if (kernel && (block->getRank() == grid->getRank())) {
-        LBMReal collFactor = LBMSystem::calcCollisionFactor(nue, block->getLevel());
+        real collFactor = LBMSystem::calcCollisionFactor(nue, block->getLevel());
         kernel->setCollisionFactor(collFactor);
         kernel->setIndex(block->getX1(), block->getX2(), block->getX3());
         kernel->setDeltaT(LBMSystem::getDeltaT(block->getLevel()));
@@ -122,7 +122,7 @@ void SetKernelBlockVisitor::throwExceptionIfNotEnoughMemory(const SPtr<Grid3D> &
         throw UbException(UB_EXARGS, "SetKernelBlockVisitor: Not enough memory!!!");
 }
 
-double SetKernelBlockVisitor::getRequiredPhysicalMemory(const SPtr<Grid3D> &grid) const
+real SetKernelBlockVisitor::getRequiredPhysicalMemory(const SPtr<Grid3D> &grid) const
 {
     unsigned long long numberOfNodesPerBlockWithGhostLayer;
     auto numberOfBlocks = (unsigned long long)grid->getNumberOfBlocks();
@@ -133,7 +133,7 @@ double SetKernelBlockVisitor::getRequiredPhysicalMemory(const SPtr<Grid3D> &grid
                                           (val<2>(blockNx) + ghostLayer) * (val<3>(blockNx) + ghostLayer);
 
     auto needMemAll =
-        double(numberOfNodesPerBlockWithGhostLayer * (27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
+        real(numberOfNodesPerBlockWithGhostLayer * (27 * sizeof(real) + sizeof(int) + sizeof(float) * 4));
 
-    return needMemAll / double(numberOfProcesses);
+    return needMemAll / real(numberOfProcesses);
 }
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetKernelBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/SetKernelBlockVisitor.h
index 1e0621f22379e52701aafa4ab06f858cb1247d7e..29685f49908e5266dc5307f17da42c9e8c874491 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SetKernelBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/SetKernelBlockVisitor.h
@@ -49,10 +49,10 @@ class SetKernelBlockVisitor : public Block3DVisitor
 public:
     enum Action { NewKernel, ChangeKernel, ChangeKernelWithData };
 
-    SetKernelBlockVisitor(SPtr<LBMKernel> kernel, LBMReal nue, double availMem, double needMem,
+    SetKernelBlockVisitor(SPtr<LBMKernel> kernel, real nue, real availMem, real needMem,
                           SetKernelBlockVisitor::Action action = SetKernelBlockVisitor::NewKernel);
 
-    SetKernelBlockVisitor(SPtr<LBMKernel> kernel, LBMReal nue, int numberOfProcesses,
+    SetKernelBlockVisitor(SPtr<LBMKernel> kernel, real nue, int numberOfProcesses,
                           SetKernelBlockVisitor::Action action = SetKernelBlockVisitor::NewKernel);
 
     ~SetKernelBlockVisitor() override = default;
@@ -63,13 +63,13 @@ public:
 
 private:
     SPtr<LBMKernel> kernel;
-    LBMReal nue;
+    real nue;
     Action action;
     bool dataSetFlag;
 
     int numberOfProcesses{ 1 };
 
-    double getRequiredPhysicalMemory(const SPtr<Grid3D> &grid) const;
+    real getRequiredPhysicalMemory(const SPtr<Grid3D> &grid) const;
 
     void throwExceptionIfNotEnoughMemory(const SPtr<Grid3D> &grid);
 };
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetUndefinedNodesBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/SetUndefinedNodesBlockVisitor.cpp
index 3b9eb9493fe0fa66f05fdd3ea42505604836d218..bb6cc5dc6fe06b1a63647d83897bc1fe83066a1a 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SetUndefinedNodesBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/SetUndefinedNodesBlockVisitor.cpp
@@ -15,6 +15,8 @@ SetUndefinedNodesBlockVisitor::SetUndefinedNodesBlockVisitor(bool twoTypeOfConne
 //////////////////////////////////////////////////////////////////////////
 void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block)
 {
+    using namespace vf::lbm::dir;
+
     if (!block->hasInterpolationFlag())
         return;
 
@@ -40,7 +42,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
     // int offset = 2;
     int offset = 3;
 
-    if (block->hasInterpolationFlag(D3Q27System::DIR_P00)) {
+    if (block->hasInterpolationFlag(DIR_P00)) {
         int startix1 = maxX1;
         int endix1   = maxX1;
         if (block->hasInterpolationFlagCF())
@@ -51,7 +53,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_M00)) {
+    if (block->hasInterpolationFlag(DIR_M00)) {
         int startix1 = minX1;
         int endix1   = minX1;
         if (block->hasInterpolationFlagCF())
@@ -62,7 +64,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_0P0)) {
+    if (block->hasInterpolationFlag(DIR_0P0)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = maxX2;
@@ -73,7 +75,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_0M0)) {
+    if (block->hasInterpolationFlag(DIR_0M0)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -84,7 +86,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_00P)) {
+    if (block->hasInterpolationFlag(DIR_00P)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -95,7 +97,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             startix3 = startix3 - offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_00M)) {
+    if (block->hasInterpolationFlag(DIR_00M)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -106,7 +108,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             endix3 = endix3 + offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_PP0)) {
+    if (block->hasInterpolationFlag(DIR_PP0)) {
         int startix1 = maxX1;
         int endix1   = maxX1;
         if (block->hasInterpolationFlagCF())
@@ -119,7 +121,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_MM0)) {
+    if (block->hasInterpolationFlag(DIR_MM0)) {
         int startix1 = minX1;
         int endix1   = minX1;
         if (block->hasInterpolationFlagCF())
@@ -132,7 +134,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_PM0)) {
+    if (block->hasInterpolationFlag(DIR_PM0)) {
         int startix1 = maxX1;
         int endix1   = maxX1;
         if (block->hasInterpolationFlagCF())
@@ -145,7 +147,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_MP0)) {
+    if (block->hasInterpolationFlag(DIR_MP0)) {
         int startix1 = minX1;
         int endix1   = minX1;
         if (block->hasInterpolationFlagCF())
@@ -158,7 +160,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_P0P)) {
+    if (block->hasInterpolationFlag(DIR_P0P)) {
         int startix1 = maxX1;
         int endix1   = maxX1;
         if (block->hasInterpolationFlagCF())
@@ -171,7 +173,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             startix3 = startix3 - offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_M0M)) {
+    if (block->hasInterpolationFlag(DIR_M0M)) {
         int startix1 = minX1;
         int endix1   = minX1;
         if (block->hasInterpolationFlagCF())
@@ -184,7 +186,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             endix3 = endix3 + offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_P0M)) {
+    if (block->hasInterpolationFlag(DIR_P0M)) {
         int startix1 = maxX1;
         int endix1   = maxX1;
         if (block->hasInterpolationFlagCF())
@@ -197,7 +199,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             endix3 = endix3 + offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_M0P)) {
+    if (block->hasInterpolationFlag(DIR_M0P)) {
         int startix1 = minX1;
         int endix1   = minX1;
         if (block->hasInterpolationFlagCF())
@@ -210,7 +212,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             startix3 = startix3 - offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_0PP)) {
+    if (block->hasInterpolationFlag(DIR_0PP)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = maxX2;
@@ -223,7 +225,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             startix3 = startix3 - offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_0MM)) {
+    if (block->hasInterpolationFlag(DIR_0MM)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -236,7 +238,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             endix3 = endix3 + offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_0PM)) {
+    if (block->hasInterpolationFlag(DIR_0PM)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = maxX2;
@@ -249,7 +251,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             endix3 = endix3 + offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_0MP)) {
+    if (block->hasInterpolationFlag(DIR_0MP)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -262,7 +264,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             startix3 = startix3 - offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_PPP)) {
+    if (block->hasInterpolationFlag(DIR_PPP)) {
         int startix1 = maxX1;
         int endix1   = maxX1;
         if (block->hasInterpolationFlagCF())
@@ -277,7 +279,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             startix3 = startix3 - offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_MPP)) {
+    if (block->hasInterpolationFlag(DIR_MPP)) {
         int startix1 = minX1;
         int endix1   = minX1;
         if (block->hasInterpolationFlagCF())
@@ -292,7 +294,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             startix3 = startix3 - offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_PMP)) {
+    if (block->hasInterpolationFlag(DIR_PMP)) {
         int startix1 = maxX1;
         int endix1   = maxX1;
         if (block->hasInterpolationFlagCF())
@@ -307,7 +309,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             startix3 = startix3 - offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_MMP)) {
+    if (block->hasInterpolationFlag(DIR_MMP)) {
         int startix1 = minX1;
         int endix1   = minX1;
         if (block->hasInterpolationFlagCF())
@@ -322,7 +324,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             startix3 = startix3 - offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_PPM)) {
+    if (block->hasInterpolationFlag(DIR_PPM)) {
         int startix1 = maxX1;
         int endix1   = maxX1;
         if (block->hasInterpolationFlagCF())
@@ -337,7 +339,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             endix3 = endix3 + offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_MPM)) {
+    if (block->hasInterpolationFlag(DIR_MPM)) {
         int startix1 = minX1;
         int endix1   = minX1;
         if (block->hasInterpolationFlagCF())
@@ -352,7 +354,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             endix3 = endix3 + offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_PMM)) {
+    if (block->hasInterpolationFlag(DIR_PMM)) {
         int startix1 = maxX1;
         int endix1   = maxX1;
         if (block->hasInterpolationFlagCF())
@@ -367,7 +369,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
             endix3 = endix3 + offset;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlag(D3Q27System::DIR_MMM)) {
+    if (block->hasInterpolationFlag(DIR_MMM)) {
         int startix1 = minX1;
         int endix1   = minX1;
         if (block->hasInterpolationFlagCF())
@@ -395,7 +397,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
     maxX2 = static_cast<int>(bcMatrix->getNX2()) - 1 - ll;
     maxX3 = static_cast<int>(bcMatrix->getNX3()) - 1 - ll;
 
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_P00)) {
+    if (block->hasInterpolationFlagFC(DIR_P00)) {
         int startix1 = maxX1 - offset2;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -404,7 +406,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_M00)) {
+    if (block->hasInterpolationFlagFC(DIR_M00)) {
         int startix1 = minX1;
         int endix1   = minX1 + offset2;
         int startix2 = minX2;
@@ -413,7 +415,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_0P0)) {
+    if (block->hasInterpolationFlagFC(DIR_0P0)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = maxX2 - offset2;
@@ -422,7 +424,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_0M0)) {
+    if (block->hasInterpolationFlagFC(DIR_0M0)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -431,7 +433,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_00P)) {
+    if (block->hasInterpolationFlagFC(DIR_00P)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -440,7 +442,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_00M)) {
+    if (block->hasInterpolationFlagFC(DIR_00M)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -449,7 +451,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = minX3 + offset2;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_PP0)) {
+    if (block->hasInterpolationFlagFC(DIR_PP0)) {
         int startix1 = maxX1 - offset2;
         int endix1   = maxX1;
         int startix2 = maxX2 - offset2;
@@ -458,7 +460,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_MM0)) {
+    if (block->hasInterpolationFlagFC(DIR_MM0)) {
         int startix1 = minX1;
         int endix1   = minX1 + offset2;
         int startix2 = minX2;
@@ -467,7 +469,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_PM0)) {
+    if (block->hasInterpolationFlagFC(DIR_PM0)) {
         int startix1 = maxX1 - offset2;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -476,7 +478,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_MP0)) {
+    if (block->hasInterpolationFlagFC(DIR_MP0)) {
         int startix1 = minX1;
         int endix1   = minX1 + offset2;
         int startix2 = maxX2 - offset2;
@@ -485,7 +487,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_P0P)) {
+    if (block->hasInterpolationFlagFC(DIR_P0P)) {
         int startix1 = maxX1 - offset2;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -494,7 +496,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_M0M)) {
+    if (block->hasInterpolationFlagFC(DIR_M0M)) {
         int startix1 = minX1;
         int endix1   = minX1 + offset2;
         int startix2 = minX2;
@@ -503,7 +505,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = minX3 + offset2;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_P0M)) {
+    if (block->hasInterpolationFlagFC(DIR_P0M)) {
         int startix1 = maxX1 - offset2;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -512,7 +514,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = minX3 + offset2;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_M0P)) {
+    if (block->hasInterpolationFlagFC(DIR_M0P)) {
         int startix1 = minX1;
         int endix1   = minX1 + offset2;
         int startix2 = minX2;
@@ -521,7 +523,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_0PP)) {
+    if (block->hasInterpolationFlagFC(DIR_0PP)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = maxX2 - offset2;
@@ -530,7 +532,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_0MM)) {
+    if (block->hasInterpolationFlagFC(DIR_0MM)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -539,7 +541,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = minX3 + offset2;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_0PM)) {
+    if (block->hasInterpolationFlagFC(DIR_0PM)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = maxX2 - offset2;
@@ -548,7 +550,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = minX3 + offset2;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_0MP)) {
+    if (block->hasInterpolationFlagFC(DIR_0MP)) {
         int startix1 = minX1;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -557,7 +559,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_PPP)) {
+    if (block->hasInterpolationFlagFC(DIR_PPP)) {
         int startix1 = maxX1 - offset2;
         int endix1   = maxX1;
         int startix2 = maxX2 - offset2;
@@ -566,7 +568,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_MPP)) {
+    if (block->hasInterpolationFlagFC(DIR_MPP)) {
         int startix1 = minX1;
         int endix1   = minX1 + offset2;
         int startix2 = maxX2 - offset2;
@@ -575,7 +577,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_PMP)) {
+    if (block->hasInterpolationFlagFC(DIR_PMP)) {
         int startix1 = maxX1 - offset2;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -584,7 +586,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_MMP)) {
+    if (block->hasInterpolationFlagFC(DIR_MMP)) {
         int startix1 = minX1;
         int endix1   = minX1 + offset2;
         int startix2 = minX2;
@@ -593,7 +595,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = maxX3;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_PPM)) {
+    if (block->hasInterpolationFlagFC(DIR_PPM)) {
         int startix1 = maxX1 - offset2;
         int endix1   = maxX1;
         int startix2 = maxX2 - offset2;
@@ -602,7 +604,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = minX3 + offset2;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_MPM)) {
+    if (block->hasInterpolationFlagFC(DIR_MPM)) {
         int startix1 = minX1;
         int endix1   = minX1 + offset2;
         int startix2 = maxX2 - offset2;
@@ -611,7 +613,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = minX3 + offset2;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_PMM)) {
+    if (block->hasInterpolationFlagFC(DIR_PMM)) {
         int startix1 = maxX1 - offset2;
         int endix1   = maxX1;
         int startix2 = minX2;
@@ -620,7 +622,7 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
         int endix3   = minX3 + offset2;
         this->setNodesUndefined(startix1, endix1, startix2, endix2, startix3, endix3, bcMatrix);
     }
-    if (block->hasInterpolationFlagFC(D3Q27System::DIR_MMM)) {
+    if (block->hasInterpolationFlagFC(DIR_MMM)) {
         int startix1 = minX1;
         int endix1   = minX1 + offset2;
         int startix2 = minX2;
@@ -633,10 +635,10 @@ void SetUndefinedNodesBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block
     // invert scaleCF blocks
     if (block->hasInterpolationFlagCF()) {
         if (block->hasInterpolationFlagFC() && twoTypeOfConnectorsCheck) {
-            for (int i = D3Q27System::DIR_P00; i <= D3Q27System::DIR_MMM; i++) {
+            for (int i = (int)DIR_P00; i <= (int)DIR_MMM; i++) {
                 UBLOG(logINFO, "FC in dir=" << i << " " << block->hasInterpolationFlagFC(i));
             }
-            for (int i = D3Q27System::DIR_P00; i <= D3Q27System::DIR_MMM; i++) {
+            for (int i = (int)DIR_P00; i <= (int)DIR_MMM; i++) {
                 UBLOG(logINFO, "CF in dir=" << i << " " << block->hasInterpolationFlagCF(i));
             }
             throw UbException(UB_EXARGS, "block " + block->toString() + " has CF and FC");
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SpongeLayerBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/SpongeLayerBlockVisitor.cpp
index 1ef34e0e8bbf54625efbc946b141cf16f24c213d..6183024279ce1753f2fd78bf20b72313b84662f1 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SpongeLayerBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/SpongeLayerBlockVisitor.cpp
@@ -13,7 +13,7 @@
 
 using namespace std;
 
-SpongeLayerBlockVisitor::SpongeLayerBlockVisitor(SPtr<GbCuboid3D> boundingBox, SPtr<LBMKernel> kernel, double nue,
+SpongeLayerBlockVisitor::SpongeLayerBlockVisitor(SPtr<GbCuboid3D> boundingBox, SPtr<LBMKernel> kernel, real nue,
                                                  int dir)
     : Block3DVisitor(0, D3Q27System::MAXLEVEL), boundingBox(boundingBox), kernel(kernel), nue(nue), dir(dir)
 {
@@ -23,6 +23,8 @@ SpongeLayerBlockVisitor::~SpongeLayerBlockVisitor() = default;
 //////////////////////////////////////////////////////////////////////////
 void SpongeLayerBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block)
 {
+    using namespace vf::lbm::dir;
+
     if (!boundingBox) {
         UB_THROW(UbException(UB_EXARGS, "The bounding box isn't set!"));
     }
@@ -33,15 +35,15 @@ void SpongeLayerBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block)
         UbTupleDouble3 org          = grid->getBlockWorldCoordinates(block);
         UbTupleDouble3 blockLengths = grid->getBlockLengths(block);
 
-        double minX1 = val<1>(org);
-        double minX2 = val<2>(org);
-        double minX3 = val<3>(org);
-        double maxX1 = val<1>(org) + val<1>(blockLengths);
-        double maxX2 = val<2>(org) + val<2>(blockLengths);
-        double maxX3 = val<3>(org) + val<3>(blockLengths);
+        real minX1 = val<1>(org);
+        real minX2 = val<2>(org);
+        real minX3 = val<3>(org);
+        real maxX1 = val<1>(org) + val<1>(blockLengths);
+        real maxX2 = val<2>(org) + val<2>(blockLengths);
+        real maxX3 = val<3>(org) + val<3>(blockLengths);
 
         if (boundingBox->isCellInsideGbObject3D(minX1, minX2, minX3, maxX1, maxX2, maxX3)) {
-            LBMReal collFactor = LBMSystem::calcCollisionFactor(nue, block->getLevel());
+            real collFactor = LBMSystem::calcCollisionFactor(nue, block->getLevel());
             kernel->setCollisionFactor(collFactor);
             kernel->setIndex(block->getX1(), block->getX2(), block->getX3());
             kernel->setDeltaT(LBMSystem::getDeltaT(block->getLevel()));
@@ -63,35 +65,35 @@ void SpongeLayerBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block)
             }
             newKernel->setBCProcessor(bcProc);
 
-            double oldCollFactor = newKernel->getCollisionFactor();
+            real oldCollFactor = newKernel->getCollisionFactor();
 
             UbTupleInt3 ixMin = grid->getBlockIndexes(boundingBox->getX1Minimum(), boundingBox->getX2Minimum(),
                                                       boundingBox->getX3Minimum());
             UbTupleInt3 ixMax = grid->getBlockIndexes(boundingBox->getX1Maximum(), boundingBox->getX2Maximum(),
                                                       boundingBox->getX3Maximum());
 
-            double newCollFactor;
+            real newCollFactor;
 
-            if (dir == D3Q27System::DIR_P00) {
+            if (dir == DIR_P00) {
                 int ibX1      = block->getX1();
                 int ibMax     = val<1>(ixMax) - val<1>(ixMin) + 1;
-                double index  = (double)(ibX1 - val<1>(ixMin) + 1);
-                newCollFactor = oldCollFactor - (oldCollFactor - 1.0) / (double)(ibMax)*index;
-            } else if (dir == D3Q27System::DIR_M00) {
+                real index  = (real)(ibX1 - val<1>(ixMin) + 1);
+                newCollFactor = oldCollFactor - (oldCollFactor - 1.0) / (real)(ibMax)*index;
+            } else if (dir == DIR_M00) {
                 int ibX1      = block->getX1();
                 int ibMax     = val<1>(ixMax) - val<1>(ixMin) + 1;
-                double index  = (double)(ibX1 - val<1>(ixMin) + 1);
-                newCollFactor = (oldCollFactor - 1.0) / (double)(ibMax)*index;
-            } else if (dir == D3Q27System::DIR_00P) {
+                real index  = (real)(ibX1 - val<1>(ixMin) + 1);
+                newCollFactor = (oldCollFactor - 1.0) / (real)(ibMax)*index;
+            } else if (dir == DIR_00P) {
                 int ibX3      = block->getX3();
                 int ibMax     = val<3>(ixMax) - val<3>(ixMin) + 1;
-                double index  = (double)(ibX3 - val<3>(ixMin) + 1);
-                newCollFactor = oldCollFactor - (oldCollFactor - 1.0) / (double)(ibMax)*index;
-            } else if (dir == D3Q27System::DIR_00M) {
+                real index  = (real)(ibX3 - val<3>(ixMin) + 1);
+                newCollFactor = oldCollFactor - (oldCollFactor - 1.0) / (real)(ibMax)*index;
+            } else if (dir == DIR_00M) {
                 int ibX3      = block->getX3();
                 int ibMax     = val<3>(ixMax) - val<3>(ixMin) + 1;
-                double index  = (double)(ibX3 - val<3>(ixMin) + 1);
-                newCollFactor = (oldCollFactor - 1.0) / (double)(ibMax)*index;
+                real index  = (real)(ibX3 - val<3>(ixMin) + 1);
+                newCollFactor = (oldCollFactor - 1.0) / (real)(ibMax)*index;
             } else
                 UB_THROW(UbException(UB_EXARGS, "Problem: no orthogonal sponge layer!"));
 
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SpongeLayerBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/SpongeLayerBlockVisitor.h
index 0896db06577fa57f3ae3a137430c69eac214e24f..184a89eba969f3a6506c83758b79d11cfb8d3d60 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SpongeLayerBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/SpongeLayerBlockVisitor.h
@@ -15,7 +15,7 @@ class LBMKernel;
 class SpongeLayerBlockVisitor : public Block3DVisitor
 {
 public:
-    SpongeLayerBlockVisitor(SPtr<GbCuboid3D> boundingBox, SPtr<LBMKernel> kernel, double nue, int dir);
+    SpongeLayerBlockVisitor(SPtr<GbCuboid3D> boundingBox, SPtr<LBMKernel> kernel, real nue, int dir);
     ~SpongeLayerBlockVisitor() override;
 
     void visit(SPtr<Grid3D> grid, SPtr<Block3D> block) override;
@@ -23,7 +23,7 @@ public:
 private:
     SPtr<GbCuboid3D> boundingBox;
     SPtr<LBMKernel> kernel;
-    double nue;
+    real nue;
     int dir;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/Visitors/ViscosityBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/ViscosityBlockVisitor.cpp
index 311a8bf19786198e85b00eb500f6e7c90d2d5106..3dbe4d9c7c01a11f33d0f1e04a563ba1016a748b 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/ViscosityBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/ViscosityBlockVisitor.cpp
@@ -5,12 +5,12 @@
 #include "ILBMKernel.h"
 #include "LBMSystem.h"
 
-ViscosityBlockVisitor::ViscosityBlockVisitor(LBMReal nu) : Block3DVisitor(0, D3Q27System::MAXLEVEL), nu(nu) {}
+ViscosityBlockVisitor::ViscosityBlockVisitor(real nu) : Block3DVisitor(0, D3Q27System::MAXLEVEL), nu(nu) {}
 //////////////////////////////////////////////////////////////////////////
 void ViscosityBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> block)
 {
     if (block->getRank() == grid->getRank()) {
-        LBMReal collFactor = LBMSystem::calcCollisionFactor(nu, block->getLevel());
+        real collFactor = LBMSystem::calcCollisionFactor(nu, block->getLevel());
         block->getKernel()->setCollisionFactor(collFactor);
     }
 }
diff --git a/src/cpu/VirtualFluidsCore/Visitors/ViscosityBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/ViscosityBlockVisitor.h
index 003bd41e1a02a547386cbde878564cf0b2bf8209..cb665f41c47f62d61584943bdf6f3ce64f84cc4f 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/ViscosityBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/ViscosityBlockVisitor.h
@@ -12,14 +12,14 @@ class Block3D;
 class ViscosityBlockVisitor : public Block3DVisitor
 {
 public:
-    ViscosityBlockVisitor(LBMReal nu);
+    ViscosityBlockVisitor(real nu);
 
     ~ViscosityBlockVisitor() override = default;
 
     void visit(SPtr<Grid3D> grid, SPtr<Block3D> block) override;
 
 private:
-    LBMReal nu;
+    real nu;
 };
 
 #endif
diff --git a/src/cpu/pythonbindings/CMakeLists.txt b/src/cpu/pythonbindings/CMakeLists.txt
deleted file mode 100644
index 3b4e7e5a1506899710dc03ea275c0d4ac1cff66d..0000000000000000000000000000000000000000
--- a/src/cpu/pythonbindings/CMakeLists.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-project(VirtualFluidsPython)
-
-pybind11_add_module(pyfluids src/VirtualfluidsModule.cpp)
-pybind11_add_module(pymuparser src/muParser.cpp)
-
-# TODO: Move this to MuParser CMakeLists.txt
-set_target_properties(muparser PROPERTIES POSITION_INDEPENDENT_CODE ON)
-
-target_compile_definitions(pyfluids PRIVATE VF_METIS VF_MPI)
-target_compile_definitions(pymuparser PRIVATE VF_METIS VF_MPI)
-
-target_link_libraries(pyfluids PRIVATE simulationconfig VirtualFluidsCore muparser basics)
-target_link_libraries(pymuparser PRIVATE muparser)
-
-target_include_directories(pyfluids PRIVATE ${CMAKE_SOURCE_DIR}/src/)
-target_include_directories(pyfluids PRIVATE ${CMAKE_BINARY_DIR})
\ No newline at end of file
diff --git a/src/cpu/pythonbindings/src/VirtualfluidsModule.cpp b/src/cpu/pythonbindings/src/VirtualfluidsModule.cpp
deleted file mode 100644
index 564dc1838d48a92340fa5491779177b299bcb270..0000000000000000000000000000000000000000
--- a/src/cpu/pythonbindings/src/VirtualfluidsModule.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-#include <pybind11/pybind11.h>
-#include "submodules/boundaryconditions.cpp"
-#include "submodules/simulationconfig.cpp"
-#include "submodules/geometry.cpp"
-#include "submodules/kernel.cpp"
-#include "submodules/simulationparameters.cpp"
-#include "submodules/writer.cpp"
-
-namespace py_bindings
-{
-    namespace py = pybind11;
-
-    PYBIND11_MODULE(pyfluids, m)
-    {
-        boundaryconditions::makeModule(m);
-        simulation::makeModule(m);
-        geometry::makeModule(m);
-        kernel::makeModule(m);
-        parameters::makeModule(m);
-        writer::makeModule(m);
-    }
-}
\ No newline at end of file
diff --git a/src/cpu/pythonbindings/src/submodules/boundaryconditions.cpp b/src/cpu/pythonbindings/src/submodules/boundaryconditions.cpp
deleted file mode 100644
index 3bff7bc069ca20fe1c0cf3d1847b9714e0381505..0000000000000000000000000000000000000000
--- a/src/cpu/pythonbindings/src/submodules/boundaryconditions.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <pybind11/pybind11.h>
-#include <pybind11/stl.h>
-#include <BoundaryConditions/DensityBCAdapter.h>
-#include <BoundaryConditions/NonReflectingOutflowBCAlgorithm.h>
-#include <BoundaryConditions/BCAdapter.h>
-#include <BoundaryConditions/NoSlipBCAdapter.h>
-#include <BoundaryConditions/VelocityBCAdapter.h>
-#include <BoundaryConditions/NoSlipBCAlgorithm.h>
-#include <BoundaryConditions/VelocityBCAlgorithm.h>
-#include <BoundaryConditions/HighViscosityNoSlipBCAlgorithm.h>
-
-namespace boundaryconditions
-{
-    namespace py = pybind11;
-    using namespace py::literals;
-
-    template<class adapter, class algorithm,
-            class = std::enable_if_t<std::is_base_of<BCAdapter, adapter>::value>,
-            class = std::enable_if_t<std::is_base_of<BCAlgorithm, algorithm>::value>>
-    class PyBoundaryCondition : public adapter
-    {
-    public:
-        template<typename ...Args>
-        PyBoundaryCondition(Args &&... args) : adapter(std::forward<Args>(args)...)
-        {
-            this->setBcAlgorithm(std::make_shared<algorithm>());
-        }
-    };
-
-    template<class adapter, class algorithm>
-    using bc_class = py::class_<PyBoundaryCondition<adapter, algorithm>, BCAdapter,
-            std::shared_ptr<PyBoundaryCondition<adapter, algorithm>>>;
-
-    void makeModule(py::module_ &parentModule)
-    {
-        py::module_ bcModule = parentModule.def_submodule("boundaryconditions");
-
-        auto _ = py::class_<BCAdapter, std::shared_ptr<BCAdapter>>(bcModule, "BCAdapter");
-
-        bc_class<NoSlipBCAdapter, NoSlipBCAlgorithm>(bcModule, "NoSlipBoundaryCondition")
-                .def(py::init());
-
-        bc_class<NoSlipBCAdapter, HighViscosityNoSlipBCAlgorithm>(bcModule, "HighViscosityNoSlipBoundaryCondition")
-                .def(py::init());
-
-        bc_class<VelocityBCAdapter, VelocityBCAlgorithm>(bcModule, "VelocityBoundaryCondition")
-                .def(py::init())
-                .def(py::init<bool &, bool &, bool &, mu::Parser &, double &, double &>(),
-                     "vx1"_a, "vx2"_a, "vx3"_a,
-                     "function"_a, "start_time"_a, "end_time"_a)
-                .def(py::init<bool &, bool &, bool &, mu::Parser &, mu::Parser &, mu::Parser &, double &, double &>(),
-                     "vx1"_a, "vx2"_a, "vx3"_a,
-                     "function_vx1"_a, "function_vx2"_a, "function_vx2"_a,
-                     "start_time"_a, "end_time"_a)
-                .def(py::init<double &, double &, double &, double &, double &, double &, double &, double &, double &>(),
-                     "vx1"_a, "vx1_start_time"_a, "vx1_end_time"_a,
-                     "vx2"_a, "vx2_start_time"_a, "vx2_end_time"_a,
-                     "vx3"_a, "vx3_start_time"_a, "vx3_end_time"_a);
-
-        bc_class<DensityBCAdapter, NonReflectingOutflowBCAlgorithm>(bcModule, "NonReflectingOutflow")
-                .def(py::init());
-    }
-
-}
\ No newline at end of file
diff --git a/src/cpu/pythonbindings/src/submodules/geometry.cpp b/src/cpu/pythonbindings/src/submodules/geometry.cpp
deleted file mode 100644
index b7ff4dd761258d41687589d2dd89c3479093753e..0000000000000000000000000000000000000000
--- a/src/cpu/pythonbindings/src/submodules/geometry.cpp
+++ /dev/null
@@ -1,84 +0,0 @@
-#include <pybind11/pybind11.h>
-#include <geometry3d/GbPoint3D.h>
-#include <geometry3d/GbObject3D.h>
-#include <geometry3d/GbCuboid3D.h>
-#include <geometry3d/GbLine3D.h>
-#include <Interactors/Interactor3D.h>
-
-
-namespace geometry
-{
-    namespace py = pybind11;
-
-    template<class GeoObject>
-    using py_geometry = py::class_<GeoObject, GbObject3D, std::shared_ptr<GeoObject>>;
-
-    std::string GbPoint3D_repr_(const GbPoint3D &instance)
-    {
-        std::ostringstream stream;
-        stream << "<GbPoint3D"
-               << " x1: " << instance.getX1Coordinate()
-               << " x2: " << instance.getX2Coordinate()
-               << " x3: " << instance.getX3Coordinate() << ">";
-
-        return stream.str();
-    }
-
-    void makeModule(py::module_ &parentModule)
-    {
-        py::module geometry = parentModule.def_submodule("geometry");
-
-        py::class_<GbObject3D, std::shared_ptr<GbObject3D>>(geometry, "GbObject3D");
-
-        py_geometry<GbPoint3D>(geometry, "GbPoint3D")
-                .def(py::init())
-                .def(py::init<double &, double &, double &>())
-                .def(py::init<GbPoint3D *>())
-                .def_property("x1", &GbPoint3D::getX1Coordinate, &GbPoint3D::setX1)
-                .def_property("x2", &GbPoint3D::getX2Coordinate, &GbPoint3D::setX2)
-                .def_property("x3", &GbPoint3D::getX3Coordinate, &GbPoint3D::setX3)
-                .def("get_distance", &GbPoint3D::getDistance)
-                .def("__repr__", &GbPoint3D_repr_);
-
-        py_geometry<GbCuboid3D>(geometry, "GbCuboid3D")
-                .def(py::init())
-                .def(py::init<double &, double &, double &, double &, double &, double &>())
-                .def(py::init<GbPoint3D *, GbPoint3D *>())
-                .def(py::init<GbCuboid3D *>())
-                .def_property("point1", &GbCuboid3D::getPoint1, &GbCuboid3D::setPoint1)
-                .def_property("point2", &GbCuboid3D::getPoint2, &GbCuboid3D::setPoint2)
-                .def("__repr__", [&](GbCuboid3D &instance)
-                {
-                    std::ostringstream stream;
-                    stream << "<GbCuboid3D" << std::endl
-                           << "point1: " << GbPoint3D_repr_(instance.getPoint1()) << std::endl
-                           << "point2: " << GbPoint3D_repr_(instance.getPoint2()) << ">";
-                    return stream.str();
-                });
-
-        py_geometry<GbLine3D>(geometry, "GbLine3D")
-                .def(py::init())
-                .def(py::init<GbPoint3D *, GbPoint3D *>())
-                .def(py::init<GbLine3D>())
-                .def_property("point1", &GbLine3D::getPoint1, &GbLine3D::setPoint1)
-                .def_property("point2", &GbLine3D::getPoint2, &GbLine3D::setPoint2)
-                .def("__repr__", [&](GbLine3D &instance)
-                {
-                    std::ostringstream stream;
-                    stream << "<GbLine3D" << std::endl
-                           << "point1: " << GbPoint3D_repr_(instance.getPoint1()) << std::endl
-                           << "point2: " << GbPoint3D_repr_(instance.getPoint2()) << ">";
-                    return stream.str();
-                });
-
-
-        py::class_<Interactor3D, std::shared_ptr<Interactor3D>>(geometry, "State")
-                .def_readonly_static("SOLID", &Interactor3D::SOLID)
-                .def_readonly_static("INVERSESOLID", &Interactor3D::INVERSESOLID)
-                .def_readonly_static("TIMEDEPENDENT", &Interactor3D::TIMEDEPENDENT)
-                .def_readonly_static("FLUID", &Interactor3D::FLUID)
-                .def_readonly_static("MOVEABLE", &Interactor3D::MOVEABLE)
-                .def_readonly_static("CHANGENOTNECESSARY", &Interactor3D::CHANGENOTNECESSARY);
-    }
-
-}
\ No newline at end of file
diff --git a/src/cpu/pythonbindings/src/submodules/kernel.cpp b/src/cpu/pythonbindings/src/submodules/kernel.cpp
deleted file mode 100644
index fb291790632cc2041410f60a14fca8d966283343..0000000000000000000000000000000000000000
--- a/src/cpu/pythonbindings/src/submodules/kernel.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-#include <memory>
-#include <pybind11/pybind11.h>
-#include <simulationconfig/KernelFactory.h>
-#include <simulationconfig/KernelConfigStructs.h>
-
-namespace kernel
-{
-    namespace py = pybind11;
-
-    void makeModule(py::module_ &parentModule)
-    {
-        py::module kernelModule = parentModule.def_submodule("kernel");
-
-        py::enum_<KernelFactory::KernelType>(kernelModule, "KernelType")
-                .value("BGK", KernelFactory::BGK)
-                .value("CompressibleCumulantFourthOrderViscosity",
-                       KernelFactory::COMPRESSIBLE_CUMULANT_4TH_ORDER_VISCOSITY);
-
-        py::class_<LBMKernelConfiguration, std::shared_ptr<LBMKernelConfiguration>>(kernelModule, "LBMKernel")
-                .def(py::init<KernelFactory::KernelType>())
-                .def_readwrite("type", &LBMKernelConfiguration::kernelType)
-                .def_readwrite("use_forcing", &LBMKernelConfiguration::useForcing)
-                .def_readwrite("forcing_in_x1", &LBMKernelConfiguration::forcingX1)
-                .def_readwrite("forcing_in_x2", &LBMKernelConfiguration::forcingX2)
-                .def_readwrite("forcing_in_x3", &LBMKernelConfiguration::forcingX3)
-                .def("set_forcing", [](LBMKernelConfiguration &kernelConfig, double x1, double x2, double x3)
-                {
-                    kernelConfig.forcingX1 = x1;
-                    kernelConfig.forcingX2 = x2;
-                    kernelConfig.forcingX3 = x3;
-                })
-                .def("__repr__", [](LBMKernelConfiguration &kernelConfig)
-                {
-                    std::ostringstream stream;
-                    stream << "<" << kernelConfig.kernelType << std::endl
-                           << "Use forcing: " << kernelConfig.useForcing << std::endl
-                           << "Forcing in x1: " << kernelConfig.forcingX1 << std::endl
-                           << "Forcing in x2: " << kernelConfig.forcingX2 << std::endl
-                           << "Forcing in x3: " << kernelConfig.forcingX3 << ">" << std::endl;
-
-                    return stream.str();
-                });
-    }
-
-}
\ No newline at end of file
diff --git a/src/cpu/pythonbindings/src/submodules/simulationconfig.cpp b/src/cpu/pythonbindings/src/submodules/simulationconfig.cpp
deleted file mode 100644
index 60af4e36af4dca67e9262dd9f5ee1f46d5b7bb58..0000000000000000000000000000000000000000
--- a/src/cpu/pythonbindings/src/submodules/simulationconfig.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-#include <pybind11/pybind11.h>
-#include <simulationconfig/Simulation.h>
-
-namespace simulation
-{
-    namespace py = pybind11;
-
-    void makeModule(py::module_ &parentModule)
-    {
-        py::class_<Simulation, std::shared_ptr<Simulation>>(parentModule, "Simulation")
-                .def(py::init())
-                .def("set_writer", &Simulation::setWriterConfiguration)
-                .def("set_grid_parameters", &Simulation::setGridParameters)
-                .def("set_physical_parameters", &Simulation::setPhysicalParameters)
-                .def("set_runtime_parameters", &Simulation::setRuntimeParameters)
-                .def("set_kernel_config", &Simulation::setKernelConfiguration)
-                .def("add_object", &Simulation::addObject)
-                .def("add_bc_adapter", &Simulation::addBCAdapter)
-                .def("run_simulation", &Simulation::run);
-    }
-
-}
\ No newline at end of file
diff --git a/src/cpu/pythonbindings/src/submodules/simulationparameters.cpp b/src/cpu/pythonbindings/src/submodules/simulationparameters.cpp
deleted file mode 100644
index acc272f2ee412cfbafd9007b4b18610cfd0a1e9b..0000000000000000000000000000000000000000
--- a/src/cpu/pythonbindings/src/submodules/simulationparameters.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-#include <pybind11/pybind11.h>
-#include <pybind11/stl.h>
-#include <complex>
-#include <simulationconfig/SimulationParameters.h>
-
-namespace parameters
-{
-    namespace py = pybind11;
-
-    void makeModule(py::module_ &parentModule)
-    {
-        py::module parametersModule = parentModule.def_submodule("parameters");
-
-        py::class_<PhysicalParameters, std::shared_ptr<PhysicalParameters>>(parametersModule, "PhysicalParameters")
-                .def(py::init())
-                .def_readwrite("bulk_viscosity_factor", &PhysicalParameters::bulkViscosityFactor,
-                               "The viscosity of the fluid will be multiplied with this factor to calculate its bulk viscosity. Default is 1.0")
-                .def_readwrite("lattice_viscosity", &PhysicalParameters::latticeViscosity, "Lattice viscosity");
-
-        py::class_<GridParameters, std::shared_ptr<GridParameters>>(parametersModule, "GridParameters")
-                .def(py::init())
-                .def_readwrite("node_distance", &GridParameters::nodeDistance)
-                .def_readwrite("reference_direction_index", &GridParameters::referenceDirectionIndex)
-                .def_readwrite("number_of_nodes_per_direction", &GridParameters::numberOfNodesPerDirection)
-                .def_readwrite("blocks_per_direction", &GridParameters::blocksPerDirection)
-                .def_readwrite("periodic_boundary_in_x1", &GridParameters::periodicBoundaryInX1)
-                .def_readwrite("periodic_boundary_in_x2", &GridParameters::periodicBoundaryInX2)
-                .def_readwrite("periodic_boundary_in_x3", &GridParameters::periodicBoundaryInX3)
-                .def_property_readonly("bounding_box", &GridParameters::boundingBox);
-
-        py::class_<BoundingBox, std::shared_ptr<BoundingBox>>(parametersModule, "BoundingBox")
-                .def_readonly("min_x1", &BoundingBox::minX1)
-                .def_readonly("min_x2", &BoundingBox::minX2)
-                .def_readonly("min_x3", &BoundingBox::minX3)
-                .def_readonly("max_x1", &BoundingBox::maxX1)
-                .def_readonly("max_x2", &BoundingBox::maxX2)
-                .def_readonly("max_x3", &BoundingBox::maxX3)
-                .def("__repr__", [](BoundingBox &self)
-                {
-                    std::ostringstream stream;
-                    stream << "<BoundingBox" << std::endl
-                           << "min x1: " << self.minX1 << std::endl
-                           << "min x2: " << self.minX2 << std::endl
-                           << "min x3: " << self.minX3 << std::endl
-                           << "max x1: " << self.maxX1 << std::endl
-                           << "max x2: " << self.maxX2 << std::endl
-                           << "max x3: " << self.maxX3 << std::endl << ">";
-
-                    return stream.str();
-                });
-
-        py::class_<RuntimeParameters, std::shared_ptr<RuntimeParameters>>(parametersModule, "RuntimeParameters")
-                .def(py::init())
-                .def_readwrite("number_of_timesteps", &RuntimeParameters::numberOfTimeSteps)
-                .def_readwrite("timestep_log_interval", &RuntimeParameters::timeStepLogInterval)
-                .def_readwrite("number_of_threads", &RuntimeParameters::numberOfThreads);
-
-    }
-}
\ No newline at end of file
diff --git a/src/cpu/pythonbindings/src/submodules/writer.cpp b/src/cpu/pythonbindings/src/submodules/writer.cpp
deleted file mode 100644
index d5ec527a27caf63d9a3066c51e1f675b307fe0b2..0000000000000000000000000000000000000000
--- a/src/cpu/pythonbindings/src/submodules/writer.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-#include <pybind11/pybind11.h>
-#include <simulationconfig/WriterConfiguration.h>
-
-namespace writer
-{
-    namespace py = pybind11;
-
-    void makeModule(py::module_ &parentModule)
-    {
-        py::module writerModule = parentModule.def_submodule("writer");
-
-        py::enum_<OutputFormat>(writerModule, "OutputFormat")
-                .value("ASCII", OutputFormat::ASCII)
-                .value("BINARY", OutputFormat::BINARY);
-
-        py::class_<WriterConfiguration>(writerModule, "Writer")
-                .def(py::init())
-                .def_readwrite("output_path", &WriterConfiguration::outputPath)
-                .def_readwrite("output_format", &WriterConfiguration::outputFormat);
-    }
-}
\ No newline at end of file
diff --git a/src/cpu/simulationconfig/CMakeLists.txt b/src/cpu/simulationconfig/CMakeLists.txt
index 95ee969a049fd65cfadc6cc95d814e788a02aa8e..f0659f67a2af8d40a20991be42b4b49e1cab8ff1 100644
--- a/src/cpu/simulationconfig/CMakeLists.txt
+++ b/src/cpu/simulationconfig/CMakeLists.txt
@@ -1,7 +1,7 @@
 project(simulationconfig)
 
 
-vf_add_library(NAME simulationconfig PUBLIC_LINK VirtualFluidsCore basics muparser)
+vf_add_library(NAME simulationconfig PUBLIC_LINK VirtualFluidsCore basics muparser lbm)
 
 set_target_properties(simulationconfig PROPERTIES POSITION_INDEPENDENT_CODE ON)
 
diff --git a/src/cpu/simulationconfig/include/simulationconfig/KernelConfigStructs.h b/src/cpu/simulationconfig/include/simulationconfig/KernelConfigStructs.h
index 88e621a3fe00a522b23fda4101e84d39305e80a2..53ea667c9da3a9c57d5aeeac67588cab0afbff3f 100644
--- a/src/cpu/simulationconfig/include/simulationconfig/KernelConfigStructs.h
+++ b/src/cpu/simulationconfig/include/simulationconfig/KernelConfigStructs.h
@@ -2,14 +2,14 @@
 #define VIRTUALFLUIDSPYTHONBINDINGS_KERNELCONFIGSTRUCTS_H
 
 #include <string>
-#include <LBM/LBMSystem.h>
+#include <basics/DataTypes.h>
 
 struct LBMKernelConfiguration {
     KernelFactory::KernelType kernelType;
     bool useForcing = false;
-    LBMReal forcingX1{};
-    LBMReal forcingX2{};
-    LBMReal forcingX3{};
+    real forcingX1{};
+    real forcingX2{};
+    real forcingX3{};
 
     explicit LBMKernelConfiguration(KernelFactory::KernelType kernelType) : kernelType(kernelType)
     {
diff --git a/src/cpu/simulationconfig/src/Simulation.cpp b/src/cpu/simulationconfig/src/Simulation.cpp
index 1258df75b8440b468c942688c9dc3366e3e2a833..098f913d61a87b0dd2692faad07de691ca7e04a1 100644
--- a/src/cpu/simulationconfig/src/Simulation.cpp
+++ b/src/cpu/simulationconfig/src/Simulation.cpp
@@ -33,6 +33,8 @@
 #include <simulationconfig/SimulationParameters.h>
 #include <simulationconfig/Simulation.h>
 
+#include <lbm/constants/D3Q27.h>
+
 
 Simulation::Simulation()
 {
@@ -120,7 +122,7 @@ void Simulation::run()
 
     auto metisVisitor = std::make_shared<MetisPartitioningGridVisitor>(communicator,
                                                                        MetisPartitioningGridVisitor::LevelBased,
-                                                                       D3Q27System::DIR_00M, MetisPartitioner::RECURSIVE);
+                                                                       vf::lbm::dir::DIR_00M, MetisPartitioner::RECURSIVE);
 
     InteractorsHelper intHelper(grid, metisVisitor);
     for (auto const &interactor : interactors)
diff --git a/src/gpu/GksGpu/Analyzer/ConvergenceAnalyzer.cpp b/src/gpu/GksGpu/Analyzer/ConvergenceAnalyzer.cpp
deleted file mode 100644
index 2125ceb6c10d2074c6e5f804e3944cd031601f1c..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Analyzer/ConvergenceAnalyzer.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-#include "ConvergenceAnalyzer.h"
-
-#include <cmath>
-#include <sstream>
-#include <iomanip>
-
-#include "Core/Logger/Logger.h"
-
-#include "DataBase/DataBase.h"
-
-#include "FlowStateData/AccessDeviceData.cuh"
-
-namespace GksGpu {
-
-ConvergenceAnalyzer::ConvergenceAnalyzer(SPtr<DataBase> dataBase, uint outputIter, real convergenceThreshold)
-{
-    this->dataBase = dataBase;
-
-    this->outputIter = outputIter;
-
-    this->setConvergenceThreshold( convergenceThreshold );
-
-    this->dataHostOld = dataBase->dataHost;
-    this->dataHostNew = dataBase->dataHost;
-}
-
-void ConvergenceAnalyzer::setConvergenceThreshold(real convergenceThreshold)
-{
-    this->convergenceThreshold.rho  = convergenceThreshold;
-    this->convergenceThreshold.rhoU = convergenceThreshold;
-    this->convergenceThreshold.rhoV = convergenceThreshold;
-    this->convergenceThreshold.rhoW = convergenceThreshold;
-    this->convergenceThreshold.rhoE = convergenceThreshold;
-#ifdef USE_PASSIVE_SCALAR
-    this->convergenceThreshold.rhoS_1 = convergenceThreshold;
-    this->convergenceThreshold.rhoS_2 = convergenceThreshold;
-#endif //USE_PASSIVE_SCALAR
-}
-
-void ConvergenceAnalyzer::setConvergenceThreshold(ConservedVariables convergenceThreshold)
-{
-    this->convergenceThreshold.rho  = convergenceThreshold.rho ;
-    this->convergenceThreshold.rhoU = convergenceThreshold.rhoU;
-    this->convergenceThreshold.rhoV = convergenceThreshold.rhoV;
-    this->convergenceThreshold.rhoW = convergenceThreshold.rhoW;
-    this->convergenceThreshold.rhoE = convergenceThreshold.rhoE;
-#ifdef USE_PASSIVE_SCALAR
-    this->convergenceThreshold.rhoS_1 = convergenceThreshold.rhoS_1;
-    this->convergenceThreshold.rhoS_2 = convergenceThreshold.rhoS_2;
-#endif //USE_PASSIVE_SCALAR
-}
-
-bool ConvergenceAnalyzer::run(uint iter)
-{
-    if( iter % outputIter != 0 ) return false;
-
-    this->dataBase->copyDataDeviceToHost( this->dataHostNew.data() );
-
-    ConservedVariables changeSquareSum, consSquareSum;
-
-    for( uint cellIdx = 0; cellIdx < this->dataBase->numberOfCells; cellIdx++  ){
-
-        ConservedVariables change, cons;
-
-        cons.rho  = this->dataHostNew[ RHO__(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoU = this->dataHostNew[ RHO_U(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoV = this->dataHostNew[ RHO_V(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoW = this->dataHostNew[ RHO_W(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoE = this->dataHostNew[ RHO_E(cellIdx, dataBase->numberOfCells) ];
-
-        change.rho  = cons.rho  - this->dataHostOld[ RHO__(cellIdx, dataBase->numberOfCells) ];
-        change.rhoU = cons.rhoU - this->dataHostOld[ RHO_U(cellIdx, dataBase->numberOfCells) ];
-        change.rhoV = cons.rhoV - this->dataHostOld[ RHO_V(cellIdx, dataBase->numberOfCells) ];
-        change.rhoW = cons.rhoW - this->dataHostOld[ RHO_W(cellIdx, dataBase->numberOfCells) ];
-        change.rhoE = cons.rhoE - this->dataHostOld[ RHO_E(cellIdx, dataBase->numberOfCells) ];
-    
-        changeSquareSum.rho  += change.rho  * change.rho ;
-        changeSquareSum.rhoU += change.rhoU * change.rhoU;
-        changeSquareSum.rhoV += change.rhoV * change.rhoV;
-        changeSquareSum.rhoW += change.rhoW * change.rhoW;
-        changeSquareSum.rhoE += change.rhoE * change.rhoE;
-    
-        consSquareSum.rho  += cons.rho  * cons.rho ;
-        consSquareSum.rhoU += cons.rhoU * cons.rhoU;
-        consSquareSum.rhoV += cons.rhoV * cons.rhoV;
-        consSquareSum.rhoW += cons.rhoW * cons.rhoW;
-        consSquareSum.rhoE += cons.rhoE * cons.rhoE;
-    }
-
-    ConservedVariables L2Change;
-
-    L2Change.rho  = std::sqrt( changeSquareSum.rho  / consSquareSum.rho  );
-    L2Change.rhoU = std::sqrt( changeSquareSum.rhoU / consSquareSum.rhoU );
-    L2Change.rhoV = std::sqrt( changeSquareSum.rhoV / consSquareSum.rhoV );
-    L2Change.rhoW = std::sqrt( changeSquareSum.rhoW / consSquareSum.rhoW );
-    L2Change.rhoE = std::sqrt( changeSquareSum.rhoE / consSquareSum.rhoE );
-
-    this->dataHostOld = this->dataHostNew;
-
-    this->printL2Change( L2Change );
-
-    if( L2Change.rho  < this->convergenceThreshold.rho  &&
-        L2Change.rhoU < this->convergenceThreshold.rhoU &&
-        L2Change.rhoV < this->convergenceThreshold.rhoV &&
-        L2Change.rhoW < this->convergenceThreshold.rhoW &&
-        L2Change.rhoE < this->convergenceThreshold.rhoE )
-    {
-        return true;
-    }
-
-    return false;
-}
-
-void ConvergenceAnalyzer::printL2Change(ConservedVariables L2Change)
-{
-    std::stringstream header;
-    std::stringstream body;
-
-    header << "| ";
-    header << "       rho" << " | "; 
-    header << "      rhoU" << " | "; 
-    header << "      rhoV" << " | "; 
-    header << "      rhoW" << " | "; 
-    header << "      rhoE" << " | ";
-
-    body   << "| ";
-    body   << std::setw(10) << std::setprecision(4) << L2Change.rho  << " | ";
-    body   << std::setw(10) << std::setprecision(4) << L2Change.rhoU << " | ";
-    body   << std::setw(10) << std::setprecision(4) << L2Change.rhoV << " | ";
-    body   << std::setw(10) << std::setprecision(4) << L2Change.rhoW << " | ";
-    body   << std::setw(10) << std::setprecision(4) << L2Change.rhoE << " | ";
-
-    *logging::out << logging::Logger::INFO_HIGH << "Residual L2-Change:" << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << header.str() << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << body.str()   << "\n";
-}
-
-} // namespace GksGpu
diff --git a/src/gpu/GksGpu/Analyzer/ConvergenceAnalyzer.h b/src/gpu/GksGpu/Analyzer/ConvergenceAnalyzer.h
deleted file mode 100644
index 52b1896a20430ea45c6ddf3f513c0902e521a825..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Analyzer/ConvergenceAnalyzer.h
+++ /dev/null
@@ -1,51 +0,0 @@
-#ifndef  ConvergenceAnalyzer_H
-#define  ConvergenceAnalyzer_H
-
-#include <vector>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/Timer/Timer.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-
-#include "GksGpu_export.h"
-
-namespace GksGpu {
-
-struct DataBase;
-
-class GKSGPU_EXPORT ConvergenceAnalyzer
-{
-private:
-
-    SPtr<DataBase> dataBase;
-
-    std::vector<real> dataHostOld;
-    std::vector<real> dataHostNew;
-
-    uint outputIter;
-
-    ConservedVariables convergenceThreshold;
-
-public:
-
-    ConvergenceAnalyzer( SPtr<DataBase> dataBase, uint outputIter = 10000, real convergenceThreshold = 1.0e-6 );
-
-    void setConvergenceThreshold( real convergenceThreshold );
-    void setConvergenceThreshold( ConservedVariables convergenceThreshold );
-
-    bool run( uint iter );
-
-private:
-
-    void printL2Change( ConservedVariables L2Change );
-
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/Analyzer/CupsAnalyzer.cpp b/src/gpu/GksGpu/Analyzer/CupsAnalyzer.cpp
deleted file mode 100644
index 1d23a82bef8ea9c3e22eef18db57a4365f603eb3..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Analyzer/CupsAnalyzer.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
-#include "CupsAnalyzer.h"
-
-#include <cmath>
-#include <sstream>
-#include <iomanip>
-
-#include "Core/Logger/Logger.h"
-
-
-#include "GksGpu_export.h"
-
-#include "DataBase/DataBase.h"
-
-namespace GksGpu {
-
-CupsAnalyzer::CupsAnalyzer(SPtr<DataBase> dataBase, 
-                           bool outputPerTime, real outputTime, 
-                           bool outputPerIter, uint outputIter)
-    : outputPerTime(outputPerTime),
-      outputTime(outputTime),
-      outputPerIter(outputPerIter),
-      outputIter(outputIter),
-      outputPerTimeCounter(1),
-      counter(0)
-{
-    this->timer        = Timer::makeStart();
-    this->timerRestart = Timer::makeStart();
-
-    this->numberOfCellUpdatesPerTimeStep = 0;
-
-    for( uint level = 0; level < dataBase->numberOfLevels; level++ )
-    {
-        numberOfCellUpdatesPerTimeStep += std::pow( 2, level ) * dataBase->perLevelCount[level].numberOfBulkCells;
-    }
-}
-
-void CupsAnalyzer::start()
-{
-    this->counter = 0;
-    this->timer->start();
-    this->timerRestart->start();
-}
-
-void CupsAnalyzer::restart()
-{
-    this->counter = 0;
-    this->timerRestart->start();
-}
-
-real CupsAnalyzer::run( uint iter, real dt )
-{
-    real currentRuntime             = this->timer->getCurrentRuntimeInSeconds();
-    real currentRuntimeSinceRestart = this->timerRestart->getCurrentRuntimeInSeconds();
-
-    real CUPS = -1.0;
-
-    this->counter++;
-
-    if( checkOutputPerTime(currentRuntime) || checkOutputPerIter(iter) )
-    {
-        unsigned long long numberOfCellUpdates = this->numberOfCellUpdatesPerTimeStep * (unsigned long long)counter;
-
-        CUPS = real(numberOfCellUpdates) / currentRuntimeSinceRestart;
-
-        this->printCups( iter, iter * dt, currentRuntime, CUPS );
-
-        this->restart();
-    }
-
-    if( checkOutputPerTime(currentRuntime) )
-    {
-        outputPerTimeCounter++;
-    }
-
-    return CUPS;
-}
-
-bool CupsAnalyzer::checkOutputPerTime(real currentRuntime)
-{
-    return outputPerTime && ( ( currentRuntime - outputPerTimeCounter * outputTime ) > 0 );
-}
-
-bool CupsAnalyzer::checkOutputPerIter(uint iter)
-{
-    return outputPerIter && (iter % outputIter == 0);
-}
-
-void CupsAnalyzer::printCups(uint iter, real simTime, real currentRunTime, real cups)
-{
-    std::stringstream header;
-    std::stringstream body;
-
-    header << "| ";
-    header << "           Iter" << " | "; 
-    header << "      sim. time" << " | "; 
-    header << "      wall time" << " | "; 
-    header << "          MCUPS" << " | ";
-
-    body   << "| ";
-    body   << std::setw(15) << std::setprecision(4) << iter                                        << " | ";
-    body   << std::setw(15) << std::setprecision(4) << this->getTimeString(simTime).c_str()        << " | ";
-    body   << std::setw(15) << std::setprecision(4) << this->getTimeString(currentRunTime).c_str() << " | ";
-    body   << std::setw(15) << std::setprecision(4) << cups / 1.0e6                                << " | ";
-
-    *logging::out << logging::Logger::INFO_HIGH << "Performance:" << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << header.str() << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << body.str()   << "\n";
-}
-
-std::string CupsAnalyzer::getTimeString(real time)
-{
-    int seconds = int(time);
-    int minutes = seconds / 60;
-    int hours   = minutes / 60;
-    int days    = hours   / 24;
-
-    int milliseconds = int( 1000.0 * ( time - real(seconds)) );
-
-    hours   -=     days * 24;
-    minutes -=   ( days * 24 + hours ) * 60;
-    seconds -= ( ( days * 24 + hours ) * 60 + minutes ) * 60;
-
-    std::stringstream timeString;
-    timeString << std::setw(2) << std::setfill('0') << days    << ":";
-    timeString << std::setw(2) << std::setfill('0') << hours   << ":";
-    timeString << std::setw(2) << std::setfill('0') << minutes << ":";
-    timeString << std::setw(2) << std::setfill('0') << seconds << ".";
-    timeString << std::setw(3) << std::setfill('0') << milliseconds;
-
-    return timeString.str();
-}
-
-} // namespace GksGpu
-
-
diff --git a/src/gpu/GksGpu/Analyzer/CupsAnalyzer.h b/src/gpu/GksGpu/Analyzer/CupsAnalyzer.h
deleted file mode 100644
index 095e795501adf019bb9eb1e54dda3329b98bb9d8..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Analyzer/CupsAnalyzer.h
+++ /dev/null
@@ -1,61 +0,0 @@
-#ifndef  CupsAnalyzer_H
-#define  CupsAnalyzer_H
-
-#include <string>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/Timer/Timer.h"
-#include "GksGpu_export.h"
-
-namespace GksGpu {
-
-struct DataBase;
-
-class GKSGPU_EXPORT CupsAnalyzer
-{
-private:
-    SPtr<Timer> timer;
-    SPtr<Timer> timerRestart;
-
-    bool outputPerTime;
-
-    bool outputPerIter;
-
-    real outputTime;
-    uint outputPerTimeCounter;
-
-    uint outputIter;
-
-    unsigned long long numberOfCellUpdatesPerTimeStep;
-
-    uint counter;
-
-public:
-
-    CupsAnalyzer( SPtr<DataBase> dataBase, 
-                  bool outputPerTime = true, real outputTime = 600.0,
-                  bool outputPerIter = true, uint outputIter = 10000 );
-
-    void start();
-
-    void restart();
-
-    real run( uint iter, real dt );
-
-private:
-
-    bool checkOutputPerTime( real currentRuntime );
-    bool checkOutputPerIter( uint iter );
-
-    void printCups(uint iter, real simTime, real currentRunTime, real cups);
-
-    std::string getTimeString( real time );
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/Analyzer/EnstrophyAnalyzer.cu b/src/gpu/GksGpu/Analyzer/EnstrophyAnalyzer.cu
deleted file mode 100644
index 346692bfdf8c8daf9a659a3a0ef04aa57f487545..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Analyzer/EnstrophyAnalyzer.cu
+++ /dev/null
@@ -1,346 +0,0 @@
-#include "EnstrophyAnalyzer.h"
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include <cmath>
-#include <sstream>
-
-#include <thrust/device_vector.h>
-#include <thrust/reduce.h>
-#include <thrust/device_ptr.h>
-
-#include <iomanip>
-
-#include "Core/Logger/Logger.h"
-
-#include "DataBase/DataBase.h"
-
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu {
-
-__global__                 void enstrophyKernel  ( DataBaseStruct dataBase, Parameters parameters, real* enstrophy, uint nx, uint startIndex, uint numberOfEntities );
-
-__host__ __device__ inline void enstrophyFunction( DataBaseStruct dataBase, Parameters parameters, real* enstrophy, uint nx, uint startIndex, uint index );
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-bool EnstrophyAnalyzer::run(uint iter)
-{
-    if( iter % this->analyzeIter != 0 ) return false;
-
-    thrust::device_vector<real> enstrophy( this->dataBase->perLevelCount[ 0 ].numberOfBulkCells );
-
-    CudaUtility::CudaGrid grid( dataBase->perLevelCount[ 0 ].numberOfBulkCells, 32 );
-
-    uint nx;
-    if     ( dataBase->perLevelCount[ 0 ].numberOfBulkCells ==  64* 64* 64 ) nx =  64;
-    else if( dataBase->perLevelCount[ 0 ].numberOfBulkCells == 128*128*128 ) nx = 128;
-    else if( dataBase->perLevelCount[ 0 ].numberOfBulkCells == 256*256*256 ) nx = 256;
-
-    runKernel( enstrophyKernel,
-               enstrophyFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct(),
-               parameters,
-               enstrophy.data().get(),
-               nx,
-               dataBase->perLevelCount[ 0 ].startOfCells );
-
-    getLastCudaError("KineticEnergyAnalyzer::run(uint iter)");
-
-    real EnstrophyTmp = thrust::reduce( enstrophy.begin(), enstrophy.end(), c0o1, thrust::plus<real>() )
-                      / real(dataBase->perLevelCount[ 0 ].numberOfBulkCells);
-
-    this->enstrophyTimeSeries.push_back( EnstrophyTmp );
-
-    //*logging::out << logging::Logger::INFO_HIGH << "EKin = " << EKin << "\n";
-
-    return true;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__global__ void enstrophyKernel(DataBaseStruct dataBase, Parameters parameters, real* enstrophy, uint nx, uint startIndex, uint numberOfEntities)
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    enstrophyFunction( dataBase, parameters, enstrophy, nx, startIndex, index );
-}
-
-__host__ __device__ void enstrophyFunction(DataBaseStruct dataBase, Parameters parameters, real* enstrophy, uint nx, uint startIndex, uint index)
-{
-    uint cellIndex = startIndex + index;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    uint xIndex = ( cellIndex % ( nx*nx ) ) % nx;
-    uint yIndex = ( cellIndex % ( nx*nx ) ) / nx;
-    uint zIndex = ( cellIndex / ( nx*nx ) );
-
-    uint xP1 = (( xIndex + 1 )%nx) + (( yIndex     )%nx)*nx + (( zIndex     )%nx)*nx*nx;
-    uint xP2 = (( xIndex + 2 )%nx) + (( yIndex     )%nx)*nx + (( zIndex     )%nx)*nx*nx;
-    uint xP3 = (( xIndex + 3 )%nx) + (( yIndex     )%nx)*nx + (( zIndex     )%nx)*nx*nx;
-    uint xP4 = (( xIndex + 4 )%nx) + (( yIndex     )%nx)*nx + (( zIndex     )%nx)*nx*nx;
-    uint xM1 = (( xIndex - 1 )%nx) + (( yIndex     )%nx)*nx + (( zIndex     )%nx)*nx*nx;
-    uint xM2 = (( xIndex - 2 )%nx) + (( yIndex     )%nx)*nx + (( zIndex     )%nx)*nx*nx;
-    uint xM3 = (( xIndex - 3 )%nx) + (( yIndex     )%nx)*nx + (( zIndex     )%nx)*nx*nx;
-    uint xM4 = (( xIndex - 4 )%nx) + (( yIndex     )%nx)*nx + (( zIndex     )%nx)*nx*nx;
-
-    uint yP1 = (( xIndex     )%nx) + (( yIndex + 1 )%nx)*nx + (( zIndex     )%nx)*nx*nx;
-    uint yP2 = (( xIndex     )%nx) + (( yIndex + 2 )%nx)*nx + (( zIndex     )%nx)*nx*nx;
-    uint yP3 = (( xIndex     )%nx) + (( yIndex + 3 )%nx)*nx + (( zIndex     )%nx)*nx*nx;
-    uint yP4 = (( xIndex     )%nx) + (( yIndex + 4 )%nx)*nx + (( zIndex     )%nx)*nx*nx;
-    uint yM1 = (( xIndex     )%nx) + (( yIndex - 1 )%nx)*nx + (( zIndex     )%nx)*nx*nx;
-    uint yM2 = (( xIndex     )%nx) + (( yIndex - 2 )%nx)*nx + (( zIndex     )%nx)*nx*nx;
-    uint yM3 = (( xIndex     )%nx) + (( yIndex - 3 )%nx)*nx + (( zIndex     )%nx)*nx*nx;
-    uint yM4 = (( xIndex     )%nx) + (( yIndex - 4 )%nx)*nx + (( zIndex     )%nx)*nx*nx;
-
-    uint zP1 = (( xIndex     )%nx) + (( yIndex     )%nx)*nx + (( zIndex + 1 )%nx)*nx*nx;
-    uint zP2 = (( xIndex     )%nx) + (( yIndex     )%nx)*nx + (( zIndex + 2 )%nx)*nx*nx;
-    uint zP3 = (( xIndex     )%nx) + (( yIndex     )%nx)*nx + (( zIndex + 3 )%nx)*nx*nx;
-    uint zP4 = (( xIndex     )%nx) + (( yIndex     )%nx)*nx + (( zIndex + 4 )%nx)*nx*nx;
-    uint zM1 = (( xIndex     )%nx) + (( yIndex     )%nx)*nx + (( zIndex - 1 )%nx)*nx*nx;
-    uint zM2 = (( xIndex     )%nx) + (( yIndex     )%nx)*nx + (( zIndex - 2 )%nx)*nx*nx;
-    uint zM3 = (( xIndex     )%nx) + (( yIndex     )%nx)*nx + (( zIndex - 3 )%nx)*nx*nx;
-    uint zM4 = (( xIndex     )%nx) + (( yIndex     )%nx)*nx + (( zIndex - 4 )%nx)*nx*nx;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real rho_xP1 = dataBase.data[ RHO__( xP1, dataBase.numberOfCells ) ];
-    real rho_xP2 = dataBase.data[ RHO__( xP2, dataBase.numberOfCells ) ];
-    real rho_xP3 = dataBase.data[ RHO__( xP3, dataBase.numberOfCells ) ];
-    real rho_xP4 = dataBase.data[ RHO__( xP4, dataBase.numberOfCells ) ];
-    real rho_xM1 = dataBase.data[ RHO__( xM1, dataBase.numberOfCells ) ];
-    real rho_xM2 = dataBase.data[ RHO__( xM2, dataBase.numberOfCells ) ];
-    real rho_xM3 = dataBase.data[ RHO__( xM3, dataBase.numberOfCells ) ];
-    real rho_xM4 = dataBase.data[ RHO__( xM4, dataBase.numberOfCells ) ];
-
-    real rho_yP1 = dataBase.data[ RHO__( yP1, dataBase.numberOfCells ) ];
-    real rho_yP2 = dataBase.data[ RHO__( yP2, dataBase.numberOfCells ) ];
-    real rho_yP3 = dataBase.data[ RHO__( yP3, dataBase.numberOfCells ) ];
-    real rho_yP4 = dataBase.data[ RHO__( yP4, dataBase.numberOfCells ) ];
-    real rho_yM1 = dataBase.data[ RHO__( yM1, dataBase.numberOfCells ) ];
-    real rho_yM2 = dataBase.data[ RHO__( yM2, dataBase.numberOfCells ) ];
-    real rho_yM3 = dataBase.data[ RHO__( yM3, dataBase.numberOfCells ) ];
-    real rho_yM4 = dataBase.data[ RHO__( yM4, dataBase.numberOfCells ) ];
-
-    real rho_zP1 = dataBase.data[ RHO__( zP1, dataBase.numberOfCells ) ];
-    real rho_zP2 = dataBase.data[ RHO__( zP2, dataBase.numberOfCells ) ];
-    real rho_zP3 = dataBase.data[ RHO__( zP3, dataBase.numberOfCells ) ];
-    real rho_zP4 = dataBase.data[ RHO__( zP4, dataBase.numberOfCells ) ];
-    real rho_zM1 = dataBase.data[ RHO__( zM1, dataBase.numberOfCells ) ];
-    real rho_zM2 = dataBase.data[ RHO__( zM2, dataBase.numberOfCells ) ];
-    real rho_zM3 = dataBase.data[ RHO__( zM3, dataBase.numberOfCells ) ];
-    real rho_zM4 = dataBase.data[ RHO__( zM4, dataBase.numberOfCells ) ];
-
-    //////////////////////////////////////////////////////////////////////////
-
-    real U_xP1   = dataBase.data[ RHO_U( xP1, dataBase.numberOfCells ) ] / rho_xP1;
-    real U_xP2   = dataBase.data[ RHO_U( xP2, dataBase.numberOfCells ) ] / rho_xP2;
-    real U_xP3   = dataBase.data[ RHO_U( xP3, dataBase.numberOfCells ) ] / rho_xP3;
-    real U_xP4   = dataBase.data[ RHO_U( xP4, dataBase.numberOfCells ) ] / rho_xP4;
-    real U_xM1   = dataBase.data[ RHO_U( xM1, dataBase.numberOfCells ) ] / rho_xM1;
-    real U_xM2   = dataBase.data[ RHO_U( xM2, dataBase.numberOfCells ) ] / rho_xM2;
-    real U_xM3   = dataBase.data[ RHO_U( xM3, dataBase.numberOfCells ) ] / rho_xM3;
-    real U_xM4   = dataBase.data[ RHO_U( xM4, dataBase.numberOfCells ) ] / rho_xM4;
-
-    real U_yP1   = dataBase.data[ RHO_U( yP1, dataBase.numberOfCells ) ] / rho_yP1;
-    real U_yP2   = dataBase.data[ RHO_U( yP2, dataBase.numberOfCells ) ] / rho_yP2;
-    real U_yP3   = dataBase.data[ RHO_U( yP3, dataBase.numberOfCells ) ] / rho_yP3;
-    real U_yP4   = dataBase.data[ RHO_U( yP4, dataBase.numberOfCells ) ] / rho_yP4;
-    real U_yM1   = dataBase.data[ RHO_U( yM1, dataBase.numberOfCells ) ] / rho_yM1;
-    real U_yM2   = dataBase.data[ RHO_U( yM2, dataBase.numberOfCells ) ] / rho_yM2;
-    real U_yM3   = dataBase.data[ RHO_U( yM3, dataBase.numberOfCells ) ] / rho_yM3;
-    real U_yM4   = dataBase.data[ RHO_U( yM4, dataBase.numberOfCells ) ] / rho_yM4;
-
-    real U_zP1   = dataBase.data[ RHO_U( zP1, dataBase.numberOfCells ) ] / rho_zP1;
-    real U_zP2   = dataBase.data[ RHO_U( zP2, dataBase.numberOfCells ) ] / rho_zP2;
-    real U_zP3   = dataBase.data[ RHO_U( zP3, dataBase.numberOfCells ) ] / rho_zP3;
-    real U_zP4   = dataBase.data[ RHO_U( zP4, dataBase.numberOfCells ) ] / rho_zP4;
-    real U_zM1   = dataBase.data[ RHO_U( zM1, dataBase.numberOfCells ) ] / rho_zM1;
-    real U_zM2   = dataBase.data[ RHO_U( zM2, dataBase.numberOfCells ) ] / rho_zM2;
-    real U_zM3   = dataBase.data[ RHO_U( zM3, dataBase.numberOfCells ) ] / rho_zM3;
-    real U_zM4   = dataBase.data[ RHO_U( zM4, dataBase.numberOfCells ) ] / rho_zM4;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    real V_xP1   = dataBase.data[ RHO_V( xP1, dataBase.numberOfCells ) ] / rho_xP1;
-    real V_xP2   = dataBase.data[ RHO_V( xP2, dataBase.numberOfCells ) ] / rho_xP2;
-    real V_xP3   = dataBase.data[ RHO_V( xP3, dataBase.numberOfCells ) ] / rho_xP3;
-    real V_xP4   = dataBase.data[ RHO_V( xP4, dataBase.numberOfCells ) ] / rho_xP4;
-    real V_xM1   = dataBase.data[ RHO_V( xM1, dataBase.numberOfCells ) ] / rho_xM1;
-    real V_xM2   = dataBase.data[ RHO_V( xM2, dataBase.numberOfCells ) ] / rho_xM2;
-    real V_xM3   = dataBase.data[ RHO_V( xM3, dataBase.numberOfCells ) ] / rho_xM3;
-    real V_xM4   = dataBase.data[ RHO_V( xM4, dataBase.numberOfCells ) ] / rho_xM4;
-
-    real V_yP1   = dataBase.data[ RHO_V( yP1, dataBase.numberOfCells ) ] / rho_yP1;
-    real V_yP2   = dataBase.data[ RHO_V( yP2, dataBase.numberOfCells ) ] / rho_yP2;
-    real V_yP3   = dataBase.data[ RHO_V( yP3, dataBase.numberOfCells ) ] / rho_yP3;
-    real V_yP4   = dataBase.data[ RHO_V( yP4, dataBase.numberOfCells ) ] / rho_yP4;
-    real V_yM1   = dataBase.data[ RHO_V( yM1, dataBase.numberOfCells ) ] / rho_yM1;
-    real V_yM2   = dataBase.data[ RHO_V( yM2, dataBase.numberOfCells ) ] / rho_yM2;
-    real V_yM3   = dataBase.data[ RHO_V( yM3, dataBase.numberOfCells ) ] / rho_yM3;
-    real V_yM4   = dataBase.data[ RHO_V( yM4, dataBase.numberOfCells ) ] / rho_yM4;
-
-    real V_zP1   = dataBase.data[ RHO_V( zP1, dataBase.numberOfCells ) ] / rho_zP1;
-    real V_zP2   = dataBase.data[ RHO_V( zP2, dataBase.numberOfCells ) ] / rho_zP2;
-    real V_zP3   = dataBase.data[ RHO_V( zP3, dataBase.numberOfCells ) ] / rho_zP3;
-    real V_zP4   = dataBase.data[ RHO_V( zP4, dataBase.numberOfCells ) ] / rho_zP4;
-    real V_zM1   = dataBase.data[ RHO_V( zM1, dataBase.numberOfCells ) ] / rho_zM1;
-    real V_zM2   = dataBase.data[ RHO_V( zM2, dataBase.numberOfCells ) ] / rho_zM2;
-    real V_zM3   = dataBase.data[ RHO_V( zM3, dataBase.numberOfCells ) ] / rho_zM3;
-    real V_zM4   = dataBase.data[ RHO_V( zM4, dataBase.numberOfCells ) ] / rho_zM4;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    real W_xP1   = dataBase.data[ RHO_W( xP1, dataBase.numberOfCells ) ] / rho_xP1;
-    real W_xP2   = dataBase.data[ RHO_W( xP2, dataBase.numberOfCells ) ] / rho_xP2;
-    real W_xP3   = dataBase.data[ RHO_W( xP3, dataBase.numberOfCells ) ] / rho_xP3;
-    real W_xP4   = dataBase.data[ RHO_W( xP4, dataBase.numberOfCells ) ] / rho_xP4;
-    real W_xM1   = dataBase.data[ RHO_W( xM1, dataBase.numberOfCells ) ] / rho_xM1;
-    real W_xM2   = dataBase.data[ RHO_W( xM2, dataBase.numberOfCells ) ] / rho_xM2;
-    real W_xM3   = dataBase.data[ RHO_W( xM3, dataBase.numberOfCells ) ] / rho_xM3;
-    real W_xM4   = dataBase.data[ RHO_W( xM4, dataBase.numberOfCells ) ] / rho_xM4;
-
-    real W_yP1   = dataBase.data[ RHO_W( yP1, dataBase.numberOfCells ) ] / rho_yP1;
-    real W_yP2   = dataBase.data[ RHO_W( yP2, dataBase.numberOfCells ) ] / rho_yP2;
-    real W_yP3   = dataBase.data[ RHO_W( yP3, dataBase.numberOfCells ) ] / rho_yP3;
-    real W_yP4   = dataBase.data[ RHO_W( yP4, dataBase.numberOfCells ) ] / rho_yP4;
-    real W_yM1   = dataBase.data[ RHO_W( yM1, dataBase.numberOfCells ) ] / rho_yM1;
-    real W_yM2   = dataBase.data[ RHO_W( yM2, dataBase.numberOfCells ) ] / rho_yM2;
-    real W_yM3   = dataBase.data[ RHO_W( yM3, dataBase.numberOfCells ) ] / rho_yM3;
-    real W_yM4   = dataBase.data[ RHO_W( yM4, dataBase.numberOfCells ) ] / rho_yM4;
-
-    real W_zP1   = dataBase.data[ RHO_W( zP1, dataBase.numberOfCells ) ] / rho_zP1;
-    real W_zP2   = dataBase.data[ RHO_W( zP2, dataBase.numberOfCells ) ] / rho_zP2;
-    real W_zP3   = dataBase.data[ RHO_W( zP3, dataBase.numberOfCells ) ] / rho_zP3;
-    real W_zP4   = dataBase.data[ RHO_W( zP4, dataBase.numberOfCells ) ] / rho_zP4;
-    real W_zM1   = dataBase.data[ RHO_W( zM1, dataBase.numberOfCells ) ] / rho_zM1;
-    real W_zM2   = dataBase.data[ RHO_W( zM2, dataBase.numberOfCells ) ] / rho_zM2;
-    real W_zM3   = dataBase.data[ RHO_W( zM3, dataBase.numberOfCells ) ] / rho_zM3;
-    real W_zM4   = dataBase.data[ RHO_W( zM4, dataBase.numberOfCells ) ] / rho_zM4;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real dVdx = ( (c28o1 * c8o1) * ( V_xP1 - V_xM1 ) - (c7o1 * c8o1) * ( V_xP2 - V_xM2 ) + (c8o1 * c4o1 * c1o3) * ( V_xP3 - V_xM3 ) - ( V_xP4 - V_xM4 ) ) / (c7o1 * c10o1 * c4o1 * parameters.dx);
-    real dWdx = ( (c28o1 * c8o1) * ( W_xP1 - W_xM1 ) - (c7o1 * c8o1) * ( W_xP2 - W_xM2 ) + (c8o1 * c4o1 * c1o3) * ( W_xP3 - W_xM3 ) - ( W_xP4 - W_xM4 ) ) / (c7o1 * c10o1 * c4o1 * parameters.dx);
-    real dUdy = ( (c28o1 * c8o1) * ( U_yP1 - U_yM1 ) - (c7o1 * c8o1) * ( U_yP2 - U_yM2 ) + (c8o1 * c4o1 * c1o3) * ( U_yP3 - U_yM3 ) - ( U_yP4 - U_yM4 ) ) / (c7o1 * c10o1 * c4o1 * parameters.dx);
-    real dWdy = ( (c28o1 * c8o1) * ( W_yP1 - W_yM1 ) - (c7o1 * c8o1) * ( W_yP2 - W_yM2 ) + (c8o1 * c4o1 * c1o3) * ( W_yP3 - W_yM3 ) - ( W_yP4 - W_yM4 ) ) / (c7o1 * c10o1 * c4o1 * parameters.dx);
-    real dUdz = ( (c28o1 * c8o1) * ( U_zP1 - U_zM1 ) - (c7o1 * c8o1) * ( U_zP2 - U_zM2 ) + (c8o1 * c4o1 * c1o3) * ( U_zP3 - U_zM3 ) - ( U_zP4 - U_zM4 ) ) / (c7o1 * c10o1 * c4o1 * parameters.dx);
-    real dVdz = ( (c28o1 * c8o1) * ( V_zP1 - V_zM1 ) - (c7o1 * c8o1) * ( V_zP2 - V_zM2 ) + (c8o1 * c4o1 * c1o3) * ( V_zP3 - V_zM3 ) - ( V_zP4 - V_zM4 ) ) / (c7o1 * c10o1 * c4o1 * parameters.dx);
-
-    real tmpX = dWdy - dVdz;
-    real tmpY = dUdz - dWdx;
-    real tmpZ = dVdx - dUdy;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    real rho = dataBase.data[ RHO__( cellIndex, dataBase.numberOfCells ) ];
-
-    enstrophy[ cellIndex ] = c1o2 * rho * ( tmpX*tmpX + tmpY*tmpY + tmpZ*tmpZ );
-}
-
-//__host__ __device__ void enstrophyFunction(DataBaseStruct dataBase, Parameters parameters, real* enstrophy, uint startIndex, uint index)
-//{
-//    uint cellIndex = startIndex + index;
-//
-//    //////////////////////////////////////////////////////////////////////////
-//
-//    uint cellToCell [6];
-//
-//    cellToCell[0] = dataBase.cellToCell[ CELL_TO_CELL( cellIndex, 0, dataBase.numberOfCells ) ];
-//    cellToCell[1] = dataBase.cellToCell[ CELL_TO_CELL( cellIndex, 1, dataBase.numberOfCells ) ];
-//    cellToCell[2] = dataBase.cellToCell[ CELL_TO_CELL( cellIndex, 2, dataBase.numberOfCells ) ];
-//    cellToCell[3] = dataBase.cellToCell[ CELL_TO_CELL( cellIndex, 3, dataBase.numberOfCells ) ];
-//    cellToCell[4] = dataBase.cellToCell[ CELL_TO_CELL( cellIndex, 4, dataBase.numberOfCells ) ];
-//    cellToCell[5] = dataBase.cellToCell[ CELL_TO_CELL( cellIndex, 5, dataBase.numberOfCells ) ];
-//
-//    real rho [7];
-//    real U   [6];
-//    real V   [6];
-//    real W   [6];
-//
-//    rho[0] = dataBase.data[ RHO__( cellToCell[0], dataBase.numberOfCells ) ];
-//    rho[1] = dataBase.data[ RHO__( cellToCell[1], dataBase.numberOfCells ) ];
-//    rho[2] = dataBase.data[ RHO__( cellToCell[2], dataBase.numberOfCells ) ];
-//    rho[3] = dataBase.data[ RHO__( cellToCell[3], dataBase.numberOfCells ) ];
-//    rho[4] = dataBase.data[ RHO__( cellToCell[4], dataBase.numberOfCells ) ];
-//    rho[5] = dataBase.data[ RHO__( cellToCell[5], dataBase.numberOfCells ) ];
-//    rho[6] = dataBase.data[ RHO__( cellIndex    , dataBase.numberOfCells ) ];
-//
-//    U  [0] = dataBase.data[ RHO_U( cellToCell[0], dataBase.numberOfCells ) ] / rho[0];
-//    U  [1] = dataBase.data[ RHO_U( cellToCell[1], dataBase.numberOfCells ) ] / rho[1];
-//    U  [2] = dataBase.data[ RHO_U( cellToCell[2], dataBase.numberOfCells ) ] / rho[2];
-//    U  [3] = dataBase.data[ RHO_U( cellToCell[3], dataBase.numberOfCells ) ] / rho[3];
-//    U  [4] = dataBase.data[ RHO_U( cellToCell[4], dataBase.numberOfCells ) ] / rho[4];
-//    U  [5] = dataBase.data[ RHO_U( cellToCell[5], dataBase.numberOfCells ) ] / rho[5];
-//
-//    V  [0] = dataBase.data[ RHO_V( cellToCell[0], dataBase.numberOfCells ) ] / rho[0];
-//    V  [1] = dataBase.data[ RHO_V( cellToCell[1], dataBase.numberOfCells ) ] / rho[1];
-//    V  [2] = dataBase.data[ RHO_V( cellToCell[2], dataBase.numberOfCells ) ] / rho[2];
-//    V  [3] = dataBase.data[ RHO_V( cellToCell[3], dataBase.numberOfCells ) ] / rho[3];
-//    V  [4] = dataBase.data[ RHO_V( cellToCell[4], dataBase.numberOfCells ) ] / rho[4];
-//    V  [5] = dataBase.data[ RHO_V( cellToCell[5], dataBase.numberOfCells ) ] / rho[5];
-//
-//    W  [0] = dataBase.data[ RHO_W( cellToCell[0], dataBase.numberOfCells ) ] / rho[0];
-//    W  [1] = dataBase.data[ RHO_W( cellToCell[1], dataBase.numberOfCells ) ] / rho[1];
-//    W  [2] = dataBase.data[ RHO_W( cellToCell[2], dataBase.numberOfCells ) ] / rho[2];
-//    W  [3] = dataBase.data[ RHO_W( cellToCell[3], dataBase.numberOfCells ) ] / rho[3];
-//    W  [4] = dataBase.data[ RHO_W( cellToCell[4], dataBase.numberOfCells ) ] / rho[4];
-//    W  [5] = dataBase.data[ RHO_W( cellToCell[5], dataBase.numberOfCells ) ] / rho[5];
-//
-//    real dVdx = c1o2 * ( V[1] - V[0] ) / parameters.dx;
-//    real dWdx = c1o2 * ( W[1] - W[0] ) / parameters.dx;
-//
-//    real dUdy = c1o2 * ( U[3] - U[2] ) / parameters.dx;
-//    real dWdy = c1o2 * ( W[3] - W[2] ) / parameters.dx;
-//
-//    real dUdz = c1o2 * ( U[5] - U[4] ) / parameters.dx;
-//    real dVdz = c1o2 * ( V[5] - V[4] ) / parameters.dx;
-//
-//    real tmpX = dWdy - dVdz;
-//    real tmpY = dUdz - dWdx;
-//    real tmpZ = dVdx - dUdy;
-//
-//    //////////////////////////////////////////////////////////////////////////
-//
-//    enstrophy[ cellIndex ] = c1o2 * rho[6] * ( tmpX*tmpX + tmpY*tmpY + tmpZ*tmpZ );
-//}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-EnstrophyAnalyzer::EnstrophyAnalyzer(SPtr<DataBase> dataBase, Parameters parameters, uint analyzeIter, uint outputIter)
-{
-    this->dataBase   = dataBase;
-    this->parameters = parameters;
-
-    this->analyzeIter = analyzeIter;
-    this->outputIter  = outputIter;
-}
-
-void EnstrophyAnalyzer::writeToFile(std::string filename)
-{
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "EnstrophyAnalyzer::writeToFile( " << filename << " )" << "\n";
-
-    std::ofstream file;
-
-    file.open(filename + ".dat" );
-
-    for( auto& EKin : this->enstrophyTimeSeries )
-        file << std::setprecision(15) << EKin << std::endl;
-
-    file.close();
-
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
-}
-
-} // namespace GksGpu
-
-
diff --git a/src/gpu/GksGpu/Analyzer/EnstrophyAnalyzer.h b/src/gpu/GksGpu/Analyzer/EnstrophyAnalyzer.h
deleted file mode 100644
index 43acf5b51f06a7182d4c212811fcce56b3f7ae3d..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Analyzer/EnstrophyAnalyzer.h
+++ /dev/null
@@ -1,47 +0,0 @@
-#ifndef  EnstrophyAnalyzer_H
-#define  EnstrophyAnalyzer_H
-
-#include <vector>
-#include <string>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-
-#include "Parameters/Parameters.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-
-namespace GksGpu {
-
-struct DataBase;
-
-class GKSGPU_EXPORT EnstrophyAnalyzer
-{
-private:
-
-    SPtr<DataBase> dataBase;
-
-    Parameters parameters;
-
-    uint outputIter;
-
-    uint analyzeIter;
-
-    std::vector<real> enstrophyTimeSeries;
-
-public:
-
-    EnstrophyAnalyzer( SPtr<DataBase> dataBase, Parameters parameters, uint analyzeIter, uint outputIter );
-
-    bool run( uint iter );
-
-    void writeToFile( std::string filename );
-
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/Analyzer/HeatFluxAnalyzer.cu b/src/gpu/GksGpu/Analyzer/HeatFluxAnalyzer.cu
deleted file mode 100644
index ed68f8d95a2a68c00ab53c2cd1037bbff43e0f5b..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Analyzer/HeatFluxAnalyzer.cu
+++ /dev/null
@@ -1,147 +0,0 @@
-#include "HeatFluxAnalyzer.h"
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include <cmath>
-#include <sstream>
-
-#include <thrust/device_vector.h>
-#include <thrust/reduce.h>
-#include <thrust/device_ptr.h>
-
-#include <iomanip>
-
-#include "Core/Logger/Logger.h"
-
-#include "DataBase/DataBase.h"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-
-#include "FlowStateData/AccessDeviceData.cuh"
-#include "FlowStateData/FlowStateDataConversion.cuh"
-
-#include "FluxComputation/SutherlandsLaw.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu {
-
-__global__                 void heatFluxKernel  ( DataBaseStruct  dataBase, GksGpu::BoundaryConditionStruct  boundaryCondition, Parameters  parameters, real* heatFlux, uint startIndex, uint numberOfEntities );
-__host__ __device__ inline void heatFluxFunction( DataBaseStruct& dataBase, GksGpu::BoundaryConditionStruct& boundaryCondition, Parameters& parameters, real* heatFlux, uint startIndex, uint index );
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-bool HeatFluxAnalyzer::run(uint iter, Parameters parameters)
-{
-    if( iter % this->analyzeIter != 0 ) return false;
-
-    uint numberOfCells = this->boundaryCondition->numberOfCellsPerLevel[ dataBase->numberOfLevels - 1 ];
-
-    thrust::device_vector<real> heatFlux( numberOfCells );
-
-    CudaUtility::CudaGrid grid( numberOfCells, 32 );
-
-    for( uint level = 0; level < dataBase->numberOfLevels - 1; level++ ) parameters.dx *= c1o2; 
-
-    runKernel( heatFluxKernel,
-               heatFluxFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct(),
-               boundaryCondition->toStruct(),
-               parameters,
-               heatFlux.data().get(),
-               boundaryCondition->startOfCellsPerLevel[ dataBase->numberOfLevels - 1 ] );
-
-    getLastCudaError("HeatFluxAnalyzer::run(uint iter)");
-
-    real q = thrust::reduce( heatFlux.begin(), heatFlux.end(), c0o1, thrust::plus<real>() ) * parameters.dx * parameters.dx;
-
-    real qIdeal = c1o4 * (parameters.K + c5o1) * ( parameters.mu / parameters.Pr ) * ( c1o1 / lambdaHot - c1o1 / lambdaCold );
-
-    this->heatFluxTimeSeries.push_back( q / qIdeal );
-
-    if( iter % this->outputIter == 0 ) *logging::out << logging::Logger::INFO_HIGH << "q = " << q / qIdeal << "\n";
-
-    return true;
-}
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__global__ void heatFluxKernel(DataBaseStruct  dataBase, GksGpu::BoundaryConditionStruct  boundaryCondition, Parameters  parameters, real* heatFlux, uint startIndex, uint numberOfEntities)
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    heatFluxFunction( dataBase, boundaryCondition, parameters, heatFlux, startIndex, index );
-}
-
-__host__ __device__ void heatFluxFunction(DataBaseStruct& dataBase, GksGpu::BoundaryConditionStruct& boundaryCondition, Parameters& parameters, real* heatFlux, uint startIndex, uint index)
-{
-    uint ghostCellIndex  = boundaryCondition.ghostCells [ startIndex + index ];
-    uint domainCellIndex = boundaryCondition.domainCells[ startIndex + index ];
-
-    if( isCellProperties( dataBase.cellProperties[ domainCellIndex ], CELL_PROPERTIES_GHOST ) )
-    {
-        heatFlux[ startIndex + index ] = c0o1;
-        return;
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    ConservedVariables ghostCons;
-
-    readCellData(ghostCellIndex, dataBase, ghostCons);
-
-    ConservedVariables domainCons;
-
-    readCellData(domainCellIndex, dataBase, domainCons);
-
-    PrimitiveVariables ghostPrim  = toPrimitiveVariables(ghostCons,  parameters.K);
-    PrimitiveVariables domainPrim = toPrimitiveVariables(domainCons, parameters.K);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    real lambda = c1o2 * (ghostPrim.lambda + domainPrim.lambda);
-
-    real r   = parameters.lambdaRef / lambda;
-
-    real mu = getViscosity(parameters, r);
-
-    heatFlux[ startIndex + index ] = c1o4 * (parameters.K + c5o1) * ( mu / parameters.Pr ) / parameters.dx * ( c1o1 / domainPrim.lambda - c1o1 / ghostPrim.lambda );
-}
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-HeatFluxAnalyzer::HeatFluxAnalyzer( SPtr<DataBase> dataBase, SPtr<GksGpu::BoundaryCondition> boundaryCondition, uint analyzeIter, uint outputIter, real lambdaHot, real lambdaCold, real L )
-    : dataBase(dataBase), 
-      boundaryCondition(boundaryCondition), 
-      analyzeIter(analyzeIter), 
-      outputIter(outputIter), 
-      lambdaHot(lambdaHot), 
-      lambdaCold(lambdaCold), 
-      L(L)
-{
-}
-
-void HeatFluxAnalyzer::writeToFile(std::string filename)
-{
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "HeatFluxAnalyzer::writeToFile( " << filename << " )" << "\n";
-
-    std::ofstream file;
-
-    file.open(filename + ".dat" );
-
-    for( auto& EKin : this->heatFluxTimeSeries )
-        file << std::setprecision(15) << EKin << std::endl;
-
-    file.close();
-
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
-}
-
-} // namespace GksGpu
-
-
diff --git a/src/gpu/GksGpu/Analyzer/HeatFluxAnalyzer.h b/src/gpu/GksGpu/Analyzer/HeatFluxAnalyzer.h
deleted file mode 100644
index e64c24310411095fe09960b9d2ca0550fc7d6cbf..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Analyzer/HeatFluxAnalyzer.h
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef  HeatFluxAnalyzer_H
-#define  HeatFluxAnalyzer_H
-
-#include <vector>
-#include <string>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-
-#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-
-#include "Parameters/Parameters.h"
-
-namespace GksGpu {
-
-struct DataBase;
-
-class GKSGPU_EXPORT HeatFluxAnalyzer
-{
-private:
-
-    SPtr<DataBase> dataBase;
-    SPtr<GksGpu::BoundaryCondition> boundaryCondition;
-
-    uint outputIter;
-
-    uint analyzeIter;
-
-    std::vector<real> heatFluxTimeSeries;
-
-    real lambdaHot;
-    real lambdaCold;
-
-    real L;
-
-public:
-
-    HeatFluxAnalyzer( SPtr<DataBase> dataBase, SPtr<GksGpu::BoundaryCondition> boundaryCondition, uint analyzeIter, uint outputIter, real lambdaHot, real lambdaCold, real L );
-
-    bool run( uint iter, Parameters parameters );
-
-    void writeToFile( std::string filename );
-
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/Analyzer/KineticEnergyAnalyzer.cu b/src/gpu/GksGpu/Analyzer/KineticEnergyAnalyzer.cu
deleted file mode 100644
index 70b130e6a3e22c8064f6e82e281ec51855b284ec..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Analyzer/KineticEnergyAnalyzer.cu
+++ /dev/null
@@ -1,116 +0,0 @@
-#include "KineticEnergyAnalyzer.h"
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include <cmath>
-#include <sstream>
-
-#include <thrust/device_vector.h>
-#include <thrust/reduce.h>
-#include <thrust/device_ptr.h>
-
-#include <iomanip>
-
-#include "Core/Logger/Logger.h"
-
-#include "DataBase/DataBase.h"
-
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu {
-
-__global__                 void kineticEnergyKernel  ( DataBaseStruct dataBase, real* kineticEnergy, uint startIndex, uint numberOfEntities );
-
-__host__ __device__ inline void kineticEnergyFunction( DataBaseStruct dataBase, real* kineticEnergy, uint startIndex, uint index );
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-bool KineticEnergyAnalyzer::run(uint iter)
-{
-    if( iter % this->analyzeIter != 0 ) return false;
-
-    thrust::device_vector<real> kineticEnergy( this->dataBase->perLevelCount[ 0 ].numberOfBulkCells );
-
-    CudaUtility::CudaGrid grid( dataBase->perLevelCount[ 0 ].numberOfBulkCells, 32 );
-
-    runKernel( kineticEnergyKernel,
-               kineticEnergyFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct(),
-               kineticEnergy.data().get(),
-               dataBase->perLevelCount[ 0 ].startOfCells );
-
-    getLastCudaError("KineticEnergyAnalyzer::run(uint iter)");
-
-    real EKin = thrust::reduce( kineticEnergy.begin(), kineticEnergy.end(), c0o1, thrust::plus<real>() )
-              / real(dataBase->perLevelCount[ 0 ].numberOfBulkCells);
-
-    this->kineticEnergyTimeSeries.push_back( EKin );
-
-    //*logging::out << logging::Logger::INFO_HIGH << "EKin = " << EKin << "\n";
-
-    return true;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__global__ void kineticEnergyKernel(DataBaseStruct dataBase, real* kineticEnergy, uint startIndex, uint numberOfEntities)
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    kineticEnergyFunction( dataBase, kineticEnergy, startIndex, index );
-}
-
-__host__ __device__ void kineticEnergyFunction(DataBaseStruct dataBase, real* kineticEnergy, uint startIndex, uint index)
-{
-    uint cellIndex = startIndex + index;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    ConservedVariables cons;
-
-    cons.rho  = dataBase.data[ RHO__(cellIndex, dataBase.numberOfCells) ];
-    cons.rhoU = dataBase.data[ RHO_U(cellIndex, dataBase.numberOfCells) ];
-    cons.rhoV = dataBase.data[ RHO_V(cellIndex, dataBase.numberOfCells) ];
-    cons.rhoW = dataBase.data[ RHO_W(cellIndex, dataBase.numberOfCells) ];
-
-    //////////////////////////////////////////////////////////////////////////
-
-    kineticEnergy[ cellIndex ] = c1o2 * ( cons.rhoU * cons.rhoU + cons.rhoV * cons.rhoV + cons.rhoW * cons.rhoW ) / cons.rho;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-KineticEnergyAnalyzer::KineticEnergyAnalyzer(SPtr<DataBase> dataBase, uint analyzeIter, uint outputIter)
-{
-    this->dataBase = dataBase;
-
-    this->analyzeIter = analyzeIter;
-    this->outputIter  = outputIter;
-}
-
-void KineticEnergyAnalyzer::writeToFile(std::string filename)
-{
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "KineticEnergyAnalyzer::writeToFile( " << filename << " )" << "\n";
-
-    std::ofstream file;
-
-    file.open(filename + ".dat" );
-
-    for( auto& EKin : this->kineticEnergyTimeSeries )
-        file << std::setprecision(15) << EKin << std::endl;
-
-    file.close();
-
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
-}
-
-} // namespace GksGpu
-
-
diff --git a/src/gpu/GksGpu/Analyzer/KineticEnergyAnalyzer.h b/src/gpu/GksGpu/Analyzer/KineticEnergyAnalyzer.h
deleted file mode 100644
index ee69b40988f8105535e5ec77d6c2d1fadb9172e1..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Analyzer/KineticEnergyAnalyzer.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#ifndef  KineticEngergyAnalyzer_H
-#define  KineticEngergyAnalyzer_H
-
-#include <vector>
-#include <string>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-
-namespace GksGpu {
-
-struct DataBase;
-
-class GKSGPU_EXPORT KineticEnergyAnalyzer
-{
-private:
-
-    SPtr<DataBase> dataBase;
-
-    uint outputIter;
-
-    uint analyzeIter;
-
-    std::vector<real> kineticEnergyTimeSeries;
-
-public:
-
-    KineticEnergyAnalyzer( SPtr<DataBase> dataBase, uint analyzeIter, uint outputIter );
-
-    bool run( uint iter );
-
-    void writeToFile( std::string filename );
-
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/Analyzer/PointTimeSeriesAnalyzer.cu b/src/gpu/GksGpu/Analyzer/PointTimeSeriesAnalyzer.cu
deleted file mode 100644
index bba7a1e88cfc0f227e298258e76097169faae7d9..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Analyzer/PointTimeSeriesAnalyzer.cu
+++ /dev/null
@@ -1,196 +0,0 @@
-#include "PointTimeSeriesAnalyzer.h"
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include <cmath>
-#include <sstream>
-
-#include <thrust/device_vector.h>
-#include <thrust/host_vector.h>
-#include <thrust/reduce.h>
-#include <thrust/device_ptr.h>
-
-#include <iomanip>
-
-#include "Core/Logger/Logger.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "DataBase/DataBase.h"
-
-#include "Parameters/Parameters.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/FlowStateDataConversion.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu {
-
-__global__                 void pointTimeSeriesKernel  ( DataBaseStruct dataBase, PointTimeSeriesAnalyzerStruct pointTimeSeriesAnalyzer, Parameters parameters, uint startIndex, uint numberOfEntities );
-
-__host__ __device__ inline void pointTimeSeriesFunction( DataBaseStruct dataBase, PointTimeSeriesAnalyzerStruct pointTimeSeriesAnalyzer, Parameters parameters, uint startIndex, uint index );
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void PointTimeSeriesAnalyzer::run(uint iter, Parameters parameters)
-{
-
-    CudaUtility::CudaGrid grid( 1, 1 );
-
-    runKernel( pointTimeSeriesKernel,
-               pointTimeSeriesFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct(),
-               this->toStruct(),
-               parameters,
-               0 );
-
-    getLastCudaError("PointTimeSeriesAnalyzer::run(uint iter, Parameters parameters)");
-
-    this->counter++;
-
-    if( this->counter == this->outputIter )
-    {
-        this->download();
-        this->counter = 0;
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__global__ void pointTimeSeriesKernel(DataBaseStruct dataBase, PointTimeSeriesAnalyzerStruct pointTimeSeriesAnalyzer, Parameters parameters, uint startIndex, uint numberOfEntities)
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    pointTimeSeriesFunction( dataBase, pointTimeSeriesAnalyzer, parameters, startIndex, index );
-}
-
-__host__ __device__ void pointTimeSeriesFunction(DataBaseStruct dataBase, PointTimeSeriesAnalyzerStruct pointTimeSeriesAnalyzer, Parameters parameters, uint startIndex, uint index)
-{
-    //////////////////////////////////////////////////////////////////////////
-
-    ConservedVariables cons;
-
-    readCellData(pointTimeSeriesAnalyzer.cellIndex, dataBase, cons);
-
-    PrimitiveVariables prim = toPrimitiveVariables(cons, parameters.K);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( pointTimeSeriesAnalyzer.quantity == 'U' ) pointTimeSeriesAnalyzer.deviceSeries [ pointTimeSeriesAnalyzer.counter ] = prim.U;
-    if( pointTimeSeriesAnalyzer.quantity == 'V' ) pointTimeSeriesAnalyzer.deviceSeries [ pointTimeSeriesAnalyzer.counter ] = prim.V;
-    if( pointTimeSeriesAnalyzer.quantity == 'W' ) pointTimeSeriesAnalyzer.deviceSeries [ pointTimeSeriesAnalyzer.counter ] = prim.W;
-
-#ifdef USE_PASSIVE_SCALAR
-    if( pointTimeSeriesAnalyzer.quantity == 'T' ) pointTimeSeriesAnalyzer.deviceSeries [ pointTimeSeriesAnalyzer.counter ] = getT(prim);
-#else
-    if( pointTimeSeriesAnalyzer.quantity == 'T' ) pointTimeSeriesAnalyzer.deviceSeries [ pointTimeSeriesAnalyzer.counter ] = c1o1 / prim.lambda;
-#endif
-
-    //////////////////////////////////////////////////////////////////////////
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-PointTimeSeriesAnalyzer::~PointTimeSeriesAnalyzer()
-{
-    this->free();
-}
-
-PointTimeSeriesAnalyzer::PointTimeSeriesAnalyzer(SPtr<DataBase> dataBase, GksMeshAdapter & adapter, Vec3 coordinates, char quantity, uint outputIter)
-    : dataBase(dataBase),
-      deviceSeries(nullptr),
-      counter(0),
-      outputIter(outputIter),
-      quantity(quantity)
-{
-    this->allocate();
-
-    this->findCellIndex( adapter, coordinates );
-}
-
-void PointTimeSeriesAnalyzer::free()
-{
-    checkCudaErrors( cudaFree ( this->deviceSeries  ) );
-}
-
-void PointTimeSeriesAnalyzer::allocate()
-{
-    this->free();
-
-    checkCudaErrors( cudaMalloc ( &this->deviceSeries , sizeof(real) * this->outputIter ) );
-}
-
-void PointTimeSeriesAnalyzer::findCellIndex(GksMeshAdapter & adapter, Vec3 coordinates)
-{
-    real minDistance = 1.0e99;
-
-    for( uint cellIdx = 0 ; cellIdx < adapter.cells.size(); cellIdx++ )
-    {
-        MeshCell& cell = adapter.cells[ cellIdx ];
-
-        Vec3 vec = cell.cellCenter - coordinates;
-
-        real distance = sqrt( vec.x*vec.x + vec.y*vec.y + vec.z*vec.z );
-
-        if( distance < minDistance )
-        {
-            this->cellIndex = cellIdx;
-            minDistance = distance;
-        }
-    }
-
-    this->coordinates = adapter.cells[ this->cellIndex ].cellCenter;
-
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "PointTimeSeriesAnalyzer::cellIndex = " << this->cellIndex << "\n";
-}
-
-void PointTimeSeriesAnalyzer::writeToFile(std::string filename)
-{
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "PointTimeSeriesAnalyzer::writeToFile( " << filename << " )" << "\n";
-
-    std::ofstream file;
-
-    file.open(filename + ".dat" );
-
-    for( auto& value : this->hostSeries )
-        file << std::setprecision(15) << value << std::endl;
-
-    file.close();
-
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
-}
-
-PointTimeSeriesAnalyzerStruct PointTimeSeriesAnalyzer::toStruct()
-{
-    PointTimeSeriesAnalyzerStruct pointTimeSeriesAnalyzer;
-
-    pointTimeSeriesAnalyzer.deviceSeries = this->deviceSeries;
-
-    pointTimeSeriesAnalyzer.quantity     = this->quantity;
-
-    pointTimeSeriesAnalyzer.counter      = this->counter;
-
-    pointTimeSeriesAnalyzer.cellIndex    = this->cellIndex;
-
-    return pointTimeSeriesAnalyzer;
-}
-
-void PointTimeSeriesAnalyzer::download()
-{
-    uint oldSize = hostSeries.size();
-
-    this->hostSeries.resize( oldSize + this->outputIter, c0o1 );
-
-    checkCudaErrors( cudaMemcpy( this->hostSeries.data() + oldSize, this->deviceSeries , sizeof(real) * outputIter, cudaMemcpyDeviceToHost ) );
-}
-
-} // namespace GksGpu
-
-
diff --git a/src/gpu/GksGpu/Analyzer/PointTimeSeriesAnalyzer.h b/src/gpu/GksGpu/Analyzer/PointTimeSeriesAnalyzer.h
deleted file mode 100644
index 658c7a9d0f0b7932c57f5387646d6177d905f497..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Analyzer/PointTimeSeriesAnalyzer.h
+++ /dev/null
@@ -1,76 +0,0 @@
-#ifndef  PointTimeSeriesAnalyzer_H
-#define  PointTimeSeriesAnalyzer_H
-
-#include <vector>
-#include <string>
-#include <memory>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-
-class  GksMeshAdapter;
-
-namespace GksGpu {
-
-struct DataBase;
-struct Parameters;
-
-struct PointTimeSeriesAnalyzerStruct
-{
-    real* deviceSeries;
-
-    char quantity;
-
-    uint counter;
-
-    uint cellIndex;
-};
-
-class GKSGPU_EXPORT PointTimeSeriesAnalyzer
-{
-public:
-
-    SPtr<DataBase> dataBase;
-
-    uint outputIter;
-
-    real* deviceSeries;
-
-    char quantity;
-
-    uint counter;
-
-    uint cellIndex;
-
-    std::vector<real> hostSeries;
-
-    Vec3 coordinates;
-
-public:
-
-    ~PointTimeSeriesAnalyzer();
-
-    PointTimeSeriesAnalyzer( SPtr<DataBase> dataBase, GksMeshAdapter & adapter, Vec3 coordinates, char quantity, uint outputIter = 10000 );
-
-    void free();
-
-    void allocate();
-
-    void findCellIndex( GksMeshAdapter & adapter, Vec3 coordinates );
-
-    void run( uint iter, Parameters parameters );
-
-    void writeToFile( std::string filename );
-
-    PointTimeSeriesAnalyzerStruct toStruct();
-
-    void download();
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/Analyzer/PointTimeSeriesCollector.cpp b/src/gpu/GksGpu/Analyzer/PointTimeSeriesCollector.cpp
deleted file mode 100644
index 81918b256ab84e5ea9c3d9db97dd8962a0bc1b7d..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Analyzer/PointTimeSeriesCollector.cpp
+++ /dev/null
@@ -1,84 +0,0 @@
-#include "PointTimeSeriesCollector.h"
-
-#include <iomanip>
-#include <fstream>
-
-#include "Core/Logger/Logger.h"
-
-#include "Analyzer/PointTimeSeriesAnalyzer.h"
-
-#include "Parameters/Parameters.h"
-
-namespace GksGpu {
-
-PointTimeSeriesCollector::~PointTimeSeriesCollector()
-{
-}
-
-PointTimeSeriesCollector::PointTimeSeriesCollector()
-{
-}
-
-void PointTimeSeriesCollector::addAnalyzer(SPtr<DataBase> dataBase, GksMeshAdapter & adapter, Vec3 coordinate, char quantity, uint outputIter)
-{
-    auto pointTimeSeriesAnalyzer = std::make_shared<PointTimeSeriesAnalyzer>( dataBase, adapter, coordinate, quantity, outputIter );
-
-    this->analyzerList.push_back( pointTimeSeriesAnalyzer );
-}
-
-void PointTimeSeriesCollector::run(uint iter, Parameters parameters)
-{
-    for( auto analyzer : this->analyzerList )
-        analyzer->run(iter, parameters);
-}
-
-void PointTimeSeriesCollector::writeToFile(std::string filename)
-{
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "PointTimeSeriesCollector::writeToFile( " << filename << " )" << "\n";
-
-    if( this->analyzerList.size() == 0 )
-    {
-        *logging::out << logging::Logger::WARNING << "empty!\n";
-        return;
-    }
-
-    std::ofstream file;
-
-    file.open(filename + ".dat" );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    file << "Number of Points = " << this->analyzerList.size() << "\n";
-
-    for( uint j = 0; j < this->analyzerList.size(); j++ )
-    {
-        file << "Point " << j << ", ";
-        file << "Quantity = "     << this->analyzerList[j]->quantity << ", ";
-        file << "Coordinates = ( " << this->analyzerList[j]->coordinates.x << ", "
-                                   << this->analyzerList[j]->coordinates.y << ", "
-                                   << this->analyzerList[j]->coordinates.z << " )";
-        file << "\n";
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    uint numberOfTimeSteps = this->analyzerList[0]->hostSeries.size();
-
-    for( uint i = 0; i < numberOfTimeSteps; i++ )
-    {
-        for( uint j = 0; j < this->analyzerList.size(); j++ )
-        {
-            file << std::setprecision(15) << this->analyzerList[j]->hostSeries[i] << ", ";
-        }
-
-        file << "\n";
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    file.close();
-
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
-}
-
-} // namespace GksGpu
diff --git a/src/gpu/GksGpu/Analyzer/PointTimeSeriesCollector.h b/src/gpu/GksGpu/Analyzer/PointTimeSeriesCollector.h
deleted file mode 100644
index 6725e7658b59cfe61aa28fbec1e54fe154f2318e..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Analyzer/PointTimeSeriesCollector.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef  PointTimeSeriesCelloctor_H
-#define  PointTimeSeriesCelloctor_H
-
-
-#include <vector>
-#include <string>
-#include <memory>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-
-class  GksMeshAdapter;
-
-namespace GksGpu {
-
-class  PointTimeSeriesAnalyzer;
-struct DataBase;
-struct Parameters;
-
-class GKSGPU_EXPORT PointTimeSeriesCollector
-{
-public:
-
-    std::vector< SPtr<PointTimeSeriesAnalyzer> > analyzerList;
-
-public:
-
-    ~PointTimeSeriesCollector();
-
-    PointTimeSeriesCollector(  );
-
-    void addAnalyzer( SPtr<DataBase> dataBase, GksMeshAdapter & adapter, Vec3 coordinate, char quantity, uint outputIter = 10000 );
-
-    void run( uint iter, Parameters parameters );
-
-    void writeToFile( std::string filename );
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/Analyzer/TurbulenceAnalyzer.cu b/src/gpu/GksGpu/Analyzer/TurbulenceAnalyzer.cu
deleted file mode 100644
index 5e896e03e7f02b63759f4ff6d42ca7f7f5e7bfa5..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Analyzer/TurbulenceAnalyzer.cu
+++ /dev/null
@@ -1,433 +0,0 @@
-#include "TurbulenceAnalyzer.h"
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include <cmath>
-#include <sstream>
-
-#include <thrust/device_vector.h>
-#include <thrust/host_vector.h>
-#include <thrust/reduce.h>
-#include <thrust/device_ptr.h>
-
-#include <iomanip>
-
-#include "Core/Logger/Logger.h"
-
-#include "DataBase/DataBase.h"
-
-#include "Parameters/Parameters.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/FlowStateDataConversion.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu {
-
-__global__                 void turbulenceKernel  ( DataBaseStruct dataBase, TurbulenceAnalyzerStruct turbulenceAnalyzer, Parameters parameters, uint startIndex, uint numberOfEntities );
-
-__host__ __device__ inline void turbulenceFunction( DataBaseStruct dataBase, TurbulenceAnalyzerStruct turbulenceAnalyzer, Parameters parameters, uint startIndex, uint index );
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-bool TurbulenceAnalyzer::run(uint iter, Parameters parameters)
-{
-    if( iter < this->analyzeStartIter ) return false;
-
-    CudaUtility::CudaGrid grid( dataBase->numberOfCells, 32 );
-
-    runKernel( turbulenceKernel,
-               turbulenceFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct(),
-               this->toStruct(),
-               parameters,
-               0 );
-
-    getLastCudaError("TurbulenceAnalyzer::run(uint iter, Parameters parameters)");
-
-    this->counter++;
-
-    return true;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__global__ void turbulenceKernel(DataBaseStruct dataBase, TurbulenceAnalyzerStruct turbulenceAnalyzer, Parameters parameters, uint startIndex, uint numberOfEntities)
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    turbulenceFunction( dataBase, turbulenceAnalyzer, parameters, startIndex, index );
-}
-
-__host__ __device__ void turbulenceFunction(DataBaseStruct dataBase, TurbulenceAnalyzerStruct turbulenceAnalyzer, Parameters parameters, uint startIndex, uint index)
-{
-    uint cellIndex = startIndex + index;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    ConservedVariables cons;
-
-    readCellData(cellIndex, dataBase, cons);
-
-    PrimitiveVariables prim = toPrimitiveVariables(cons, parameters.K);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( turbulenceAnalyzer.U  ) turbulenceAnalyzer.U [ cellIndex ] += prim.U;
-    if( turbulenceAnalyzer.V  ) turbulenceAnalyzer.V [ cellIndex ] += prim.V;
-    if( turbulenceAnalyzer.W  ) turbulenceAnalyzer.W [ cellIndex ] += prim.W;
-
-    if( turbulenceAnalyzer.UU ) turbulenceAnalyzer.UU[ cellIndex ] += prim.U * prim.U;
-    if( turbulenceAnalyzer.VV ) turbulenceAnalyzer.VV[ cellIndex ] += prim.V * prim.V;
-    if( turbulenceAnalyzer.WW ) turbulenceAnalyzer.WW[ cellIndex ] += prim.W * prim.W;
-
-    if( turbulenceAnalyzer.UV ) turbulenceAnalyzer.UV[ cellIndex ] += prim.U * prim.V;
-    if( turbulenceAnalyzer.UW ) turbulenceAnalyzer.UW[ cellIndex ] += prim.U * prim.W;
-    if( turbulenceAnalyzer.VW ) turbulenceAnalyzer.VW[ cellIndex ] += prim.V * prim.W;
-
-#ifdef USE_PASSIVE_SCALAR
-    if( turbulenceAnalyzer.T  ) turbulenceAnalyzer.T [ cellIndex ] += getT(prim);
-#else
-    if( turbulenceAnalyzer.T  ) turbulenceAnalyzer.T [ cellIndex ] +=   c1o1 / prim.lambda;
-#endif
-
-    if( turbulenceAnalyzer.TT ) turbulenceAnalyzer.TT[ cellIndex ] += ( c1o1 / prim.lambda ) * ( c1o1 / prim.lambda );
-    if( turbulenceAnalyzer.p  ) turbulenceAnalyzer.p [ cellIndex ] += c1o2 * prim.rho / prim.lambda;
-
-    //////////////////////////////////////////////////////////////////////////
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-TurbulenceAnalyzer::~TurbulenceAnalyzer()
-{
-    this->free();
-}
-
-TurbulenceAnalyzer::TurbulenceAnalyzer(SPtr<DataBase> dataBase, uint analyzeStartIter)
-    : U ( nullptr ),
-      V ( nullptr ),
-      W ( nullptr ),
-      UU( nullptr ),
-      VV( nullptr ),
-      WW( nullptr ),
-      UV( nullptr ),
-      UW( nullptr ),
-      VW( nullptr ),
-      T ( nullptr ),
-      TT( nullptr ),
-      p ( nullptr ),
-      collect_U ( true  ),
-      collect_V ( true  ),
-      collect_W ( true  ),
-      collect_UU( false ),
-      collect_VV( false ),
-      collect_WW( false ),
-      collect_UV( false ),
-      collect_UW( false ),
-      collect_VW( false ),
-      collect_T ( true  ),
-      collect_TT( false ),
-      collect_p ( false )
-{
-    this->dataBase = dataBase;
-
-    this->analyzeStartIter = analyzeStartIter;
-
-    this->counter = 0;
-
-    this->allocate();
-}
-
-void TurbulenceAnalyzer::free()
-{
-    if( this->U  ) checkCudaErrors( cudaFree ( this->U  ) );
-    if( this->V  ) checkCudaErrors( cudaFree ( this->V  ) );
-    if( this->W  ) checkCudaErrors( cudaFree ( this->W  ) );
-    if( this->UU ) checkCudaErrors( cudaFree ( this->UU ) );
-    if( this->VV ) checkCudaErrors( cudaFree ( this->VV ) );
-    if( this->WW ) checkCudaErrors( cudaFree ( this->WW ) );
-    if( this->UV ) checkCudaErrors( cudaFree ( this->UV ) );
-    if( this->UW ) checkCudaErrors( cudaFree ( this->UW ) );
-    if( this->VW ) checkCudaErrors( cudaFree ( this->VW ) );
-    if( this->T  ) checkCudaErrors( cudaFree ( this->T  ) );
-    if( this->TT ) checkCudaErrors( cudaFree ( this->TT ) );
-    if( this->p  ) checkCudaErrors( cudaFree ( this->p  ) );
-
-    h_U.clear ( );
-    h_V.clear ( );
-    h_W.clear ( );
-    h_UU.clear( );
-    h_VV.clear( );
-    h_WW.clear( );
-    h_UV.clear( );
-    h_UW.clear( );
-    h_VW.clear( );
-    h_T.clear ( );
-    h_TT.clear( );
-    h_p.clear ( );
-}
-
-void TurbulenceAnalyzer::allocate()
-{
-    this->free();
-
-    if( collect_U  ) checkCudaErrors( cudaMalloc ( &this->U , sizeof(real) * dataBase->numberOfCells ) );
-    if( collect_V  ) checkCudaErrors( cudaMalloc ( &this->V , sizeof(real) * dataBase->numberOfCells ) );
-    if( collect_W  ) checkCudaErrors( cudaMalloc ( &this->W , sizeof(real) * dataBase->numberOfCells ) );
-    if( collect_UU ) checkCudaErrors( cudaMalloc ( &this->UU, sizeof(real) * dataBase->numberOfCells ) );
-    if( collect_VV ) checkCudaErrors( cudaMalloc ( &this->VV, sizeof(real) * dataBase->numberOfCells ) );
-    if( collect_WW ) checkCudaErrors( cudaMalloc ( &this->WW, sizeof(real) * dataBase->numberOfCells ) );
-    if( collect_UV ) checkCudaErrors( cudaMalloc ( &this->UV, sizeof(real) * dataBase->numberOfCells ) );
-    if( collect_UW ) checkCudaErrors( cudaMalloc ( &this->UW, sizeof(real) * dataBase->numberOfCells ) );
-    if( collect_VW ) checkCudaErrors( cudaMalloc ( &this->VW, sizeof(real) * dataBase->numberOfCells ) );
-    if( collect_T  ) checkCudaErrors( cudaMalloc ( &this->T , sizeof(real) * dataBase->numberOfCells ) );
-    if( collect_TT ) checkCudaErrors( cudaMalloc ( &this->TT, sizeof(real) * dataBase->numberOfCells ) );
-    if( collect_p  ) checkCudaErrors( cudaMalloc ( &this->p , sizeof(real) * dataBase->numberOfCells ) );
-
-    if( collect_U  ) h_U.resize ( dataBase->numberOfCells );
-    if( collect_V  ) h_V.resize ( dataBase->numberOfCells ); 
-    if( collect_W  ) h_W.resize ( dataBase->numberOfCells );
-    if( collect_UU ) h_UU.resize( dataBase->numberOfCells );
-    if( collect_VV ) h_VV.resize( dataBase->numberOfCells );
-    if( collect_WW ) h_WW.resize( dataBase->numberOfCells );
-    if( collect_UV ) h_UV.resize( dataBase->numberOfCells );
-    if( collect_UW ) h_UW.resize( dataBase->numberOfCells );
-    if( collect_VW ) h_VW.resize( dataBase->numberOfCells );
-    if( collect_T  ) h_T.resize ( dataBase->numberOfCells );
-    if( collect_TT ) h_TT.resize( dataBase->numberOfCells );
-    if( collect_p  ) h_p.resize ( dataBase->numberOfCells );
-}
-
-void TurbulenceAnalyzer::writeRestartFile(std::string filename)
-{
-    this->download(false);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    filename += ".rst";
-
-    *logging::out << logging::Logger::INFO_HIGH << "Writing restart file " << filename << " ... ";
-
-    std::ofstream file;
-
-	file.open( filename.c_str(), std::ios::binary );
-
-	if (!file.is_open()) {
-		throw std::runtime_error("\nFile cannot be opened.\n\nERROR!\n\n\n");
-        return;
-	}
-
-    //////////////////////////////////////////////////////////////////////////
-
-    file.write( (char*) &this->counter, sizeof( uint ) );
-
-    file.write( (char*) &dataBase->numberOfLevels, sizeof( uint ) );
-    file.write( (char*) &dataBase->numberOfCells,  sizeof( uint ) );
-    file.write( (char*) &dataBase->numberOfFaces,  sizeof( uint ) );
-
-    file.write( (char*) &this->collect_U , sizeof( bool ) );
-    file.write( (char*) &this->collect_V , sizeof( bool ) );
-    file.write( (char*) &this->collect_W , sizeof( bool ) );
-    file.write( (char*) &this->collect_UU, sizeof( bool ) );
-    file.write( (char*) &this->collect_VV, sizeof( bool ) );
-    file.write( (char*) &this->collect_WW, sizeof( bool ) );
-    file.write( (char*) &this->collect_UV, sizeof( bool ) );
-    file.write( (char*) &this->collect_UW, sizeof( bool ) );
-    file.write( (char*) &this->collect_VW, sizeof( bool ) );
-    file.write( (char*) &this->collect_T , sizeof( bool ) );
-    file.write( (char*) &this->collect_TT, sizeof( bool ) );
-    file.write( (char*) &this->collect_p , sizeof( bool ) );
-
-    if( collect_U  ) file.write( (char*) this->h_U.data() , dataBase->numberOfCells * sizeof( real ) );
-    if( collect_V  ) file.write( (char*) this->h_V.data() , dataBase->numberOfCells * sizeof( real ) );
-    if( collect_W  ) file.write( (char*) this->h_W.data() , dataBase->numberOfCells * sizeof( real ) );
-    if( collect_UU ) file.write( (char*) this->h_UU.data(), dataBase->numberOfCells * sizeof( real ) );
-    if( collect_VV ) file.write( (char*) this->h_VV.data(), dataBase->numberOfCells * sizeof( real ) );
-    if( collect_WW ) file.write( (char*) this->h_WW.data(), dataBase->numberOfCells * sizeof( real ) );
-    if( collect_UV ) file.write( (char*) this->h_UV.data(), dataBase->numberOfCells * sizeof( real ) );
-    if( collect_UW ) file.write( (char*) this->h_UW.data(), dataBase->numberOfCells * sizeof( real ) );
-    if( collect_VW ) file.write( (char*) this->h_VW.data(), dataBase->numberOfCells * sizeof( real ) );
-    if( collect_T  ) file.write( (char*) this->h_T.data() , dataBase->numberOfCells * sizeof( real ) );
-    if( collect_TT ) file.write( (char*) this->h_TT.data(), dataBase->numberOfCells * sizeof( real ) );
-    if( collect_p  ) file.write( (char*) this->h_p.data() , dataBase->numberOfCells * sizeof( real ) );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    file.close();
-
-    *logging::out << logging::Logger::INFO_HIGH << "done!\n";
-}
-
-void TurbulenceAnalyzer::readRestartFile(std::string filename)
-{
-    filename += ".rst";
-
-    *logging::out << logging::Logger::INFO_HIGH << "Reading restart file " << filename << " ... ";
-	
-    std::ifstream file;
-
-	file.open( filename.c_str(), std::ios::binary );
-
-	if (!file.is_open()) {
-		throw std::runtime_error("\nFile cannot be opened.\n\nERROR!\n\n\n");
-        return;
-	}
-
-    //////////////////////////////////////////////////////////////////////////
-
-    file.read( (char*) &this->counter, sizeof( uint ) );
-
-    uint numberOfLevelsRead;
-    uint numberOfCellsRead;
-    uint numberOfFacesRead;
-    
-    file.read( (char*) &numberOfLevelsRead, sizeof( uint ) );
-    file.read( (char*) &numberOfCellsRead,  sizeof( uint ) );
-    file.read( (char*) &numberOfFacesRead,  sizeof( uint ) );
-
-    if( numberOfLevelsRead != dataBase->numberOfLevels ||
-        numberOfCellsRead  != dataBase->numberOfCells  ||
-        numberOfFacesRead  != dataBase->numberOfFaces  ){
-    
-        *logging::out << logging::Logger::INFO_HIGH << "\n";
-        *logging::out << logging::Logger::INFO_HIGH << "Levels: " << numberOfLevelsRead << " vs. " << dataBase->numberOfLevels << "\n";
-        *logging::out << logging::Logger::INFO_HIGH << "Cells:  " << numberOfCellsRead  << " vs. " << dataBase->numberOfCells  << "\n";
-        *logging::out << logging::Logger::INFO_HIGH << "Faces:  " << numberOfFacesRead  << " vs. " << dataBase->numberOfFaces  << "\n";
-
-        file.close();
-
-        throw std::runtime_error("\nERROR: Restart file does not match current setup");
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    file.read( (char*) &this->collect_U , sizeof( bool ) );
-    file.read( (char*) &this->collect_V , sizeof( bool ) );
-    file.read( (char*) &this->collect_W , sizeof( bool ) );
-    file.read( (char*) &this->collect_UU, sizeof( bool ) );
-    file.read( (char*) &this->collect_VV, sizeof( bool ) );
-    file.read( (char*) &this->collect_WW, sizeof( bool ) );
-    file.read( (char*) &this->collect_UV, sizeof( bool ) );
-    file.read( (char*) &this->collect_UW, sizeof( bool ) );
-    file.read( (char*) &this->collect_VW, sizeof( bool ) );
-    file.read( (char*) &this->collect_T , sizeof( bool ) );
-    file.read( (char*) &this->collect_TT, sizeof( bool ) );
-    file.read( (char*) &this->collect_p , sizeof( bool ) );
-
-    if( collect_U  ) file.read( (char*) this->h_U.data() , dataBase->numberOfCells * sizeof( real ) );
-    if( collect_V  ) file.read( (char*) this->h_V.data() , dataBase->numberOfCells * sizeof( real ) );
-    if( collect_W  ) file.read( (char*) this->h_W.data() , dataBase->numberOfCells * sizeof( real ) );
-    if( collect_UU ) file.read( (char*) this->h_UU.data(), dataBase->numberOfCells * sizeof( real ) );
-    if( collect_VV ) file.read( (char*) this->h_VV.data(), dataBase->numberOfCells * sizeof( real ) );
-    if( collect_WW ) file.read( (char*) this->h_WW.data(), dataBase->numberOfCells * sizeof( real ) );
-    if( collect_UV ) file.read( (char*) this->h_UV.data(), dataBase->numberOfCells * sizeof( real ) );
-    if( collect_UW ) file.read( (char*) this->h_UW.data(), dataBase->numberOfCells * sizeof( real ) );
-    if( collect_VW ) file.read( (char*) this->h_VW.data(), dataBase->numberOfCells * sizeof( real ) );
-    if( collect_T  ) file.read( (char*) this->h_T.data() , dataBase->numberOfCells * sizeof( real ) );
-    if( collect_TT ) file.read( (char*) this->h_TT.data(), dataBase->numberOfCells * sizeof( real ) );
-    if( collect_p  ) file.read( (char*) this->h_p.data() , dataBase->numberOfCells * sizeof( real ) );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    file.close();
-
-    this->upload();
-
-    *logging::out << logging::Logger::INFO_HIGH << "done!\n";
-}
-
-TurbulenceAnalyzerStruct TurbulenceAnalyzer::toStruct()
-{
-    TurbulenceAnalyzerStruct turbulenceAnalyzer;
-
-    turbulenceAnalyzer.U  = this->U;
-    turbulenceAnalyzer.V  = this->V;
-    turbulenceAnalyzer.W  = this->W;
-
-    turbulenceAnalyzer.UU = this->UU;
-    turbulenceAnalyzer.VV = this->VV;
-    turbulenceAnalyzer.WW = this->WW;
-
-    turbulenceAnalyzer.UV = this->UV;
-    turbulenceAnalyzer.UW = this->UW;
-    turbulenceAnalyzer.VW = this->VW;
-
-    turbulenceAnalyzer.T  = this->T;
-    turbulenceAnalyzer.TT = this->TT;
-    turbulenceAnalyzer.p  = this->p;
-
-    return turbulenceAnalyzer;
-}
-
-void TurbulenceAnalyzer::download(bool normalize)
-{
-    if( collect_U  ) checkCudaErrors( cudaMemcpy( this->h_U.data() , this->U , sizeof(real) * dataBase->numberOfCells, cudaMemcpyDeviceToHost ) );
-    if( collect_V  ) checkCudaErrors( cudaMemcpy( this->h_V.data() , this->V , sizeof(real) * dataBase->numberOfCells, cudaMemcpyDeviceToHost ) );
-    if( collect_W  ) checkCudaErrors( cudaMemcpy( this->h_W.data() , this->W , sizeof(real) * dataBase->numberOfCells, cudaMemcpyDeviceToHost ) );
-    if( collect_UU ) checkCudaErrors( cudaMemcpy( this->h_UU.data(), this->UU, sizeof(real) * dataBase->numberOfCells, cudaMemcpyDeviceToHost ) );
-    if( collect_VV ) checkCudaErrors( cudaMemcpy( this->h_VV.data(), this->VV, sizeof(real) * dataBase->numberOfCells, cudaMemcpyDeviceToHost ) );
-    if( collect_WW ) checkCudaErrors( cudaMemcpy( this->h_WW.data(), this->WW, sizeof(real) * dataBase->numberOfCells, cudaMemcpyDeviceToHost ) );
-    if( collect_UV ) checkCudaErrors( cudaMemcpy( this->h_UV.data(), this->UV, sizeof(real) * dataBase->numberOfCells, cudaMemcpyDeviceToHost ) );
-    if( collect_UW ) checkCudaErrors( cudaMemcpy( this->h_UW.data(), this->UW, sizeof(real) * dataBase->numberOfCells, cudaMemcpyDeviceToHost ) );
-    if( collect_VW ) checkCudaErrors( cudaMemcpy( this->h_VW.data(), this->VW, sizeof(real) * dataBase->numberOfCells, cudaMemcpyDeviceToHost ) );
-    if( collect_T  ) checkCudaErrors( cudaMemcpy( this->h_T.data() , this->T , sizeof(real) * dataBase->numberOfCells, cudaMemcpyDeviceToHost ) );
-    if( collect_TT ) checkCudaErrors( cudaMemcpy( this->h_TT.data(), this->TT, sizeof(real) * dataBase->numberOfCells, cudaMemcpyDeviceToHost ) );
-    if( collect_p  ) checkCudaErrors( cudaMemcpy( this->h_p.data() , this->p , sizeof(real) * dataBase->numberOfCells, cudaMemcpyDeviceToHost ) );
-
-    if(normalize)
-    {
-        for( uint cellIndex = 0; cellIndex < dataBase->numberOfCells; cellIndex++ )
-        {
-            if( collect_U  ) this->h_U [ cellIndex ] /= real(this->counter);
-            if( collect_V  ) this->h_V [ cellIndex ] /= real(this->counter);
-            if( collect_W  ) this->h_W [ cellIndex ] /= real(this->counter);
-            if( collect_UU ) this->h_UU[ cellIndex ] /= real(this->counter);
-            if( collect_VV ) this->h_VV[ cellIndex ] /= real(this->counter);
-            if( collect_WW ) this->h_WW[ cellIndex ] /= real(this->counter);
-            if( collect_UV ) this->h_UV[ cellIndex ] /= real(this->counter);
-            if( collect_UW ) this->h_UW[ cellIndex ] /= real(this->counter);
-            if( collect_VW ) this->h_VW[ cellIndex ] /= real(this->counter);
-            if( collect_T  ) this->h_T [ cellIndex ] /= real(this->counter);
-            if( collect_TT ) this->h_TT[ cellIndex ] /= real(this->counter);
-            if( collect_p  ) this->h_p [ cellIndex ] /= real(this->counter);
-
-            if( collect_UU ) this->h_UU[ cellIndex ] -= this->h_U[ cellIndex ] * this->h_U[ cellIndex ];
-            if( collect_VV ) this->h_VV[ cellIndex ] -= this->h_V[ cellIndex ] * this->h_V[ cellIndex ];
-            if( collect_WW ) this->h_WW[ cellIndex ] -= this->h_W[ cellIndex ] * this->h_W[ cellIndex ];
-
-            if( collect_UV ) this->h_UV[ cellIndex ] -= this->h_U[ cellIndex ] * this->h_V[ cellIndex ];
-            if( collect_UW ) this->h_UW[ cellIndex ] -= this->h_U[ cellIndex ] * this->h_W[ cellIndex ];
-            if( collect_VW ) this->h_VW[ cellIndex ] -= this->h_V[ cellIndex ] * this->h_W[ cellIndex ];
-        
-            if( collect_TT ) this->h_TT[ cellIndex ] -= this->h_T[ cellIndex ] * this->h_T[ cellIndex ];
-        }
-    }
-}
-
-void TurbulenceAnalyzer::upload()
-{
-    if( collect_U  ) checkCudaErrors( cudaMemcpy( this->U , this->h_U.data() , sizeof(real) * dataBase->numberOfCells, cudaMemcpyHostToDevice ) );
-    if( collect_V  ) checkCudaErrors( cudaMemcpy( this->V , this->h_V.data() , sizeof(real) * dataBase->numberOfCells, cudaMemcpyHostToDevice ) );
-    if( collect_W  ) checkCudaErrors( cudaMemcpy( this->W , this->h_W.data() , sizeof(real) * dataBase->numberOfCells, cudaMemcpyHostToDevice ) );
-    if( collect_UU ) checkCudaErrors( cudaMemcpy( this->UU, this->h_UU.data(), sizeof(real) * dataBase->numberOfCells, cudaMemcpyHostToDevice ) );
-    if( collect_VV ) checkCudaErrors( cudaMemcpy( this->VV, this->h_VV.data(), sizeof(real) * dataBase->numberOfCells, cudaMemcpyHostToDevice ) );
-    if( collect_WW ) checkCudaErrors( cudaMemcpy( this->WW, this->h_WW.data(), sizeof(real) * dataBase->numberOfCells, cudaMemcpyHostToDevice ) );
-    if( collect_UV ) checkCudaErrors( cudaMemcpy( this->UV, this->h_UV.data(), sizeof(real) * dataBase->numberOfCells, cudaMemcpyHostToDevice ) );
-    if( collect_UW ) checkCudaErrors( cudaMemcpy( this->UW, this->h_UW.data(), sizeof(real) * dataBase->numberOfCells, cudaMemcpyHostToDevice ) );
-    if( collect_VW ) checkCudaErrors( cudaMemcpy( this->VW, this->h_VW.data(), sizeof(real) * dataBase->numberOfCells, cudaMemcpyHostToDevice ) );
-    if( collect_T  ) checkCudaErrors( cudaMemcpy( this->T , this->h_T.data() , sizeof(real) * dataBase->numberOfCells, cudaMemcpyHostToDevice ) );
-    if( collect_TT ) checkCudaErrors( cudaMemcpy( this->TT, this->h_TT.data(), sizeof(real) * dataBase->numberOfCells, cudaMemcpyHostToDevice ) );
-    if( collect_p  ) checkCudaErrors( cudaMemcpy( this->p , this->h_p.data() , sizeof(real) * dataBase->numberOfCells, cudaMemcpyHostToDevice ) );
-}
-
-} // namespace GksGpu
-
-
diff --git a/src/gpu/GksGpu/Analyzer/TurbulenceAnalyzer.h b/src/gpu/GksGpu/Analyzer/TurbulenceAnalyzer.h
deleted file mode 100644
index 1f9f3d7338733d48ca4ffb237f09f864566896a4..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Analyzer/TurbulenceAnalyzer.h
+++ /dev/null
@@ -1,128 +0,0 @@
-#ifndef  TurbulenceAnalyzer_H
-#define  TurbulenceAnalyzer_H
-
-#include <vector>
-#include <string>
-#include <memory>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-
-namespace GksGpu {
-
-struct DataBase;
-struct Parameters;
-
-struct TurbulenceAnalyzerStruct
-{
-    uint counter;
-
-    real* U ;
-    real* V ;
-    real* W ;
-    
-    real* UU;
-    real* VV;
-    real* WW;
-    
-    real* UV;
-    real* UW;
-    real* VW;
-    
-    real* T ;
-    real* TT;
-
-    real* p ;
-};
-
-class GKSGPU_EXPORT TurbulenceAnalyzer
-{
-private:
-
-    SPtr<DataBase> dataBase;
-
-    uint analyzeStartIter;
-
-    real* U ;
-    real* V ;
-    real* W ;
-
-    real* UU;
-    real* VV;
-    real* WW;
-
-    real* UV;
-    real* UW;
-    real* VW;
-
-    real* T ;
-    real* TT;
-    real* p ;
-
-public:
-
-    uint counter;
-
-    std::vector<real> h_U ;
-    std::vector<real> h_V ;
-    std::vector<real> h_W ;
-
-    std::vector<real> h_UU;
-    std::vector<real> h_VV;
-    std::vector<real> h_WW;
-
-    std::vector<real> h_UV;
-    std::vector<real> h_UW;
-    std::vector<real> h_VW;
-
-    std::vector<real> h_T ;
-    std::vector<real> h_TT;
-    std::vector<real> h_p ;
-
-    bool collect_U ;
-    bool collect_V ;
-    bool collect_W ;
-    
-    bool collect_UU;
-    bool collect_VV;
-    bool collect_WW;
-    
-    bool collect_UV;
-    bool collect_UW;
-    bool collect_VW;
-    
-    bool collect_T ;
-    bool collect_TT;
-    bool collect_p ;
-
-public:
-
-    ~TurbulenceAnalyzer();
-
-    TurbulenceAnalyzer( SPtr<DataBase> dataBase, uint analyzeStartIter );
-
-    void free();
-
-    void allocate();
-
-    bool run( uint iter, Parameters parameters );
-
-    void writeRestartFile( std::string filename );
-
-    void readRestartFile( std::string filename );
-
-    TurbulenceAnalyzerStruct toStruct();
-
-    void download(bool normalize = true);
-
-    void upload();
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/BoundaryConditions/AdiabaticWall.cu b/src/gpu/GksGpu/BoundaryConditions/AdiabaticWall.cu
deleted file mode 100644
index a5b6a8812283528975461f50fc0d10ee5a0aeb6b..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/AdiabaticWall.cu
+++ /dev/null
@@ -1,162 +0,0 @@
-#include "AdiabaticWall.h"
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include "PointerDefinitions.h"
-#include "Core/RealConstants.h"
-
-#include "DataBase/DataBase.h"
-#include "DataBase/DataBaseStruct.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-#include "Definitions/PassiveScalar.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/FlowStateDataConversion.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu{
-
-//////////////////////////////////////////////////////////////////////////
-
-__global__                 void boundaryConditionKernel  ( const DataBaseStruct dataBase, 
-                                                           const AdiabaticWallStruct boundaryCondition, 
-                                                           const Parameters parameters,
-                                                           const uint startIndex,
-                                                           const uint numberOfEntities );
-
-__host__ __device__ inline void boundaryConditionFunction( const DataBaseStruct& dataBase, 
-                                                           const AdiabaticWallStruct& boundaryCondition, 
-                                                           const Parameters& parameters,
-                                                           const uint startIndex,
-                                                           const uint index );
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-void AdiabaticWall::runBoundaryConditionKernel(const SPtr<DataBase> dataBase, 
-                                                const Parameters parameters, 
-                                                const uint level)
-{    
-    CudaUtility::CudaGrid grid( this->numberOfCellsPerLevel[ level ], 32 );
-
-    runKernel( boundaryConditionKernel,
-               boundaryConditionFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct(),
-               this->toStruct(),
-               parameters,
-               this->startOfCellsPerLevel[ level ] );
-
-    //cudaDeviceSynchronize();
-
-    getLastCudaError("AdiabaticWall::runBoundaryConditionKernel( const SPtr<DataBase> dataBase, const Parameters parameters, const uint level )");
-}
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-__global__ void boundaryConditionKernel(const DataBaseStruct dataBase, 
-                                        const AdiabaticWallStruct boundaryCondition, 
-                                        const Parameters parameters,
-                                        const uint startIndex,
-                                        const uint numberOfEntities)
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    boundaryConditionFunction( dataBase, boundaryCondition, parameters, startIndex, index );
-}
-
-__host__ __device__ inline void boundaryConditionFunction(const DataBaseStruct& dataBase, 
-                                                          const AdiabaticWallStruct& boundaryCondition, 
-                                                          const Parameters& parameters,
-                                                          const uint startIndex,
-                                                          const uint index)
-{
-    uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
-    uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
-    uint secondCellIdx = boundaryCondition.secondCells[ startIndex + index ];
-
-    PrimitiveVariables ghostCellPrim;
-    {
-        PrimitiveVariables domainCellPrim;
-        PrimitiveVariables secondCellPrim;
-
-        {
-            ConservedVariables domainCellData;
-            readCellData( domainCellIdx, dataBase, domainCellData );
-            domainCellPrim = toPrimitiveVariables( domainCellData, parameters.K );
-
-            ConservedVariables secondCellData;
-            if( secondCellIdx != INVALID_INDEX ){
-                readCellData( secondCellIdx, dataBase, secondCellData );
-                secondCellPrim = toPrimitiveVariables( secondCellData, parameters.K );
-            }
-        }
-
-        ghostCellPrim.U      = c2o1 * boundaryCondition.velocity.x - domainCellPrim.U;
-        ghostCellPrim.V      = c2o1 * boundaryCondition.velocity.y - domainCellPrim.V;
-        ghostCellPrim.W      = c2o1 * boundaryCondition.velocity.z - domainCellPrim.W;
-
-        ghostCellPrim.lambda = domainCellPrim.lambda;
-    #ifdef USE_PASSIVE_SCALAR
-        ghostCellPrim.S_1    = domainCellPrim.S_1;
-        ghostCellPrim.S_2    = domainCellPrim.S_2;
-    #endif // USE_PASSIVE_SCALAR
-
-
-        if( boundaryCondition.useSecondCells && secondCellIdx != INVALID_INDEX ){
-            real p1 = c1o2 * domainCellPrim.rho / domainCellPrim.lambda;
-            real p2 = c1o2 * secondCellPrim.rho / secondCellPrim.lambda;
-
-            ghostCellPrim.rho = c2o1 * ( c2o1 * p1 - p2 ) * ghostCellPrim.lambda;
-        }
-        else{
-            real p = c1o2 * domainCellPrim.rho / domainCellPrim.lambda;
-
-            ghostCellPrim.rho = c2o1 * p * ghostCellPrim.lambda;
-        }
-    }
-
-    {
-        ConservedVariables ghostCons = toConservedVariables( ghostCellPrim, parameters.K );
-
-        writeCellData( ghostCellIdx, dataBase, ghostCons );
-    }
-}
-
-AdiabaticWall::AdiabaticWall(SPtr<DataBase> dataBase, Vec3 velocity, bool useSecondCells)
-    : BoundaryCondition( dataBase )
-{
-    this->velocity = velocity;
-    this->useSecondCells = useSecondCells;
-}
-
-bool AdiabaticWall::isWall()
-{
-    return true;
-}
-
-bool AdiabaticWall::isInsulated()
-{
-    return true;
-}
-
-bool AdiabaticWall::secondCellsNeeded()
-{
-    return true;
-}
-
-} // namespace GksGpu
-
diff --git a/src/gpu/GksGpu/BoundaryConditions/AdiabaticWall.h b/src/gpu/GksGpu/BoundaryConditions/AdiabaticWall.h
deleted file mode 100644
index 88519dc3d145f6c09e8b9cc5135803c53245a378..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/AdiabaticWall.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#ifndef AdiabaticWall_CUH
-#define AdiabaticWall_CUH
-
-#include <memory>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-
-#include "BoundaryConditions/BoundaryCondition.h"
-
-namespace GksGpu{
-
-struct AdiabaticWallStruct
-{
-    uint  numberOfCells;
-
-    uint* ghostCells;
-    uint* domainCells;
-    uint* secondCells;
-
-    Vec3 velocity;
-
-    bool useSecondCells;
-};
-
-struct GKSGPU_EXPORT AdiabaticWall : public BoundaryCondition //, public IsothermalWallStruct
-{
-    Vec3 velocity;
-
-    bool useSecondCells;
-
-    AdiabaticWall( SPtr<DataBase> dataBase, Vec3 velocity, bool useSecondCells );
-
-    virtual bool isWall() override;
-
-    virtual bool isInsulated() override;
-
-    virtual bool secondCellsNeeded() override;
-
-    virtual void runBoundaryConditionKernel(const SPtr<DataBase> dataBase,
-                                            const Parameters parameters, 
-                                            const uint level) override;
-
-    AdiabaticWallStruct toStruct()
-    {
-        AdiabaticWallStruct boundaryCondition;
-
-        boundaryCondition.numberOfCells = this->numberOfCells;
-
-        boundaryCondition.ghostCells    = this->ghostCells;
-        boundaryCondition.domainCells   = this->domainCells;
-        boundaryCondition.secondCells   = this->secondCells;
-
-        boundaryCondition.velocity      = this->velocity;
-
-        boundaryCondition.useSecondCells  = this->useSecondCells;
-
-        return boundaryCondition;
-    }
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/BoundaryConditions/BoundaryCondition.cpp b/src/gpu/GksGpu/BoundaryConditions/BoundaryCondition.cpp
deleted file mode 100644
index fb1ddb1ff7db9d1b6da0115a25aab87f029537fc..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/BoundaryCondition.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-#include "BoundaryCondition.h"
-
-#include <memory>
-#include <vector>
-
-#include "GridGenerator/grid/NodeValues.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "DataBase/DataBase.h"
-#include "DataBase/DataBaseAllocator.h"
-#include "DataBase/DataBaseStruct.h"
-
-using namespace vf::gpu;
-
-namespace GksGpu{
-
-BoundaryCondition::BoundaryCondition( SPtr<DataBase> dataBase )
-    : myAllocator ( dataBase->myAllocator )
-{
-      numberOfCells = INVALID_INDEX;
-      ghostCells    = nullptr;
-      domainCells   = nullptr;
-      secondCells   = nullptr;
-}
-
-BoundaryCondition::~BoundaryCondition()
-{
-    this->myAllocator->freeMemory( *this );
-}
-
-void BoundaryCondition::findBoundaryCells(GksMeshAdapter & adapter, bool allowGhostCells, std::function<bool(Vec3)> boundaryFinder)
-{
-    this->myAllocator->freeMemory( *this );
-
-    std::vector<uint> ghostCells;
-    std::vector<uint> domainCells;
-    std::vector<uint> secondCells;
-
-    numberOfCellsPerLevel.resize( adapter.numberOfLevels );
-    startOfCellsPerLevel.resize ( adapter.numberOfLevels );
-
-    for( auto& n : numberOfCellsPerLevel ) n = 0;
-
-    for( uint level = 0; level < adapter.numberOfLevels; level++ )
-    {
-        uint startIdx = adapter.startOfCellsPerLevel[level] 
-                      + adapter.numberOfBulkCellsPerLevel[level];
-
-        uint endIdx   = adapter.startOfCellsPerLevel[level] 
-                      + adapter.numberOfCellsPerLevel[level];
-
-        for( uint cellIdx = startIdx ; cellIdx < endIdx; cellIdx++ )
-        {
-            MeshCell& cell = adapter.cells[ cellIdx ];
-
-            if( !boundaryFinder( cell.cellCenter ) ) continue;
-
-            if( cell.type != STOPPER_OUT_OF_GRID && cell.type != STOPPER_OUT_OF_GRID_BOUNDARY && cell.type != STOPPER_SOLID ) continue;
-
-            if( cell.isRecvCell ) continue;
-
-            // look in all directions
-            uint maximalSearchDirection = 27;
-
-            // in case of Flux BC look only at face neighbors
-            if( this->isFluxBC() ) maximalSearchDirection = 6;
-
-            for( uint idx = 0; idx < maximalSearchDirection; idx++ )
-            {
-                uint neighborCellIdx = cell.cellToCell[ idx ];
-
-                if( neighborCellIdx == INVALID_INDEX ) continue;
-
-                MeshCell& neighborCell = adapter.cells[ neighborCellIdx ];
-
-                bool neighborCellIsFluid = neighborCell.type != STOPPER_OUT_OF_GRID && 
-                                           neighborCell.type != STOPPER_OUT_OF_GRID_BOUNDARY && 
-                                           neighborCell.type != STOPPER_SOLID;
-
-                bool neighborCellIsValidGhostCell = !this->isFluxBC() && allowGhostCells && !boundaryFinder( neighborCell.cellCenter );
-
-                if( neighborCellIsFluid || neighborCellIsValidGhostCell )
-                {
-                    ghostCells.push_back ( cellIdx );
-                    domainCells.push_back( neighborCellIdx );
-
-                    this->numberOfCellsPerLevel[ level ]++;
-
-                    if( this->secondCellsNeeded() )
-                    {
-                        secondCells.push_back( neighborCell.cellToCell[ idx ] );
-                    }
-
-                    if( this->isWall()      ) cell.isWall      = this->isWall();
-                    if( this->isFluxBC()    ) cell.isFluxBC    = this->isFluxBC();
-                    if( this->isInsulated() ) cell.isInsulated = this->isInsulated();
-
-                    break;
-                }
-            }
-        }
-    }
-
-    startOfCellsPerLevel[ 0 ] = 0;
-
-    for( uint level = 1; level < adapter.numberOfLevels; level++ )
-    {
-        startOfCellsPerLevel[ level ] = startOfCellsPerLevel [ level - 1 ]
-                                      + numberOfCellsPerLevel[ level - 1 ];
-    }
-
-    this->numberOfCells = ghostCells.size();
-
-    this->myAllocator->allocateMemory( shared_from_this(), ghostCells, domainCells, secondCells );
-}
-
-bool BoundaryCondition::isFluxBC()
-{
-    return false;
-}
-
-bool BoundaryCondition::isInsulated()
-{
-    return false;
-}
-
-bool BoundaryCondition::secondCellsNeeded()
-{
-    return false;
-}
-
-} // namespace GksGpu
\ No newline at end of file
diff --git a/src/gpu/GksGpu/BoundaryConditions/BoundaryCondition.h b/src/gpu/GksGpu/BoundaryConditions/BoundaryCondition.h
deleted file mode 100644
index 9c3bac9c3e2795fa99f339461c6a7f2d16448696..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/BoundaryCondition.h
+++ /dev/null
@@ -1,76 +0,0 @@
-#ifndef BoundaryCondition_H
-#define BoundaryCondition_H
-
-#include <functional>
-
-#include <memory>
-#include <vector>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-
-#include "Parameters/Parameters.h"
-
-class  GksMeshAdapter;
-
-namespace GksGpu{
-
-class  DataBaseAllocator;
-struct DataBase;
-
-struct BoundaryConditionStruct
-{
-    uint  numberOfCells;
-
-    uint* ghostCells;
-    uint* domainCells;
-    uint* secondCells;
-};
-
-struct GKSGPU_EXPORT BoundaryCondition : virtual public BoundaryConditionStruct, public std::enable_shared_from_this<BoundaryCondition>
-{
-    SPtr<DataBaseAllocator> myAllocator;
-
-    std::vector<uint> numberOfCellsPerLevel;
-    std::vector<uint> startOfCellsPerLevel;
-
-    BoundaryCondition( SPtr<DataBase> dataBase );
-
-    ~BoundaryCondition();
-
-    virtual void findBoundaryCells( GksMeshAdapter& adapter, bool allowGhostCells, std::function<bool(Vec3)> boundaryFinder);
-
-    virtual bool isWall() = 0;
-
-    virtual bool isFluxBC();
-
-    virtual bool isInsulated();
-
-    virtual bool secondCellsNeeded();
-
-    virtual void runBoundaryConditionKernel( const SPtr<DataBase> dataBase,
-                                             const Parameters parameters,
-                                             const uint level ) = 0;
-
-    BoundaryConditionStruct toStruct()
-    {
-        BoundaryConditionStruct boundaryCondition;
-
-        boundaryCondition.numberOfCells = this->numberOfCells;
-
-        boundaryCondition.ghostCells      = this->ghostCells;
-        boundaryCondition.domainCells     = this->domainCells;
-        boundaryCondition.secondCells     = this->secondCells;
-
-        return boundaryCondition;
-    }
-
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/BoundaryConditions/ConcreteHeatFlux.cu b/src/gpu/GksGpu/BoundaryConditions/ConcreteHeatFlux.cu
deleted file mode 100644
index 61f001d7b082be04b07dde525d9b0a961e6f89e4..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/ConcreteHeatFlux.cu
+++ /dev/null
@@ -1,231 +0,0 @@
-#include "ConcreteHeatFlux.h"
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <iostream>
-
-#include <thrust/host_vector.h>
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include "PointerDefinitions.h"
-#include "Core/RealConstants.h"
-#include "Core/Logger/Logger.h"
-
-#include "DataBase/DataBase.h"
-#include "DataBase/DataBaseStruct.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-#include "Definitions/PassiveScalar.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/FlowStateDataConversion.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-#include "FlowStateData/ThermalDependencies.cuh"
-
-#include "FluxComputation/Moments.cuh"
-#include "FluxComputation/ApplyFlux.cuh"
-#include "FluxComputation/Transformation.cuh"
-#include "FluxComputation/AssembleFlux.cuh"
-#include "FluxComputation/ExpansionCoefficients.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu{
-
-//////////////////////////////////////////////////////////////////////////
-
-__global__                 void boundaryConditionKernel  ( const DataBaseStruct dataBase, 
-                                                           const ConcreteHeatFluxStruct boundaryCondition, 
-                                                           const Parameters parameters,
-                                                           const uint startIndex,
-                                                           const uint numberOfEntities );
-
-__host__ __device__ inline void boundaryConditionFunction( const DataBaseStruct& dataBase, 
-                                                           const ConcreteHeatFluxStruct& boundaryCondition, 
-                                                           const Parameters& parameters,
-                                                           const uint startIndex,
-                                                           const uint index );
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-void ConcreteHeatFlux::runBoundaryConditionKernel(const SPtr<DataBase> dataBase, 
-                                          const Parameters parameters, 
-                                          const uint level)
-{    
-    CudaUtility::CudaGrid grid( this->numberOfCellsPerLevel[ level ], 32 );
-
-    runKernel( boundaryConditionKernel,
-               boundaryConditionFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct(),
-               this->toStruct(),
-               parameters,
-               this->startOfCellsPerLevel[ level ] );
-
-    cudaDeviceSynchronize();
-
-    getLastCudaError("HeatFlux::runBoundaryConditionKernel( const SPtr<DataBase> dataBase, const Parameters parameters, const uint level )");
-}
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-__global__ void boundaryConditionKernel(const DataBaseStruct dataBase, 
-                                        const ConcreteHeatFluxStruct boundaryCondition, 
-                                        const Parameters parameters,
-                                        const uint startIndex,
-                                        const uint numberOfEntities)
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    boundaryConditionFunction( dataBase, boundaryCondition, parameters, startIndex, index );
-}
-
-__host__ __device__ inline void boundaryConditionFunction(const DataBaseStruct& dataBase, 
-                                                          const ConcreteHeatFluxStruct& boundaryCondition, 
-                                                          const Parameters& parameters,
-                                                          const uint startIndex,
-                                                          const uint index)
-{
-#ifdef USE_PASSIVE_SCALAR
-    uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
-    uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
-    uint secondCellIdx = boundaryCondition.secondCells[ startIndex + index ];
-
-    real dx = boundaryCondition.L / real(boundaryCondition.numberOfPoints + 1);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    PrimitiveVariables domainCellPrim;
-    {
-        ConservedVariables domainCellData;
-        readCellData(domainCellIdx, dataBase, domainCellData);
-        domainCellPrim = toPrimitiveVariables(domainCellData, parameters.K);
-    }
-    PrimitiveVariables secondCellPrim;
-    {
-        ConservedVariables secondCellData;
-        readCellData(secondCellIdx, dataBase, secondCellData);
-        secondCellPrim = toPrimitiveVariables(secondCellData, parameters.K);
-    }
-
-    real TF = c3o2 * getT(domainCellPrim) - c1o2 * getT(secondCellPrim);
-    //real TF = getT(domainCellPrim);
-
-    for( uint i = 0; i < boundaryCondition.numberOfPoints; i++ )
-    {
-        uint finiteDifferenceIndex = ( startIndex + index ) * boundaryCondition.numberOfPoints + i;
-
-        real T0 = boundaryCondition.temperatures[ finiteDifferenceIndex ];
-
-        real Tn;
-        if( i == 0 )
-            Tn = TF;
-        else
-            Tn = boundaryCondition.temperatures[ finiteDifferenceIndex - 1 ];
-
-        real Tp;
-        if( i == boundaryCondition.numberOfPoints - 1 )
-            Tp = boundaryCondition.ambientTemperature;
-        else
-            Tp = boundaryCondition.temperatures[ finiteDifferenceIndex + 1 ];
-
-        real dTdxx = ( Tp + Tn - c2o1 * T0 ) / ( dx * dx );
-
-        boundaryCondition.temperatures[ finiteDifferenceIndex ] += parameters.dt * boundaryCondition.temperatureConductivity * dTdxx;
-    }
-
-    ConservedVariables flux;
-
-    {
-        real T0 = TF;
-        real T1 = boundaryCondition.temperatures[ ( startIndex + index ) * boundaryCondition.numberOfPoints     ];
-        real T2 = boundaryCondition.temperatures[ ( startIndex + index ) * boundaryCondition.numberOfPoints + 1 ];
-
-
-        real k = boundaryCondition.temperatureConductivity * boundaryCondition.density * boundaryCondition.specificHeatCapacity;
-
-        flux.rhoE = - k * ( - c3o2 * T0 + c2o1 * T1 - c1o2 * T2 ) / dx;
-    }
-
-    flux = (parameters.dt * parameters.dx * parameters.dx) * flux;
-
-    applyFluxToNegCell(dataBase, domainCellIdx, flux, 'a', parameters);
-
-#endif
-}
-
-ConcreteHeatFlux::~ConcreteHeatFlux()
-{
-    checkCudaErrors( cudaFree( this->temperatures ) );
-}
-
-ConcreteHeatFlux::ConcreteHeatFlux(SPtr<DataBase> dataBase, uint numberOfPoints, real temperatureConductivity, real density, real specificHeatCapacity, real L, real ambientTemperature)
-    : BoundaryCondition( dataBase )
-{
-    this->numberOfPoints = numberOfPoints;
-
-    this->temperatureConductivity = temperatureConductivity;
-    this->density                 = density;
-    this->specificHeatCapacity    = specificHeatCapacity;
-
-    this->L = L;
-    this->ambientTemperature = ambientTemperature;
-
-    this->temperatures = nullptr;
-}
-
-void ConcreteHeatFlux::init()
-{
-    checkCudaErrors( cudaMalloc( &this->temperatures, sizeof(real) * numberOfPoints * this->numberOfCells ) );
-
-    // initialize values
-    thrust::device_ptr<real> dev_ptr(this->temperatures);
-    thrust::fill(dev_ptr, dev_ptr + numberOfPoints * this->numberOfCells, this->ambientTemperature);
-
-    this->ghostCellsHost.resize(this->numberOfCells);
-    this->domainCellsHost.resize(this->numberOfCells);
-    this->secondCellsHost.resize(this->numberOfCells);
-
-    this->temperaturesHost.resize(this->numberOfPoints * this->numberOfCells);
-}
-
-void ConcreteHeatFlux::download()
-{
-    checkCudaErrors( cudaMemcpy(this->ghostCellsHost.data() , this->ghostCells , sizeof(uint) * this->numberOfCells, cudaMemcpyDeviceToHost ) );
-    checkCudaErrors( cudaMemcpy(this->domainCellsHost.data(), this->domainCells, sizeof(uint) * this->numberOfCells, cudaMemcpyDeviceToHost ) );
-    checkCudaErrors( cudaMemcpy(this->secondCellsHost.data(), this->secondCells, sizeof(uint) * this->numberOfCells, cudaMemcpyDeviceToHost ) );
-
-    checkCudaErrors( cudaMemcpy(this->temperaturesHost.data(), this->temperatures, sizeof(real) * this->numberOfCells * this->numberOfPoints, cudaMemcpyDeviceToHost ) );
-}
-
-bool ConcreteHeatFlux::isWall()
-{
-    return true;
-}
-
-bool ConcreteHeatFlux::isInsulated()
-{
-    return true;
-}
-
-bool ConcreteHeatFlux::isFluxBC()
-{
-    return true;
-}
-
-bool ConcreteHeatFlux::secondCellsNeeded()
-{
-    return true;
-}
-
-} // namespace GksGpu
-
diff --git a/src/gpu/GksGpu/BoundaryConditions/ConcreteHeatFlux.h b/src/gpu/GksGpu/BoundaryConditions/ConcreteHeatFlux.h
deleted file mode 100644
index b8ff0d316a0dc5d126afc277cde662d8c89eab8d..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/ConcreteHeatFlux.h
+++ /dev/null
@@ -1,110 +0,0 @@
-#ifndef ConcreteHeatFlux_CUH
-#define ConcreteHeatFlux_CUH
-
-#include <memory>
-
-#include <thrust/device_vector.h>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-
-#include "BoundaryConditions/BoundaryCondition.h"
-
-namespace GksGpu{
-
-struct ConcreteHeatFluxStruct
-{
-    uint  numberOfCells;
-
-    uint* ghostCells;
-    uint* domainCells;
-    uint* secondCells;
-
-    uint  numberOfPoints;
-
-    real* temperatures;
-
-    real* heatConductivity;
-
-    real temperatureConductivity;
-    real density;
-    real specificHeatCapacity;
-
-    real L;
-    real ambientTemperature;
-};
-
-struct GKSGPU_EXPORT ConcreteHeatFlux : public BoundaryCondition //, public IsothermalWallStruct
-{
-    real* temperatures;
-
-    uint numberOfPoints;
-
-    real temperatureConductivity;
-    real density;
-    real specificHeatCapacity;
-
-    real L;
-    real ambientTemperature;
-
-    std::vector<uint> ghostCellsHost ;
-    std::vector<uint> domainCellsHost;
-    std::vector<uint> secondCellsHost;
-
-    std::vector<real> temperaturesHost;
-
-    ~ConcreteHeatFlux();
-
-    ConcreteHeatFlux( SPtr<DataBase> dataBase, uint numberOfPoints, real temperatureConductivity, real density, real specificHeatCapacity, real L, real ambientTemperature );
-
-    void init();
-
-    void download();
-
-    virtual bool isWall() override;
-
-    virtual bool isInsulated() override;
-
-    virtual bool isFluxBC() override;
-
-    virtual bool secondCellsNeeded() override;
-
-    virtual void runBoundaryConditionKernel(const SPtr<DataBase> dataBase,
-                                            const Parameters parameters, 
-                                            const uint level) override;
-
-    ConcreteHeatFluxStruct toStruct()
-    {
-        ConcreteHeatFluxStruct boundaryCondition;
-
-        boundaryCondition.numberOfCells  = this->numberOfCells;
-
-        boundaryCondition.ghostCells     = this->ghostCells;
-        boundaryCondition.domainCells    = this->domainCells;
-        boundaryCondition.secondCells    = this->secondCells;
-
-        boundaryCondition.temperatures   = this->temperatures;
-        boundaryCondition.numberOfPoints = this->numberOfPoints;
-
-        boundaryCondition.temperatureConductivity = this->temperatureConductivity;
-        boundaryCondition.density                 = this->density;
-        boundaryCondition.specificHeatCapacity    = this->specificHeatCapacity;
-
-        boundaryCondition.L                  = this->L;
-        boundaryCondition.ambientTemperature = this->ambientTemperature;
-
-        return boundaryCondition;
-    }
-
-    void writeVTKFile( SPtr<DataBase> dataBase, Parameters& parameters, std::string filename );
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/BoundaryConditions/CreepingMassFlux.cu b/src/gpu/GksGpu/BoundaryConditions/CreepingMassFlux.cu
deleted file mode 100644
index fd55918246bd4aebb38f1ed8982d86e776eb7fbb..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/CreepingMassFlux.cu
+++ /dev/null
@@ -1,150 +0,0 @@
-#include "CreepingMassFlux.h"
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <iostream>
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include "PointerDefinitions.h"
-#include "Core/RealConstants.h"
-
-#include "DataBase/DataBase.h"
-#include "DataBase/DataBaseStruct.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-#include "Definitions/PassiveScalar.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/FlowStateDataConversion.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#include "FluxComputation/Moments.cuh"
-#include "FluxComputation/ApplyFlux.cuh"
-#include "FluxComputation/Transformation.cuh"
-#include "FluxComputation/AssembleFlux.cuh"
-#include "FluxComputation/ExpansionCoefficients.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu{
-
-//////////////////////////////////////////////////////////////////////////
-
-__global__                 void boundaryConditionKernel  ( const DataBaseStruct dataBase, 
-                                                           const CreepingMassFluxStruct boundaryCondition, 
-                                                           const Parameters parameters,
-                                                           const uint startIndex,
-                                                           const uint numberOfEntities );
-
-__host__ __device__ inline void boundaryConditionFunction( const DataBaseStruct& dataBase, 
-                                                           const CreepingMassFluxStruct& boundaryCondition, 
-                                                           const Parameters& parameters,
-                                                           const uint startIndex,
-                                                           const uint index );
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-void CreepingMassFlux::runBoundaryConditionKernel(const SPtr<DataBase> dataBase, 
-                                          const Parameters parameters, 
-                                          const uint level)
-{    
-    CudaUtility::CudaGrid grid( this->numberOfCellsPerLevel[ level ], 32 );
-
-    runKernel( boundaryConditionKernel,
-               boundaryConditionFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct(),
-               this->toStruct(),
-               parameters,
-               this->startOfCellsPerLevel[ level ] );
-
-    cudaDeviceSynchronize();
-
-    getLastCudaError("HeatFlux::runBoundaryConditionKernel( const SPtr<DataBase> dataBase, const Parameters parameters, const uint level )");
-}
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-__global__ void boundaryConditionKernel(const DataBaseStruct dataBase, 
-                                        const CreepingMassFluxStruct boundaryCondition, 
-                                        const Parameters parameters,
-                                        const uint startIndex,
-                                        const uint numberOfEntities)
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    boundaryConditionFunction( dataBase, boundaryCondition, parameters, startIndex, index );
-}
-
-__host__ __device__ inline void boundaryConditionFunction(const DataBaseStruct& dataBase, 
-                                                          const CreepingMassFluxStruct& boundaryCondition, 
-                                                          const Parameters& parameters,
-                                                          const uint startIndex,
-                                                          const uint index)
-{
-    // uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
-    uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    PrimitiveVariables domainCellPrim;
-    {
-        ConservedVariables domainCellData;
-        readCellData(domainCellIdx, dataBase, domainCellData);
-        domainCellPrim = toPrimitiveVariables(domainCellData, parameters.K);
-    }
-
-    ConservedVariables flux;
-
-    if( boundaryCondition.velocity > 0.0 )
-    {
-        flux.rho = boundaryCondition.velocity * boundaryCondition.rho;
-
-        flux.rhoE = ( parameters.K + c3o1 ) / ( c4o1 * boundaryCondition.lambda ) * flux.rho;
-        //flux.rhoE = (parameters.K + three) / (four * domainCellPrim.lambda) * flux.rho;
-
-    #ifdef USE_PASSIVE_SCALAR
-        flux.rhoS_1 = flux.rho;
-    #endif // USE_PASSIVE_SCALAR
-        flux = (parameters.dt * parameters.dx * parameters.dx) * flux;
-
-        applyFluxToPosCell(dataBase, domainCellIdx, flux, 'z', parameters);
-    }
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-}
-
-CreepingMassFlux::CreepingMassFlux(SPtr<DataBase> dataBase, real rho, real velocity, real lambda)
-    : BoundaryCondition( dataBase )
-{
-    this->rho      = rho;
-    this->velocity = velocity;
-    this->lambda   = lambda;
-}
-
-bool CreepingMassFlux::isWall()
-{
-    return true;
-}
-
-bool CreepingMassFlux::isFluxBC()
-{
-    return true;
-}
-
-bool CreepingMassFlux::secondCellsNeeded()
-{
-    return false;
-}
-
-} // namespace GksGpu
-
diff --git a/src/gpu/GksGpu/BoundaryConditions/CreepingMassFlux.h b/src/gpu/GksGpu/BoundaryConditions/CreepingMassFlux.h
deleted file mode 100644
index 70d5f11f2fb6b5ce03195c416dca4035b9e89055..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/CreepingMassFlux.h
+++ /dev/null
@@ -1,70 +0,0 @@
-#ifndef CreepingMassFlux_CUH
-#define CreepingMassFlux_CUH
-
-#include <memory>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-
-#include "BoundaryConditions/BoundaryCondition.h"
-
-namespace GksGpu{
-
-struct CreepingMassFluxStruct
-{
-    uint  numberOfCells;
-
-    uint* ghostCells;
-    uint* domainCells;
-    uint* secondCells;
-
-    real rho;
-    real velocity;
-    real lambda;
-};
-
-struct GKSGPU_EXPORT CreepingMassFlux : public BoundaryCondition //, public IsothermalWallStruct
-{
-    real rho;
-    real velocity;
-    real lambda;
-
-    CreepingMassFlux( SPtr<DataBase> dataBase, real rho, real velocity, real lambda );
-
-    virtual bool isWall() override;
-
-    virtual bool isFluxBC() override;
-
-    virtual bool secondCellsNeeded() override;
-
-    virtual void runBoundaryConditionKernel(const SPtr<DataBase> dataBase,
-                                            const Parameters parameters, 
-                                            const uint level) override;
-
-    CreepingMassFluxStruct toStruct()
-    {
-        CreepingMassFluxStruct boundaryCondition;
-
-        boundaryCondition.numberOfCells = this->numberOfCells;
-
-        boundaryCondition.ghostCells    = this->ghostCells;
-        boundaryCondition.domainCells   = this->domainCells;
-        boundaryCondition.secondCells   = this->secondCells;
-
-        boundaryCondition.rho           = this->rho;
-        boundaryCondition.velocity      = this->velocity;
-        boundaryCondition.lambda        = this->lambda;
-
-        return boundaryCondition;
-    }
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/BoundaryConditions/Extrapolation.cu b/src/gpu/GksGpu/BoundaryConditions/Extrapolation.cu
deleted file mode 100644
index 34fa30611b33f7c6226a81955e7d637bd4010f17..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/Extrapolation.cu
+++ /dev/null
@@ -1,108 +0,0 @@
-#include "Extrapolation.h"
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include "PointerDefinitions.h"
-#include "Core/RealConstants.h"
-
-#include "DataBase/DataBase.h"
-#include "DataBase/DataBaseStruct.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-#include "Definitions/PassiveScalar.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu{
-
-//////////////////////////////////////////////////////////////////////////
-
-__global__                 void boundaryConditionKernel  ( const DataBaseStruct dataBase, 
-                                                           const ExtrapolationStruct boundaryCondition, 
-                                                           const Parameters parameters,
-                                                           const uint startIndex,
-                                                           const uint numberOfEntities );
-
-__host__ __device__ inline void boundaryConditionFunction( const DataBaseStruct& dataBase, 
-                                                           const ExtrapolationStruct& boundaryCondition, 
-                                                           const Parameters& parameters,
-                                                           const uint startIndex,
-                                                           const uint index );
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-void Extrapolation::runBoundaryConditionKernel(const SPtr<DataBase> dataBase, 
-                                               const Parameters parameters, 
-                                               const uint level)
-{    
-    CudaUtility::CudaGrid grid( this->numberOfCellsPerLevel[ level ], 32 );
-
-    runKernel( boundaryConditionKernel,
-               boundaryConditionFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct(),
-               this->toStruct(),
-               parameters,
-               this->startOfCellsPerLevel[ level ] );
-
-    getLastCudaError("IsothermalWall::runBoundaryConditionKernel( const SPtr<DataBase> dataBase, const Parameters parameters, const uint level )");
-}
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-__global__ void boundaryConditionKernel(const DataBaseStruct dataBase, 
-                                        const ExtrapolationStruct boundaryCondition, 
-                                        const Parameters parameters,
-                                        const uint startIndex,
-                                        const uint numberOfEntities)
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    boundaryConditionFunction( dataBase, boundaryCondition, parameters, startIndex, index );
-}
-
-__host__ __device__ inline void boundaryConditionFunction(const DataBaseStruct& dataBase, 
-                                                          const ExtrapolationStruct& boundaryCondition, 
-                                                          const Parameters& parameters,
-                                                          const uint startIndex,
-                                                          const uint index)
-{
-    uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
-    uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
-
-    ConservedVariables domainCellData;
-    readCellData ( domainCellIdx, dataBase, domainCellData );
-    writeCellData( ghostCellIdx , dataBase, domainCellData );
-}
-
-Extrapolation::Extrapolation(SPtr<DataBase> dataBase)
-    : BoundaryCondition( dataBase )
-{
-}
-
-bool Extrapolation::isWall()
-{
-    return false;
-}
-
-bool Extrapolation::secondCellsNeeded()
-{
-    return false;
-}
-
-} // namespace GksGpu
-
diff --git a/src/gpu/GksGpu/BoundaryConditions/Extrapolation.h b/src/gpu/GksGpu/BoundaryConditions/Extrapolation.h
deleted file mode 100644
index 4353a5e58e947d248c9b87f7838dab45f986e5f5..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/Extrapolation.h
+++ /dev/null
@@ -1,54 +0,0 @@
-#ifndef Extrapolation_CUH
-#define Extrapolation_CUH
-
-#include <memory>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-
-#include "BoundaryConditions/BoundaryCondition.h"
-
-namespace GksGpu{
-
-struct ExtrapolationStruct
-{
-    uint  numberOfCells;
-
-    uint* ghostCells;
-    uint* domainCells;
-    uint* secondCells;
-};
-
-struct GKSGPU_EXPORT Extrapolation : public BoundaryCondition //, public IsothermalWallStruct
-{
-    Extrapolation( SPtr<DataBase> dataBase );
-
-    virtual bool isWall() override;
-
-    virtual bool secondCellsNeeded() override;
-
-    virtual void runBoundaryConditionKernel(const SPtr<DataBase> dataBase,
-                                            const Parameters parameters, 
-                                            const uint level) override;
-
-    ExtrapolationStruct toStruct()
-    {
-        ExtrapolationStruct boundaryCondition;
-
-        boundaryCondition.numberOfCells = this->numberOfCells;
-
-        boundaryCondition.ghostCells    = this->ghostCells;
-        boundaryCondition.domainCells   = this->domainCells;
-        boundaryCondition.secondCells   = this->secondCells;
-
-        return boundaryCondition;
-    }
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/BoundaryConditions/HeatFlux.cu b/src/gpu/GksGpu/BoundaryConditions/HeatFlux.cu
deleted file mode 100644
index 87f880bcf1001a012487f5df327566dfe1ded350..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/HeatFlux.cu
+++ /dev/null
@@ -1,137 +0,0 @@
-#include "HeatFlux.h"
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <iostream>
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include "PointerDefinitions.h"
-#include "Core/RealConstants.h"
-
-#include "DataBase/DataBase.h"
-#include "DataBase/DataBaseStruct.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-#include "Definitions/PassiveScalar.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/FlowStateDataConversion.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#include "FluxComputation/Moments.cuh"
-#include "FluxComputation/ApplyFlux.cuh"
-#include "FluxComputation/Transformation.cuh"
-#include "FluxComputation/AssembleFlux.cuh"
-#include "FluxComputation/ExpansionCoefficients.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu{
-
-//////////////////////////////////////////////////////////////////////////
-
-__global__                 void boundaryConditionKernel  ( const DataBaseStruct dataBase, 
-                                                           const HeatFluxStruct boundaryCondition, 
-                                                           const Parameters parameters,
-                                                           const uint startIndex,
-                                                           const uint numberOfEntities );
-
-__host__ __device__ inline void boundaryConditionFunction( const DataBaseStruct& dataBase, 
-                                                           const HeatFluxStruct& boundaryCondition, 
-                                                           const Parameters& parameters,
-                                                           const uint startIndex,
-                                                           const uint index );
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-void HeatFlux::runBoundaryConditionKernel(const SPtr<DataBase> dataBase, 
-                                          const Parameters parameters, 
-                                          const uint level)
-{    
-    CudaUtility::CudaGrid grid( this->numberOfCellsPerLevel[ level ], 32 );
-
-    runKernel( boundaryConditionKernel,
-               boundaryConditionFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct(),
-               this->toStruct(),
-               parameters,
-               this->startOfCellsPerLevel[ level ] );
-
-    cudaDeviceSynchronize();
-
-    getLastCudaError("HeatFlux::runBoundaryConditionKernel( const SPtr<DataBase> dataBase, const Parameters parameters, const uint level )");
-}
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-__global__ void boundaryConditionKernel(const DataBaseStruct dataBase, 
-                                        const HeatFluxStruct boundaryCondition, 
-                                        const Parameters parameters,
-                                        const uint startIndex,
-                                        const uint numberOfEntities)
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    boundaryConditionFunction( dataBase, boundaryCondition, parameters, startIndex, index );
-}
-
-__host__ __device__ inline void boundaryConditionFunction(const DataBaseStruct& dataBase, 
-                                                          const HeatFluxStruct& boundaryCondition, 
-                                                          const Parameters& parameters,
-                                                          const uint startIndex,
-                                                          const uint index)
-{
-    // uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
-    uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    PrimitiveVariables domainCellPrim;
-    {
-        ConservedVariables domainCellData;
-        readCellData(domainCellIdx, dataBase, domainCellData);
-        domainCellPrim = toPrimitiveVariables(domainCellData, parameters.K);
-    }
-
-    ConservedVariables flux;
-
-    flux.rhoE = boundaryCondition.HRRPUA * parameters.dt * parameters.dx * parameters.dx;
-
-    applyFluxToPosCell(dataBase, domainCellIdx, flux, 'z', parameters);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-}
-
-HeatFlux::HeatFlux(SPtr<DataBase> dataBase, real  HRRPUA)
-    : BoundaryCondition( dataBase )
-{
-    this->HRRPUA = HRRPUA;
-}
-
-bool HeatFlux::isWall()
-{
-    return true;
-}
-
-bool HeatFlux::isFluxBC()
-{
-    return false;
-}
-
-bool HeatFlux::secondCellsNeeded()
-{
-    return false;
-}
-
-} // namespace GksGpu
-
diff --git a/src/gpu/GksGpu/BoundaryConditions/HeatFlux.h b/src/gpu/GksGpu/BoundaryConditions/HeatFlux.h
deleted file mode 100644
index 701f9f4db8593d9f27771d12ac451db13a42f625..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/HeatFlux.h
+++ /dev/null
@@ -1,64 +0,0 @@
-#ifndef HeatFlux_CUH
-#define HeatFlux_CUH
-
-#include <memory>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-
-#include "BoundaryConditions/BoundaryCondition.h"
-
-namespace GksGpu{
-
-struct HeatFluxStruct
-{
-    uint  numberOfCells;
-
-    uint* ghostCells;
-    uint* domainCells;
-    uint* secondCells;
-
-    real HRRPUA;
-};
-
-struct GKSGPU_EXPORT HeatFlux : public BoundaryCondition //, public IsothermalWallStruct
-{
-    real HRRPUA;
-
-    HeatFlux( SPtr<DataBase> dataBase, real HRRPUA );
-
-    virtual bool isWall() override;
-
-    virtual bool isFluxBC() override;
-
-    virtual bool secondCellsNeeded() override;
-
-    virtual void runBoundaryConditionKernel(const SPtr<DataBase> dataBase,
-                                            const Parameters parameters, 
-                                            const uint level) override;
-
-    HeatFluxStruct toStruct()
-    {
-        HeatFluxStruct boundaryCondition;
-
-        boundaryCondition.numberOfCells = this->numberOfCells;
-
-        boundaryCondition.ghostCells    = this->ghostCells;
-        boundaryCondition.domainCells   = this->domainCells;
-        boundaryCondition.secondCells   = this->secondCells;
-
-        boundaryCondition.HRRPUA        = this->HRRPUA;
-
-        return boundaryCondition;
-    }
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/BoundaryConditions/Inflow.cu b/src/gpu/GksGpu/BoundaryConditions/Inflow.cu
deleted file mode 100644
index 7f9b2777f5e75a5c79a2ee5f280871a021cf6c94..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/Inflow.cu
+++ /dev/null
@@ -1,167 +0,0 @@
-#include "Inflow.h"
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include "PointerDefinitions.h"
-#include "Core/RealConstants.h"
-
-#include "DataBase/DataBase.h"
-#include "DataBase/DataBaseStruct.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-#include "Definitions/PassiveScalar.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/FlowStateDataConversion.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu{
-
-//////////////////////////////////////////////////////////////////////////
-
-__global__                 void boundaryConditionKernel  ( const DataBaseStruct dataBase, 
-                                                           const InflowStruct boundaryCondition, 
-                                                           const Parameters parameters,
-                                                           const uint startIndex,
-                                                           const uint numberOfEntities );
-
-__host__ __device__ inline void boundaryConditionFunction( const DataBaseStruct& dataBase, 
-                                                           const InflowStruct& boundaryCondition, 
-                                                           const Parameters& parameters,
-                                                           const uint startIndex,
-                                                           const uint index );
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-void Inflow::runBoundaryConditionKernel(const SPtr<DataBase> dataBase, 
-                                                const Parameters parameters, 
-                                                const uint level)
-{    
-    CudaUtility::CudaGrid grid( this->numberOfCellsPerLevel[ level ], 32 );
-
-    runKernel( boundaryConditionKernel,
-               boundaryConditionFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct(),
-               this->toStruct(),
-               parameters,
-               this->startOfCellsPerLevel[ level ] );
-
-    cudaDeviceSynchronize();
-
-    getLastCudaError("Inflow::runBoundaryConditionKernel( const SPtr<DataBase> dataBase, const Parameters parameters, const uint level )");
-}
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-__global__ void boundaryConditionKernel(const DataBaseStruct dataBase, 
-                                        const InflowStruct boundaryCondition, 
-                                        const Parameters parameters,
-                                        const uint startIndex,
-                                        const uint numberOfEntities)
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    boundaryConditionFunction( dataBase, boundaryCondition, parameters, startIndex, index );
-}
-
-__host__ __device__ inline void boundaryConditionFunction(const DataBaseStruct& dataBase, 
-                                                          const InflowStruct& boundaryCondition, 
-                                                          const Parameters& parameters,
-                                                          const uint startIndex,
-                                                          const uint index)
-{
-    uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
-    uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
-    // uint secondCellIdx = boundaryCondition.secondCells[ startIndex + index ];
-
-    PrimitiveVariables ghostCellPrim;
-    {
-        PrimitiveVariables domainCellPrim;
-        PrimitiveVariables secondCellPrim;
-
-        {
-            ConservedVariables domainCellData;
-            readCellData( domainCellIdx, dataBase, domainCellData );
-            domainCellPrim = toPrimitiveVariables( domainCellData, parameters.K );
-        }
-
-        real factor = c1o1;
-        //if( fabs(boundaryCondition.a1) > real(1.0e-6) )
-        {
-            real y = dataBase.cellCenter[ VEC_Y(ghostCellIdx, dataBase.numberOfCells) ];
-            real z = dataBase.cellCenter[ VEC_Z(ghostCellIdx, dataBase.numberOfCells) ];
-
-            real r = sqrt( y*y + z*z );
-
-            //factor =  ( boundaryCondition.a0 
-            //          + boundaryCondition.a1*y 
-            //          + boundaryCondition.a2*y*y  ) * ( four / boundaryCondition.a1 / boundaryCondition.a1 );
-
-            factor = ( boundaryCondition.a0 
-                      + boundaryCondition.a1*r 
-                      + boundaryCondition.a2*r*r  );
-
-            //factor = one;
-        }
-
-        //ghostCellPrim.rho    = two *          boundaryCondition.rho        - domainCellPrim.rho;
-        ghostCellPrim.U      = c2o1 * factor * boundaryCondition.velocity.x - domainCellPrim.U;
-        ghostCellPrim.V      = c2o1 * factor * boundaryCondition.velocity.y - domainCellPrim.V;
-        ghostCellPrim.W      = c2o1 * factor * boundaryCondition.velocity.z - domainCellPrim.W;
-        ghostCellPrim.lambda = c2o1 *          boundaryCondition.lambda     - domainCellPrim.lambda;
-    #ifdef USE_PASSIVE_SCALAR
-        ghostCellPrim.S_1    = c2o1 *          boundaryCondition.S_1        - domainCellPrim.S_1;
-        ghostCellPrim.S_2    = c2o1 *          boundaryCondition.S_2        - domainCellPrim.S_2;
-    #endif // USE_PASSIVE_SCALAR
-        
-        real p = c1o2 * domainCellPrim.rho / domainCellPrim.lambda;
-        ghostCellPrim.rho = c2o1 * p * ghostCellPrim.lambda;
-    }
-
-    {
-        ConservedVariables ghostCons = toConservedVariables( ghostCellPrim, parameters.K );
-
-        writeCellData( ghostCellIdx, dataBase, ghostCons );
-    }
-}
-
-Inflow::Inflow(SPtr<DataBase> dataBase, Vec3 velocity, real lambda, real rho, real a0, real a1, real a2, real S_1, real S_2)
-    : BoundaryCondition( dataBase )
-{
-    this->velocity       = velocity;
-    this->lambda         = lambda;
-    this->rho            = rho;
-    this->S_1            = S_1;
-    this->S_2            = S_2;
-
-    this->a0             = a0;
-    this->a1             = a1;
-    this->a2             = a2;
-}
-
-bool Inflow::isWall()
-{
-    return false;
-}
-
-bool Inflow::secondCellsNeeded()
-{
-    return false;
-}
-
-} // namespace GksGpu
-
diff --git a/src/gpu/GksGpu/BoundaryConditions/Inflow.h b/src/gpu/GksGpu/BoundaryConditions/Inflow.h
deleted file mode 100644
index b1036c4187ce3d040f8ebf2f51a5fbed1ecb76cc..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/Inflow.h
+++ /dev/null
@@ -1,80 +0,0 @@
-#ifndef Inflow_CUH
-#define Inflow_CUH
-
-#include <memory>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-
-#include "BoundaryConditions/BoundaryCondition.h"
-
-namespace GksGpu{
-
-struct InflowStruct
-{
-    uint  numberOfCells;
-
-    uint* ghostCells;
-    uint* domainCells;
-    uint* secondCells;
-
-    Vec3 velocity;
-    real lambda;
-    real rho;
-    real S_1;
-    real S_2;
-
-    real a0, a1, a2;
-};
-
-struct GKSGPU_EXPORT Inflow : public BoundaryCondition //, public IsothermalWallStruct
-{
-    Vec3 velocity;
-    real lambda;
-    real rho;
-    real S_1;
-    real S_2;
-
-    real a0, a1, a2;
-
-    Inflow( SPtr<DataBase> dataBase, Vec3 velocity, real lambda, real rho, real a0, real a1, real a2, real S_1 = 0.0, real S_2 = 0.0 );
-
-    virtual bool isWall() override;
-
-    virtual bool secondCellsNeeded() override;
-
-    virtual void runBoundaryConditionKernel(const SPtr<DataBase> dataBase,
-                                            const Parameters parameters, 
-                                            const uint level) override;
-
-    InflowStruct toStruct()
-    {
-        InflowStruct boundaryCondition;
-
-        boundaryCondition.numberOfCells = this->numberOfCells;
-
-        boundaryCondition.ghostCells      = this->ghostCells;
-        boundaryCondition.domainCells     = this->domainCells;
-        boundaryCondition.secondCells     = this->secondCells;
-
-        boundaryCondition.velocity        = this->velocity;
-        boundaryCondition.lambda          = this->lambda;
-        boundaryCondition.rho             = this->rho;
-        boundaryCondition.S_1             = this->S_1;
-        boundaryCondition.S_2             = this->S_2;
-
-        boundaryCondition.a0              = this->a0;
-        boundaryCondition.a1              = this->a1;
-        boundaryCondition.a2              = this->a2;
-
-        return boundaryCondition;
-    }
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/BoundaryConditions/InflowComplete.cu b/src/gpu/GksGpu/BoundaryConditions/InflowComplete.cu
deleted file mode 100644
index 6cda5a57f8327c5782725c0b720c4b6ba8a11271..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/InflowComplete.cu
+++ /dev/null
@@ -1,322 +0,0 @@
-#include "InflowComplete.h"
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <iostream>
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include "PointerDefinitions.h"
-#include "Core/RealConstants.h"
-
-#include "DataBase/DataBase.h"
-#include "DataBase/DataBaseStruct.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-#include "Definitions/PassiveScalar.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/FlowStateDataConversion.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#include "FluxComputation/Moments.cuh"
-#include "FluxComputation/ApplyFlux.cuh"
-#include "FluxComputation/Transformation.cuh"
-#include "FluxComputation/AssembleFlux.cuh"
-#include "FluxComputation/ExpansionCoefficients.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu{
-
-//////////////////////////////////////////////////////////////////////////
-
-__global__                 void boundaryConditionKernel  ( const DataBaseStruct dataBase, 
-                                                           const InflowCompleteStruct boundaryCondition, 
-                                                           const Parameters parameters,
-                                                           const uint startIndex,
-                                                           const uint numberOfEntities );
-
-__host__ __device__ inline void boundaryConditionFunction( const DataBaseStruct& dataBase, 
-                                                           const InflowCompleteStruct& boundaryCondition, 
-                                                           const Parameters& parameters,
-                                                           const uint startIndex,
-                                                           const uint index );
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-void InflowComplete::runBoundaryConditionKernel(const SPtr<DataBase> dataBase, 
-                                                const Parameters parameters, 
-                                                const uint level)
-{    
-    CudaUtility::CudaGrid grid( this->numberOfCellsPerLevel[ level ], 32 );
-
-    runKernel( boundaryConditionKernel,
-               boundaryConditionFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct(),
-               this->toStruct(),
-               parameters,
-               this->startOfCellsPerLevel[ level ] );
-
-    cudaDeviceSynchronize();
-
-    getLastCudaError("Inflow::runBoundaryConditionKernel( const SPtr<DataBase> dataBase, const Parameters parameters, const uint level )");
-}
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-__global__ void boundaryConditionKernel(const DataBaseStruct dataBase, 
-                                        const InflowCompleteStruct boundaryCondition, 
-                                        const Parameters parameters,
-                                        const uint startIndex,
-                                        const uint numberOfEntities)
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    boundaryConditionFunction( dataBase, boundaryCondition, parameters, startIndex, index );
-}
-
-__host__ __device__ inline void boundaryConditionFunction(const DataBaseStruct& dataBase, 
-                                                          const InflowCompleteStruct& boundaryCondition, 
-                                                          const Parameters& parameters,
-                                                          const uint startIndex,
-                                                          const uint index)
-{
-    uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
-    uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    if( true )
-    {
-        PrimitiveVariables ghostCellPrim;
-        {
-            PrimitiveVariables domainCellPrim;
-
-            {
-                ConservedVariables domainCellData;
-                readCellData(domainCellIdx, dataBase, domainCellData);
-                domainCellPrim = toPrimitiveVariables(domainCellData, parameters.K);
-            }
-
-            //    ghostCellPrim.rho    = two * boundaryCondition.prim.rho    - domainCellPrim.rho;
-            //    ghostCellPrim.U      = two * boundaryCondition.prim.U      - domainCellPrim.U;
-            //    ghostCellPrim.V      = two * boundaryCondition.prim.V      - domainCellPrim.V;
-            //    ghostCellPrim.W      = two * boundaryCondition.prim.W      - domainCellPrim.W;
-            ghostCellPrim.lambda = /*two * boundaryCondition.prim.lambda -*/ domainCellPrim.lambda;
-            //#ifdef USE_PASSIVE_SCALAR
-            //    ghostCellPrim.S_1    = two * boundaryCondition.prim.S_1    - domainCellPrim.S_1;
-            //    ghostCellPrim.S_2    = two * boundaryCondition.prim.S_2    - domainCellPrim.S_2;
-            //#endif // USE_PASSIVE_SCALAR
-
-            ghostCellPrim.rho = boundaryCondition.prim.rho;
-            ghostCellPrim.U = c2o1 * boundaryCondition.prim.U - domainCellPrim.U;
-            ghostCellPrim.V = c2o1 * boundaryCondition.prim.V - domainCellPrim.V;
-            //ghostCellPrim.W = two * boundaryCondition.prim.W - domainCellPrim.W;
-            ghostCellPrim.W      = boundaryCondition.prim.W;
-            //ghostCellPrim.lambda = boundaryCondition.prim.lambda;
-#ifdef USE_PASSIVE_SCALAR
-            ghostCellPrim.S_1 = boundaryCondition.prim.S_1;
-            ghostCellPrim.S_2 = boundaryCondition.prim.S_2;
-#endif // USE_PASSIVE_SCALAR
-
-            real y = dataBase.cellCenter[VEC_Y(ghostCellIdx, dataBase.numberOfCells)];
-            real x = dataBase.cellCenter[VEC_X(ghostCellIdx, dataBase.numberOfCells)];
-
-            real r = sqrt(y*y + x*x);
-
-            ghostCellPrim.W *= (c1o1 - c4o1*r*r);
-#ifdef USE_PASSIVE_SCALAR
-            ghostCellPrim.S_1 *= (c1o1 - c4o1*r*r);
-            ghostCellPrim.S_2 = boundaryCondition.prim.S_2 - ghostCellPrim.S_1;
-#endif // USE_PASSIVE_SCALAR
-        }
-
-        {
-            ConservedVariables ghostCons = toConservedVariables(ghostCellPrim, parameters.K);
-
-            writeCellData(ghostCellIdx, dataBase, ghostCons);
-        }
-    }
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    if( false )
-    {
-        PrimitiveVariables domainCellPrim;
-        {
-            ConservedVariables domainCellData;
-            readCellData(domainCellIdx, dataBase, domainCellData);
-            domainCellPrim = toPrimitiveVariables(domainCellData, parameters.K);
-        }
-
-        real momentU [NUMBER_OF_MOMENTS];
-        real momentV [NUMBER_OF_MOMENTS];
-        real momentW [NUMBER_OF_MOMENTS];
-        real momentXi[NUMBER_OF_MOMENTS];
-
-        PrimitiveVariables facePrim = boundaryCondition.prim;
-
-        //facePrim.lambda = domainCellPrim.lambda;
-
-        transformGlobalToLocal( facePrim, 'z' );
-
-        computeMoments(facePrim, parameters.K, momentU, momentV, momentW, momentXi);
-
-        ConservedVariables flux;
-
-        flux.rho  = momentU[0 + 1];
-        //flux.rhoU = momentU[1 + 1];
-
-        //flux.rhoE = c1o2 * ( momentU[2 + 1]
-        //                   + momentU[0 + 1] * momentV [2]
-        //                   + momentU[0 + 1] * momentW [2]
-        //                   + momentU[0 + 1] * momentXi[2] );
-
-        flux.rhoE = momentU[0 + 1] * c1o4 * ( parameters.K + c5o1 ) / boundaryCondition.prim.lambda;
-
-        //////////////////////////////////////////////////////////////////////////
-
-#ifdef USE_PASSIVE_SCALAR
-        flux.rhoS_1 = flux.rho * boundaryCondition.prim.S_1;
-        flux.rhoS_2 = flux.rho * boundaryCondition.prim.S_2;
-#endif // USE_PASSIVE_SCALAR
-
-        flux   = ( parameters.dt * parameters.dx * parameters.dx * facePrim.rho ) * flux;
-
-        transformLocalToGlobal( flux, 'z' );
-
-        applyFluxToPosCell(dataBase, domainCellIdx, flux, 'z', parameters);
-
-        return;
-    }
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    //if( false )
-    //{
-    //    PrimitiveVariables domainCellPrim;
-    //    {
-    //        ConservedVariables domainCellData;
-    //        readCellData(domainCellIdx, dataBase, domainCellData);
-    //        domainCellPrim = toPrimitiveVariables(domainCellData, parameters.K);
-    //    }    
-
-    //    PrimitiveVariables facePrim = boundaryCondition.prim;
-
-    //    //////////////////////////////////////////////////////////////////////////
-
-    //    real ax[LENGTH_CELL_DATA];
-    //    real ay[LENGTH_CELL_DATA];
-    //    real az[LENGTH_CELL_DATA];
-    //    real at[LENGTH_CELL_DATA];
-
-    //#pragma unroll
-    //    for( uint i = 0; i < LENGTH_CELL_DATA; i++ )
-    //    { 
-    //        ax[i] = c0o1; 
-    //        ay[i] = c0o1; 
-    //        az[i] = c0o1; 
-    //        at[i] = c0o1;
-    //    }
-    //    
-    //    {
-    //        ConservedVariables gradN, gradT1, gradT2;
-
-    //        transformGlobalToLocal( gradN , 'z' );
-    //        transformGlobalToLocal( gradT1, 'z' );
-    //        transformGlobalToLocal( gradT2, 'z' );
-
-    //        transformGlobalToLocal( facePrim, 'z' );
-
-    //        computeExpansionCoefficients(facePrim, gradN , parameters.K, ax);
-    //        computeExpansionCoefficients(facePrim, gradT1, parameters.K, ay);
-    //        computeExpansionCoefficients(facePrim, gradT2, parameters.K, az);
-    //    }
-
-    //    //////////////////////////////////////////////////////////////////////////
-
-    //    {
-    //        ConservedVariables flux;
-    //        {
-    //            real momentU [ NUMBER_OF_MOMENTS ]; 
-    //            real momentV [ NUMBER_OF_MOMENTS ]; 
-    //            real momentW [ NUMBER_OF_MOMENTS ]; 
-    //            real momentXi[ NUMBER_OF_MOMENTS ];
-
-    //            computeMoments( facePrim, parameters.K, momentU, momentV, momentW, momentXi );
-
-    //            Vec3 force = parameters.force;
-
-    //            transformGlobalToLocal(force, 'z');
-
-    //            {
-    //                ConservedVariables timeGrad;
-    //                computeTimeDerivative( facePrim, 
-    //                                       momentU, 
-    //                                       momentV, 
-    //                                       momentW, 
-    //                                       momentXi, 
-    //                                       ax, ay, az,
-    //                                       force,
-    //                                       timeGrad );
-
-    //                computeExpansionCoefficients( facePrim, timeGrad, parameters.K, at );
-    //            }
-    //            {
-    //                real timeCoefficients[4];
-    //                computeTimeCoefficients( facePrim, parameters, timeCoefficients );
-
-    //                real heatFlux;
-    //                assembleFlux( facePrim, 
-    //                              momentU, momentV, momentW, momentXi,
-    //                              ax, ay, az, at, 
-    //                              timeCoefficients, 
-    //                              parameters,
-    //                              force,
-    //                              flux,
-    //                              heatFlux );
-
-    //                transformLocalToGlobal( flux, 'z' );
-    //            }
-    //        }
-
-    //        applyFluxToPosCell(dataBase, domainCellIdx, flux, 'z', parameters);
-    //        applyFluxToNegCell(dataBase, ghostCellIdx , flux, 'z', parameters);
-    //    }
-    //}
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-}
-
-InflowComplete::InflowComplete(SPtr<DataBase> dataBase, PrimitiveVariables prim)
-    : BoundaryCondition( dataBase )
-{
-    this->prim = prim;
-}
-
-bool InflowComplete::isWall()
-{
-    return false;
-}
-
-bool InflowComplete::isFluxBC()
-{
-    return false;
-}
-
-bool InflowComplete::secondCellsNeeded()
-{
-    return false;
-}
-
-} // namespace GksGpu
-
diff --git a/src/gpu/GksGpu/BoundaryConditions/InflowComplete.h b/src/gpu/GksGpu/BoundaryConditions/InflowComplete.h
deleted file mode 100644
index 6d402d150a10a05666f44ab5c73c068fab507736..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/InflowComplete.h
+++ /dev/null
@@ -1,64 +0,0 @@
-#ifndef InflowComplete_CUH
-#define InflowComplete_CUH
-
-#include <memory>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-
-#include "BoundaryConditions/BoundaryCondition.h"
-
-namespace GksGpu{
-
-struct InflowCompleteStruct
-{
-    uint  numberOfCells;
-
-    uint* ghostCells;
-    uint* domainCells;
-    uint* secondCells;
-
-    PrimitiveVariables prim;
-};
-
-struct GKSGPU_EXPORT InflowComplete : public BoundaryCondition //, public IsothermalWallStruct
-{
-    PrimitiveVariables prim;
-
-    InflowComplete( SPtr<DataBase> dataBase, PrimitiveVariables prim );
-
-    virtual bool isWall() override;
-
-    virtual bool isFluxBC() override;
-
-    virtual bool secondCellsNeeded() override;
-
-    virtual void runBoundaryConditionKernel(const SPtr<DataBase> dataBase,
-                                            const Parameters parameters, 
-                                            const uint level) override;
-
-    InflowCompleteStruct toStruct()
-    {
-        InflowCompleteStruct boundaryCondition;
-
-        boundaryCondition.numberOfCells = this->numberOfCells;
-
-        boundaryCondition.ghostCells    = this->ghostCells;
-        boundaryCondition.domainCells   = this->domainCells;
-        boundaryCondition.secondCells   = this->secondCells;
-
-        boundaryCondition.prim          = prim;
-
-        return boundaryCondition;
-    }
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/BoundaryConditions/IsothermalWall.cu b/src/gpu/GksGpu/BoundaryConditions/IsothermalWall.cu
deleted file mode 100644
index 5dee058d719527ad554ed866b53d7efab536e697..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/IsothermalWall.cu
+++ /dev/null
@@ -1,159 +0,0 @@
-#include "IsothermalWall.h"
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include "PointerDefinitions.h"
-#include "Core/RealConstants.h"
-
-#include "DataBase/DataBase.h"
-#include "DataBase/DataBaseStruct.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-#include "Definitions/PassiveScalar.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/FlowStateDataConversion.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu{
-
-//////////////////////////////////////////////////////////////////////////
-
-__global__                 void boundaryConditionKernel  ( const DataBaseStruct dataBase, 
-                                                           const IsothermalWallStruct boundaryCondition, 
-                                                           const Parameters parameters,
-                                                           const uint startIndex,
-                                                           const uint numberOfEntities );
-
-__host__ __device__ inline void boundaryConditionFunction( const DataBaseStruct& dataBase, 
-                                                           const IsothermalWallStruct& boundaryCondition, 
-                                                           const Parameters& parameters,
-                                                           const uint startIndex,
-                                                           const uint index );
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-void IsothermalWall::runBoundaryConditionKernel(const SPtr<DataBase> dataBase, 
-                                                const Parameters parameters, 
-                                                const uint level)
-{    
-    CudaUtility::CudaGrid grid( this->numberOfCellsPerLevel[ level ], 32 );
-
-    runKernel( boundaryConditionKernel,
-               boundaryConditionFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct(),
-               this->toStruct(),
-               parameters,
-               this->startOfCellsPerLevel[ level ] );
-
-    cudaDeviceSynchronize();
-
-    getLastCudaError("IsothermalWall::runBoundaryConditionKernel( const SPtr<DataBase> dataBase, const Parameters parameters, const uint level )");
-}
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-__global__ void boundaryConditionKernel(const DataBaseStruct dataBase, 
-                                        const IsothermalWallStruct boundaryCondition, 
-                                        const Parameters parameters,
-                                        const uint startIndex,
-                                        const uint numberOfEntities)
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    boundaryConditionFunction( dataBase, boundaryCondition, parameters, startIndex, index );
-}
-
-__host__ __device__ inline void boundaryConditionFunction(const DataBaseStruct& dataBase, 
-                                                          const IsothermalWallStruct& boundaryCondition, 
-                                                          const Parameters& parameters,
-                                                          const uint startIndex,
-                                                          const uint index)
-{
-    uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
-    uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
-    uint secondCellIdx = boundaryCondition.secondCells[ startIndex + index ];
-
-    PrimitiveVariables ghostCellPrim;
-    {
-        PrimitiveVariables domainCellPrim;
-        PrimitiveVariables secondCellPrim;
-
-        {
-            ConservedVariables domainCellData;
-            readCellData( domainCellIdx, dataBase, domainCellData );
-            domainCellPrim = toPrimitiveVariables( domainCellData, parameters.K );
-
-            ConservedVariables secondCellData;
-            if( secondCellIdx != INVALID_INDEX ){
-                readCellData( secondCellIdx, dataBase, secondCellData );
-                secondCellPrim = toPrimitiveVariables( secondCellData, parameters.K );
-            }
-        }
-
-        ghostCellPrim.U      = c2o1 * boundaryCondition.velocity.x - domainCellPrim.U;
-        ghostCellPrim.V      = c2o1 * boundaryCondition.velocity.y - domainCellPrim.V;
-        ghostCellPrim.W      = c2o1 * boundaryCondition.velocity.z - domainCellPrim.W;
-        ghostCellPrim.lambda = c2o1 * boundaryCondition.lambda     - domainCellPrim.lambda;
-    #ifdef USE_PASSIVE_SCALAR
-        ghostCellPrim.S_1    = /*two * boundaryCondition.S_1 -*/ domainCellPrim.S_1;
-        ghostCellPrim.S_2    = /*two * boundaryCondition.S_2 -*/ domainCellPrim.S_2;
-    #endif // USE_PASSIVE_SCALAR
-
-
-        if( boundaryCondition.useSecondCells && secondCellIdx != INVALID_INDEX ){
-            real p1 = c1o2 * domainCellPrim.rho / domainCellPrim.lambda;
-            real p2 = c1o2 * secondCellPrim.rho / secondCellPrim.lambda;
-
-            ghostCellPrim.rho = c2o1 * ( c2o1 * p1 - p2 ) * ghostCellPrim.lambda;
-        }
-        else{
-            real p = c1o2 * domainCellPrim.rho / domainCellPrim.lambda;
-
-            ghostCellPrim.rho = c2o1 * p * ghostCellPrim.lambda;
-        }
-    }
-
-    {
-        ConservedVariables ghostCons = toConservedVariables( ghostCellPrim, parameters.K );
-
-        writeCellData( ghostCellIdx, dataBase, ghostCons );
-    }
-}
-
-IsothermalWall::IsothermalWall(SPtr<DataBase> dataBase, Vec3 velocity, real lambda, bool useSecondCells, real S_1, real S_2)
-    : BoundaryCondition( dataBase )
-{
-    this->velocity       = velocity;
-    this->lambda         = lambda;
-    this->S_1            = S_1;
-    this->S_2            = S_2;
-    this->useSecondCells = useSecondCells;
-}
-
-bool IsothermalWall::isWall()
-{
-    return true;
-}
-
-bool IsothermalWall::secondCellsNeeded()
-{
-    return true;
-}
-
-} // namespace GksGpu
-
diff --git a/src/gpu/GksGpu/BoundaryConditions/IsothermalWall.h b/src/gpu/GksGpu/BoundaryConditions/IsothermalWall.h
deleted file mode 100644
index 57bafca6bf7190553aa80dacfc41b8207eeef099..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/IsothermalWall.h
+++ /dev/null
@@ -1,75 +0,0 @@
-#ifndef IsothermalWall_CUH
-#define IsothermalWall_CUH
-
-#include <memory>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-
-#include "BoundaryConditions/BoundaryCondition.h"
-
-namespace GksGpu{
-
-struct IsothermalWallStruct
-{
-    uint  numberOfCells;
-
-    uint* ghostCells;
-    uint* domainCells;
-    uint* secondCells;
-
-    Vec3 velocity;
-    real lambda;
-    real S_1;
-    real S_2;
-
-    bool useSecondCells;
-};
-
-struct GKSGPU_EXPORT IsothermalWall : public BoundaryCondition //, public IsothermalWallStruct
-{
-    Vec3 velocity;
-    real lambda;
-    real S_1;
-    real S_2;
-
-    bool useSecondCells;
-
-    IsothermalWall( SPtr<DataBase> dataBase, Vec3 velocity, real lambda, bool useSecondCells, real S_1 = 0.0, real S_2 = 0.0 );
-
-    virtual bool isWall() override;
-
-    virtual bool secondCellsNeeded() override;
-
-    virtual void runBoundaryConditionKernel(const SPtr<DataBase> dataBase,
-                                            const Parameters parameters, 
-                                            const uint level) override;
-
-    IsothermalWallStruct toStruct()
-    {
-        IsothermalWallStruct boundaryCondition;
-
-        boundaryCondition.numberOfCells = this->numberOfCells;
-
-        boundaryCondition.ghostCells      = this->ghostCells;
-        boundaryCondition.domainCells     = this->domainCells;
-        boundaryCondition.secondCells     = this->secondCells;
-
-        boundaryCondition.velocity        = this->velocity;
-        boundaryCondition.lambda          = this->lambda;
-        boundaryCondition.S_1             = this->S_1;
-        boundaryCondition.S_2             = this->S_2;
-
-        boundaryCondition.useSecondCells  = this->useSecondCells;
-
-        return boundaryCondition;
-    }
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/BoundaryConditions/MassCompensation.cu b/src/gpu/GksGpu/BoundaryConditions/MassCompensation.cu
deleted file mode 100644
index f6e69742635d594b2f0f1319642c51a5dde78a9e..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/MassCompensation.cu
+++ /dev/null
@@ -1,154 +0,0 @@
-#include "MassCompensation.h"
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <iostream>
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include "PointerDefinitions.h"
-#include "Core/RealConstants.h"
-
-#include "DataBase/DataBase.h"
-#include "DataBase/DataBaseStruct.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-#include "Definitions/PassiveScalar.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/FlowStateDataConversion.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#include "FluxComputation/Moments.cuh"
-#include "FluxComputation/ApplyFlux.cuh"
-#include "FluxComputation/Transformation.cuh"
-#include "FluxComputation/AssembleFlux.cuh"
-#include "FluxComputation/ExpansionCoefficients.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu{
-
-//////////////////////////////////////////////////////////////////////////
-
-__global__                 void boundaryConditionKernel  ( const DataBaseStruct dataBase, 
-                                                           const MassCompensationStruct boundaryCondition, 
-                                                           const Parameters parameters,
-                                                           const uint startIndex,
-                                                           const uint numberOfEntities );
-
-__host__ __device__ inline void boundaryConditionFunction( const DataBaseStruct& dataBase, 
-                                                           const MassCompensationStruct& boundaryCondition, 
-                                                           const Parameters& parameters,
-                                                           const uint startIndex,
-                                                           const uint index );
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-void MassCompensation::runBoundaryConditionKernel(const SPtr<DataBase> dataBase, 
-                                          const Parameters parameters, 
-                                          const uint level)
-{    
-    CudaUtility::CudaGrid grid( this->numberOfCellsPerLevel[ level ], 32 );
-
-    runKernel( boundaryConditionKernel,
-               boundaryConditionFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct(),
-               this->toStruct(),
-               parameters,
-               this->startOfCellsPerLevel[ level ] );
-
-    cudaDeviceSynchronize();
-
-    getLastCudaError("HeatFlux::runBoundaryConditionKernel( const SPtr<DataBase> dataBase, const Parameters parameters, const uint level )");
-}
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-__global__ void boundaryConditionKernel(const DataBaseStruct dataBase, 
-                                        const MassCompensationStruct boundaryCondition, 
-                                        const Parameters parameters,
-                                        const uint startIndex,
-                                        const uint numberOfEntities)
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    boundaryConditionFunction( dataBase, boundaryCondition, parameters, startIndex, index );
-}
-
-__host__ __device__ inline void boundaryConditionFunction(const DataBaseStruct& dataBase, 
-                                                          const MassCompensationStruct& boundaryCondition, 
-                                                          const Parameters& parameters,
-                                                          const uint startIndex,
-                                                          const uint index)
-{
-    // uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
-    uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    PrimitiveVariables domainCellPrim;
-    {
-        ConservedVariables domainCellData;
-        readCellData(domainCellIdx, dataBase, domainCellData);
-        domainCellPrim = toPrimitiveVariables(domainCellData, parameters.K);
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    real p0 = c1o2 * boundaryCondition.rho / boundaryCondition.lambda;
-    real p1 = c1o2 * domainCellPrim.rho / domainCellPrim.lambda;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( p1 > p0 )
-    {
-        ConservedVariables flux;
-
-        flux.rho = c2o1 * p0 * domainCellPrim.lambda - domainCellPrim.rho;
-
-        //flux.rhoE = ( parameters.K + three ) / ( four * boundaryCondition.lambda ) * flux.rho;
-        flux.rhoE = (parameters.K + c3o1) / (c4o1 * domainCellPrim.lambda) * flux.rho;
-
-        flux = (parameters.dt * parameters.dx * parameters.dx) * flux;
-
-        applyFluxToPosCell(dataBase, domainCellIdx, flux, 'z', parameters);
-    }
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-}
-
-MassCompensation::MassCompensation(SPtr<DataBase> dataBase, real rho, real velocity, real lambda)
-    : BoundaryCondition( dataBase )
-{
-    this->rho      = rho;
-    this->velocity = velocity;
-    this->lambda   = lambda;
-}
-
-bool MassCompensation::isWall()
-{
-    return false;
-}
-
-bool MassCompensation::isFluxBC()
-{
-    return false;
-}
-
-bool MassCompensation::secondCellsNeeded()
-{
-    return false;
-}
-
-} // namespace GksGpu
-
diff --git a/src/gpu/GksGpu/BoundaryConditions/MassCompensation.h b/src/gpu/GksGpu/BoundaryConditions/MassCompensation.h
deleted file mode 100644
index 6b2c47206718e10f5664f1b7275ce4393d0e3774..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/MassCompensation.h
+++ /dev/null
@@ -1,70 +0,0 @@
-#ifndef MassCompensation_CUH
-#define MassCompensation_CUH
-
-#include <memory>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-
-#include "BoundaryConditions/BoundaryCondition.h"
-
-namespace GksGpu{
-
-struct MassCompensationStruct
-{
-    uint  numberOfCells;
-
-    uint* ghostCells;
-    uint* domainCells;
-    uint* secondCells;
-
-    real rho;
-    real velocity;
-    real lambda;
-};
-
-struct GKSGPU_EXPORT MassCompensation : public BoundaryCondition //, public IsothermalWallStruct
-{
-    real rho;
-    real velocity;
-    real lambda;
-
-    MassCompensation( SPtr<DataBase> dataBase, real rho, real velocity, real lambda );
-
-    virtual bool isWall() override;
-
-    virtual bool isFluxBC() override;
-
-    virtual bool secondCellsNeeded() override;
-
-    virtual void runBoundaryConditionKernel(const SPtr<DataBase> dataBase,
-                                            const Parameters parameters, 
-                                            const uint level) override;
-
-    MassCompensationStruct toStruct()
-    {
-        MassCompensationStruct boundaryCondition;
-
-        boundaryCondition.numberOfCells = this->numberOfCells;
-
-        boundaryCondition.ghostCells    = this->ghostCells;
-        boundaryCondition.domainCells   = this->domainCells;
-        boundaryCondition.secondCells   = this->secondCells;
-
-        boundaryCondition.rho           = this->rho;
-        boundaryCondition.velocity      = this->velocity;
-        boundaryCondition.lambda        = this->lambda;
-
-        return boundaryCondition;
-    }
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/BoundaryConditions/Open.cu b/src/gpu/GksGpu/BoundaryConditions/Open.cu
deleted file mode 100644
index 9413d7015ac46b18395e8544df162a3868dd0204..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/Open.cu
+++ /dev/null
@@ -1,193 +0,0 @@
-#include "Open.h"
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include "PointerDefinitions.h"
-#include "Core/RealConstants.h"
-
-#include "DataBase/DataBase.h"
-#include "DataBase/DataBaseStruct.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-#include "Definitions/PassiveScalar.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/FlowStateDataConversion.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu{
-
-//////////////////////////////////////////////////////////////////////////
-
-__global__                 void boundaryConditionKernel  ( const DataBaseStruct dataBase, 
-                                                           const OpenStruct boundaryCondition, 
-                                                           const Parameters parameters,
-                                                           const uint startIndex,
-                                                           const uint numberOfEntities );
-
-__host__ __device__ inline void boundaryConditionFunction( const DataBaseStruct& dataBase, 
-                                                           const OpenStruct& boundaryCondition, 
-                                                           const Parameters& parameters,
-                                                           const uint startIndex,
-                                                           const uint index );
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-void Open::runBoundaryConditionKernel(const SPtr<DataBase> dataBase, 
-                                               const Parameters parameters, 
-                                               const uint level)
-{    
-    CudaUtility::CudaGrid grid( this->numberOfCellsPerLevel[ level ], 32 );
-
-    runKernel( boundaryConditionKernel,
-               boundaryConditionFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct(),
-               this->toStruct(),
-               parameters,
-               this->startOfCellsPerLevel[ level ] );
-
-    getLastCudaError("IsothermalWall::runBoundaryConditionKernel( const SPtr<DataBase> dataBase, const Parameters parameters, const uint level )");
-}
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-__global__ void boundaryConditionKernel(const DataBaseStruct dataBase, 
-                                        const OpenStruct boundaryCondition, 
-                                        const Parameters parameters,
-                                        const uint startIndex,
-                                        const uint numberOfEntities)
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    boundaryConditionFunction( dataBase, boundaryCondition, parameters, startIndex, index );
-}
-
-__host__ __device__ inline void boundaryConditionFunction(const DataBaseStruct& dataBase, 
-                                                          const OpenStruct& boundaryCondition, 
-                                                          const Parameters& parameters,
-                                                          const uint startIndex,
-                                                          const uint index)
-{
-    uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
-    uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
-    uint secondCellIdx = boundaryCondition.secondCells[ startIndex + index ];
-
-    ConservedVariables domainCellData, secondCellData, ghostCellData;
-    readCellData ( domainCellIdx, dataBase, domainCellData );
-    readCellData ( secondCellIdx, dataBase, secondCellData );
-
-    PrimitiveVariables domainCellPrim = toPrimitiveVariables( domainCellData, parameters.K );
-    PrimitiveVariables secondCellPrim = toPrimitiveVariables( secondCellData, parameters.K );
-    
-    //////////////////////////////////////////////////////////////////////////
-
-    real xGhostCell  = dataBase.cellCenter[ VEC_X(ghostCellIdx, dataBase.numberOfCells) ];
-    real yGhostCell  = dataBase.cellCenter[ VEC_Y(ghostCellIdx, dataBase.numberOfCells) ];
-    real zGhostCell  = dataBase.cellCenter[ VEC_Z(ghostCellIdx, dataBase.numberOfCells) ];
-    
-    real xDomainCell = dataBase.cellCenter[ VEC_X(domainCellIdx, dataBase.numberOfCells) ];
-    real yDomainCell = dataBase.cellCenter[ VEC_Y(domainCellIdx, dataBase.numberOfCells) ];
-    real zDomainCell = dataBase.cellCenter[ VEC_Z(domainCellIdx, dataBase.numberOfCells) ];
-
-    real dx = xGhostCell - xDomainCell;
-    real dy = yGhostCell - yDomainCell;
-    real dz = zGhostCell - zDomainCell;
-
-    real sign = domainCellPrim.U * dx 
-              + domainCellPrim.V * dy 
-              + domainCellPrim.W * dz;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    real p1 = c1o2 * domainCellPrim.rho / domainCellPrim.lambda;
-    real p2 = c1o2 * secondCellPrim.rho / secondCellPrim.lambda;
-
-    real p0 = c1o2 * boundaryCondition.prim.rho / boundaryCondition.prim.lambda;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    //if( sign > zero )
-    //if( p2 > p1 )
-    if( p1 > p0 )
-    {
-        ghostCellData = domainCellData;
-        //ghostCellData = two * domainCellData + ( - one ) * secondCellData;
-
-        
-        ghostCellData.rhoU = c0o1;
-        ghostCellData.rhoV = c0o1;
-        ghostCellData.rhoW = c0o1;
-    }
-    else
-    {
-        PrimitiveVariables ghostCellPrim  = boundaryCondition.prim;
-
-        ghostCellPrim.U = domainCellPrim.U;
-        ghostCellPrim.V = domainCellPrim.V;
-        ghostCellPrim.W = domainCellPrim.W;
-
-        //ghostCellPrim.U = p0/p1;
-        //ghostCellPrim.V = p0/p1;
-        //ghostCellPrim.W = p0/p1;
-
-        //ghostCellPrim.U = two * domainCellPrim.U - secondCellPrim.U;
-        //ghostCellPrim.V = two * domainCellPrim.V - secondCellPrim.V;
-        //ghostCellPrim.W = two * domainCellPrim.W - secondCellPrim.W;
-
-        real velocity = sqrt( ghostCellPrim.U * ghostCellPrim.U + ghostCellPrim.V * ghostCellPrim.V + ghostCellPrim.W * ghostCellPrim.W );
-
-        if( velocity > boundaryCondition.velocityLimiter  )
-        {
-            ghostCellPrim.U *= boundaryCondition.velocityLimiter / velocity;
-            ghostCellPrim.V *= boundaryCondition.velocityLimiter / velocity;
-            ghostCellPrim.W *= boundaryCondition.velocityLimiter / velocity;
-        }
-
-        ghostCellData = toConservedVariables(ghostCellPrim, parameters.K);
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    //ghostCellData = two * domainCellData + ( - one ) * secondCellData;
-
-    //ghostCellData = domainCellData;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    writeCellData(ghostCellIdx, dataBase, ghostCellData);
-}
-
-Open::Open(SPtr<DataBase> dataBase, PrimitiveVariables prim, real velocityLimiter)
-    : BoundaryCondition( dataBase )
-{
-    this->prim = prim;
-
-    this->velocityLimiter = velocityLimiter;
-}
-
-bool Open::isWall()
-{
-    return false;
-}
-
-bool Open::secondCellsNeeded()
-{
-    return true;
-}
-
-} // namespace GksGpu
-
diff --git a/src/gpu/GksGpu/BoundaryConditions/Open.h b/src/gpu/GksGpu/BoundaryConditions/Open.h
deleted file mode 100644
index 16cf9736b9ee4b2851d56547a523cc948ec7c64b..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/Open.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#ifndef Open_CUH
-#define Open_CUH
-
-#include <memory>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-
-#include "BoundaryConditions/BoundaryCondition.h"
-
-namespace GksGpu{
-
-struct OpenStruct
-{
-    uint  numberOfCells;
-
-    uint* ghostCells;
-    uint* domainCells;
-    uint* secondCells;
-
-    PrimitiveVariables prim;
-
-    real velocityLimiter;
-};
-
-struct GKSGPU_EXPORT Open : public BoundaryCondition //, public IsothermalWallStruct
-{
-    PrimitiveVariables prim;
-
-    real velocityLimiter;
-
-    Open( SPtr<DataBase> dataBase, PrimitiveVariables prim, real velocityLimiter );
-
-    virtual bool isWall() override;
-
-    virtual bool secondCellsNeeded() override;
-
-    virtual void runBoundaryConditionKernel(const SPtr<DataBase> dataBase,
-                                            const Parameters parameters, 
-                                            const uint level) override;
-
-    OpenStruct toStruct()
-    {
-        OpenStruct boundaryCondition;
-
-        boundaryCondition.numberOfCells   = this->numberOfCells;
-
-        boundaryCondition.ghostCells      = this->ghostCells;
-        boundaryCondition.domainCells     = this->domainCells;
-        boundaryCondition.secondCells     = this->secondCells;
-
-        boundaryCondition.prim            = this->prim;
-
-        boundaryCondition.velocityLimiter = this->velocityLimiter;
-
-        return boundaryCondition;
-    }
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/BoundaryConditions/PassiveScalarDiriclet.cu b/src/gpu/GksGpu/BoundaryConditions/PassiveScalarDiriclet.cu
deleted file mode 100644
index e8e29790157874ee9775eba84a48a62ca71bb18b..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/PassiveScalarDiriclet.cu
+++ /dev/null
@@ -1,203 +0,0 @@
-#include "PassiveScalarDiriclet.h"
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include "PointerDefinitions.h"
-#include "Core/RealConstants.h"
-
-#include "DataBase/DataBase.h"
-#include "DataBase/DataBaseStruct.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-#include "Definitions/PassiveScalar.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/FlowStateDataConversion.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu{
-
-//////////////////////////////////////////////////////////////////////////
-
-__global__                 void boundaryConditionKernel  ( const DataBaseStruct dataBase, 
-                                                           const PassiveScalarDiricletStruct boundaryCondition, 
-                                                           const Parameters parameters,
-                                                           const uint startIndex,
-                                                           const uint numberOfEntities );
-
-__host__ __device__ inline void boundaryConditionFunction( const DataBaseStruct& dataBase, 
-                                                           const PassiveScalarDiricletStruct& boundaryCondition, 
-                                                           const Parameters& parameters,
-                                                           const uint startIndex,
-                                                           const uint index );
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-void PassiveScalarDiriclet::runBoundaryConditionKernel(const SPtr<DataBase> dataBase, 
-                                                       const Parameters parameters, 
-                                                       const uint level)
-{    
-    CudaUtility::CudaGrid grid( this->numberOfCellsPerLevel[ level ], 32 );
-
-    runKernel( boundaryConditionKernel,
-               boundaryConditionFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct(),
-               this->toStruct(),
-               parameters,
-               this->startOfCellsPerLevel[ level ] );
-
-    cudaDeviceSynchronize();
-
-    getLastCudaError("Pressure::runBoundaryConditionKernel( const SPtr<DataBase> dataBase, const Parameters parameters, const uint level )");
-}
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-__global__ void boundaryConditionKernel(const DataBaseStruct dataBase, 
-                                        const PassiveScalarDiricletStruct boundaryCondition, 
-                                        const Parameters parameters,
-                                        const uint startIndex,
-                                        const uint numberOfEntities)
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    boundaryConditionFunction( dataBase, boundaryCondition, parameters, startIndex, index );
-}
-
-__host__ __device__ inline void boundaryConditionFunction(const DataBaseStruct& dataBase, 
-                                                          const PassiveScalarDiricletStruct& boundaryCondition, 
-                                                          const Parameters& parameters,
-                                                          const uint startIndex,
-                                                          const uint index)
-{
-#ifdef USE_PASSIVE_SCALAR
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    if(false){
-        uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
-        uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
-
-        PrimitiveVariables domainCellPrim;
-
-        ConservedVariables domainCellData;
-        readCellData( domainCellIdx, dataBase, domainCellData );
-        domainCellPrim = toPrimitiveVariables( domainCellData, parameters.K );
-
-        //////////////////////////////////////////////////////////////////////////
-
-        real dS_1 = ( boundaryCondition.S_1 * ( 1.0 - domainCellPrim.S_1 ) ) * parameters.dt;
-        
-        //real x = dataBase.cellCenter[VEC_X(ghostCellIdx, dataBase.numberOfCells)];
-        //real y = dataBase.cellCenter[VEC_Y(ghostCellIdx, dataBase.numberOfCells)];
-
-        //real r = sqrt( x * x + y * y );
-
-        //if( r > 0.25 ) dS_1 *= four * (c1o2 - r);
-
-        domainCellPrim.S_1 += dS_1;
-
-        domainCellPrim.S_2 = 1.0 - domainCellPrim.S_1;
-
-        //////////////////////////////////////////////////////////////////////////
-
-        domainCellData = toConservedVariables( domainCellPrim, parameters.K );
-        writeCellData(domainCellIdx, dataBase, domainCellData);
-    }
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-    if(true){
-        uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
-        uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
-        uint secondCellIdx = boundaryCondition.secondCells[ startIndex + index ];
-
-        PrimitiveVariables ghostCellPrim;
-        PrimitiveVariables domainCellPrim;
-        PrimitiveVariables secondCellPrim;
-
-        {
-            ConservedVariables domainCellData;
-            readCellData( domainCellIdx, dataBase, domainCellData );
-            domainCellPrim = toPrimitiveVariables( domainCellData, parameters.K );
-
-            ConservedVariables secondCellData;
-            if( secondCellIdx != INVALID_INDEX ){
-                readCellData( secondCellIdx, dataBase, secondCellData );
-                secondCellPrim = toPrimitiveVariables( secondCellData, parameters.K );
-            }
-        }
-
-        ghostCellPrim.U      = - domainCellPrim.U;
-        ghostCellPrim.V      = - domainCellPrim.V;
-        ghostCellPrim.W      = - domainCellPrim.W;
-    #ifdef USE_PASSIVE_SCALAR
-        ghostCellPrim.S_1    = c2o1 * boundaryCondition.S_1 - domainCellPrim.S_1;
-        ghostCellPrim.S_2    = c2o1 * boundaryCondition.S_2 - domainCellPrim.S_2;
-    #endif // USE_PASSIVE_SCALAR
-
-        //////////////////////////////////////////////////////////////////////////
-
-        real T = getT(domainCellPrim);
-        setLambdaFromT(ghostCellPrim, T);
-
-        //////////////////////////////////////////////////////////////////////////
-
-        if( secondCellIdx != INVALID_INDEX ){
-            real p1 = c1o2 * domainCellPrim.rho / domainCellPrim.lambda;
-            real p2 = c1o2 * secondCellPrim.rho / secondCellPrim.lambda;
-
-            ghostCellPrim.rho = c2o1 * ( c2o1 * p1 - p2 ) * ghostCellPrim.lambda;
-        }
-        else{
-            real p = c1o2 * domainCellPrim.rho / domainCellPrim.lambda;
-
-            ghostCellPrim.rho = c2o1 * p * ghostCellPrim.lambda;
-        }
-
-        ConservedVariables ghostCons = toConservedVariables( ghostCellPrim, parameters.K );
-
-        writeCellData( ghostCellIdx, dataBase, ghostCons );
-    }
-
-
-#endif // USE_PASSIVE_SCALAR
-}
-
-PassiveScalarDiriclet::PassiveScalarDiriclet(SPtr<DataBase> dataBase, real S_1, real S_2)
-    : BoundaryCondition( dataBase )
-{
-    this->S_1 = S_1;
-    this->S_2 = S_2;
-}
-
-bool PassiveScalarDiriclet::isWall()
-{
-    return true;
-}
-
-bool PassiveScalarDiriclet::secondCellsNeeded()
-{
-    return true;
-}
-
-} // namespace GksGpu
-
diff --git a/src/gpu/GksGpu/BoundaryConditions/PassiveScalarDiriclet.h b/src/gpu/GksGpu/BoundaryConditions/PassiveScalarDiriclet.h
deleted file mode 100644
index 0c46b12a085c9e8f755e2170274466e907710d86..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/PassiveScalarDiriclet.h
+++ /dev/null
@@ -1,70 +0,0 @@
-#ifndef PassiveScalarDiriclet_CUH
-#define PassiveScalarDiriclet_CUH
-
-#include <memory>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-
-#include "BoundaryConditions/BoundaryCondition.h"
-
-namespace GksGpu{
-
-//struct IsothermalWallStruct : virtual public BoundaryConditionStruct
-//{
-//    Vec3 velocity;
-//    real lambda;
-//    real S;
-//};
-
-struct PassiveScalarDiricletStruct
-{
-    uint  numberOfCells;
-
-    uint* ghostCells;
-    uint* domainCells;
-    uint* secondCells;
-
-    real S_1;
-    real S_2;
-};
-
-struct GKSGPU_EXPORT PassiveScalarDiriclet : public BoundaryCondition
-{
-    real S_1;
-    real S_2;
-
-    PassiveScalarDiriclet( SPtr<DataBase> dataBase, real S_1, real S_2 );
-
-    virtual bool isWall() override;
-
-    virtual bool secondCellsNeeded() override;
-
-    virtual void runBoundaryConditionKernel(const SPtr<DataBase> dataBase,
-                                            const Parameters parameters, 
-                                            const uint level) override;
-
-    PassiveScalarDiricletStruct toStruct()
-    {
-        PassiveScalarDiricletStruct boundaryCondition;
-
-        boundaryCondition.numberOfCells = this->numberOfCells;
-
-        boundaryCondition.ghostCells    = this->ghostCells;
-        boundaryCondition.domainCells   = this->domainCells;
-        boundaryCondition.secondCells   = this->secondCells;
-
-        boundaryCondition.S_1           = this->S_1;
-        boundaryCondition.S_2           = this->S_2;
-
-        return boundaryCondition;
-    }
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/BoundaryConditions/Periodic.cu b/src/gpu/GksGpu/BoundaryConditions/Periodic.cu
deleted file mode 100644
index 559e4c6dac326b417cc98adb10db1dcab154a987..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/Periodic.cu
+++ /dev/null
@@ -1,154 +0,0 @@
-#include "Periodic.h"
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include "PointerDefinitions.h"
-#include "Core/RealConstants.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "DataBase/DataBase.h"
-#include "DataBase/DataBaseStruct.h"
-#include "DataBase/DataBaseAllocator.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-#include "Definitions/PassiveScalar.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu{
-
-//////////////////////////////////////////////////////////////////////////
-
-__global__                 void boundaryConditionKernel  ( const DataBaseStruct dataBase, 
-                                                           const BoundaryConditionStruct boundaryCondition, 
-                                                           const Parameters parameters,
-                                                           const uint startIndex,
-                                                           const uint numberOfEntities );
-
-__host__ __device__ inline void boundaryConditionFunction( const DataBaseStruct& dataBase, 
-                                                           const BoundaryConditionStruct& boundaryCondition, 
-                                                           const Parameters& parameters,
-                                                           const uint startIndex,
-                                                           const uint index );
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-void Periodic::runBoundaryConditionKernel(const SPtr<DataBase> dataBase, 
-                                          const Parameters parameters, 
-                                          const uint level)
-{    
-    CudaUtility::CudaGrid grid( this->numberOfCellsPerLevel[ level ], 32 );
-
-    runKernel( boundaryConditionKernel,
-               boundaryConditionFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct(),
-               this->toStruct(),
-               parameters,
-               this->startOfCellsPerLevel[ level ] );
-
-    getLastCudaError("Periodic::runBoundaryConditionKernel( const SPtr<DataBase> dataBase, const Parameters parameters, const uint level )");
-}
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-__global__ void boundaryConditionKernel(const DataBaseStruct dataBase, 
-                                        const BoundaryConditionStruct boundaryCondition, 
-                                        const Parameters parameters,
-                                        const uint startIndex,
-                                        const uint numberOfEntities)
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    boundaryConditionFunction( dataBase, boundaryCondition, parameters, startIndex, index );
-}
-
-__host__ __device__ inline void boundaryConditionFunction(const DataBaseStruct& dataBase, 
-                                                          const BoundaryConditionStruct& boundaryCondition, 
-                                                          const Parameters& parameters,
-                                                          const uint startIndex,
-                                                          const uint index)
-{
-    uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
-    uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
-    
-    ConservedVariables domainCellData;
-    readCellData ( domainCellIdx, dataBase, domainCellData );
-    writeCellData( ghostCellIdx , dataBase, domainCellData );
-}
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-void Periodic::findBoundaryCells(GksMeshAdapter & adapter, bool allowGhostCells, std::function<bool(Vec3)> boundaryFinder)
-{
-    this->myAllocator->freeMemory( *this );
-
-    std::vector<uint> ghostCells;
-    std::vector<uint> domainCells;
-    std::vector<uint> secondCells;
-
-    numberOfCellsPerLevel.resize( adapter.numberOfLevels );
-    startOfCellsPerLevel.resize ( adapter.numberOfLevels );
-
-    for( auto& n : numberOfCellsPerLevel ) n = 0;
-
-    for( uint level = 0; level < adapter.numberOfLevels; level++ )
-    {
-        uint startIdx = adapter.startOfCellsPerLevel[level] 
-                      + adapter.numberOfBulkCellsPerLevel[level];
-
-        uint endIdx   = adapter.startOfCellsPerLevel[level] 
-                      + adapter.numberOfCellsPerLevel[level];
-
-        for( uint_2 candidate : adapter.periodicBoundaryNeighbors )
-        {
-            MeshCell& cell = adapter.cells[ candidate[0] ];
-
-            if( !boundaryFinder( cell.cellCenter ) ) continue;
-         
-            if( candidate[1] == INVALID_INDEX ) continue;
-            
-            ghostCells.push_back ( candidate[0] );
-            domainCells.push_back( candidate[1] );
-                
-            this->numberOfCellsPerLevel[ level ]++;
-        }
-    }
-
-    startOfCellsPerLevel[ 0 ] = 0;
-
-    for( uint level = 1; level < adapter.numberOfLevels; level++ )
-    {
-        startOfCellsPerLevel[ level ] = startOfCellsPerLevel [ level - 1 ]
-                                      + numberOfCellsPerLevel[ level - 1 ];
-    }
-
-    this->numberOfCells = ghostCells.size();
-
-    this->myAllocator->allocateMemory( shared_from_this(), ghostCells, domainCells, secondCells );
-
-}
-
-bool Periodic::isWall()
-{
-    return false;
-}
-
-} // namespace GksGpu
diff --git a/src/gpu/GksGpu/BoundaryConditions/Periodic.h b/src/gpu/GksGpu/BoundaryConditions/Periodic.h
deleted file mode 100644
index 3d6755f9dd9f3578a717e794f39bc105f8c0a345..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/Periodic.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef Periodic_CUH
-#define Periodic_CUH
-
-#include <memory>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-
-#include "BoundaryConditions/BoundaryCondition.h"
-
-namespace GksGpu{
-
-struct GKSGPU_EXPORT Periodic : public BoundaryCondition
-{
-    Periodic( SPtr<DataBase> dataBase ) : BoundaryCondition( dataBase ){}
-
-    virtual bool isWall();
-
-    virtual void findBoundaryCells( GksMeshAdapter& adapter, bool allowGhostCells, std::function<bool(Vec3)> boundaryFinder) override;
-
-    virtual void runBoundaryConditionKernel(const SPtr<DataBase> dataBase,
-                                            const Parameters parameters, 
-                                            const uint level) override;
-
-    BoundaryConditionStruct toStruct()
-    {
-        BoundaryConditionStruct boundaryCondition;
-
-        boundaryCondition.numberOfCells = this->numberOfCells;
-
-        boundaryCondition.ghostCells    = this->ghostCells;
-        boundaryCondition.domainCells   = this->domainCells;
-        boundaryCondition.secondCells   = this->secondCells;
-
-        return boundaryCondition;
-    }
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/BoundaryConditions/Pressure.cu b/src/gpu/GksGpu/BoundaryConditions/Pressure.cu
deleted file mode 100644
index 8523eb18e7db60b79170eee5884d22a3a25abac5..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/Pressure.cu
+++ /dev/null
@@ -1,150 +0,0 @@
-#include "Pressure.h"
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include "PointerDefinitions.h"
-#include "Core/RealConstants.h"
-
-#include "DataBase/DataBase.h"
-#include "DataBase/DataBaseStruct.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-#include "Definitions/PassiveScalar.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/FlowStateDataConversion.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu{
-
-//////////////////////////////////////////////////////////////////////////
-
-__global__                 void boundaryConditionKernel  ( const DataBaseStruct dataBase, 
-                                                           const PressureStruct boundaryCondition, 
-                                                           const Parameters parameters,
-                                                           const uint startIndex,
-                                                           const uint numberOfEntities );
-
-__host__ __device__ inline void boundaryConditionFunction( const DataBaseStruct& dataBase, 
-                                                           const PressureStruct& boundaryCondition, 
-                                                           const Parameters& parameters,
-                                                           const uint startIndex,
-                                                           const uint index );
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-void Pressure::runBoundaryConditionKernel(const SPtr<DataBase> dataBase, 
-                                                const Parameters parameters, 
-                                                const uint level)
-{    
-    CudaUtility::CudaGrid grid( this->numberOfCellsPerLevel[ level ], 32 );
-
-    runKernel( boundaryConditionKernel,
-               boundaryConditionFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct(),
-               this->toStruct(),
-               parameters,
-               this->startOfCellsPerLevel[ level ] );
-
-    cudaDeviceSynchronize();
-
-    getLastCudaError("Pressure::runBoundaryConditionKernel( const SPtr<DataBase> dataBase, const Parameters parameters, const uint level )");
-}
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-__global__ void boundaryConditionKernel(const DataBaseStruct dataBase, 
-                                        const PressureStruct boundaryCondition, 
-                                        const Parameters parameters,
-                                        const uint startIndex,
-                                        const uint numberOfEntities)
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    boundaryConditionFunction( dataBase, boundaryCondition, parameters, startIndex, index );
-}
-
-__host__ __device__ inline void boundaryConditionFunction(const DataBaseStruct& dataBase, 
-                                                          const PressureStruct& boundaryCondition, 
-                                                          const Parameters& parameters,
-                                                          const uint startIndex,
-                                                          const uint index)
-{
-    uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
-    uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
-    uint secondCellIdx = boundaryCondition.secondCells[ startIndex + index ];
-
-    PrimitiveVariables ghostCellPrim;
-    {
-        PrimitiveVariables domainCellPrim;
-        PrimitiveVariables secondCellPrim;
-
-        {
-            ConservedVariables domainCellData;
-            readCellData( domainCellIdx, dataBase, domainCellData );
-            domainCellPrim = toPrimitiveVariables( domainCellData, parameters.K );
-
-            ConservedVariables secondCellData;
-            if( secondCellIdx != INVALID_INDEX ){
-                readCellData( secondCellIdx, dataBase, secondCellData );
-                secondCellPrim = toPrimitiveVariables( secondCellData, parameters.K );
-            }
-        }
-
-        //ghostCellPrim.rho    = two * domainCellPrim.rho    - secondCellPrim.rho;
-        ghostCellPrim.U      = c2o1 * domainCellPrim.U      - secondCellPrim.U;
-        ghostCellPrim.V      = c2o1 * domainCellPrim.V      - secondCellPrim.V;
-        ghostCellPrim.W      = c2o1 * domainCellPrim.W      - secondCellPrim.W;
-        ghostCellPrim.lambda = c2o1 * domainCellPrim.lambda - secondCellPrim.lambda;
-    #ifdef USE_PASSIVE_SCALAR
-        ghostCellPrim.S_1    = c2o1 * domainCellPrim.S_1    - secondCellPrim.S_1;
-        ghostCellPrim.S_2    = c2o1 * domainCellPrim.S_2    - secondCellPrim.S_2;
-    #endif // USE_PASSIVE_SCALAR
-
-
-        real rho0 = ( c2o1 * boundaryCondition.p0 * c1o2 * ( domainCellPrim.lambda + ghostCellPrim.lambda ) );
-        ghostCellPrim.rho = c2o1 * rho0 - domainCellPrim.rho;
-
-        //real lambda0 = ( c1o2 * ( domainCellPrim.rho + ghostCellPrim.rho  ) * c1o2 / boundaryCondition.p0 );
-        //ghostCellPrim.lambda = two * lambda0 - domainCellPrim.lambda;
-    }
-
-    {
-        ConservedVariables ghostCons = toConservedVariables( ghostCellPrim, parameters.K );
-
-        writeCellData( ghostCellIdx, dataBase, ghostCons );
-    }
-}
-
-Pressure::Pressure(SPtr<DataBase> dataBase, real p0)
-    : BoundaryCondition( dataBase )
-{
-    this->p0 = p0;
-}
-
-bool Pressure::isWall()
-{
-    return false;
-}
-
-bool Pressure::secondCellsNeeded()
-{
-    return true;
-}
-
-} // namespace GksGpu
-
diff --git a/src/gpu/GksGpu/BoundaryConditions/Pressure.h b/src/gpu/GksGpu/BoundaryConditions/Pressure.h
deleted file mode 100644
index 4413a2d0c1a241ce900d9a1166382c586baad89f..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/Pressure.h
+++ /dev/null
@@ -1,67 +0,0 @@
-#ifndef Pressure_CUH
-#define Pressure_CUH
-
-#include <memory>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-
-#include "BoundaryConditions/BoundaryCondition.h"
-
-namespace GksGpu{
-
-//struct IsothermalWallStruct : virtual public BoundaryConditionStruct
-//{
-//    Vec3 velocity;
-//    real lambda;
-//    real S;
-//};
-
-struct PressureStruct
-{
-    uint  numberOfCells;
-
-    uint* ghostCells;
-    uint* domainCells;
-    uint* secondCells;
-
-    real p0;
-};
-
-struct GKSGPU_EXPORT Pressure : public BoundaryCondition
-{
-    real p0;
-
-    Pressure( SPtr<DataBase> dataBase, real p0 );
-
-    virtual bool isWall() override;
-
-    virtual bool secondCellsNeeded() override;
-
-    virtual void runBoundaryConditionKernel(const SPtr<DataBase> dataBase,
-                                            const Parameters parameters, 
-                                            const uint level) override;
-
-    PressureStruct toStruct()
-    {
-        PressureStruct boundaryCondition;
-
-        boundaryCondition.numberOfCells = this->numberOfCells;
-
-        boundaryCondition.ghostCells    = this->ghostCells;
-        boundaryCondition.domainCells   = this->domainCells;
-        boundaryCondition.secondCells   = this->secondCells;
-
-        boundaryCondition.p0            = this->p0;
-
-        return boundaryCondition;
-    }
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/BoundaryConditions/Pressure2.cu b/src/gpu/GksGpu/BoundaryConditions/Pressure2.cu
deleted file mode 100644
index c6b00d1ad5689efa1179415b3a6a3ff555700409..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/Pressure2.cu
+++ /dev/null
@@ -1,188 +0,0 @@
-#include "Pressure2.h"
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include "PointerDefinitions.h"
-#include "Core/RealConstants.h"
-
-#include "DataBase/DataBase.h"
-#include "DataBase/DataBaseStruct.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-#include "Definitions/PassiveScalar.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/FlowStateDataConversion.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu{
-
-//////////////////////////////////////////////////////////////////////////
-
-__global__                 void boundaryConditionKernel  ( const DataBaseStruct dataBase, 
-                                                           const Pressure2Struct boundaryCondition, 
-                                                           const Parameters parameters,
-                                                           const uint startIndex,
-                                                           const uint numberOfEntities );
-
-__host__ __device__ inline void boundaryConditionFunction( const DataBaseStruct& dataBase, 
-                                                           const Pressure2Struct& boundaryCondition, 
-                                                           const Parameters& parameters,
-                                                           const uint startIndex,
-                                                           const uint index );
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-void Pressure2::runBoundaryConditionKernel(const SPtr<DataBase> dataBase, 
-                                                const Parameters parameters, 
-                                                const uint level)
-{    
-    CudaUtility::CudaGrid grid( this->numberOfCellsPerLevel[ level ], 32 );
-
-    runKernel( boundaryConditionKernel,
-               boundaryConditionFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct(),
-               this->toStruct(),
-               parameters,
-               this->startOfCellsPerLevel[ level ] );
-
-    cudaDeviceSynchronize();
-
-    getLastCudaError("Pressure::runBoundaryConditionKernel( const SPtr<DataBase> dataBase, const Parameters parameters, const uint level )");
-}
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-__global__ void boundaryConditionKernel(const DataBaseStruct dataBase, 
-                                        const Pressure2Struct boundaryCondition, 
-                                        const Parameters parameters,
-                                        const uint startIndex,
-                                        const uint numberOfEntities)
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    boundaryConditionFunction( dataBase, boundaryCondition, parameters, startIndex, index );
-}
-
-__host__ __device__ inline void boundaryConditionFunction(const DataBaseStruct& dataBase, 
-                                                          const Pressure2Struct& boundaryCondition, 
-                                                          const Parameters& parameters,
-                                                          const uint startIndex,
-                                                          const uint index)
-{
-    uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
-    uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
-    uint secondCellIdx = boundaryCondition.secondCells[ startIndex + index ];
-
-    PrimitiveVariables ghostCellPrim;
-    {
-        PrimitiveVariables domainCellPrim;
-        PrimitiveVariables secondCellPrim;
-
-        {
-            ConservedVariables domainCellData;
-            readCellData( domainCellIdx, dataBase, domainCellData );
-            domainCellPrim = toPrimitiveVariables( domainCellData, parameters.K );
-
-            ConservedVariables secondCellData;
-            if( secondCellIdx != INVALID_INDEX ){
-                readCellData( secondCellIdx, dataBase, secondCellData );
-                secondCellPrim = toPrimitiveVariables( secondCellData, parameters.K );
-            }
-        }
-
-        //ghostCellPrim.rho    = two * domainCellPrim.rho    - secondCellPrim.rho;
-        ghostCellPrim.U      = c2o1 * domainCellPrim.U      - secondCellPrim.U;
-        ghostCellPrim.V      = c2o1 * domainCellPrim.V      - secondCellPrim.V;
-        ghostCellPrim.W      = c2o1 * domainCellPrim.W      - secondCellPrim.W;
-        //ghostCellPrim.lambda = two * domainCellPrim.lambda - secondCellPrim.lambda;
-        ghostCellPrim.lambda = domainCellPrim.lambda;
-    #ifdef USE_PASSIVE_SCALAR
-        //ghostCellPrim.S_1    = two * domainCellPrim.S_1    - secondCellPrim.S_1;
-        //ghostCellPrim.S_2    = two * domainCellPrim.S_2    - secondCellPrim.S_2;
-        ghostCellPrim.S_1    = domainCellPrim.S_1;
-        ghostCellPrim.S_2    = domainCellPrim.S_2;
-        //ghostCellPrim.S_1    = zero;
-        //ghostCellPrim.S_2    = zero;
-    #endif // USE_PASSIVE_SCALAR
-
-
-        real rho0 = ( c2o1 * boundaryCondition.p0 * c1o2 * ( domainCellPrim.lambda + ghostCellPrim.lambda ) );
-        ghostCellPrim.rho = c2o1 * rho0 - domainCellPrim.rho;
-
-        //real lambda0 = ( c1o2 * ( domainCellPrim.rho + ghostCellPrim.rho  ) * c1o2 / boundaryCondition.p0 );
-        //ghostCellPrim.lambda = two * lambda0 - domainCellPrim.lambda;
-    
-        //////////////////////////////////////////////////////////////////////////
-
-        real xGhostCell = dataBase.cellCenter[VEC_X(ghostCellIdx, dataBase.numberOfCells)];
-        real yGhostCell = dataBase.cellCenter[VEC_Y(ghostCellIdx, dataBase.numberOfCells)];
-        real zGhostCell = dataBase.cellCenter[VEC_Z(ghostCellIdx, dataBase.numberOfCells)];
-
-        real xDomainCell = dataBase.cellCenter[VEC_X(domainCellIdx, dataBase.numberOfCells)];
-        real yDomainCell = dataBase.cellCenter[VEC_Y(domainCellIdx, dataBase.numberOfCells)];
-        real zDomainCell = dataBase.cellCenter[VEC_Z(domainCellIdx, dataBase.numberOfCells)];
-
-        real dx = xGhostCell - xDomainCell;
-        real dy = yGhostCell - yDomainCell;
-        real dz = zGhostCell - zDomainCell;
-
-        real sign = domainCellPrim.U * dx
-                  + domainCellPrim.V * dy
-                  + domainCellPrim.W * dz;
-
-        //////////////////////////////////////////////////////////////////////////
-
-        if( sign < c0o1 )
-        {
-            //ghostCellPrim.U = - domainCellPrim.U;
-            //ghostCellPrim.V = - domainCellPrim.V;
-            //ghostCellPrim.W = - domainCellPrim.W;
-            ghostCellPrim.U = c0o1;
-            ghostCellPrim.V = c0o1;
-            ghostCellPrim.W = c0o1;
-        }
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-
-    {
-        ConservedVariables ghostCons = toConservedVariables( ghostCellPrim, parameters.K );
-
-        writeCellData( ghostCellIdx, dataBase, ghostCons );
-    }
-}
-
-Pressure2::Pressure2(SPtr<DataBase> dataBase, real p0)
-    : BoundaryCondition( dataBase )
-{
-    this->p0 = p0;
-}
-
-bool Pressure2::isWall()
-{
-    return false;
-}
-
-bool Pressure2::secondCellsNeeded()
-{
-    return true;
-}
-
-} // namespace GksGpu
-
diff --git a/src/gpu/GksGpu/BoundaryConditions/Pressure2.h b/src/gpu/GksGpu/BoundaryConditions/Pressure2.h
deleted file mode 100644
index 18dc531a5240af42b84ef484980f54b5b47a9d82..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/Pressure2.h
+++ /dev/null
@@ -1,67 +0,0 @@
-#ifndef Pressure2_CUH
-#define Pressure2_CUH
-
-#include <memory>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-
-#include "BoundaryConditions/BoundaryCondition.h"
-
-namespace GksGpu{
-
-//struct IsothermalWallStruct : virtual public BoundaryConditionStruct
-//{
-//    Vec3 velocity;
-//    real lambda;
-//    real S;
-//};
-
-struct Pressure2Struct
-{
-    uint  numberOfCells;
-
-    uint* ghostCells;
-    uint* domainCells;
-    uint* secondCells;
-
-    real p0;
-};
-
-struct GKSGPU_EXPORT Pressure2 : public BoundaryCondition
-{
-    real p0;
-
-    Pressure2( SPtr<DataBase> dataBase, real p0 );
-
-    virtual bool isWall() override;
-
-    virtual bool secondCellsNeeded() override;
-
-    virtual void runBoundaryConditionKernel(const SPtr<DataBase> dataBase,
-                                            const Parameters parameters, 
-                                            const uint level) override;
-
-    Pressure2Struct toStruct()
-    {
-        Pressure2Struct boundaryCondition;
-
-        boundaryCondition.numberOfCells = this->numberOfCells;
-
-        boundaryCondition.ghostCells    = this->ghostCells;
-        boundaryCondition.domainCells   = this->domainCells;
-        boundaryCondition.secondCells   = this->secondCells;
-
-        boundaryCondition.p0            = this->p0;
-
-        return boundaryCondition;
-    }
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/BoundaryConditions/SalinasVazquez.cu b/src/gpu/GksGpu/BoundaryConditions/SalinasVazquez.cu
deleted file mode 100644
index afb2065b4cbef2b60db5010d8eb0d750b0320dbd..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/SalinasVazquez.cu
+++ /dev/null
@@ -1,176 +0,0 @@
-#include "SalinasVazquez.h"
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include "PointerDefinitions.h"
-#include "Core/RealConstants.h"
-
-#include "DataBase/DataBase.h"
-#include "DataBase/DataBaseStruct.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-#include "Definitions/PassiveScalar.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/FlowStateDataConversion.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu{
-
-//////////////////////////////////////////////////////////////////////////
-
-__global__                 void boundaryConditionKernel  ( const DataBaseStruct dataBase, 
-                                                           const SalinasVazquezStruct boundaryCondition, 
-                                                           const Parameters parameters,
-                                                           const uint startIndex,
-                                                           const uint numberOfEntities );
-
-__host__ __device__ inline void boundaryConditionFunction( const DataBaseStruct& dataBase, 
-                                                           const SalinasVazquezStruct& boundaryCondition, 
-                                                           const Parameters& parameters,
-                                                           const uint startIndex,
-                                                           const uint index );
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-void SalinasVazquez::runBoundaryConditionKernel(const SPtr<DataBase> dataBase, 
-                                                const Parameters parameters, 
-                                                const uint level)
-{    
-    CudaUtility::CudaGrid grid( this->numberOfCellsPerLevel[ level ], 32 );
-
-    runKernel( boundaryConditionKernel,
-               boundaryConditionFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct(),
-               this->toStruct(),
-               parameters,
-               this->startOfCellsPerLevel[ level ] );
-
-    getLastCudaError("IsothermalWall::runBoundaryConditionKernel( const SPtr<DataBase> dataBase, const Parameters parameters, const uint level )");
-}
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-__global__ void boundaryConditionKernel(const DataBaseStruct dataBase, 
-                                        const SalinasVazquezStruct boundaryCondition, 
-                                        const Parameters parameters,
-                                        const uint startIndex,
-                                        const uint numberOfEntities)
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    boundaryConditionFunction( dataBase, boundaryCondition, parameters, startIndex, index );
-}
-
-__host__ __device__ inline void boundaryConditionFunction(const DataBaseStruct& dataBase, 
-                                                          const SalinasVazquezStruct& boundaryCondition, 
-                                                          const Parameters& parameters,
-                                                          const uint startIndex,
-                                                          const uint index)
-{
-    uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
-    uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
-    uint secondCellIdx = boundaryCondition.secondCells[ startIndex + index ];
-
-    real lambda;
-    {
-        real x = dataBase.cellCenter[ VEC_X(ghostCellIdx, dataBase.numberOfCells) ];
-
-        real TMX = c1o1 / boundaryCondition.lambdaMX;
-        real TPX = c1o1 / boundaryCondition.lambdaPX;
-
-        real T = TPX + ( TMX - TPX ) * ( boundaryCondition.a0 
-                                       + boundaryCondition.a1*x 
-                                       + boundaryCondition.a2*x*x 
-                                       + boundaryCondition.a3*x*x*x );
-
-        lambda = c1o1 / T;
-    }
-
-    PrimitiveVariables ghostCellPrim;
-    {
-        PrimitiveVariables domainCellPrim;
-        PrimitiveVariables secondCellPrim;
-
-        {
-            ConservedVariables domainCellData;
-            readCellData( domainCellIdx, dataBase, domainCellData );
-            domainCellPrim = toPrimitiveVariables( domainCellData, parameters.K );
-
-            ConservedVariables secondCellData;
-            if( secondCellIdx != INVALID_INDEX ){
-                readCellData( secondCellIdx, dataBase, secondCellData );
-                secondCellPrim = toPrimitiveVariables( secondCellData, parameters.K );
-            }
-        }
-
-        ghostCellPrim.U      =              - domainCellPrim.U;
-        ghostCellPrim.V      =              - domainCellPrim.V;
-        ghostCellPrim.W      =              - domainCellPrim.W;
-        ghostCellPrim.lambda = c2o1 * lambda - domainCellPrim.lambda;
-    #ifdef USE_PASSIVE_SCALAR
-        ghostCellPrim.S_1    =                domainCellPrim.S_1;
-        ghostCellPrim.S_2    =                domainCellPrim.S_2;
-    #endif // USE_PASSIVE_SCALAR
-
-
-        if( boundaryCondition.useSecondCells && secondCellIdx != INVALID_INDEX ){
-            real p1 = c1o2 * domainCellPrim.rho / domainCellPrim.lambda;
-            real p2 = c1o2 * secondCellPrim.rho / secondCellPrim.lambda;
-
-            ghostCellPrim.rho = c2o1 * ( c2o1 * p1 - p2 ) * ghostCellPrim.lambda;
-        }
-        else{
-            real p = c1o2 * domainCellPrim.rho / domainCellPrim.lambda;
-
-            ghostCellPrim.rho = c2o1 * p * ghostCellPrim.lambda;
-        }
-    }
-
-    {
-        ConservedVariables ghostCons = toConservedVariables( ghostCellPrim, parameters.K );
-
-        writeCellData( ghostCellIdx, dataBase, ghostCons );
-    }
-}
-
-SalinasVazquez::SalinasVazquez(SPtr<DataBase> dataBase, real lambdaMX, real lambdaPX, real a0, real a1, real a2, real a3, bool useSecondCells)
-    : BoundaryCondition( dataBase )
-{
-    this->lambdaMX       = lambdaMX;
-    this->lambdaPX       = lambdaPX;
-
-    this->a0             = a0;
-    this->a1             = a1;
-    this->a2             = a2;
-    this->a3             = a3;
-
-    this->useSecondCells = useSecondCells;
-}
-
-bool SalinasVazquez::isWall()
-{
-    return true;
-}
-
-bool SalinasVazquez::secondCellsNeeded()
-{
-    return true;
-}
-
-} // namespace GksGpu
-
diff --git a/src/gpu/GksGpu/BoundaryConditions/SalinasVazquez.h b/src/gpu/GksGpu/BoundaryConditions/SalinasVazquez.h
deleted file mode 100644
index 5769b157e987ffe280da40919ce80e0f6e5de9ed..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/SalinasVazquez.h
+++ /dev/null
@@ -1,78 +0,0 @@
-#ifndef SalinasVazquez_CUH
-#define SalinasVazquez_CUH
-
-#include <memory>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-
-#include "BoundaryConditions/BoundaryCondition.h"
-
-namespace GksGpu{
-
-struct SalinasVazquezStruct
-{
-    uint  numberOfCells;
-
-    uint* ghostCells;
-    uint* domainCells;
-    uint* secondCells;
-
-    real lambdaMX;
-    real lambdaPX;
-
-    real a0, a1, a2, a3;
-
-    bool useSecondCells;
-};
-
-struct GKSGPU_EXPORT SalinasVazquez : public BoundaryCondition //, public IsothermalWallStruct
-{
-    real lambdaMX;
-    real lambdaPX;
-
-    real a0, a1, a2, a3;
-
-    bool useSecondCells;
-
-    SalinasVazquez( SPtr<DataBase> dataBase, real lambdaMX, real lambdaPX, real a0, real a1, real a2, real a3, bool useSecondCells );
-
-    virtual bool isWall() override;
-
-    virtual bool secondCellsNeeded() override;
-
-    virtual void runBoundaryConditionKernel(const SPtr<DataBase> dataBase,
-                                            const Parameters parameters, 
-                                            const uint level) override;
-
-    SalinasVazquezStruct toStruct()
-    {
-        SalinasVazquezStruct boundaryCondition;
-
-        boundaryCondition.numberOfCells = this->numberOfCells;
-
-        boundaryCondition.ghostCells      = this->ghostCells;
-        boundaryCondition.domainCells     = this->domainCells;
-        boundaryCondition.secondCells     = this->secondCells;
-
-        boundaryCondition.lambdaMX        = this->lambdaMX;
-        boundaryCondition.lambdaPX        = this->lambdaPX;
-
-        boundaryCondition.a0              = this->a0;
-        boundaryCondition.a1              = this->a1;
-        boundaryCondition.a2              = this->a2;
-        boundaryCondition.a3              = this->a3;
-
-        boundaryCondition.useSecondCells  = this->useSecondCells;
-
-        return boundaryCondition;
-    }
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/BoundaryConditions/Symmetry.cu b/src/gpu/GksGpu/BoundaryConditions/Symmetry.cu
deleted file mode 100644
index c4e104cf095125fd40bb0d06ab707697d84826ee..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/Symmetry.cu
+++ /dev/null
@@ -1,128 +0,0 @@
-#include "Symmetry.h"
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include "PointerDefinitions.h"
-#include "Core/RealConstants.h"
-
-#include "DataBase/DataBase.h"
-#include "DataBase/DataBaseStruct.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-#include "Definitions/PassiveScalar.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/FlowStateDataConversion.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu{
-
-//////////////////////////////////////////////////////////////////////////
-
-__global__                 void boundaryConditionKernel  ( const DataBaseStruct dataBase, 
-                                                           const SymmetryStruct boundaryCondition, 
-                                                           const Parameters parameters,
-                                                           const uint startIndex,
-                                                           const uint numberOfEntities );
-
-__host__ __device__ inline void boundaryConditionFunction( const DataBaseStruct& dataBase, 
-                                                           const SymmetryStruct& boundaryCondition, 
-                                                           const Parameters& parameters,
-                                                           const uint startIndex,
-                                                           const uint index );
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-void Symmetry::runBoundaryConditionKernel(const SPtr<DataBase> dataBase, 
-                                          const Parameters parameters, 
-                                          const uint level)
-{    
-    CudaUtility::CudaGrid grid( this->numberOfCellsPerLevel[ level ], 32 );
-
-    runKernel( boundaryConditionKernel,
-               boundaryConditionFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct(),
-               this->toStruct(),
-               parameters,
-               this->startOfCellsPerLevel[ level ] );
-
-    getLastCudaError("IsothermalWall::runBoundaryConditionKernel( const SPtr<DataBase> dataBase, const Parameters parameters, const uint level )");
-}
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-__global__ void boundaryConditionKernel(const DataBaseStruct dataBase, 
-                                        const SymmetryStruct boundaryCondition, 
-                                        const Parameters parameters,
-                                        const uint startIndex,
-                                        const uint numberOfEntities)
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    boundaryConditionFunction( dataBase, boundaryCondition, parameters, startIndex, index );
-}
-
-__host__ __device__ inline void boundaryConditionFunction(const DataBaseStruct& dataBase, 
-                                                          const SymmetryStruct& boundaryCondition, 
-                                                          const Parameters& parameters,
-                                                          const uint startIndex,
-                                                          const uint index)
-{
-    uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
-    uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
-
-    //////////////////////////////////////////////////////////////////////////
-
-    ConservedVariables domainCellData;
-    readCellData ( domainCellIdx, dataBase, domainCellData );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    PrimitiveVariables domainCellPrim = toPrimitiveVariables( domainCellData, parameters.K );
-    PrimitiveVariables ghostCellPrim  = toPrimitiveVariables( domainCellData, parameters.K );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( boundaryCondition.direction == 'x' ) ghostCellPrim.U = - domainCellPrim.U;
-    if( boundaryCondition.direction == 'y' ) ghostCellPrim.V = - domainCellPrim.V;
-    if( boundaryCondition.direction == 'z' ) ghostCellPrim.W = - domainCellPrim.W;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    ConservedVariables ghostCellData = toConservedVariables(ghostCellPrim, parameters.K);
-
-    writeCellData( ghostCellIdx , dataBase, ghostCellData );
-}
-
-Symmetry::Symmetry(SPtr<DataBase> dataBase, char direction)
-    : BoundaryCondition( dataBase )
-{
-    this->direction = direction;
-}
-
-bool Symmetry::isWall()
-{
-    return true;
-}
-
-bool Symmetry::secondCellsNeeded()
-{
-    return false;
-}
-
-} // namespace GksGpu
-
diff --git a/src/gpu/GksGpu/BoundaryConditions/Symmetry.h b/src/gpu/GksGpu/BoundaryConditions/Symmetry.h
deleted file mode 100644
index 6fd73fa3894fa500d21b4b5cb7e2f19a8b6fb61b..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/BoundaryConditions/Symmetry.h
+++ /dev/null
@@ -1,60 +0,0 @@
-#ifndef Symmetry_CUH
-#define Symmetry_CUH
-
-#include <memory>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-
-#include "BoundaryConditions/BoundaryCondition.h"
-
-namespace GksGpu{
-
-struct SymmetryStruct
-{
-    uint  numberOfCells;
-
-    uint* ghostCells;
-    uint* domainCells;
-    uint* secondCells;
-
-    char direction;
-};
-
-struct GKSGPU_EXPORT Symmetry : public BoundaryCondition //, public IsothermalWallStruct
-{
-    char direction;
-
-    Symmetry( SPtr<DataBase> dataBase, char direction );
-
-    virtual bool isWall() override;
-
-    virtual bool secondCellsNeeded() override;
-
-    virtual void runBoundaryConditionKernel(const SPtr<DataBase> dataBase,
-                                            const Parameters parameters, 
-                                            const uint level) override;
-
-    SymmetryStruct toStruct()
-    {
-        SymmetryStruct boundaryCondition;
-
-        boundaryCondition.numberOfCells = this->numberOfCells;
-
-        boundaryCondition.ghostCells    = this->ghostCells;
-        boundaryCondition.domainCells   = this->domainCells;
-        boundaryCondition.secondCells   = this->secondCells;
-
-        boundaryCondition.direction     = this->direction;
-
-        return boundaryCondition;
-    }
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/CMakeLists.txt b/src/gpu/GksGpu/CMakeLists.txt
deleted file mode 100644
index 5dbc533cc5f45c006c29a12242350f0433518bbf..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/CMakeLists.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-project(GksGpu LANGUAGES CUDA CXX)
-
-vf_add_library(PRIVATE_LINK basics lbmCuda GksMeshAdapter OpenMP::OpenMP_CXX MPI::MPI_CXX)
-
-target_include_directories(GksGpu PRIVATE "${VF_THIRD_DIR}/cuda_samples/")
-
-if (NOT MSVC)
-    target_compile_options(GksGpu PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:-fPIC>")
-endif()
-
diff --git a/src/gpu/GksGpu/CellProperties/CellProperties.cuh b/src/gpu/GksGpu/CellProperties/CellProperties.cuh
deleted file mode 100644
index 08731b9f52cdc54cc41d5e239ac05ee6e88fecd7..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/CellProperties/CellProperties.cuh
+++ /dev/null
@@ -1,55 +0,0 @@
-#ifndef CellProperties_H
-#define CellProperties_H
-
-#ifdef __CUDACC__
-#include <cuda_runtime.h>
-#else
-#ifndef __host__
-#define __host__
-#endif
-#ifndef __device__
-#define __device__
-#endif
-#endif
-
-//////////////////////////////////////////////////////////////////////////
-
-#define CELL_PROPERTIES_DEFAULT        (0u)
-#define CELL_PROPERTIES_GHOST          (1u)
-#define CELL_PROPERTIES_WALL           (2u)
-#define CELL_PROPERTIES_FINE_GHOST     (4u)
-#define CELL_PROPERTIES_IS_FLUX_BC     (8u)
-#define CELL_PROPERTIES_IS_INSULATED   (16u)
-#define CELL_PROPERTIES_5              (32u)
-#define CELL_PROPERTIES_6              (64u)
-#define CELL_PROPERTIES_7              (128u)
-
-//////////////////////////////////////////////////////////////////////////
-
-namespace GksGpu {
-
-typedef unsigned char CellProperties;
-
-//////////////////////////////////////////////////////////////////////////
-
-__host__ __device__ inline void setCellProperties( CellProperties& left, const CellProperties& right )
-{
-    left |= right;
-}
-
-__host__ __device__ inline void unsetCellProperties( CellProperties& left, const CellProperties& right )
-{
-    left &= ~right;
-}
-
-__host__ __device__ inline bool isCellProperties( const CellProperties& left, const CellProperties& right )
-{
-    return (left & right) == right;
-}
-
-//////////////////////////////////////////////////////////////////////////
-
-} // namespace GksGpu
-
-#endif
-
diff --git a/src/gpu/GksGpu/CellUpdate/CellUpdate.cu b/src/gpu/GksGpu/CellUpdate/CellUpdate.cu
deleted file mode 100644
index ccc7ab12c8ffb9fa183873b36b5faef16a50d793..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/CellUpdate/CellUpdate.cu
+++ /dev/null
@@ -1,220 +0,0 @@
-#include "CellUpdate.h"
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-#include <math.h>
-
-#include "PointerDefinitions.h"
-#include "Core/RealConstants.h"
-
-#include "DataBase/DataBaseStruct.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-#include "Definitions/PassiveScalar.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/FlowStateDataConversion.cuh"
-#include "FlowStateData/ThermalDependencies.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#include "CellUpdate/Reaction.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu {
-
-__global__                 void cellUpdateKernel  ( DataBaseStruct dataBase, Parameters parameters, uint startIndex, uint numberOfEntities );
-
-__host__ __device__ inline void cellUpdateFunction( DataBaseStruct dataBase, Parameters parameters, uint startIndex, uint index );
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void CellUpdate::run( SPtr<DataBase> dataBase, Parameters parameters, uint level )
-{
-    CudaUtility::CudaGrid grid( dataBase->perLevelCount[ level ].numberOfBulkCells, 32 );
-
-    runKernel( cellUpdateKernel,
-               cellUpdateFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct(),
-               parameters,
-               dataBase->perLevelCount[ level ].startOfCells );
-    
-    cudaDeviceSynchronize();
-
-    getLastCudaError("CellUpdate::run( SPtr<DataBase> dataBase, Parameters parameters, uint level )");
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__global__ void cellUpdateKernel(DataBaseStruct dataBase, Parameters parameters, uint startIndex, uint numberOfEntities)
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-    
-    cellUpdateFunction( dataBase, parameters, startIndex, index );
-}
-
-__host__ __device__ inline void cellUpdateFunction(DataBaseStruct dataBase, Parameters parameters, uint startIndex, uint index)
-{
-    uint cellIndex = startIndex + index;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    //CellProperties cellProperties = dataBase.cellProperties[ cellIndex ];
-
-    //if( isCellProperties( cellProperties, CELL_PROPERTIES_FINE_GHOST ) );
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real cellVolume = parameters.dx * parameters.dx * parameters.dx;
-
-    ConservedVariables cons;
-
-    readCellData      (cellIndex, dataBase, cons);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    {
-        ConservedVariables update, zeroCons;
-        readCellDataUpdate(cellIndex, dataBase, update);
-        writeCellDataUpdate(cellIndex, dataBase, zeroCons);
-
-        //////////////////////////////////////////////////////////////////////////
-        // dirty fix to exclude viscous heating: Part 1
-        //ConservedVariables testCons = cons;
-        //testCons.rho  += update.rho / cellVolume;
-        //testCons.rhoE += update.rhoE/ cellVolume;
-        //PrimitiveVariables testPrim = toPrimitiveVariables(testCons, parameters.K);
-        //////////////////////////////////////////////////////////////////////////
-
-        //////////////////////////////////////////////////////////////////////////
-        //if( cellIndex == 415179 )
-        //{
-        //    //printf( "rho   = %14.4e  |  dRho   = %14.4e \n", cons.rho   , (one / cellVolume) * update.rho    );
-        //    //printf( "rhoU  = %14.4e  |  dRhoU  = %14.4e \n", cons.rhoU  , (one / cellVolume) * update.rhoU   );
-        //    //printf( "rhoV  = %14.4e  |  dRhoV  = %14.4e \n", cons.rhoV  , (one / cellVolume) * update.rhoV   );
-        //    //printf( "rhoW  = %14.4e  |  dRhoW  = %14.4e \n", cons.rhoW  , (one / cellVolume) * update.rhoW   );
-        //    printf( "rhoE  = %14.4e  |  dRhoE  = %14.4e \n", cons.rhoE  , (one / cellVolume) * update.rhoE   );
-        //    //printf( "rhoS1 = %14.4e  |  dRhoS1 = %14.4e \n", cons.rhoS_1, (one / cellVolume) * update.rhoS_1 );
-        //    //printf( "rhoS2 = %14.4e  |  dRhoS2 = %14.4e \n", cons.rhoS_2, (one / cellVolume) * update.rhoS_2 );
-        //    printf( "=================================================================\n" );
-        //}
-        //////////////////////////////////////////////////////////////////////////
-
-        cons = cons + (c1o1 / cellVolume) * update;
-        
-        //////////////////////////////////////////////////////////////////////////
-        // dirty fix to exclude viscous heating: Part 2
-        //PrimitiveVariables prim = toPrimitiveVariables(cons, parameters.K);
-        //prim.lambda = testPrim.lambda;
-        //cons = toConservedVariables( prim, parameters.K );
-        //////////////////////////////////////////////////////////////////////////
-
-        if( isnan(cons.rho ) ||
-            isnan(cons.rhoU) ||
-            isnan(cons.rhoV) ||
-            isnan(cons.rhoW) ||
-            isnan(cons.rhoE) )
-        {
-            *dataBase.crashCellIndex = cellIndex;
-        }
-    }
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    if(parameters.forcingSchemeIdx == 0)
-    {
-        // consistent source term treatment of Tian et al. (2007)
-        cons.rhoU += parameters.force.x * parameters.dt * cons.rho;
-        cons.rhoV += parameters.force.y * parameters.dt * cons.rho;
-        cons.rhoW += parameters.force.z * parameters.dt * cons.rho;
-        cons.rhoE += parameters.force.x * dataBase.massFlux[VEC_X(cellIndex, dataBase.numberOfCells)] / (c6o1 * parameters.dx * parameters.dx)
-                   + parameters.force.y * dataBase.massFlux[VEC_Y(cellIndex, dataBase.numberOfCells)] / (c6o1 * parameters.dx * parameters.dx)
-                   + parameters.force.z * dataBase.massFlux[VEC_Z(cellIndex, dataBase.numberOfCells)] / (c6o1 * parameters.dx * parameters.dx);
-
-        dataBase.massFlux[VEC_X(cellIndex, dataBase.numberOfCells)] = c0o1;
-        dataBase.massFlux[VEC_Y(cellIndex, dataBase.numberOfCells)] = c0o1;
-        dataBase.massFlux[VEC_Z(cellIndex, dataBase.numberOfCells)] = c0o1;
-    }
-
-    if(parameters.forcingSchemeIdx == 1)
-    {
-        // forcing only on density variation
-        cons.rhoU += parameters.force.x * parameters.dt * ( cons.rho - parameters.rhoRef );
-        cons.rhoV += parameters.force.y * parameters.dt * ( cons.rho - parameters.rhoRef );
-        cons.rhoW += parameters.force.z * parameters.dt * ( cons.rho - parameters.rhoRef );
-        cons.rhoE += parameters.force.x * dataBase.massFlux[VEC_X(cellIndex, dataBase.numberOfCells)] / (c6o1 * parameters.dx * parameters.dx)
-                   + parameters.force.y * dataBase.massFlux[VEC_Y(cellIndex, dataBase.numberOfCells)] / (c6o1 * parameters.dx * parameters.dx)
-                   + parameters.force.z * dataBase.massFlux[VEC_Z(cellIndex, dataBase.numberOfCells)] / (c6o1 * parameters.dx * parameters.dx);
-
-        dataBase.massFlux[VEC_X(cellIndex, dataBase.numberOfCells)] = c0o1;
-        dataBase.massFlux[VEC_Y(cellIndex, dataBase.numberOfCells)] = c0o1;
-        dataBase.massFlux[VEC_Z(cellIndex, dataBase.numberOfCells)] = c0o1;
-    }
-
-    if(parameters.forcingSchemeIdx == 2)
-    {
-        PrimitiveVariables prim = toPrimitiveVariables(cons, parameters.K);
-        real lambda = prim.lambda;
-
-        // forcing only on density variation
-        cons.rhoU += parameters.force.x * parameters.dt * ( cons.rho - parameters.rhoRef );
-        cons.rhoV += parameters.force.y * parameters.dt * ( cons.rho - parameters.rhoRef );
-        cons.rhoW += parameters.force.z * parameters.dt * ( cons.rho - parameters.rhoRef );
-        //cons.rhoE += parameters.force.x * dataBase.massFlux[VEC_X(cellIndex, dataBase.numberOfCells)] / (six * parameters.dx * parameters.dx)
-        //           + parameters.force.y * dataBase.massFlux[VEC_Y(cellIndex, dataBase.numberOfCells)] / (six * parameters.dx * parameters.dx)
-        //           + parameters.force.z * dataBase.massFlux[VEC_Z(cellIndex, dataBase.numberOfCells)] / (six * parameters.dx * parameters.dx);
-
-        prim = toPrimitiveVariables(cons, parameters.K);
-        prim.lambda = lambda;
-        cons = toConservedVariables(prim, parameters.K);
-    }
-    
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    // mass conserving fix for out of bounds scalars
-
-    //if( cons.rhoS_1 < zero ) { cons.rhoS_2 -= cons.rhoS_1; cons.rhoS_1 = zero; }
-    //if( cons.rhoS_2 < zero ) { cons.rhoS_1 -= cons.rhoS_2; cons.rhoS_2 = zero; }
-
-    //if( cons.rhoS_1 > cons.rho ) { cons.rhoS_2 += cons.rhoS_1 - cons.rho; cons.rhoS_1 = cons.rho; }
-    //if( cons.rhoS_2 > cons.rho ) { cons.rhoS_1 += cons.rhoS_2 - cons.rho; cons.rhoS_2 = cons.rho; }
-
-    //if( cons.rhoS_1 + cons.rhoS_2 > cons.rho )
-    //{
-    //    real faktor = (Z1 + Z2);
-
-    //    Z1 /= faktor;
-    //    Z2 /= faktor;
-    //}
-    
-    
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    chemicalReaction(dataBase, parameters, cellIndex, cons);
-    
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    // Dirty fix that limits the velocity
-
-    //PrimitiveVariables prim = toPrimitiveVariables(cons, parameters.K);
-
-    //real velocity = sqrt( prim.U * prim.U + prim.V * prim.V + prim.W * prim.W );
-
-    //if( velocity > five  )
-    //{
-    //    prim.U *= five / velocity;
-    //    prim.V *= five / velocity;
-    //    prim.W *= five / velocity;
-    //}
-
-    //cons = toConservedVariables(prim, parameters.K);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    writeCellData(cellIndex, dataBase, cons);
-}
-
-} // namespace GksGpu
diff --git a/src/gpu/GksGpu/CellUpdate/CellUpdate.h b/src/gpu/GksGpu/CellUpdate/CellUpdate.h
deleted file mode 100644
index de8a0b86ca5de9501046ffaa1235c70122bf91e8..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/CellUpdate/CellUpdate.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef  CellUpdate_H
-#define  CellUpdate_H
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-
-#include "DataBase/DataBase.h"
-#include "Parameters/Parameters.h"
-
-namespace GksGpu {
-
-class GKSGPU_EXPORT CellUpdate
-{
-public:
-
-    static void run( SPtr<DataBase> dataBase, 
-                     Parameters parameters, 
-                     uint level );
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/CellUpdate/Reaction.cuh b/src/gpu/GksGpu/CellUpdate/Reaction.cuh
deleted file mode 100644
index 21ba61220fd7b81fbb53002ea090d278d228bb66..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/CellUpdate/Reaction.cuh
+++ /dev/null
@@ -1,191 +0,0 @@
-#include "CellUpdate.h"
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-#include <math.h>
-
-#include "PointerDefinitions.h"
-#include "Core/RealConstants.h"
-
-#include "DataBase/DataBaseStruct.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-#include "Definitions/PassiveScalar.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/FlowStateDataConversion.cuh"
-#include "FlowStateData/ThermalDependencies.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu {
-
-inline __host__ __device__ real getTurbulentViscosityDeardorff(const DataBaseStruct& dataBase, const Parameters& parameters, const uint cellIndex, const ConservedVariables& cons )
-{
-    // See FDS 6 Technical Reference Guide, Section 4.2.3
-
-    PrimitiveVariables prim = toPrimitiveVariables(cons, parameters.K);
-
-    ConservedVariables neighborCons;
-    PrimitiveVariables neighborPrim;
-
-    real kSGS = c0o1;
-
-    {
-        real uHead = c1o2 * prim.U;
-
-        {
-            // uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 0, dataBase.numberOfCells)];
-            readCellData(cellIndex, dataBase, neighborCons);
-            neighborPrim = toPrimitiveVariables(neighborCons, parameters.K);
-
-            uHead += c1o4 * neighborPrim.U;
-        }
-        {
-            // uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 1, dataBase.numberOfCells)];
-            readCellData(cellIndex, dataBase, neighborCons);
-            neighborPrim = toPrimitiveVariables(neighborCons, parameters.K);
-
-            uHead += c1o4 * neighborPrim.U;
-        }
-
-        kSGS += c1o2 * ( prim.U - uHead ) * ( prim.U - uHead );
-    }
-
-    {
-        real vHead = c1o2 * prim.V;
-
-        {
-            // uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 2, dataBase.numberOfCells)];
-            readCellData(cellIndex, dataBase, neighborCons);
-            neighborPrim = toPrimitiveVariables(neighborCons, parameters.K);
-
-            vHead += c1o4 * neighborPrim.V;
-        }
-        {
-            // uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 3, dataBase.numberOfCells)];
-            readCellData(cellIndex, dataBase, neighborCons);
-            neighborPrim = toPrimitiveVariables(neighborCons, parameters.K);
-
-            vHead += c1o4 * neighborPrim.V;
-        }
-
-        kSGS += c1o2 * ( prim.V - vHead ) * ( prim.V - vHead );
-    }
-
-    {
-        real wHead = c1o2 * prim.W;
-
-        {
-            // uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 4, dataBase.numberOfCells)];
-            readCellData(cellIndex, dataBase, neighborCons);
-            neighborPrim = toPrimitiveVariables(neighborCons, parameters.K);
-
-            wHead += c1o4 * neighborPrim.W;
-        }
-        {
-            // uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 5, dataBase.numberOfCells)];
-            readCellData(cellIndex, dataBase, neighborCons);
-            neighborPrim = toPrimitiveVariables(neighborCons, parameters.K);
-
-            wHead += c1o4 * neighborPrim.W;
-        }
-
-        kSGS += c1o2 * ( prim.W - wHead ) * ( prim.W - wHead );
-    }
-
-    //real turbulentViscosity = prim.rho * parameters.dx * c1o10 * sqrt(kSGS) / 0.3;
-
-    dataBase.diffusivity[cellIndex] = (realAccumulator) kSGS;
-
-    //printf("%f", kSGS);
-
-    return kSGS;
-}
-
-
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__host__ __device__ inline void chemicalReaction(DataBaseStruct dataBase, Parameters parameters, uint cellIndex, ConservedVariables& cons)
-{
-    // see FDS 5 Technical reference guide, section 6.1.4 for combustion model
-#ifdef USE_PASSIVE_SCALAR
-    if (parameters.enableReaction)
-    {
-        CellProperties cellProperties = dataBase.cellProperties[ cellIndex ];
-
-        if( isCellProperties( cellProperties, CELL_PROPERTIES_FINE_GHOST ) ) return;
-
-        PrimitiveVariables prim = toPrimitiveVariables(cons, parameters.K);
-
-        //////////////////////////////////////////////////////////////////////////
-
-        //real diffusivity = getTurbulentViscosityDeardorff(dataBase, parameters, cellIndex, cons);
-        //real diffusivity = dataBase.diffusivity[ cellIndex ];
-        real diffusivity = dataBase.diffusivity[ cellIndex ] / ( c6o1 * parameters.dx * parameters.dx * parameters.dt );
-        dataBase.diffusivity[ cellIndex ] = c0o1;
-
-        //////////////////////////////////////////////////////////////////////////
-
-        real mixingTimeScale = real(0.1) * parameters.dx * parameters.dx / diffusivity;
-
-        //real kSGS = getTurbulentViscosityDeardorff(dataBase, parameters, cellIndex, cons);
-
-        //real mixingTimeScale_d = parameters.dx * parameters.dx / parameters.D;
-
-        //real mixingTimeScale_u = real(0.4) * parameters.dx / sqrt( c2o3 * kSGS );
-
-        //real mixingTimeScale_g = sqrt( c2o1 * parameters.dx / fabs( parameters.force.z ) );
-
-        //real mixingTimeScale = fminf( mixingTimeScale_d, mixingTimeScale_u );
-        //mixingTimeScale      = fminf( mixingTimeScale_g, mixingTimeScale   );
-
-        //////////////////////////////////////////////////////////////////////////
-
-        real Y_F = prim.S_1;
-        real Y_P = prim.S_2;
-
-        real Y_A = c1o1 - Y_F - Y_P;
-
-        ///////////////////////////////////////////////////////////////////////////////
-
-        real Y_O2 = rX * ( M_O2 / M_A ) * Y_A;
-
-        ///////////////////////////////////////////////////////////////////////////////
-
-        real s = M_F / ( c2o1 * M_O2 );
-
-        real heatReleaseRate = cons.rho * fminf(Y_F, s * Y_O2) / mixingTimeScale * ( parameters.heatOfReaction / M_F );
-
-        //////////////////////////////////////////////////////////////////////////
-
-        if( heatReleaseRate < c0o1 )
-            heatReleaseRate = c0o1;
-
-        //////////////////////////////////////////////////////////////////////////
-
-        if( parameters.useHeatReleaseRateLimiter )
-        if( heatReleaseRate > parameters.heatReleaseRateLimiter )
-            heatReleaseRate = parameters.heatReleaseRateLimiter;
-
-        //////////////////////////////////////////////////////////////////////////
-
-        real drhoY_F = heatReleaseRate * parameters.dt / ( parameters.heatOfReaction / M_F );
-
-        //real r = c1o1 + ( c1o2 / rX ) * ( M_A / M_F );
-        real r = c1o1 + ( c2o1 / rX ) * ( M_A / M_F );
-
-        cons.rhoS_1 -=     drhoY_F;
-        cons.rhoS_2 += r * drhoY_F;
-        cons.rhoE   += heatReleaseRate * parameters.dt;
-
-        //////////////////////////////////////////////////////////////////////////
-    }
-
-#endif // USE_PASSIVE_SCALAR
-}
-
-} // namespace GksGpu
\ No newline at end of file
diff --git a/src/gpu/GksGpu/Communication/Communicator.cpp b/src/gpu/GksGpu/Communication/Communicator.cpp
deleted file mode 100644
index 6f4fc0fa575e5c6908ca8a55000b40f78733a781..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Communication/Communicator.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-#include "Communicator.h"
-
-#ifdef VF_DOUBLE_ACCURACY
-#define MPI_TYPE_GPU  MPI_DOUBLE
-#else
-#define MPI_TYPE_GPU  MPI_FLOAT
-#endif
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <iostream>
-
-#include "PointerDefinitions.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "DataBase/DataBase.h"
-#include "DataBase/DataBaseAllocator.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-#include "Definitions/CudaAwareMpi.h"
-
-#include "CudaUtility/CudaUtility.h"
-
-namespace GksGpu {
-
-int Communicator::tagSendPositive = 0;
-int Communicator::tagSendNegative = 1;
-
-Communicator::Communicator( SPtr<DataBase> dataBase )
-    : myAllocator ( dataBase->myAllocator )
-{
-    this->numberOfSendNodes = INVALID_INDEX;
-    this->numberOfRecvNodes = INVALID_INDEX;
-
-    this->sendIndices    = nullptr;
-    this->recvIndices    = nullptr;
-    this->sendBuffer     = nullptr;
-    this->recvBuffer     = nullptr;
-    this->sendBufferHost = nullptr;
-    this->recvBufferHost = nullptr;
-
-    this->sendBufferIsReady = MPI_REQUEST_NULL;
-}
-
-void Communicator::initialize(GksMeshAdapter & adapter, uint level, uint direction)
-{
-    this->myAllocator->freeMemory( *this );
-
-    this->numberOfSendNodes = adapter.communicationIndices[level].sendIndices[direction].size();
-    this->numberOfRecvNodes = adapter.communicationIndices[level].recvIndices[direction].size();
-
-    this->myAllocator->allocateMemory( *this, adapter.communicationIndices[level].sendIndices[direction], 
-                                              adapter.communicationIndices[level].recvIndices[direction] );
-
-    this->opposingRank = adapter.communicationProcesses[direction];
-}
-
-void Communicator::sendData( SPtr<DataBase> dataBase, int tag )
-{
-#ifdef USE_CUDA_AWARE_MPI
-
-    this->copyFromMeshToSendBuffer( dataBase );
-    
-    MPI_Isend( this->sendBuffer, this->numberOfSendNodes * LENGTH_CELL_DATA, MPI_TYPE_GPU, this->opposingRank, tag, MPI_COMM_WORLD, &this->sendBufferIsReady );
-
-#else // USE_CUDA_AWARE_MPI
-
-    this->copyFromMeshToSendBuffer( dataBase );
-
-    MPI_Wait(&this->sendBufferIsReady, MPI_STATUSES_IGNORE);
-
-    this->myAllocator->copyBuffersDeviceToHost( shared_from_this() );
-    
-    CudaUtility::synchronizeCudaStream( CudaUtility::communicationStream );
-
-    MPI_Isend( this->sendBufferHost, this->numberOfSendNodes * LENGTH_CELL_DATA, MPI_TYPE_GPU, this->opposingRank, tag, MPI_COMM_WORLD, &this->sendBufferIsReady );
-
-#endif // USE_CUDA_AWARE_MPI
-}
-
-void Communicator::recvData( SPtr<DataBase> dataBase, int tag )
-{
-#ifdef USE_CUDA_AWARE_MPI
-    
-    MPI_Recv ( this->recvBuffer, this->numberOfRecvNodes * LENGTH_CELL_DATA, MPI_TYPE_GPU, this->opposingRank, tag, MPI_COMM_WORLD, MPI_STATUSES_IGNORE );
-    
-    this->copyFromRecvBufferToMesh( dataBase );
-
-#else // USE_CUDA_AWARE_MPI
-    
-    MPI_Recv ( this->recvBufferHost, this->numberOfRecvNodes * LENGTH_CELL_DATA, MPI_TYPE_GPU, this->opposingRank, tag, MPI_COMM_WORLD, MPI_STATUSES_IGNORE );
-    
-    this->myAllocator->copyBuffersHostToDevice( shared_from_this() );
-
-    this->copyFromRecvBufferToMesh( dataBase );
-
-#endif // USE_CUDA_AWARE_MPI
-}
-
-} // namespace GksGpu
diff --git a/src/gpu/GksGpu/Communication/Communicator.cu b/src/gpu/GksGpu/Communication/Communicator.cu
deleted file mode 100644
index 58951afdef57d6eb53fd7b6ab278d0df63cf00d2..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Communication/Communicator.cu
+++ /dev/null
@@ -1,165 +0,0 @@
-#include "Communicator.h"
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include "PointerDefinitions.h"
-#include "Core/RealConstants.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "DataBase/DataBase.h"
-#include "DataBase/DataBaseAllocator.h"
-#include "DataBase/DataBaseStruct.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-#include "Definitions/PassiveScalar.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu {
-
-//////////////////////////////////////////////////////////////////////////
-
-__global__                 void sendBufferKernel  ( const DataBaseStruct dataBase, 
-                                                    const uint numberOfSendNodes,
-                                                    const uint* sendIndices,
-                                                    real* sendBuffer,
-                                                    const uint startIndex,
-                                                    const uint numberOfEntities );
-
-__host__ __device__ inline void sendBufferFunction( const DataBaseStruct dataBase, 
-                                                    const uint numberOfSendNodes,
-                                                    const uint* sendIndices,
-                                                    real* sendBuffer,
-                                                    const uint startIndex,
-                                                    const uint index );
-
-__global__                 void recvBufferKernel  ( const DataBaseStruct dataBase, 
-                                                    const uint numberOfRecvNodes,
-                                                    const uint* recvIndices,
-                                                    real* recvBuffer,
-                                                    const uint startIndex,
-                                                    const uint numberOfEntities );
-
-__host__ __device__ inline void recvBufferFunction( const DataBaseStruct dataBase, 
-                                                    const uint numberOfRecvNodes,
-                                                    const uint* recvIndices,
-                                                    real* recvBuffer,
-                                                    const uint startIndex,
-                                                    const uint index );
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void Communicator::copyFromMeshToSendBuffer(const SPtr<DataBase> dataBase)
-{    
-    CudaUtility::CudaGrid grid( this->numberOfSendNodes, 32, CudaUtility::communicationStream );
-
-    runKernel( sendBufferKernel,
-               sendBufferFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct(),
-               this->numberOfSendNodes,
-               this->sendIndices,
-               this->sendBuffer,
-               0 );
-}
-
-void Communicator::copyFromRecvBufferToMesh(const SPtr<DataBase> dataBase)
-{    
-    CudaUtility::CudaGrid grid( this->numberOfRecvNodes, 32, CudaUtility::communicationStream );
-
-    runKernel( recvBufferKernel,
-               recvBufferFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct(),
-               this->numberOfRecvNodes,
-               this->recvIndices,
-               this->recvBuffer,
-               0 );
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__global__ void sendBufferKernel( const DataBaseStruct dataBase, 
-                                  const uint numberOfSendNodes,
-                                  const uint* sendIndices,
-                                  real* sendBuffer,
-                                  const uint startIndex,
-                                  const uint numberOfEntities )
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    sendBufferFunction( dataBase, numberOfSendNodes, sendIndices, sendBuffer, startIndex, index );
-}
-
-__host__ __device__ inline void sendBufferFunction( const DataBaseStruct dataBase, 
-                                                    const uint numberOfSendNodes,
-                                                    const uint* sendIndices,
-                                                    real* sendBuffer,
-                                                    const uint startIndex,
-                                                    const uint index )
-{
-    uint cellIdx  = sendIndices [ index ];
-
-    sendBuffer[ RHO__(index, numberOfSendNodes) ] = dataBase.data[ RHO__(cellIdx, dataBase.numberOfCells) ];
-    sendBuffer[ RHO_U(index, numberOfSendNodes) ] = dataBase.data[ RHO_U(cellIdx, dataBase.numberOfCells) ];
-    sendBuffer[ RHO_V(index, numberOfSendNodes) ] = dataBase.data[ RHO_V(cellIdx, dataBase.numberOfCells) ];
-    sendBuffer[ RHO_W(index, numberOfSendNodes) ] = dataBase.data[ RHO_W(cellIdx, dataBase.numberOfCells) ];
-    sendBuffer[ RHO_E(index, numberOfSendNodes) ] = dataBase.data[ RHO_E(cellIdx, dataBase.numberOfCells) ];
-#ifdef USE_PASSIVE_SCALAR
-    sendBuffer[ RHO_S_1(index, numberOfSendNodes) ] = dataBase.data[ RHO_S_1(cellIdx, dataBase.numberOfCells) ];
-    sendBuffer[ RHO_S_2(index, numberOfSendNodes) ] = dataBase.data[ RHO_S_2(cellIdx, dataBase.numberOfCells) ];
-#endif // USE_PASSIVE_SCALAR
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__global__ void recvBufferKernel( const DataBaseStruct dataBase, 
-                                  const uint numberOfRecvNodes,
-                                  const uint* recvIndices,
-                                  real* recvBuffer,
-                                  const uint startIndex,
-                                  const uint numberOfEntities )
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    recvBufferFunction( dataBase, numberOfRecvNodes, recvIndices, recvBuffer, startIndex, index );
-}
-
-__host__ __device__ inline void recvBufferFunction( const DataBaseStruct dataBase, 
-                                                    const uint numberOfRecvNodes,
-                                                    const uint* recvIndices,
-                                                    real* recvBuffer,
-                                                    const uint startIndex,
-                                                    const uint index )
-{
-    uint cellIdx  = recvIndices [ index ];
-
-    dataBase.data[ RHO__(cellIdx, dataBase.numberOfCells) ] = recvBuffer[ RHO__(index, numberOfRecvNodes) ] ;
-    dataBase.data[ RHO_U(cellIdx, dataBase.numberOfCells) ] = recvBuffer[ RHO_U(index, numberOfRecvNodes) ] ;
-    dataBase.data[ RHO_V(cellIdx, dataBase.numberOfCells) ] = recvBuffer[ RHO_V(index, numberOfRecvNodes) ] ;
-    dataBase.data[ RHO_W(cellIdx, dataBase.numberOfCells) ] = recvBuffer[ RHO_W(index, numberOfRecvNodes) ] ;
-    dataBase.data[ RHO_E(cellIdx, dataBase.numberOfCells) ] = recvBuffer[ RHO_E(index, numberOfRecvNodes) ] ;
-#ifdef USE_PASSIVE_SCALAR
-    dataBase.data[ RHO_S_1(cellIdx, dataBase.numberOfCells) ] = recvBuffer[ RHO_S_1(index, numberOfRecvNodes) ] ;
-    dataBase.data[ RHO_S_2(cellIdx, dataBase.numberOfCells) ] = recvBuffer[ RHO_S_2(index, numberOfRecvNodes) ] ;
-#endif // USE_PASSIVE_SCALAR
-}
-
-} // namespace GksGpu
\ No newline at end of file
diff --git a/src/gpu/GksGpu/Communication/Communicator.h b/src/gpu/GksGpu/Communication/Communicator.h
deleted file mode 100644
index 4b883563705324c862cc6778480b6b09ba285587..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Communication/Communicator.h
+++ /dev/null
@@ -1,63 +0,0 @@
-#ifndef Communicator_H
-#define Communicator_H
-
-#include <memory>
-#include <vector>
-#include <mpi.h>
-//#include <mutex>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-
-class  GksMeshAdapter;
-
-namespace GksGpu {
-
-class  DataBaseAllocator;
-struct DataBase;
-
-struct GKSGPU_EXPORT Communicator : public std::enable_shared_from_this<Communicator>
-{
-    SPtr<DataBaseAllocator> myAllocator;
-
-    uint numberOfSendNodes;
-    uint numberOfRecvNodes;
-
-    uint* sendIndices; // device
-    uint* recvIndices; // device
-
-    real* sendBuffer; // device
-    real* recvBuffer; // device
-
-    real* sendBufferHost; // pinned memory
-    real* recvBufferHost; // pinned memory
-
-    uint rank;
-    uint opposingRank;
-
-    MPI_Request sendBufferIsReady;
-
-    static int tagSendPositive;
-    static int tagSendNegative;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Communicator( SPtr<DataBase> dataBase );
-
-    void initialize( GksMeshAdapter& adapter, uint level, uint direction );
-
-    void copyFromMeshToSendBuffer( SPtr<DataBase> dataBase );
-
-    void copyFromRecvBufferToMesh( SPtr<DataBase> dataBase );
-
-    void sendData( SPtr<DataBase> dataBase, int tag );
-    void recvData( SPtr<DataBase> dataBase, int tag );
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/Communication/MpiUtility.cpp b/src/gpu/GksGpu/Communication/MpiUtility.cpp
deleted file mode 100644
index 1dbfcaf15a3023ea03f829f9d74aa84548523a04..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Communication/MpiUtility.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-#include "MpiUtility.h"
-
-#include <exception>
-
-#define ENV_LOCAL_RANK	     "OMPI_COMM_WORLD_RANK"
-#define ENV_COMM_WORLD_SIZE  "OMPI_COMM_WORLD_SIZE"
-
-namespace GksGpu {
-
-int MpiUtility::getMpiRankBeforeInit()
-{
-    char * localRankStr = NULL;
-
-    // We extract the local rank initialization using an environment variable
-    if ((localRankStr = getenv(ENV_LOCAL_RANK)) != NULL)
-    {
-        return atoi(localRankStr);
-    }
-    else
-    {
-        return 0;
-    }
-}
-
-int MpiUtility::getMpiWorldSizeBeforeInit()
-{
-    char * mpiWorldSizeStr = NULL;
-
-    // We extract the local rank initialization using an environment variable
-    if ((mpiWorldSizeStr = getenv(ENV_COMM_WORLD_SIZE)) != NULL)
-    {
-        return atoi(mpiWorldSizeStr);
-    }
-    else
-    {
-        return 1;
-    }
-}
-
-} // namespace GksGpu
diff --git a/src/gpu/GksGpu/Communication/MpiUtility.h b/src/gpu/GksGpu/Communication/MpiUtility.h
deleted file mode 100644
index f2aa1a8be1d18f99cfd29d91b2bcb31edc0a661f..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Communication/MpiUtility.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef MpiUtility_H
-#define MpiUtility_H
-
-#include <mpi.h>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-
-class  GksMeshAdapter;
-
-namespace GksGpu {
-
-class  DataBaseAllocator;
-struct DataBase;
-
-struct GKSGPU_EXPORT MpiUtility
-{
-    static int getMpiRankBeforeInit();
-
-    static int getMpiWorldSizeBeforeInit();
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/CudaUtility/CudaRunKernel.hpp b/src/gpu/GksGpu/CudaUtility/CudaRunKernel.hpp
deleted file mode 100644
index 91de639cc712745cadd5f8a21e3c330dde989121..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/CudaUtility/CudaRunKernel.hpp
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef  CudaRunKernel_HPP
-#define  CudaRunKernel_HPP
-
-#include <string>
-#include <device_launch_parameters.h>
-#include <omp.h>
-#include <iostream>
-
-#include "CudaUtility/CudaUtility.h"
-
-namespace GksGpu {
-
-template<typename KernelFunctor, typename FunctionFunctor, typename... TArgs>
-void runKernel(KernelFunctor kernel, FunctionFunctor function, std::string deviceType, const CudaUtility::CudaGrid& grid, TArgs... args)
-{
-    if( grid.numberOfEntities == 0 ) return;
-
-    if( deviceType == "GPU" )
-    {
-        kernel<<< grid.blocks, grid.threads, 0, grid.stream >>>( args..., grid.numberOfEntities );
-    }
-    else
-    {
-//#pragma omp parallel for
-        for( int index = 0; index < grid.numberOfEntities; index++ )
-        {
-            function( args..., index );
-        }
-    }
-}
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/CudaUtility/CudaUtility.cpp b/src/gpu/GksGpu/CudaUtility/CudaUtility.cpp
deleted file mode 100644
index 0cd9e948dca8522284efe3febce95320dd5f4243..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/CudaUtility/CudaUtility.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-#include "CudaUtility.h"
-
-#include <iostream>
-#include <cuda_runtime.h>
-#include <cuda.h>
-#include <helper_cuda.h>
-
-#include "Core/DataTypes.h"
-#include "Core/Logger/Logger.h"
-
-namespace GksGpu {
-
-cudaStream_t CudaUtility::computeStream = nullptr;
-cudaStream_t CudaUtility::communicationStream = nullptr;
-
-CudaUtility::CudaGrid::CudaGrid( uint numberOfEntities, uint threadsPerBlock, cudaStream_t stream )
-{
-    this->numberOfEntities = numberOfEntities;
-    this->threads.x = threadsPerBlock;
-    this->blocks.x  = ( numberOfEntities + threadsPerBlock - 1 ) / threadsPerBlock;
-
-    this->stream = stream;
-}
-
-void CudaUtility::printCudaMemoryUsage()
-{
-    size_t free_byte ;
-    size_t total_byte ;
-
-    checkCudaErrors( cudaMemGetInfo( &free_byte, &total_byte ) );
-
-    double free_db = (double)free_byte ;
-    double total_db = (double)total_byte ;
-    double used_db = total_db - free_db ;
-
-    *logging::out << logging::Logger::INFO_HIGH << "GPU memory usage:" << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << "    used  = " << used_db /1024.0/1024.0/1024.0 << " GB\n";
-    *logging::out << logging::Logger::INFO_HIGH << "    free  = " << free_db /1024.0/1024.0/1024.0 << " GB\n";
-    *logging::out << logging::Logger::INFO_HIGH << "    total = " << total_db/1024.0/1024.0/1024.0 << " GB\n";
-}
-
-int CudaUtility::getCudaDeviceCount()
-{    
-    int deviceCount = 0;
-    checkCudaErrors( cudaGetDeviceCount(&deviceCount) );
-    return deviceCount;
-}
-
-void CudaUtility::setCudaDevice(int device)
-{    
-    checkCudaErrors( cudaSetDevice( device ) );
-    checkCudaErrors( cudaGetDevice( &device ) );
-
-    cudaDeviceProp prop;
-    cudaGetDeviceProperties(&prop, device);
-
-    *logging::out << logging::Logger::INFO_HIGH << "Set device " << device << ": " << prop.name << "\n";
-
-    // set communication stream on high priority, such that it can interleave the compute stream
-    // the non blocking flag disable implicit synchronization with the default thread '0'
-    // based on https://fenix.tecnico.ulisboa.pt/downloadFile/563568428758047/CUDA_StreamsEvents.pdf
-    // slide 5
-    int priority_high, priority_low;
-    cudaDeviceGetStreamPriorityRange(&priority_low , &priority_high ) ;
-
-    // the flag needs to be cudaStreamDefault to ensure synchronization with default stream
-    //cudaStreamCreateWithPriority (&communicationStream, cudaStreamDefault, priority_high );
-    //cudaStreamCreateWithPriority (&computeStream      , cudaStreamDefault, priority_low  );
-    cudaStreamCreateWithPriority (&communicationStream, cudaStreamNonBlocking, priority_high );
-    cudaStreamCreateWithPriority (&computeStream      , cudaStreamNonBlocking, priority_low  );
-}
-
-int CudaUtility::getCudaDevice()
-{
-    int device;
-    checkCudaErrors( cudaGetDevice( &device ) );
-
-    cudaDeviceProp prop;
-    cudaGetDeviceProperties(&prop, device);
-
-    *logging::out << logging::Logger::INFO_HIGH << "The current device " << device << ": " << prop.name << "\n";
-
-    return device;
-}
-
-void CudaUtility::synchronizeCudaDevice()
-{
-    checkCudaErrors( cudaDeviceSynchronize() );
-}
-
-void CudaUtility::synchronizeCudaStream(cudaStream_t stream)
-{
-    checkCudaErrors( cudaStreamSynchronize(stream) );
-}
-
-} // namespace GksGpu
diff --git a/src/gpu/GksGpu/CudaUtility/CudaUtility.h b/src/gpu/GksGpu/CudaUtility/CudaUtility.h
deleted file mode 100644
index fa312e9d41879703d04a03092e79401abc132c86..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/CudaUtility/CudaUtility.h
+++ /dev/null
@@ -1,48 +0,0 @@
-#ifndef  CudaUtilExtern_H
-#define  CudaUtilExtern_H
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-
-
-#include "GksGpu_export.h"
-
-#include "Core/DataTypes.h"
-
-namespace GksGpu {
-
-class GKSGPU_EXPORT CudaUtility
-{
-public:
-
-    struct CudaGrid 
-    {
-        dim3 threads;
-        dim3 blocks;
-
-        uint numberOfEntities;
-
-        cudaStream_t stream;
-
-        CudaGrid( uint numberOfEntities, uint threadsPerBlock, cudaStream_t stream = 0 );
-    };
-
-    static cudaStream_t computeStream;
-    static cudaStream_t communicationStream;
-
-    static void printCudaMemoryUsage();
-
-    static int getCudaDeviceCount();
-
-    static void setCudaDevice( int device );
-
-    static int getCudaDevice(  );
-
-    static void synchronizeCudaDevice();
-
-    static void synchronizeCudaStream( cudaStream_t stream );
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/DataBase/DataBase.cpp b/src/gpu/GksGpu/DataBase/DataBase.cpp
deleted file mode 100644
index 46921a683de3dd9c322be2d89b4ca66f6fa07020..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/DataBase/DataBase.cpp
+++ /dev/null
@@ -1,244 +0,0 @@
-#include "DataBase.h"
-
-#include <iostream>
-#include <string>
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include "DataBaseAllocator.h"
-#include "DataBaseStruct.h"
-
-#include "Core/Logger/Logger.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-#include "Communication/Communicator.h"
-
-#include <lbm/constants/NumericConstants.h>
-
-using namespace vf::lbm::constant;
-
-namespace GksGpu {
-
-DataBase::DataBase( std::string type ) 
-        : myAllocator    ( DataBaseAllocator::create( type ) ),
-          numberOfNodes      (0),
-          numberOfCells      (0),
-          numberOfFaces      (0),
-          numberOfLevels     (0),
-          numberOfCoarseGhostCells(0),
-          numberOfFineGhostCells(0),
-          cellToCell     (nullptr),
-          faceToCell     (nullptr),
-          parentCell     (nullptr),
-          faceCenter     (nullptr),
-          cellCenter     (nullptr),
-          cellProperties (nullptr),
-          faceOrientation(nullptr),
-          fineToCoarse   (nullptr),
-          coarseToFine   (nullptr),
-          data           (nullptr),
-          dataUpdate     (nullptr),
-          massFlux       (nullptr),
-          diffusivity      (nullptr)
-{
-}
-
-DataBase::~DataBase()
-{
-    this->myAllocator->freeMemory( *this );
-}
-
-void DataBase::setMesh(GksMeshAdapter & adapter)
-{
-    this->numberOfNodes      = adapter.nodes.size();
-
-    this->numberOfCells      = adapter.cells.size();
-
-    this->numberOfFaces      = adapter.faces.size();
-
-    this->numberOfLevels     = adapter.numberOfLevels;
-
-    this->perLevelCount.resize( this->numberOfLevels );
-
-    for( uint level = 0; level < this->numberOfLevels; level++ )
-    {
-        perLevelCount[ level ].numberOfCells = adapter.numberOfCellsPerLevel[ level ];
-        perLevelCount[ level ].startOfCells  = adapter.startOfCellsPerLevel [ level ];
-
-        perLevelCount[ level ].numberOfBulkCells = adapter.numberOfBulkCellsPerLevel[ level ];
-
-        perLevelCount[ level ].numberOfFacesX = adapter.numberOfFacesPerLevelXYZ[ 3 * level     ];
-        perLevelCount[ level ].startOfFacesX  = adapter.startOfFacesPerLevelXYZ [ 3 * level     ];
-
-        perLevelCount[ level ].numberOfFacesY = adapter.numberOfFacesPerLevelXYZ[ 3 * level + 1 ];
-        perLevelCount[ level ].startOfFacesY  = adapter.startOfFacesPerLevelXYZ [ 3 * level + 1 ];
-
-        perLevelCount[ level ].numberOfFacesZ = adapter.numberOfFacesPerLevelXYZ[ 3 * level + 2 ];
-        perLevelCount[ level ].startOfFacesZ  = adapter.startOfFacesPerLevelXYZ [ 3 * level + 2 ];
-
-        perLevelCount[ level ].numberOfFaces = perLevelCount[ level ].numberOfFacesX
-                                             + perLevelCount[ level ].numberOfFacesY
-                                             + perLevelCount[ level ].numberOfFacesZ;
-
-        perLevelCount[ level ].numberOfInnerFaces = adapter.numberOfInnerFacesPerLevel[ level ];
-
-        perLevelCount[ level ].numberOfFineToCoarse = adapter.numberOfFineToCoarsePerLevel[ level ];
-        perLevelCount[ level ].numberOfCoarseToFine = adapter.numberOfCoarseToFinePerLevel[ level ];
-
-        perLevelCount[ level ].startOfFineToCoarse = adapter.startOfFineToCoarsePerLevel[ level ];
-        perLevelCount[ level ].startOfCoarseToFine = adapter.startOfCoarseToFinePerLevel[ level ];
-    }
-
-    this->numberOfCoarseGhostCells = adapter.fineToCoarse.size();
-
-    this->numberOfFineGhostCells   = adapter.coarseToFine.size();
-
-    this->myAllocator->allocateMemory( shared_from_this() );
-
-    this->myAllocator->copyMesh( shared_from_this(), adapter );
-}
-
-void DataBase::setCommunicators(GksMeshAdapter & adapter)
-{
-    this->communicators.resize( this->numberOfLevels );
-
-    for( uint level = 0; level < this->numberOfLevels; level++ )
-    {
-        for( uint direction = 0; direction < 6; direction++ )
-        {
-            if( adapter.communicationProcesses[direction] != INVALID_INDEX &&
-                ( 
-                  adapter.communicationIndices[level].sendIndices[direction].size() > 0 ||
-                  adapter.communicationIndices[level].recvIndices[direction].size() > 0
-                )
-              )
-            {
-                this->communicators[level][direction] = std::make_shared<Communicator>( shared_from_this() );
-
-                this->communicators[level][direction]->initialize( adapter, level, direction );
-
-                *logging::out << logging::Logger::INFO_LOW << "Generated Communicator " << level << ":" << direction << " \n";
-            }
-            else
-            {
-                this->communicators[level][direction] = nullptr;
-            }
-        }
-
-    
-    }
-}
-
-void DataBase::copyDataHostToDevice()
-{
-    this->myAllocator->copyDataHostToDevice( shared_from_this() );
-}
-
-void DataBase::copyDataDeviceToHost()
-{
-    this->myAllocator->copyDataDeviceToHost( shared_from_this(), this->dataHost.data() );
-}
-
-void DataBase::copyDataDeviceToHost( real* dataHost )
-{
-    this->myAllocator->copyDataDeviceToHost( shared_from_this(), dataHost );
-}
-
-int DataBase::getCrashCellIndex()
-{
-    return this->myAllocator->getCrashCellIndex(shared_from_this());
-}
-
-DataBaseStruct DataBase::toStruct()
-{
-    DataBaseStruct dataBase;
-
-    dataBase.numberOfCells            = this->numberOfCells;
-    dataBase.numberOfFaces            = this->numberOfFaces;
-
-    dataBase.numberOfCoarseGhostCells = this->numberOfCoarseGhostCells;
-    dataBase.numberOfFineGhostCells   = this->numberOfFineGhostCells;
-
-    dataBase.cellToCell               = this->cellToCell;
-    dataBase.faceToCell               = this->faceToCell;
-
-    dataBase.parentCell               = this->parentCell;
-
-    dataBase.fineToCoarse             = this->fineToCoarse;
-    dataBase.coarseToFine             = this->coarseToFine;
-
-    dataBase.faceCenter               = this->faceCenter;
-    dataBase.cellCenter               = this->cellCenter;
-
-    dataBase.cellProperties           = this->cellProperties;
-
-    dataBase.faceOrientation          = this->faceOrientation;
-
-    dataBase.fineToCoarse             = this->fineToCoarse;
-    dataBase.coarseToFine             = this->coarseToFine;
-
-    dataBase.data                     = this->data;
-    dataBase.dataUpdate               = this->dataUpdate;
-
-    dataBase.massFlux                 = this->massFlux;
-
-    dataBase.diffusivity              = this->diffusivity;
-
-    dataBase.crashCellIndex           = this->crashCellIndex;
-
-    return dataBase;
-}
-
-uint DataBase::getCellLevel(uint cellIdx)
-{
-    uint level = 0;
-
-    while( cellIdx >= this->perLevelCount[level].startOfCells
-                   + this->perLevelCount[level].numberOfCells ) level++;
-
-    return level;
-}
-
-uint DataBase::getFaceLevel(uint faceIdx)
-{
-    uint level = 0;
-
-    while( faceIdx >= this->perLevelCount[level].startOfFacesX
-                   + this->perLevelCount[level].numberOfFaces ) level++;
-
-    return level;
-}
-
-Vec3 DataBase::getCellCenter(uint cellIdx)
-{
-    Vec3 cellCenter;
-
-    for( uint node = 0; node < 8; node++ )
-    {
-        cellCenter = cellCenter + this->nodeCoordinates[ this->cellToNode[ cellIdx ][ node ] ];
-    }
-
-    cellCenter.x /= c8o1;
-    cellCenter.y /= c8o1;
-    cellCenter.z /= c8o1;
-
-    return cellCenter;
-}
-
-bool DataBase::isGhostCell(uint cellIdx)
-{
-    uint level = this->getCellLevel( cellIdx );
-
-    return ( cellIdx >= this->perLevelCount[ level ].startOfCells + this->perLevelCount[ level ].numberOfBulkCells )
-           ||
-           ( isCellProperties( this->cellPropertiesHost[cellIdx], CELL_PROPERTIES_FINE_GHOST ) );
-
-}
-
-std::string DataBase::getDeviceType()
-{
-    return this->myAllocator->getDeviceType();
-}
-
-} // namespace GksGpu
diff --git a/src/gpu/GksGpu/DataBase/DataBase.h b/src/gpu/GksGpu/DataBase/DataBase.h
deleted file mode 100644
index 02475ae98ca5bb5d799098791342802cf6cde7b0..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/DataBase/DataBase.h
+++ /dev/null
@@ -1,180 +0,0 @@
-#ifndef DataBase_H
-#define DataBase_H
-
-#include <memory>
-#include <string>
-#include <vector>
-#include <array>
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/ArrayTypes.h"
-
-
-#include "GksGpu_export.h"
-
-#include "Definitions/AccumulatorDataType.h"
-
-#include "CellProperties/CellProperties.cuh"
-
-class  GksMeshAdapter;
-
-namespace GksGpu { 
-
-struct BoundaryCondition;
-class  DataBaseAllocator;
-struct DataBase;
-struct PerLevelCounts;
-struct DataBaseStruct;
-struct Communicator;
-
-struct GKSGPU_EXPORT DataBase : public std::enable_shared_from_this<DataBase>
-{
-    //////////////////////////////////////////////////////////////////////////
-    // Management
-    //////////////////////////////////////////////////////////////////////////
-
-    SPtr<DataBaseAllocator> myAllocator;
-
-    std::vector< SPtr<BoundaryCondition> > boundaryConditions;
-
-    std::vector< std::array< SPtr< Communicator >, 6 > > communicators;
-
-    //////////////////////////////////////////////////////////////////////////
-    // Sizes
-    //////////////////////////////////////////////////////////////////////////
-
-    uint numberOfNodes;
-
-    uint numberOfCells;
-
-    uint numberOfFaces;
-
-    uint numberOfLevels;
-
-    uint numberOfCoarseGhostCells;
-
-    uint numberOfFineGhostCells;
-
-    std::vector<PerLevelCounts> perLevelCount;
-
-    //////////////////////////////////////////////////////////////////////////
-    // Host only geometry and connectivity
-    //////////////////////////////////////////////////////////////////////////
-
-    std::vector<Vec3>   nodeCoordinates;
-
-    std::vector<uint_8> cellToNode;
-    std::vector<uint_4> faceToNode;
-
-    std::vector<CellProperties> cellPropertiesHost;
-
-    //////////////////////////////////////////////////////////////////////////
-    // Host/Device geometry and connectivity - READ ONLY
-    //////////////////////////////////////////////////////////////////////////
-
-    uint* cellToCell;     // 6
-
-    uint* faceToCell;     // 2
-
-    uint* parentCell;     // 1
-
-    real* faceCenter;     // 3
-    real* cellCenter;     // 3
-
-    CellProperties* cellProperties;     // 1 x byte
-
-    char* faceOrientation;
-
-    uint* fineToCoarse;   // 9
-    uint* coarseToFine;   // 9
-
-    //////////////////////////////////////////////////////////////////////////
-    // Host/Device data - READ MODIFY
-    //////////////////////////////////////////////////////////////////////////
-
-    real*            data;
-    realAccumulator* dataUpdate;
-
-    real* massFlux;
-
-    realAccumulator* diffusivity;
-
-    int* crashCellIndex;
-
-    //////////////////////////////////////////////////////////////////////////
-    // Host only data
-    //////////////////////////////////////////////////////////////////////////
-
-    std::vector<real> dataHost;
-
-    std::vector<real> diffusivityHost;
-
-    //////////////////////////////////////////////////////////////////////////
-    //////////////////////////////////////////////////////////////////////////
-    // Methods
-    //////////////////////////////////////////////////////////////////////////
-    //////////////////////////////////////////////////////////////////////////
-
-    DataBase( std::string type );
-    ~DataBase();
-
-    //void setMesh( std::shared_ptr<MeshGeneratorQuadTree> mesh );
-
-    void setMesh( GksMeshAdapter& adapter );
-
-    void setCommunicators( GksMeshAdapter& adapter );
-
-    void copyDataHostToDevice();
-
-    void copyDataDeviceToHost();
-
-    void copyDataDeviceToHost( real* dataHost );
-
-    int getCrashCellIndex();
-
-    DataBaseStruct toStruct();
-
-    //////////////////////////////////////////////////////////////////////////
-
-    uint getCellLevel( uint cellIdx );
-    uint getFaceLevel( uint faceIdx );
-
-    Vec3 getCellCenter( uint cellIdx );
-
-    bool isGhostCell( uint cellIdx );
-
-    std::string getDeviceType();
-};
-
-struct GKSGPU_EXPORT PerLevelCounts
-{
-    uint numberOfCells;
-    uint startOfCells;
-
-    uint numberOfBulkCells;
-
-    uint numberOfFaces;
-
-    uint numberOfInnerFaces;
-
-    uint numberOfFacesX;
-    uint startOfFacesX;
-
-    uint numberOfFacesY;
-    uint startOfFacesY;
-
-    uint numberOfFacesZ;
-    uint startOfFacesZ;
-
-    uint numberOfCoarseToFine;
-    uint startOfCoarseToFine;
-
-    uint numberOfFineToCoarse;
-    uint startOfFineToCoarse;
-};
-
-} // namespace GksGpu
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/GksGpu/DataBase/DataBaseAllocator.cpp b/src/gpu/GksGpu/DataBase/DataBaseAllocator.cpp
deleted file mode 100644
index fc5a8ddf3ee1f4ea1816a01ad315afd4df85ccf3..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/DataBase/DataBaseAllocator.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-#include "DataBaseAllocator.h"
-
-//#include "../../DataBase/DataBaseAllocator/DataBaseAllocatorCPU/DataBaseAllocatorCPU.h"
-//#include "../../DataBase/DataBaseAllocator/DataBaseAllocatorGPU/DataBaseAllocatorGPU.h"
-
-#include "DataBaseAllocatorCPU.h"
-#include "DataBaseAllocatorGPU.h"
-
-#include <string>
-
-namespace GksGpu {
-
-std::shared_ptr<DataBaseAllocator> DataBaseAllocator::create(std::string type)
-{
-    if ( type == "GPU" )
-        return std::shared_ptr<DataBaseAllocator>( new DataBaseAllocatorGPU() );
-    else
-        return std::shared_ptr<DataBaseAllocator>( new DataBaseAllocatorCPU() );
-}
-
-DataBaseAllocator::~DataBaseAllocator()
-{
-}
-
-DataBaseAllocator::DataBaseAllocator()
-{
-}
-
-DataBaseAllocator::DataBaseAllocator(const DataBaseAllocator & orig)
-{
-}
-
-} // namespace GksGpu
diff --git a/src/gpu/GksGpu/DataBase/DataBaseAllocator.h b/src/gpu/GksGpu/DataBase/DataBaseAllocator.h
deleted file mode 100644
index e50c58f13a8c2134311f28a904e14ac64ca473d5..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/DataBase/DataBaseAllocator.h
+++ /dev/null
@@ -1,74 +0,0 @@
-#ifndef DataBaseAllocator_H
-#define DataBaseAllocator_H
-
-#include <string>
-#include <vector>
-
-#include "Core/DataTypes.h"
-#include "PointerDefinitions.h"
-
-
-#include "GksGpu_export.h"
-
-class  GksMeshAdapter;
-
-namespace GksGpu {
-
-struct DataBase;
-struct BoundaryCondition;
-struct Communicator;
-
-class GKSGPU_EXPORT DataBaseAllocator {
-
-public:
-
-    static std::shared_ptr<DataBaseAllocator> create( std::string type );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    virtual void freeMemory( DataBase& dataBase ) = 0;
-
-    virtual void allocateMemory( SPtr<DataBase> dataBase) = 0;
-
-    virtual void copyMesh( SPtr<DataBase> dataBase, GksMeshAdapter& adapter ) = 0;
-
-    virtual void copyDataHostToDevice( SPtr<DataBase> dataBase ) = 0;
-    
-    virtual void copyDataDeviceToHost( SPtr<DataBase> dataBase, real* hostData ) = 0;
-
-    virtual int  getCrashCellIndex( SPtr<DataBase> dataBase ) = 0;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    virtual void freeMemory( BoundaryCondition& boundaryCondition ) = 0;
-
-    virtual void allocateMemory( SPtr<BoundaryCondition> boundaryCondition, std::vector<uint> ghostCells, std::vector<uint> domainCells, std::vector<uint> secondCells ) = 0;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    virtual void freeMemory( Communicator& communicator ) = 0;
-
-    virtual void allocateMemory( Communicator& communicator, std::vector<uint>& sendIndices, std::vector<uint>& recvIndices ) = 0;
-
-    virtual void copyDataDeviceToDevice( SPtr<Communicator> dst, SPtr<Communicator> src ) = 0;
-
-    virtual void copyBuffersDeviceToHost( SPtr<Communicator> communicator ) = 0;
-    virtual void copyBuffersHostToDevice( SPtr<Communicator> communicator ) = 0;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    ~DataBaseAllocator();
-
-    virtual std::string getDeviceType() = 0;
-
-protected:
-
-    DataBaseAllocator();
-    DataBaseAllocator( const DataBaseAllocator& orig );
-
-};
-
-} // namespace GksGpu
-
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/GksGpu/DataBase/DataBaseAllocatorCPU.cpp b/src/gpu/GksGpu/DataBase/DataBaseAllocatorCPU.cpp
deleted file mode 100644
index 39d5f788c6a3b5332ded60378fdae8cd137d0b80..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/DataBase/DataBaseAllocatorCPU.cpp
+++ /dev/null
@@ -1,262 +0,0 @@
-#include "DataBaseAllocatorCPU.h"
-
-#include <cstring>
-
-#include "Core/DataTypes.h"
-#include "PointerDefinitions.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "DataBase/DataBase.h"
-
-#include "CellProperties/CellProperties.cuh"
-
-#include "BoundaryConditions/BoundaryCondition.h"
-
-#include "Communication/Communicator.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-
-namespace GksGpu {
-
-void DataBaseAllocatorCPU::freeMemory( DataBase& dataBase)
-{
-    dataBase.cellToNode.clear();
-    dataBase.faceToNode.clear();
-
-    dataBase.cellPropertiesHost.clear();
-
-    delete [] dataBase.cellToCell;
-
-    delete [] dataBase.faceToCell;
-
-    delete [] dataBase.parentCell;
-
-    delete [] dataBase.faceCenter;
-    delete [] dataBase.cellCenter;
-
-    delete [] dataBase.cellProperties;
-
-    delete [] dataBase.faceOrientation;
-
-    delete [] dataBase.fineToCoarse;
-    delete [] dataBase.coarseToFine;
-
-    delete [] dataBase.data;
-    delete [] dataBase.dataUpdate;
-
-    delete [] dataBase.massFlux;
-
-    delete [] dataBase.diffusivity;
-
-    delete [] dataBase.crashCellIndex;
-
-    dataBase.dataHost.clear();
-}
-
-void DataBaseAllocatorCPU::allocateMemory(SPtr<DataBase> dataBase)
-{
-    dataBase->cellToNode.resize( dataBase->numberOfCells );
-    dataBase->faceToNode.resize( dataBase->numberOfFaces );
-
-    dataBase->cellPropertiesHost.resize( dataBase->numberOfCells );
-
-    dataBase->cellToCell = new uint [ LENGTH_CELL_TO_CELL * dataBase->numberOfCells ];
-
-    dataBase->faceToCell = new uint [ LENGTH_FACE_TO_CELL * dataBase->numberOfFaces ];
-
-    dataBase->parentCell = new uint [ dataBase->numberOfCells ];
-
-    dataBase->faceCenter = new real [ LENGTH_VECTOR * dataBase->numberOfFaces ];
-    dataBase->cellCenter = new real [ LENGTH_VECTOR * dataBase->numberOfCells ];
-
-    dataBase->cellProperties = new CellProperties [ dataBase->numberOfCells ];
-
-    dataBase->faceOrientation = new char [ dataBase->numberOfFaces ];
-
-    dataBase->fineToCoarse = new uint [ LENGTH_FINE_TO_COARSE * dataBase->numberOfCoarseGhostCells ];
-    dataBase->coarseToFine = new uint [ LENGTH_COARSE_TO_FINE * dataBase->numberOfFineGhostCells   ];
-
-    dataBase->data       = new real            [ LENGTH_CELL_DATA * dataBase->numberOfCells ];
-    dataBase->dataUpdate = new realAccumulator [ LENGTH_CELL_DATA * dataBase->numberOfCells ];
-
-    dataBase->massFlux   = new real [ LENGTH_VECTOR    * dataBase->numberOfCells ];
-
-    dataBase->diffusivity  = new realAccumulator [ dataBase->numberOfCells ];
-
-    dataBase->crashCellIndex = new int;
-
-    dataBase->dataHost.resize( LENGTH_CELL_DATA * dataBase->numberOfCells );
-
-    dataBase->diffusivityHost.resize( dataBase->numberOfCells );
-}
-
-void DataBaseAllocatorCPU::copyMesh(SPtr<DataBase> dataBase, GksMeshAdapter & adapter)
-{
-    dataBase->nodeCoordinates = adapter.nodes;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    for( uint cellIdx = 0; cellIdx < dataBase->numberOfCells; cellIdx++ )
-    {
-        dataBase->cellToNode[ cellIdx ][ 0 ] = adapter.cells[ cellIdx ].cellToNode[ 7 ];
-        dataBase->cellToNode[ cellIdx ][ 1 ] = adapter.cells[ cellIdx ].cellToNode[ 3 ];
-        dataBase->cellToNode[ cellIdx ][ 2 ] = adapter.cells[ cellIdx ].cellToNode[ 1 ];
-        dataBase->cellToNode[ cellIdx ][ 3 ] = adapter.cells[ cellIdx ].cellToNode[ 5 ];
-        dataBase->cellToNode[ cellIdx ][ 4 ] = adapter.cells[ cellIdx ].cellToNode[ 6 ];
-        dataBase->cellToNode[ cellIdx ][ 5 ] = adapter.cells[ cellIdx ].cellToNode[ 2 ];
-        dataBase->cellToNode[ cellIdx ][ 6 ] = adapter.cells[ cellIdx ].cellToNode[ 0 ];
-        dataBase->cellToNode[ cellIdx ][ 7 ] = adapter.cells[ cellIdx ].cellToNode[ 4 ];
-        
-        for( uint neighbordx = 0; neighbordx < LENGTH_CELL_TO_CELL; neighbordx++ )
-            dataBase->cellToCell[ CELL_TO_CELL( cellIdx, neighbordx, dataBase->numberOfCells ) ] 
-                = adapter.cells[ cellIdx ].cellToCell[ neighbordx ];
-
-        dataBase->parentCell[ cellIdx ] = adapter.cells[ cellIdx ].parent;
-
-        dataBase->cellCenter[ VEC_X( cellIdx, dataBase->numberOfCells ) ] = adapter.cells[ cellIdx ].cellCenter.x;
-        dataBase->cellCenter[ VEC_Y( cellIdx, dataBase->numberOfCells ) ] = adapter.cells[ cellIdx ].cellCenter.y;
-        dataBase->cellCenter[ VEC_Z( cellIdx, dataBase->numberOfCells ) ] = adapter.cells[ cellIdx ].cellCenter.z;
-
-        dataBase->cellPropertiesHost[ cellIdx ] = CELL_PROPERTIES_DEFAULT;
-
-        if( adapter.cells[ cellIdx ].isWall )
-            setCellProperties( dataBase->cellPropertiesHost[ cellIdx ], CELL_PROPERTIES_WALL ); 
-
-        if( adapter.cells[ cellIdx ].isFluxBC )
-            setCellProperties( dataBase->cellPropertiesHost[ cellIdx ], CELL_PROPERTIES_IS_FLUX_BC );
-
-        if( adapter.cells[ cellIdx ].isInsulated )
-            setCellProperties( dataBase->cellPropertiesHost[ cellIdx ], CELL_PROPERTIES_IS_INSULATED ); 
-
-        if( adapter.cells[ cellIdx ].isGhostCell )
-            setCellProperties( dataBase->cellPropertiesHost[ cellIdx ], CELL_PROPERTIES_GHOST ); 
-
-        if( adapter.cells[ cellIdx ].isFineGhostCell() )
-            setCellProperties( dataBase->cellPropertiesHost[ cellIdx ], CELL_PROPERTIES_FINE_GHOST ); 
-    }
-
-    for( uint faceIdx = 0; faceIdx < dataBase->numberOfFaces; faceIdx++ )
-    {
-        for( uint nodeIdx = 0; nodeIdx < 4; nodeIdx++ )
-            dataBase->faceToNode[ faceIdx ][ nodeIdx ]
-                = adapter.faces[ faceIdx ].faceToNode[ nodeIdx ];
-
-        dataBase->faceToCell[ NEG_CELL( faceIdx, dataBase->numberOfFaces ) ] = adapter.faces[ faceIdx ].negCell;
-        dataBase->faceToCell[ POS_CELL( faceIdx, dataBase->numberOfFaces ) ] = adapter.faces[ faceIdx ].posCell;
-
-        dataBase->faceCenter[ VEC_X( faceIdx, dataBase->numberOfFaces ) ] = adapter.faces[ faceIdx ].faceCenter.x;
-        dataBase->faceCenter[ VEC_Y( faceIdx, dataBase->numberOfFaces ) ] = adapter.faces[ faceIdx ].faceCenter.y;
-        dataBase->faceCenter[ VEC_Z( faceIdx, dataBase->numberOfFaces ) ] = adapter.faces[ faceIdx ].faceCenter.z;
-
-        dataBase->faceOrientation[ faceIdx ] = adapter.faces[ faceIdx ].orientation;
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    for( uint idx = 0; idx < dataBase->numberOfCoarseGhostCells; idx++ ){
-        for( uint connectivityIdx = 0; connectivityIdx < LENGTH_FINE_TO_COARSE; connectivityIdx++ ){
-            dataBase->fineToCoarse[ FINE_TO_COARSE( idx, connectivityIdx, dataBase->numberOfCoarseGhostCells ) ]
-                = adapter.fineToCoarse[idx][connectivityIdx];
-        }
-    }
-
-    for( uint idx = 0; idx < dataBase->numberOfFineGhostCells; idx++ ){
-        for( uint connectivityIdx = 0; connectivityIdx < LENGTH_COARSE_TO_FINE; connectivityIdx++ ){
-            dataBase->coarseToFine[ COARSE_TO_FINE( idx, connectivityIdx, dataBase->numberOfFineGhostCells ) ]
-                = adapter.coarseToFine[idx][connectivityIdx];
-        }
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    memcpy ( dataBase->cellProperties, dataBase->cellPropertiesHost.data(), sizeof(CellProperties) * dataBase->numberOfCells );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    *dataBase->crashCellIndex = -1;
-}
-
-void DataBaseAllocatorCPU::copyDataHostToDevice(SPtr<DataBase> dataBase)
-{
-    memcpy( dataBase->data, dataBase->dataHost.data(), sizeof(real) * LENGTH_CELL_DATA * dataBase->numberOfCells );
-}
-
-void DataBaseAllocatorCPU::copyDataDeviceToHost(SPtr<DataBase> dataBase, real* hostData)
-{
-    memcpy( hostData, dataBase->data, sizeof(real) * LENGTH_CELL_DATA * dataBase->numberOfCells );
-    
-    memcpy( dataBase->diffusivityHost.data(), dataBase->diffusivity, sizeof(real) * dataBase->numberOfCells );
-}
-
-int DataBaseAllocatorCPU::getCrashCellIndex(SPtr<DataBase> dataBase)
-{
-    return *dataBase->crashCellIndex;
-}
-
-void DataBaseAllocatorCPU::freeMemory(GksGpu::BoundaryCondition& boundaryCondition)
-{
-    delete [] boundaryCondition.ghostCells ;
-    delete [] boundaryCondition.domainCells;
-    delete [] boundaryCondition.secondCells;
-}
-
-void DataBaseAllocatorCPU::allocateMemory(SPtr<GksGpu::BoundaryCondition> boundaryCondition, std::vector<uint> ghostCells, std::vector<uint> domainCells, std::vector<uint> secondCells)
-{
-    boundaryCondition->ghostCells  = new uint[ ghostCells.size()  ];
-    boundaryCondition->domainCells = new uint[ domainCells.size() ];
-    boundaryCondition->secondCells = new uint[ secondCells.size() ];
-
-    memcpy ( boundaryCondition->ghostCells , ghostCells.data() , sizeof(uint) * ghostCells.size()  );
-    memcpy ( boundaryCondition->domainCells, domainCells.data(), sizeof(uint) * domainCells.size() );
-    memcpy ( boundaryCondition->secondCells, secondCells.data(), sizeof(uint) * secondCells.size() );
-}
-
-void DataBaseAllocatorCPU::freeMemory(Communicator & communicator)
-{
-    delete [] communicator.sendIndices;
-    delete [] communicator.recvIndices;
-    
-    delete [] communicator.sendBuffer;
-    delete [] communicator.recvBuffer;
-    
-    delete [] communicator.sendBufferHost;
-    delete [] communicator.recvBufferHost;
-}
-
-void DataBaseAllocatorCPU::allocateMemory(Communicator & communicator, std::vector<uint>& sendIndices, std::vector<uint>& recvIndices)
-{
-    communicator.sendIndices     = new uint[communicator.numberOfSendNodes];
-    communicator.recvIndices     = new uint[communicator.numberOfRecvNodes];
-
-    communicator.sendBuffer      = new real[LENGTH_CELL_DATA * communicator.numberOfSendNodes];
-    communicator.recvBuffer      = new real[LENGTH_CELL_DATA * communicator.numberOfRecvNodes];
-
-    communicator.sendBufferHost  = new real[LENGTH_CELL_DATA * communicator.numberOfSendNodes];
-    communicator.recvBufferHost  = new real[LENGTH_CELL_DATA * communicator.numberOfRecvNodes];
-
-    memcpy ( communicator.sendIndices , sendIndices.data() , sizeof(uint) * communicator.numberOfSendNodes );
-    memcpy ( communicator.recvIndices , recvIndices.data() , sizeof(uint) * communicator.numberOfRecvNodes );
-}
-
-void DataBaseAllocatorCPU::copyDataDeviceToDevice(SPtr<Communicator> dst, SPtr<Communicator> src)
-{
-    memcpy( dst->recvBuffer, src->sendBuffer, LENGTH_CELL_DATA * sizeof(real) * src->numberOfSendNodes );
-}
-
-void DataBaseAllocatorCPU::copyBuffersDeviceToHost(SPtr<Communicator> communicator)
-{
-    memcpy( communicator->sendBufferHost, communicator->sendBuffer, LENGTH_CELL_DATA * sizeof(real) * communicator->numberOfSendNodes );
-}
-
-void DataBaseAllocatorCPU::copyBuffersHostToDevice(SPtr<Communicator> communicator)
-{
-    memcpy( communicator->recvBuffer, communicator->recvBufferHost, LENGTH_CELL_DATA * sizeof(real) * communicator->numberOfRecvNodes );
-}
-
-std::string DataBaseAllocatorCPU::getDeviceType()
-{
-    return std::string("CPU");
-}
-
-} // namespace GksGpu
diff --git a/src/gpu/GksGpu/DataBase/DataBaseAllocatorCPU.h b/src/gpu/GksGpu/DataBase/DataBaseAllocatorCPU.h
deleted file mode 100644
index 420223338912f152a44ed92c7140afb587f4ec95..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/DataBase/DataBaseAllocatorCPU.h
+++ /dev/null
@@ -1,55 +0,0 @@
-#ifndef DataBaseAllocatorCPU_H
-#define DatabaseAllocatorCPU_H
-
-#include "Core/DataTypes.h"
-#include "PointerDefinitions.h"
-
-#include "DataBaseAllocator.h"
-
-
-#include "GksGpu_export.h"
-
-namespace GksGpu {
-
-class GKSGPU_EXPORT DataBaseAllocatorCPU : public DataBaseAllocator {
-
-public:
-
-    virtual void freeMemory( DataBase& dataBase ) override;
-
-    virtual void allocateMemory( SPtr<DataBase> dataBase ) override;
-
-    virtual void copyMesh( SPtr<DataBase> dataBase, GksMeshAdapter& adapter ) override;
-
-    virtual void copyDataHostToDevice( SPtr<DataBase> dataBase ) override;
-    
-    virtual void copyDataDeviceToHost( SPtr<DataBase> dataBase, real* dataHost ) override;
-
-    virtual int  getCrashCellIndex( SPtr<DataBase> dataBase ) override;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    virtual void freeMemory( BoundaryCondition& boundaryCondition ) override;
-
-    virtual void allocateMemory( SPtr<BoundaryCondition> boundaryCondition, std::vector<uint> ghostCells, std::vector<uint> domainCells, std::vector<uint> secondCells ) override;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    virtual void freeMemory( Communicator& communicator ) override;
-
-    virtual void allocateMemory( Communicator& communicator, std::vector<uint>& sendIndices, std::vector<uint>& recvIndices ) override;
-
-    virtual void copyDataDeviceToDevice( SPtr<Communicator> dst, SPtr<Communicator> src ) override;
-
-    virtual void copyBuffersDeviceToHost( SPtr<Communicator> communicator ) override;
-    virtual void copyBuffersHostToDevice( SPtr<Communicator> communicator ) override;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    virtual std::string getDeviceType() override;
-};
-
-} // namespace GksGpu
-
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/GksGpu/DataBase/DataBaseAllocatorGPU.cpp b/src/gpu/GksGpu/DataBase/DataBaseAllocatorGPU.cpp
deleted file mode 100644
index b2eb7d6f124bcb004ee900228a1631a452b6ff3a..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/DataBase/DataBaseAllocatorGPU.cpp
+++ /dev/null
@@ -1,305 +0,0 @@
-#include "DataBaseAllocatorGPU.h"
-
-#include <cstring>
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include "Core/DataTypes.h"
-#include "PointerDefinitions.h"
-
-#include "GksMeshAdapter/GksMeshAdapter.h"
-
-#include "DataBase/DataBase.h"
-
-#include "CellProperties/CellProperties.cuh"
-
-#include "BoundaryConditions/BoundaryCondition.h"
-
-#include "Communication/Communicator.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-
-#include "CudaUtility/CudaUtility.h"
-
-namespace GksGpu {
-
-void DataBaseAllocatorGPU::freeMemory( DataBase& dataBase )
-{
-    dataBase.cellToNode.clear();
-    dataBase.faceToNode.clear();
-
-    dataBase.cellPropertiesHost.clear();
-
-    checkCudaErrors( cudaFree ( dataBase.cellToCell ) );
-
-    checkCudaErrors( cudaFree ( dataBase.faceToCell ) );
-
-    checkCudaErrors( cudaFree ( dataBase.parentCell ) );
-
-    checkCudaErrors( cudaFree ( dataBase.faceCenter ) );
-    checkCudaErrors( cudaFree ( dataBase.cellCenter ) );
-
-    checkCudaErrors( cudaFree ( dataBase.cellProperties ) );
-
-    checkCudaErrors( cudaFree ( dataBase.faceOrientation ) );
-
-    checkCudaErrors( cudaFree ( dataBase.fineToCoarse ) );
-    checkCudaErrors( cudaFree ( dataBase.coarseToFine ) );
-
-    checkCudaErrors( cudaFree ( dataBase.data ) );
-    checkCudaErrors( cudaFree ( dataBase.dataUpdate ) );
-
-    checkCudaErrors( cudaFree ( dataBase.massFlux ) );
-
-    checkCudaErrors( cudaFree ( dataBase.diffusivity ) );
-
-    checkCudaErrors( cudaFree ( dataBase.crashCellIndex ) );
-
-    dataBase.dataHost.clear();
-}
-
-void DataBaseAllocatorGPU::allocateMemory(SPtr<DataBase> dataBase)
-{
-    dataBase->cellToNode.resize( dataBase->numberOfCells );
-    dataBase->faceToNode.resize( dataBase->numberOfFaces );
-
-    dataBase->cellPropertiesHost.resize( dataBase->numberOfCells );
-
-    checkCudaErrors( cudaMalloc ( &dataBase->cellToCell, sizeof(uint) * LENGTH_CELL_TO_CELL * dataBase->numberOfCells ) );
-
-    checkCudaErrors( cudaMalloc ( &dataBase->faceToCell, sizeof(uint) * LENGTH_FACE_TO_CELL * dataBase->numberOfFaces ) );
-
-    checkCudaErrors( cudaMalloc ( &dataBase->parentCell, sizeof(uint) * dataBase->numberOfCells ) );
-
-    checkCudaErrors( cudaMalloc ( &dataBase->faceCenter, sizeof(real) * LENGTH_VECTOR * dataBase->numberOfFaces ) );
-    checkCudaErrors( cudaMalloc ( &dataBase->cellCenter, sizeof(real) * LENGTH_VECTOR * dataBase->numberOfCells ) );
-
-    checkCudaErrors( cudaMalloc ( &dataBase->cellProperties, sizeof(CellProperties) * dataBase->numberOfCells ) );
-
-    checkCudaErrors( cudaMalloc ( &dataBase->faceOrientation, sizeof(char) * dataBase->numberOfFaces ) );
-
-    checkCudaErrors( cudaMalloc ( &dataBase->fineToCoarse, sizeof(uint) * LENGTH_FINE_TO_COARSE * dataBase->numberOfCoarseGhostCells ) );
-    checkCudaErrors( cudaMalloc ( &dataBase->coarseToFine, sizeof(uint) * LENGTH_COARSE_TO_FINE * dataBase->numberOfFineGhostCells   ) );
-
-    checkCudaErrors( cudaMalloc ( &dataBase->data,       sizeof(real) *            LENGTH_CELL_DATA * dataBase->numberOfCells ) );
-    checkCudaErrors( cudaMalloc ( &dataBase->dataUpdate, sizeof(realAccumulator) * LENGTH_CELL_DATA * dataBase->numberOfCells ) );
-
-    checkCudaErrors( cudaMalloc ( &dataBase->massFlux ,  sizeof(real) * LENGTH_VECTOR    * dataBase->numberOfCells ) );
-
-    checkCudaErrors( cudaMalloc ( &dataBase->diffusivity,  sizeof(realAccumulator) * dataBase->numberOfCells ) );
-
-    checkCudaErrors( cudaMalloc ( &dataBase->crashCellIndex,  sizeof(int) ) );
-
-    dataBase->dataHost.resize( LENGTH_CELL_DATA * dataBase->numberOfCells );
-
-    dataBase->diffusivityHost.resize( dataBase->numberOfCells );
-}
-
-void DataBaseAllocatorGPU::copyMesh(SPtr<DataBase> dataBase, GksMeshAdapter & adapter)
-{
-    dataBase->nodeCoordinates = adapter.nodes;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    std::vector<uint> cellToCellBuffer   ( LENGTH_CELL_TO_CELL * dataBase->numberOfCells );
-
-    std::vector<uint> faceToCellBuffer   ( LENGTH_FACE_TO_CELL * dataBase->numberOfFaces );
-
-    std::vector<uint> parentCellBuffer   ( dataBase->numberOfCells );
-
-    std::vector<real> faceCenterBuffer   ( LENGTH_VECTOR * dataBase->numberOfFaces );
-    std::vector<real> cellCenterBuffer   ( LENGTH_VECTOR * dataBase->numberOfCells );
-
-    std::vector<char> faceOrientationBuffer( dataBase->numberOfFaces );
-
-    std::vector<uint> fineToCoarseBuffer ( LENGTH_FINE_TO_COARSE * dataBase->numberOfCoarseGhostCells );
-    std::vector<uint> coarseToFineBuffer ( LENGTH_COARSE_TO_FINE * dataBase->numberOfFineGhostCells   );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    for( uint cellIdx = 0; cellIdx < dataBase->numberOfCells; cellIdx++ )
-    {
-        dataBase->cellToNode[ cellIdx ][ 0 ] = adapter.cells[ cellIdx ].cellToNode[ 7 ];
-        dataBase->cellToNode[ cellIdx ][ 1 ] = adapter.cells[ cellIdx ].cellToNode[ 3 ];
-        dataBase->cellToNode[ cellIdx ][ 2 ] = adapter.cells[ cellIdx ].cellToNode[ 1 ];
-        dataBase->cellToNode[ cellIdx ][ 3 ] = adapter.cells[ cellIdx ].cellToNode[ 5 ];
-        dataBase->cellToNode[ cellIdx ][ 4 ] = adapter.cells[ cellIdx ].cellToNode[ 6 ];
-        dataBase->cellToNode[ cellIdx ][ 5 ] = adapter.cells[ cellIdx ].cellToNode[ 2 ];
-        dataBase->cellToNode[ cellIdx ][ 6 ] = adapter.cells[ cellIdx ].cellToNode[ 0 ];
-        dataBase->cellToNode[ cellIdx ][ 7 ] = adapter.cells[ cellIdx ].cellToNode[ 4 ];
-        
-        for( uint neighbordx = 0; neighbordx < LENGTH_CELL_TO_CELL; neighbordx++ )
-            cellToCellBuffer[ CELL_TO_CELL( cellIdx, neighbordx, dataBase->numberOfCells ) ] 
-                = adapter.cells[ cellIdx ].cellToCell[ neighbordx ];
-
-        parentCellBuffer[ cellIdx ] = adapter.cells[ cellIdx ].parent;
-
-        cellCenterBuffer[ VEC_X( cellIdx, dataBase->numberOfCells ) ] = adapter.cells[ cellIdx ].cellCenter.x;
-        cellCenterBuffer[ VEC_Y( cellIdx, dataBase->numberOfCells ) ] = adapter.cells[ cellIdx ].cellCenter.y;
-        cellCenterBuffer[ VEC_Z( cellIdx, dataBase->numberOfCells ) ] = adapter.cells[ cellIdx ].cellCenter.z;
-
-        dataBase->cellPropertiesHost[ cellIdx ] = CELL_PROPERTIES_DEFAULT;
-
-        if( adapter.cells[ cellIdx ].isWall )
-            setCellProperties( dataBase->cellPropertiesHost[ cellIdx ], CELL_PROPERTIES_WALL ); 
-
-        if( adapter.cells[ cellIdx ].isFluxBC )
-            setCellProperties( dataBase->cellPropertiesHost[ cellIdx ], CELL_PROPERTIES_IS_FLUX_BC );
-
-        if( adapter.cells[ cellIdx ].isInsulated )
-            setCellProperties( dataBase->cellPropertiesHost[ cellIdx ], CELL_PROPERTIES_IS_INSULATED ); 
-
-        if( adapter.cells[ cellIdx ].isGhostCell )
-            setCellProperties( dataBase->cellPropertiesHost[ cellIdx ], CELL_PROPERTIES_GHOST ); 
-
-        if( adapter.cells[ cellIdx ].isFineGhostCell() )
-            setCellProperties( dataBase->cellPropertiesHost[ cellIdx ], CELL_PROPERTIES_FINE_GHOST ); 
-    }
-
-    for( uint faceIdx = 0; faceIdx < dataBase->numberOfFaces; faceIdx++ )
-    {
-        for( uint nodeIdx = 0; nodeIdx < 4; nodeIdx++ )
-            dataBase->faceToNode[ faceIdx ][ nodeIdx ]
-                = adapter.faces[ faceIdx ].faceToNode[ nodeIdx ];
-
-        faceToCellBuffer[ NEG_CELL( faceIdx, dataBase->numberOfFaces ) ] = adapter.faces[ faceIdx ].negCell;
-        faceToCellBuffer[ POS_CELL( faceIdx, dataBase->numberOfFaces ) ] = adapter.faces[ faceIdx ].posCell;
-
-        faceCenterBuffer[ VEC_X( faceIdx, dataBase->numberOfFaces ) ] = adapter.faces[ faceIdx ].faceCenter.x;
-        faceCenterBuffer[ VEC_Y( faceIdx, dataBase->numberOfFaces ) ] = adapter.faces[ faceIdx ].faceCenter.y;
-        faceCenterBuffer[ VEC_Z( faceIdx, dataBase->numberOfFaces ) ] = adapter.faces[ faceIdx ].faceCenter.z;
-
-        faceOrientationBuffer[ faceIdx ] = adapter.faces[ faceIdx ].orientation;
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    for( uint cellIdx = 0; cellIdx < dataBase->numberOfCoarseGhostCells; cellIdx++ ){
-        for( uint connectivityIdx = 0; connectivityIdx < LENGTH_FINE_TO_COARSE; connectivityIdx++ ){
-            fineToCoarseBuffer[ FINE_TO_COARSE( cellIdx, connectivityIdx, dataBase->numberOfCoarseGhostCells ) ]
-                = adapter.fineToCoarse[cellIdx][connectivityIdx];
-        }
-    }
-
-    for( uint cellIdx = 0; cellIdx < dataBase->numberOfFineGhostCells; cellIdx++ ){
-        for( uint connectivityIdx = 0; connectivityIdx < LENGTH_COARSE_TO_FINE; connectivityIdx++ ){
-            coarseToFineBuffer[ COARSE_TO_FINE( cellIdx, connectivityIdx, dataBase->numberOfFineGhostCells ) ]
-                = adapter.coarseToFine[cellIdx][connectivityIdx];
-        }
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    checkCudaErrors( cudaMemcpy ( dataBase->cellToCell,     cellToCellBuffer.data(),     sizeof(uint) * LENGTH_CELL_TO_CELL * dataBase->numberOfCells, cudaMemcpyHostToDevice ) );
-    
-    checkCudaErrors( cudaMemcpy ( dataBase->faceToCell,     faceToCellBuffer.data(),     sizeof(uint) * LENGTH_FACE_TO_CELL * dataBase->numberOfFaces, cudaMemcpyHostToDevice ) );
-
-    checkCudaErrors( cudaMemcpy ( dataBase->parentCell,     parentCellBuffer.data(),     sizeof(uint) * dataBase->numberOfCells, cudaMemcpyHostToDevice ) );
-
-    checkCudaErrors( cudaMemcpy ( dataBase->faceCenter,     faceCenterBuffer.data(),     sizeof(real) * LENGTH_VECTOR * dataBase->numberOfFaces, cudaMemcpyHostToDevice ) );
-    checkCudaErrors( cudaMemcpy ( dataBase->cellCenter,     cellCenterBuffer.data(),     sizeof(real) * LENGTH_VECTOR * dataBase->numberOfCells, cudaMemcpyHostToDevice ) );
-
-    checkCudaErrors( cudaMemcpy ( dataBase->cellProperties, dataBase->cellPropertiesHost.data(), sizeof(CellProperties) * dataBase->numberOfCells, cudaMemcpyHostToDevice ) );
-
-    checkCudaErrors( cudaMemcpy ( dataBase->faceOrientation, faceOrientationBuffer.data(), sizeof(char) * dataBase->numberOfFaces, cudaMemcpyHostToDevice ) );
-
-    checkCudaErrors( cudaMemcpy ( dataBase->fineToCoarse,   fineToCoarseBuffer.data(),   sizeof(uint) * LENGTH_FINE_TO_COARSE * dataBase->numberOfCoarseGhostCells, cudaMemcpyHostToDevice ) );
-    checkCudaErrors( cudaMemcpy ( dataBase->coarseToFine,   coarseToFineBuffer.data(),   sizeof(uint) * LENGTH_COARSE_TO_FINE * dataBase->numberOfFineGhostCells  , cudaMemcpyHostToDevice ) );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    checkCudaErrors( cudaMemset( dataBase->crashCellIndex, -1, sizeof(int) ) );
-
-    //////////////////////////////////////////////////////////////////////////
-}
-
-void DataBaseAllocatorGPU::copyDataHostToDevice(SPtr<DataBase> dataBase)
-{
-    checkCudaErrors( cudaMemcpy( dataBase->data, dataBase->dataHost.data(), sizeof(real) * LENGTH_CELL_DATA * dataBase->numberOfCells, cudaMemcpyHostToDevice ) );
-}
-
-void DataBaseAllocatorGPU::copyDataDeviceToHost(SPtr<DataBase> dataBase,  real* hostData )
-{
-    checkCudaErrors( cudaMemcpy( hostData, dataBase->data, sizeof(real) * LENGTH_CELL_DATA * dataBase->numberOfCells, cudaMemcpyDeviceToHost ) );
-
-    checkCudaErrors( cudaMemcpy( dataBase->diffusivityHost.data(), dataBase->diffusivity, sizeof(real) * dataBase->numberOfCells, cudaMemcpyDeviceToHost ) );
-}
-
-int DataBaseAllocatorGPU::getCrashCellIndex(SPtr<DataBase> dataBase)
-{
-    int crashCellIndex;
-
-    checkCudaErrors( cudaMemcpy( &crashCellIndex, dataBase->crashCellIndex, sizeof(int), cudaMemcpyDeviceToHost ) );
-
-    return crashCellIndex;
-}
-
-void DataBaseAllocatorGPU::freeMemory(GksGpu::BoundaryCondition& boundaryCondition)
-{
-    checkCudaErrors( cudaFree ( boundaryCondition.ghostCells  ) );
-    checkCudaErrors( cudaFree ( boundaryCondition.domainCells ) );
-    checkCudaErrors( cudaFree ( boundaryCondition.secondCells ) );
-}
-
-void DataBaseAllocatorGPU::allocateMemory(SPtr<GksGpu::BoundaryCondition> boundaryCondition, std::vector<uint> ghostCells, std::vector<uint> domainCells, std::vector<uint> secondCells)
-{
-    checkCudaErrors( cudaMalloc ( &boundaryCondition->ghostCells , sizeof(uint) * ghostCells.size()  ) );
-    checkCudaErrors( cudaMalloc ( &boundaryCondition->domainCells, sizeof(uint) * domainCells.size() ) );
-    checkCudaErrors( cudaMalloc ( &boundaryCondition->secondCells, sizeof(uint) * secondCells.size() ) );
-
-    checkCudaErrors( cudaMemcpy ( boundaryCondition->ghostCells , ghostCells.data() , sizeof(uint) * ghostCells.size() , cudaMemcpyHostToDevice ) );
-    checkCudaErrors( cudaMemcpy ( boundaryCondition->domainCells, domainCells.data(), sizeof(uint) * domainCells.size(), cudaMemcpyHostToDevice ) );
-    checkCudaErrors( cudaMemcpy ( boundaryCondition->secondCells, secondCells.data(), sizeof(uint) * secondCells.size(), cudaMemcpyHostToDevice ) );
-}
-
-void DataBaseAllocatorGPU::freeMemory(Communicator & communicator)
-{
-    checkCudaErrors( cudaFree     ( communicator.sendIndices     ) );
-    checkCudaErrors( cudaFree     ( communicator.recvIndices     ) );
-
-    checkCudaErrors( cudaFree     ( communicator.sendBuffer      ) );
-    checkCudaErrors( cudaFree     ( communicator.recvBuffer      ) );
-
-    checkCudaErrors( cudaFreeHost ( communicator.sendBufferHost  ) );
-    checkCudaErrors( cudaFreeHost ( communicator.recvBufferHost  ) );
-}
-
-void DataBaseAllocatorGPU::allocateMemory(Communicator & communicator, std::vector<uint>& sendIndices, std::vector<uint>& recvIndices)
-{
-    checkCudaErrors( cudaMalloc     ( &communicator.sendIndices    , sizeof(uint) * communicator.numberOfSendNodes ) );
-    checkCudaErrors( cudaMalloc     ( &communicator.recvIndices    , sizeof(uint) * communicator.numberOfRecvNodes ) );
-    
-    checkCudaErrors( cudaMalloc     ( &communicator.sendBuffer     , LENGTH_CELL_DATA * sizeof(real) * communicator.numberOfSendNodes ) );
-    checkCudaErrors( cudaMalloc     ( &communicator.recvBuffer     , LENGTH_CELL_DATA * sizeof(real) * communicator.numberOfRecvNodes ) );
-    
-    checkCudaErrors( cudaMallocHost ( &communicator.sendBufferHost , LENGTH_CELL_DATA * sizeof(real) * communicator.numberOfSendNodes ) );
-    checkCudaErrors( cudaMallocHost ( &communicator.recvBufferHost , LENGTH_CELL_DATA * sizeof(real) * communicator.numberOfRecvNodes ) );
-
-    checkCudaErrors( cudaMemcpy     ( communicator.sendIndices , sendIndices.data() , sizeof(uint) * communicator.numberOfSendNodes, cudaMemcpyHostToDevice ) );
-    checkCudaErrors( cudaMemcpy     ( communicator.recvIndices , recvIndices.data() , sizeof(uint) * communicator.numberOfRecvNodes, cudaMemcpyHostToDevice ) );
-}
-
-void DataBaseAllocatorGPU::copyDataDeviceToDevice(SPtr<Communicator> dst, SPtr<Communicator> src)
-{
-    checkCudaErrors( cudaMemcpy ( dst->recvBuffer, src->sendBuffer, LENGTH_CELL_DATA * sizeof(real) * src->numberOfSendNodes, cudaMemcpyDefault ) );
-}
-
-void DataBaseAllocatorGPU::copyBuffersDeviceToHost(SPtr<Communicator> communicator)
-{
-    size_t size = LENGTH_CELL_DATA * sizeof(real) * communicator->numberOfSendNodes;
-    cudaMemcpyAsync ( communicator->sendBufferHost, communicator->sendBuffer, size, cudaMemcpyDeviceToHost, CudaUtility::communicationStream );
-}
-
-void DataBaseAllocatorGPU::copyBuffersHostToDevice(SPtr<Communicator> communicator)
-{
-    size_t size = LENGTH_CELL_DATA * sizeof(real) * communicator->numberOfRecvNodes;
-    cudaMemcpyAsync ( communicator->recvBuffer, communicator->recvBufferHost, size, cudaMemcpyHostToDevice, CudaUtility::communicationStream );
-}
-
-std::string DataBaseAllocatorGPU::getDeviceType()
-{
-    return std::string("GPU");
-}
-
-} // namespace GksGpu
diff --git a/src/gpu/GksGpu/DataBase/DataBaseAllocatorGPU.h b/src/gpu/GksGpu/DataBase/DataBaseAllocatorGPU.h
deleted file mode 100644
index 4ed547e223c36248cd323ced4ecca637fcdce9a8..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/DataBase/DataBaseAllocatorGPU.h
+++ /dev/null
@@ -1,55 +0,0 @@
-#ifndef DataBaseAllocatorGPU_H
-#define DatabaseAllocatorGPU_H
-
-#include "Core/DataTypes.h"
-#include "PointerDefinitions.h"
-
-#include "DataBaseAllocator.h"
-
-
-#include "GksGpu_export.h"
-
-namespace GksGpu {
-
-class GKSGPU_EXPORT DataBaseAllocatorGPU : public DataBaseAllocator {
-
-public:
-
-    virtual void freeMemory( DataBase& dataBase ) override;
-
-    virtual void allocateMemory( SPtr<DataBase> dataBase ) override;
-
-    virtual void copyMesh( SPtr<DataBase> dataBase, GksMeshAdapter& adapter ) override;
-
-    virtual void copyDataHostToDevice( SPtr<DataBase> dataBase ) override;
-    
-    virtual void copyDataDeviceToHost( SPtr<DataBase> dataBase, real* dataHost ) override;
-
-    virtual int  getCrashCellIndex( SPtr<DataBase> dataBase ) override;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    virtual void freeMemory( BoundaryCondition& boundaryCondition ) override;
-
-    virtual void allocateMemory( SPtr<BoundaryCondition> boundaryCondition, std::vector<uint> ghostCells, std::vector<uint> domainCells, std::vector<uint> secondCells ) override;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    virtual void freeMemory( Communicator& communicator ) override;
-
-    virtual void allocateMemory( Communicator& communicator, std::vector<uint>& sendIndices, std::vector<uint>& recvIndices ) override;
-
-    virtual void copyDataDeviceToDevice( SPtr<Communicator> dst, SPtr<Communicator> src ) override;
-
-    virtual void copyBuffersDeviceToHost( SPtr<Communicator> communicator ) override;
-    virtual void copyBuffersHostToDevice( SPtr<Communicator> communicator ) override;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    virtual std::string getDeviceType() override;
-};
-
-} // namespace GksGpu
-
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/GksGpu/DataBase/DataBaseStruct.h b/src/gpu/GksGpu/DataBase/DataBaseStruct.h
deleted file mode 100644
index 3fb5a50a0c396f92d49ebf29fd197067ccda6500..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/DataBase/DataBaseStruct.h
+++ /dev/null
@@ -1,46 +0,0 @@
-#ifndef DataBaseStruct_H
-#define DataBaseStruct_H
-
-#include "Core/DataTypes.h"
-
-
-
-namespace GksGpu{ 
-
-struct GKSGPU_EXPORT DataBaseStruct
-{
-    uint  numberOfCells;
-    uint  numberOfFaces;
-    
-    uint  numberOfCoarseGhostCells;
-    uint  numberOfFineGhostCells;
-
-    uint* cellToCell;
-    uint* faceToCell;
-
-    uint* parentCell;
-
-    uint* fineToCoarse;
-    uint* coarseToFine;
-
-    real* faceCenter;
-    real* cellCenter;
-
-    CellProperties* cellProperties;
-
-    char* faceOrientation;
-
-    real*            data;
-    realAccumulator* dataUpdate;
-
-    real* massFlux;
-
-    realAccumulator* diffusivity;
-
-    int* crashCellIndex;
-};
-
-} // namespace GksGpu
-
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/GksGpu/Definitions/AccumulatorDataType.h b/src/gpu/GksGpu/Definitions/AccumulatorDataType.h
deleted file mode 100644
index 20807f33a2ad3f892266975395ab980814c8362f..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Definitions/AccumulatorDataType.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef AccumulatorDataType_H
-#define AccumulatorDataType_H
-
-namespace GksGpu {
-
-// This file is used to control the data type of accumulator variables.
-// Accumulator variables are variables, where cell values are written 
-// during the flux computation, which is per face. Since the face evaluation 
-// order on GPUs is arbitrary, the cutoff errors for these accumulators are non 
-// deterministic. This deficiency can be solved for single precision calculations
-// by setting the accumulator data type to double. The deviations are then 
-// so small, that they are cut off during the downcast to single.
-// using double precision accumulators has some performance implications, 
-// especially on consumer hardware.
-
-//typedef float realAccumulator;
-typedef double realAccumulator;
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/Definitions/CudaAwareMpi.h b/src/gpu/GksGpu/Definitions/CudaAwareMpi.h
deleted file mode 100644
index b12e5eca17bcf02a1e7a1446cf878484833423fb..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Definitions/CudaAwareMpi.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef CudaAwareMpi_H
-#define CudaAwareMpi_H
-
-//#define USE_CUDA_AWARE_MPI
-
-#endif
diff --git a/src/gpu/GksGpu/Definitions/MemoryAccessPattern.h b/src/gpu/GksGpu/Definitions/MemoryAccessPattern.h
deleted file mode 100644
index 0b1c33861b06986f318860201db0fdec69d1a11d..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Definitions/MemoryAccessPattern.h
+++ /dev/null
@@ -1,86 +0,0 @@
-#ifndef DataDefinitions_H
-#define DataDefinitions_H
-
-#include "PassiveScalar.h"
-
-#define SOA
-
-//////////////////////////////////////////////////////////////////////////
-
-#define LENGTH_VECTOR       3
-
-#ifdef USE_PASSIVE_SCALAR
-    #define LENGTH_CELL_DATA    7
-#else
-    #define LENGTH_CELL_DATA    5
-#endif
-
-#define LENGTH_CELL_TO_CELL 6
-
-#define LENGTH_FACE_TO_CELL 2
-
-#define LENGTH_FINE_TO_COARSE 9
-
-#define LENGTH_COARSE_TO_FINE 9
-
-//////////////////////////////////////////////////////////////////////////
-
-#ifdef SOA
-
-#define VEC_X(vecIdx, numberOfVectors)  ( 0 * numberOfVectors + vecIdx )
-#define VEC_Y(vecIdx, numberOfVectors)  ( 1 * numberOfVectors + vecIdx )
-#define VEC_Z(vecIdx, numberOfVectors)  ( 2 * numberOfVectors + vecIdx )
-                                                           
-#define RHO__( cellIdx, numberOfCells ) ( 0 * numberOfCells   + cellIdx )
-#define RHO_U( cellIdx, numberOfCells ) ( 1 * numberOfCells   + cellIdx )
-#define RHO_V( cellIdx, numberOfCells ) ( 2 * numberOfCells   + cellIdx )
-#define RHO_W( cellIdx, numberOfCells ) ( 3 * numberOfCells   + cellIdx )
-#define RHO_E( cellIdx, numberOfCells ) ( 4 * numberOfCells   + cellIdx )
-
-#ifdef USE_PASSIVE_SCALAR
-    #define RHO_S_1( cellIdx, numberOfCells ) ( 5 * numberOfCells   + cellIdx )
-    #define RHO_S_2( cellIdx, numberOfCells ) ( 6 * numberOfCells   + cellIdx )
-#endif // USE_PASSIVE_SCALAR
-
-#define CELL_TO_CELL( cellIdx, neighborIdx, numberOfCells ) ( neighborIdx * numberOfCells + cellIdx )
-
-#define NEG_CELL( faceIdx, numberOfFaces ) (                 faceIdx )
-#define POS_CELL( faceIdx, numberOfFaces ) ( numberOfFaces + faceIdx )
-
-#define FINE_TO_COARSE( idx, cellIdx, number ) ( cellIdx * number + idx )
-#define COARSE_TO_FINE( idx, cellIdx, number ) ( cellIdx * number + idx )
-
-#endif
-
-//////////////////////////////////////////////////////////////////////////
-
-#ifdef AOS
-
-#define VEC_X(vecIdx, numberOfVectors)  ( vecIdx * LENGTH_VECTOR     )
-#define VEC_Y(vecIdx, numberOfVectors)  ( vecIdx * LENGTH_VECTOR + 1 )
-#define VEC_Z(vecIdx, numberOfVectors)  ( vecIdx * LENGTH_VECTOR + 2 )
-                                                           
-#define RHO__( cellIdx, numberOfCells ) ( cellIdx * LENGTH_CELL_DATA     )
-#define RHO_U( cellIdx, numberOfCells ) ( cellIdx * LENGTH_CELL_DATA + 1 )
-#define RHO_V( cellIdx, numberOfCells ) ( cellIdx * LENGTH_CELL_DATA + 2 )
-#define RHO_W( cellIdx, numberOfCells ) ( cellIdx * LENGTH_CELL_DATA + 3 )
-#define RHO_E( cellIdx, numberOfCells ) ( cellIdx * LENGTH_CELL_DATA + 4 )
-
-#ifdef USE_PASSIVE_SCALAR
-    #define RHO_S_1( cellIdx, numberOfCells ) ( cellIdx * LENGTH_CELL_DATA + 5 )
-    #define RHO_S_2( cellIdx, numberOfCells ) ( cellIdx * LENGTH_CELL_DATA + 6 )
-#endif // USE_PASSIVE_SCALAR
-                                                                         
-#define CELL_TO_CELL( cellIdx, neighborIdx, numberOfCells ) ( cellIdx * LENGTH_CELL_TO_CELL + neighborIdx )
-
-#define NEG_CELL( faceIdx, numberOfFaces ) ( faceIdx * LENGTH_FACE_TO_CELL     )
-#define POS_CELL( faceIdx, numberOfFaces ) ( faceIdx * LENGTH_FACE_TO_CELL + 1 )
-
-#define FINE_TO_COARSE( idx, cellIdx, number ) ( cellIdx * LENGTH_FINE_TO_COARSE + idx )
-#define COARSE_TO_FINE( idx, cellIdx, number ) ( cellIdx * LENGTH_COARSE_TO_FINE + idx )
-
-#endif
-
-//////////////////////////////////////////////////////////////////////////
-
-#endif
diff --git a/src/gpu/GksGpu/Definitions/PassiveScalar.h b/src/gpu/GksGpu/Definitions/PassiveScalar.h
deleted file mode 100644
index b71ada48a0c05eaa726abae31bf76f7b56a2c0fd..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Definitions/PassiveScalar.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef PassiveScalar_H
-#define PassiveScalar_H
-
-//#define USE_PASSIVE_SCALAR
-
-#endif
diff --git a/src/gpu/GksGpu/FlowStateData/AccessDeviceData.cuh b/src/gpu/GksGpu/FlowStateData/AccessDeviceData.cuh
deleted file mode 100644
index 2ad158173970c5bb36637643f621c729a8fcc37a..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/FlowStateData/AccessDeviceData.cuh
+++ /dev/null
@@ -1,85 +0,0 @@
-#ifndef AccessDeviceData_CUH
-#define AccessDeviceData_CUH
-
-#ifdef __CUDACC__
-#include <cuda_runtime.h>
-#else
-#ifndef __host__
-#define __host__
-#endif
-#ifndef __device__
-#define __device__
-#endif
-#endif
-
-#include "Core/DataTypes.h"
-#include "Core/RealConstants.h"
-
-#include "DataBase/DataBaseStruct.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-#include "Definitions/PassiveScalar.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-
-namespace GksGpu {
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__host__ __device__ inline void readCellData(const uint cellIdx, const DataBaseStruct& dataBase, ConservedVariables& cellCons)
-{
-    cellCons.rho  = dataBase.data[ RHO__( cellIdx, dataBase.numberOfCells ) ];
-    cellCons.rhoU = dataBase.data[ RHO_U( cellIdx, dataBase.numberOfCells ) ];
-    cellCons.rhoV = dataBase.data[ RHO_V( cellIdx, dataBase.numberOfCells ) ];
-    cellCons.rhoW = dataBase.data[ RHO_W( cellIdx, dataBase.numberOfCells ) ];
-    cellCons.rhoE = dataBase.data[ RHO_E( cellIdx, dataBase.numberOfCells ) ];
-#ifdef USE_PASSIVE_SCALAR
-	cellCons.rhoS_1 = dataBase.data[ RHO_S_1( cellIdx, dataBase.numberOfCells ) ];
-	cellCons.rhoS_2 = dataBase.data[ RHO_S_2( cellIdx, dataBase.numberOfCells ) ];
-#endif // USE_PASSIVE_SCALAR
-}
-
-__host__ __device__ inline void writeCellData(const uint cellIdx, const DataBaseStruct& dataBase, ConservedVariables& cellCons)
-{
-    dataBase.data[ RHO__( cellIdx, dataBase.numberOfCells ) ] = cellCons.rho ;
-    dataBase.data[ RHO_U( cellIdx, dataBase.numberOfCells ) ] = cellCons.rhoU;
-    dataBase.data[ RHO_V( cellIdx, dataBase.numberOfCells ) ] = cellCons.rhoV;
-    dataBase.data[ RHO_W( cellIdx, dataBase.numberOfCells ) ] = cellCons.rhoW;
-    dataBase.data[ RHO_E( cellIdx, dataBase.numberOfCells ) ] = cellCons.rhoE;
-#ifdef USE_PASSIVE_SCALAR
-	dataBase.data[ RHO_S_1( cellIdx, dataBase.numberOfCells ) ] = cellCons.rhoS_1;
-	dataBase.data[ RHO_S_2( cellIdx, dataBase.numberOfCells ) ] = cellCons.rhoS_2;
-#endif // USE_PASSIVE_SCALAR
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__host__ __device__ inline void readCellDataUpdate(const uint cellIdx, const DataBaseStruct& dataBase, ConservedVariables& cellCons)
-{
-    cellCons.rho  = dataBase.dataUpdate[ RHO__( cellIdx, dataBase.numberOfCells ) ];
-    cellCons.rhoU = dataBase.dataUpdate[ RHO_U( cellIdx, dataBase.numberOfCells ) ];
-    cellCons.rhoV = dataBase.dataUpdate[ RHO_V( cellIdx, dataBase.numberOfCells ) ];
-    cellCons.rhoW = dataBase.dataUpdate[ RHO_W( cellIdx, dataBase.numberOfCells ) ];
-    cellCons.rhoE = dataBase.dataUpdate[ RHO_E( cellIdx, dataBase.numberOfCells ) ];
-#ifdef USE_PASSIVE_SCALAR
-	cellCons.rhoS_1 = dataBase.dataUpdate[ RHO_S_1( cellIdx, dataBase.numberOfCells ) ];
-	cellCons.rhoS_2 = dataBase.dataUpdate[ RHO_S_2( cellIdx, dataBase.numberOfCells ) ];
-#endif // USE_PASSIVE_SCALAR
-}
-
-__host__ __device__ inline void writeCellDataUpdate(const uint cellIdx, const DataBaseStruct& dataBase, ConservedVariables& cellCons)
-{
-    dataBase.dataUpdate[ RHO__( cellIdx, dataBase.numberOfCells ) ] = cellCons.rho ;
-    dataBase.dataUpdate[ RHO_U( cellIdx, dataBase.numberOfCells ) ] = cellCons.rhoU;
-    dataBase.dataUpdate[ RHO_V( cellIdx, dataBase.numberOfCells ) ] = cellCons.rhoV;
-    dataBase.dataUpdate[ RHO_W( cellIdx, dataBase.numberOfCells ) ] = cellCons.rhoW;
-    dataBase.dataUpdate[ RHO_E( cellIdx, dataBase.numberOfCells ) ] = cellCons.rhoE;
-#ifdef USE_PASSIVE_SCALAR
-	dataBase.dataUpdate[ RHO_S_1( cellIdx, dataBase.numberOfCells ) ] = cellCons.rhoS_1;
-	dataBase.dataUpdate[ RHO_S_2( cellIdx, dataBase.numberOfCells ) ] = cellCons.rhoS_2;
-#endif // USE_PASSIVE_SCALAR
-}
-
-} // namespace GksGpu
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/GksGpu/FlowStateData/FlowStateData.cuh b/src/gpu/GksGpu/FlowStateData/FlowStateData.cuh
deleted file mode 100644
index 3b7929b39b47761624fec7052becc55921990276..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/FlowStateData/FlowStateData.cuh
+++ /dev/null
@@ -1,264 +0,0 @@
-#ifndef FlowStateData_H
-#define FlowStateData_H
-
-#ifdef __CUDACC__
-#include <cuda_runtime.h>
-#else
-#ifndef __host__
-#define __host__
-#endif
-#ifndef __device__
-#define __device__
-#endif
-#endif
-
-#include "Core/DataTypes.h"
-
-#include "Definitions/PassiveScalar.h"
-
-#include <lbm/constants/NumericConstants.h>
-
-using namespace vf::lbm::constant;
-
-namespace GksGpu {
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-struct PrimitiveVariables
-{
-    real rho;
-    real U;
-    real V;
-    real W;
-    real lambda;
-    #ifdef USE_PASSIVE_SCALAR
-    real S_1;
-    real S_2;
-    #endif
-
-    //////////////////////////////////////////////////////////////////////////
-
-    __host__ __device__ PrimitiveVariables()
-		: rho   (c0o1)
-         ,U     (c0o1)
-         ,V     (c0o1)
-         ,W     (c0o1)
-         ,lambda(c0o1)
-    #ifdef USE_PASSIVE_SCALAR
-         ,S_1   (c0o1)
-         ,S_2   (c0o1)
-    #endif
-    {}
-
-    //////////////////////////////////////////////////////////////////////////
-
-    __host__ __device__ PrimitiveVariables(real rho
-                                          ,real U
-                                          ,real V
-                                          ,real W
-                                          ,real lambda
-    #ifdef USE_PASSIVE_SCALAR
-                                          ,real S_1 = c0o1
-                                          ,real S_2 = c0o1
-    #endif
-    )
-        : rho   (rho   )
-         ,U     (U     )
-         ,V     (V     )
-         ,W     (W     )
-         ,lambda(lambda)
-    #ifdef USE_PASSIVE_SCALAR
-         ,S_1   (S_1   )
-         ,S_2   (S_2   )
-    #endif
-    {}
-
-    //////////////////////////////////////////////////////////////////////////
-};
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-struct ConservedVariables
-{
-    real rho;
-    real rhoU;
-    real rhoV;
-    real rhoW;
-    real rhoE;
-    #ifdef USE_PASSIVE_SCALAR
-    real rhoS_1;
-    real rhoS_2;
-    #endif
-
-    //////////////////////////////////////////////////////////////////////////
-
-    __host__ __device__ ConservedVariables()
-        : rho (c0o1)
-         ,rhoU(c0o1)
-         ,rhoV(c0o1)
-         ,rhoW(c0o1)
-         ,rhoE(c0o1)
-    #ifdef USE_PASSIVE_SCALAR
-         ,rhoS_1(c0o1)
-         ,rhoS_2(c0o1)
-    #endif
-    {}
-
-    //////////////////////////////////////////////////////////////////////////
-		  
-    __host__ __device__ ConservedVariables(real rho
-                                          ,real rhoU
-                                          ,real rhoV
-                                          ,real rhoW
-                                          ,real rhoE
-    #ifdef USE_PASSIVE_SCALAR
-                                          ,real rhoS_1 = c0o1
-                                          ,real rhoS_2 = c0o1
-    #endif
-    )
-        : rho (rho )
-         ,rhoU(rhoU)
-         ,rhoV(rhoV)
-         ,rhoW(rhoW)
-         ,rhoE(rhoE)
-    #ifdef USE_PASSIVE_SCALAR
-         ,rhoS_1(rhoS_1)
-         ,rhoS_2(rhoS_2)
-    #endif
-    {}
-
-    //////////////////////////////////////////////////////////////////////////
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__host__ __device__ inline PrimitiveVariables operator+ ( const PrimitiveVariables& left, const PrimitiveVariables& right )
-{
-    PrimitiveVariables result;
-
-    result.rho    = left.rho    + right.rho   ;
-    result.U      = left.U      + right.U     ;
-    result.V      = left.V      + right.V     ;
-    result.W      = left.W      + right.W     ;
-    result.lambda = left.lambda + right.lambda;
-
-#ifdef USE_PASSIVE_SCALAR
-    result.S_1    = left.S_1    + right.S_1   ;
-    result.S_2    = left.S_2    + right.S_2   ;
-#endif
-
-    return result;
-}
-
-__host__ __device__ inline ConservedVariables operator+ ( const ConservedVariables& left, const ConservedVariables& right )
-{
-    ConservedVariables result;
-
-    result.rho    = left.rho    + right.rho   ;
-    result.rhoU   = left.rhoU   + right.rhoU  ;
-    result.rhoV   = left.rhoV   + right.rhoV  ;
-    result.rhoW   = left.rhoW   + right.rhoW  ;
-    result.rhoE   = left.rhoE   + right.rhoE  ;
-
-#ifdef USE_PASSIVE_SCALAR
-    result.rhoS_1 = left.rhoS_1 + right.rhoS_1;
-    result.rhoS_2 = left.rhoS_2 + right.rhoS_2;
-#endif
-
-    return result;
-}
-
-//////////////////////////////////////////////////////////////////////////
-
-__host__ __device__ inline PrimitiveVariables operator- ( const PrimitiveVariables& left, const PrimitiveVariables& right )
-{
-    PrimitiveVariables result;
-
-    result.rho    = left.rho    - right.rho   ;
-    result.U      = left.U      - right.U     ;
-    result.V      = left.V      - right.V     ;
-    result.W      = left.W      - right.W     ;
-    result.lambda = left.lambda - right.lambda;
-
-#ifdef USE_PASSIVE_SCALAR
-    result.S_1    = left.S_1    - right.S_1   ;
-    result.S_2    = left.S_2    - right.S_2   ;
-#endif
-
-    return result;
-}
-
-__host__ __device__ inline ConservedVariables operator- ( const ConservedVariables& left, const ConservedVariables& right )
-{
-    ConservedVariables result;
-
-    result.rho    = left.rho    - right.rho   ;
-    result.rhoU   = left.rhoU   - right.rhoU  ;
-    result.rhoV   = left.rhoV   - right.rhoV  ;
-    result.rhoW   = left.rhoW   - right.rhoW  ;
-    result.rhoE   = left.rhoE   - right.rhoE  ;
-
-#ifdef USE_PASSIVE_SCALAR
-    result.rhoS_1 = left.rhoS_1 - right.rhoS_1;
-    result.rhoS_2 = left.rhoS_2 - right.rhoS_2;
-#endif
-
-    return result;
-}
-
-//////////////////////////////////////////////////////////////////////////
-
-__host__ __device__ inline PrimitiveVariables operator* ( const real left, const PrimitiveVariables& right )
-{
-    PrimitiveVariables result;
-
-    result.rho    = left * right.rho   ;
-    result.U      = left * right.U     ;
-    result.V      = left * right.V     ;
-    result.W      = left * right.W     ;
-    result.lambda = left * right.lambda;
-
-#ifdef USE_PASSIVE_SCALAR
-    result.S_1    = left * right.S_1   ;
-    result.S_2    = left * right.S_2   ;
-#endif
-
-    return result;
-}
-
-__host__ __device__ inline ConservedVariables operator* ( const real left, const ConservedVariables& right )
-{
-    ConservedVariables result;
-
-    result.rho    = left * right.rho   ;
-    result.rhoU   = left * right.rhoU  ;
-    result.rhoV   = left * right.rhoV  ;
-    result.rhoW   = left * right.rhoW  ;
-    result.rhoE   = left * right.rhoE  ;
-
-#ifdef USE_PASSIVE_SCALAR
-    result.rhoS_1 = left * right.rhoS_1;
-    result.rhoS_2 = left * right.rhoS_2;
-#endif
-
-    return result;
-}
-
-} // namespace GksGpu
-
-#endif
-
diff --git a/src/gpu/GksGpu/FlowStateData/FlowStateDataConversion.cuh b/src/gpu/GksGpu/FlowStateData/FlowStateDataConversion.cuh
deleted file mode 100644
index b7b759c99ffec6118a4173af098e0b372caf6ef7..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/FlowStateData/FlowStateDataConversion.cuh
+++ /dev/null
@@ -1,88 +0,0 @@
-#ifndef FlowStateDataConversion_H
-#define FlowStateDataConversion_H
-
-#ifdef __CUDACC__
-#include <cuda_runtime.h>
-#else
-#ifndef __host__
-#define __host__
-#endif
-#ifndef __device__
-#define __device__
-#endif
-#endif
-
-#include "Core/DataTypes.h"
-#include "Core/RealConstants.h"
-
-#include "Definitions/PassiveScalar.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/ThermalDependencies.cuh"
-
-namespace GksGpu {
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-__host__ __device__ inline ConservedVariables toConservedVariables( const PrimitiveVariables& prim, real K, bool overrideK = true )
-{
-    //#ifdef USE_PASSIVE_SCALAR
-    //if( overrideK ) K = getK(prim);
-    //#endif
-
-#ifdef USE_PASSIVE_SCALAR
-    return ConservedVariables(prim.rho
-                             ,prim.U * prim.rho
-                             ,prim.V * prim.rho
-                             ,prim.W * prim.rho
-                             //,getEint(prim) * prim.rho + c1o2 * prim.rho * ( prim.U * prim.U + prim.V * prim.V + prim.W * prim.W )
-                             ,( K + c3o1 ) / ( c4o1 * prim.lambda ) * prim.rho + c1o2 * prim.rho * ( prim.U * prim.U + prim.V * prim.V + prim.W * prim.W )
-                             ,prim.S_1 * prim.rho
-                             ,prim.S_2 * prim.rho
-    );
-#else
-    return ConservedVariables(prim.rho
-                             ,prim.U * prim.rho
-                             ,prim.V * prim.rho
-                             ,prim.W * prim.rho
-                             ,( K + c3o1 ) / ( c4o1 * prim.lambda ) * prim.rho + c1o2 * prim.rho * ( prim.U * prim.U + prim.V * prim.V + prim.W * prim.W )
-    );
-#endif
-}
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-__host__ __device__ inline PrimitiveVariables toPrimitiveVariables( const ConservedVariables& cons, real K, bool overrideK = true )
-{
-    //#ifdef USE_PASSIVE_SCALAR
-    //if( overrideK ) K = getK(cons);
-    //#endif
-
-#ifdef USE_PASSIVE_SCALAR
-	return PrimitiveVariables(cons.rho
-						     ,cons.rhoU / cons.rho
-						     ,cons.rhoV / cons.rho
-						     ,cons.rhoW / cons.rho
-						     //,getlambda(cons)
-						     ,( K + c3o1 ) * cons.rho / ( c4o1 * ( cons.rhoE - c1o2 * ( cons.rhoU * cons.rhoU + cons.rhoV * cons.rhoV + cons.rhoW * cons.rhoW ) / cons.rho ) )
-                             ,cons.rhoS_1 / cons.rho
-                             ,cons.rhoS_2 / cons.rho
-	);
-#else
-	return PrimitiveVariables(cons.rho
-						     ,cons.rhoU / cons.rho
-						     ,cons.rhoV / cons.rho
-						     ,cons.rhoW / cons.rho
-						     ,( K + c3o1 ) * cons.rho / ( c4o1 * ( cons.rhoE - c1o2 * ( cons.rhoU * cons.rhoU + cons.rhoV * cons.rhoV + cons.rhoW * cons.rhoW ) / cons.rho ) )
-	);
-#endif
-}
-
-} // namespace GksGpu
-
-#endif
-
diff --git a/src/gpu/GksGpu/FlowStateData/HeatCapacities.cuh b/src/gpu/GksGpu/FlowStateData/HeatCapacities.cuh
deleted file mode 100644
index 04a164aa327bed36cca2b8756c87dd1c7d9f0a64..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/FlowStateData/HeatCapacities.cuh
+++ /dev/null
@@ -1,407 +0,0 @@
-//#ifndef HeatCapacities_H
-//#define HeatCapacities_H
-//
-// #ifdef __CUDACC__
-// #include <cuda_runtime.h>
-// #else
-// #ifndef __host__
-// #define __host__
-// #endif
-// #ifndef __device__
-// #define __device__
-// #endif
-// #endif
-//
-//#include "Core/DataTypes.h"
-//#include "Core/RealConstants.h"
-//
-//#include "Definitions/PassiveScalar.h"
-//
-//#include "FlowStateData/FlowStateData.cuh"
-//
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//#ifdef USE_PASSIVE_SCALAR
-//
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//__host__ __device__ inline real getCp( real T, real* CpData )
-//{
-//    real T0 = 100.0;
-//    real dT = 200.0;
-//
-//    int i = int( ( T - T0 ) / dT );
-//
-//    real CpLow  = CpData[i];
-//    real CpHigh = CpData[i+1];
-//
-//    real x = (T - T0 - i * dT) / dT;
-//
-//    return CpLow + x * ( CpHigh - CpLow );
-//}
-//
-////__host__ __device__ inline real getIntegratedCp( real T, real* CpData )
-////{
-////    real T0 = 100.0;
-////    real dT = 200.0;
-////
-////    int i = int( ( T - T0 ) / dT );
-////
-////    real CpLow  = CpData[i];
-////    real CpHigh = CpData[i+1];
-////
-////    real x = (T - T0 - i * dT) / dT;
-////
-////    real CpAtT = CpLow + x * ( CpHigh - CpLow );
-////
-////    real sum = dT * c1o2 *  CpLow;
-////
-////    for( int j = 0; j < i; j++ )
-////    {
-////        sum += dT * CpData[i];
-////    }
-////
-////    sum += (T - T0 - i * dT) * c1o2 * ( CpLow + CpAtT );
-////
-////    return sum;
-////}
-//
-//__host__ __device__ inline real getIntegratedCv( real T, real* CpData )
-//{
-//    real T0 = 100.0;
-//    real dT = 200.0;
-//
-//    int i = int( ( T - T0 ) / dT );
-//
-//    real Cv0    = CpData[0]   - real(8.31445984848);
-//    real CvLow  = CpData[i]   - real(8.31445984848);
-//    real CvHigh = CpData[i+1] - real(8.31445984848);
-//
-//    real deltaT = T - T0 - i * dT;
-//
-//    real x = deltaT / dT;
-//
-//    real CvAtT = CvLow + x * ( CvHigh - CvLow );
-//
-//    //////////////////////////////////////////////////////////////////////////
-//
-//    real sum = c1o2 * deltaT * ( CvLow + CvAtT );
-//
-//    if( i > 0 )
-//    {
-//        sum += dT * c1o2 *  ( Cv0 + CvLow );
-//
-//        for (int j = 1; j < i; j++)
-//        {
-//            sum += dT * ( CpData[j] - real(8.31445984848) );
-//        }
-//    }
-//
-//    return sum;
-//}
-//
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//__host__ __device__ inline real getCpO2( real T )
-//{
-//    real CpData [] = {
-//                        29.106,
-//                        29.385,
-//                        31.091,
-//                        32.981,
-//                        34.355,
-//                        35.300,
-//                        35.988,
-//                        36.544,
-//                        37.040,
-//                        37.510,
-//                        37.969,
-//                        38.419,
-//                        38.856,
-//                        39.276,
-//                        39.674,
-//                        40.048,
-//                        40.395,
-//                        40.716,
-//                        41.013,
-//                        41.289
-//                     };
-//
-//    return getCp( T, CpData);
-//}
-//
-//__host__ __device__ inline real getCpN2( real T )
-//{
-//    real CpData [] = {
-//                        29.104,
-//                        29.125,
-//                        29.580,
-//                        30.754,
-//                        32.090,
-//                        33.241,
-//                        34.147,
-//                        34.843,
-//                        35.378,
-//                        35.796,
-//                        36.126,
-//                        36.395,
-//                        36.616,
-//                        36.801,
-//                        36.959,
-//                        37.096,
-//                        37.216,
-//                        37.323,
-//                        37.420,
-//                        37.508
-//                     };
-//
-//    return getCp( T, CpData);
-//}
-//
-//__host__ __device__ inline real getCpCH4( real T )
-//{
-//
-//    real CpData [] = {
-//                         33.258,
-//                         35.708,
-//                         46.342,
-//                         57.794,
-//                         67.601,
-//                         75.529,
-//                         81.744,
-//                         86.556,
-//                         90.283,
-//                         93.188,
-//                         95.477,
-//                         97.301,
-//                         98.772,
-//                         99.971,
-//                        100.960,
-//                        101.782,
-//                        102.474,
-//                        103.060,
-//                        103.560,
-//                        103.990 
-//                     };
-//
-//    return getCp( T, CpData);
-//}
-//
-//__host__ __device__ inline real getCpH2O( real T )
-//{
-//
-//    real CpData [] = {
-//                        33.299,
-//                        33.596,
-//                        35.226,
-//                        37.495,
-//                        39.987,
-//                        42.536,
-//                        44.945,
-//                        47.090,
-//                        48.935,
-//                        50.496,
-//                        51.823,
-//                        52.947,
-//                        53.904,
-//                        54.723,
-//                        55.430,
-//                        56.044,
-//                        56.583,
-//                        57.058,
-//                        57.480,
-//                        57.859
-//                     };
-//
-//    return getCp( T, CpData);
-//}
-//
-//__host__ __device__ inline real getCpCO2( real T )
-//{
-//
-//    real CpData [] = {
-//                        29.208,
-//                        37.221,
-//                        44.627,
-//                        49.564,
-//                        52.999,
-//                        55.409,
-//                        57.137,
-//                        58.379,
-//                        59.317,
-//                        60.049,
-//                        60.622,
-//                        61.086,
-//                        61.471,
-//                        61.802,
-//                        62.095,
-//                        62.347,
-//                        62.573,
-//                        62.785,
-//                        62.980,
-//                        63.166
-//                     };
-//
-//    return getCp( T, CpData);
-//}
-//
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//__host__ __device__ inline real getIntegratedCvO2( real T )
-//{
-//    real CpData [] = {
-//                        29.106,
-//                        29.385,
-//                        31.091,
-//                        32.981,
-//                        34.355,
-//                        35.300,
-//                        35.988,
-//                        36.544,
-//                        37.040,
-//                        37.510,
-//                        37.969,
-//                        38.419,
-//                        38.856,
-//                        39.276,
-//                        39.674,
-//                        40.048,
-//                        40.395,
-//                        40.716,
-//                        41.013,
-//                        41.289
-//                     };
-//
-//    return getIntegratedCv( T, CpData);
-//}
-//
-//__host__ __device__ inline real getIntegratedCvN2( real T )
-//{
-//    real CpData [] = {
-//                        29.104,
-//                        29.125,
-//                        29.580,
-//                        30.754,
-//                        32.090,
-//                        33.241,
-//                        34.147,
-//                        34.843,
-//                        35.378,
-//                        35.796,
-//                        36.126,
-//                        36.395,
-//                        36.616,
-//                        36.801,
-//                        36.959,
-//                        37.096,
-//                        37.216,
-//                        37.323,
-//                        37.420,
-//                        37.508
-//                     };
-//
-//    return getIntegratedCv( T, CpData);
-//}
-//
-//__host__ __device__ inline real getIntegratedCvCH4( real T )
-//{
-//
-//    real CpData [] = {
-//                         33.258,
-//                         35.708,
-//                         46.342,
-//                         57.794,
-//                         67.601,
-//                         75.529,
-//                         81.744,
-//                         86.556,
-//                         90.283,
-//                         93.188,
-//                         95.477,
-//                         97.301,
-//                         98.772,
-//                         99.971,
-//                        100.960,
-//                        101.782,
-//                        102.474,
-//                        103.060,
-//                        103.560,
-//                        103.990 
-//                     };
-//
-//    return getIntegratedCv( T, CpData);
-//}
-//
-//__host__ __device__ inline real getIntegratedCvH2O( real T )
-//{
-//
-//    real CpData [] = {
-//                        33.299,
-//                        33.596,
-//                        35.226,
-//                        37.495,
-//                        39.987,
-//                        42.536,
-//                        44.945,
-//                        47.090,
-//                        48.935,
-//                        50.496,
-//                        51.823,
-//                        52.947,
-//                        53.904,
-//                        54.723,
-//                        55.430,
-//                        56.044,
-//                        56.583,
-//                        57.058,
-//                        57.480,
-//                        57.859
-//                     };
-//
-//    return getIntegratedCv( T, CpData);
-//}
-//
-//__host__ __device__ inline real getIntegratedCvCO2( real T )
-//{
-//
-//    real CpData [] = {
-//                        29.208,
-//                        37.221,
-//                        44.627,
-//                        49.564,
-//                        52.999,
-//                        55.409,
-//                        57.137,
-//                        58.379,
-//                        59.317,
-//                        60.049,
-//                        60.622,
-//                        61.086,
-//                        61.471,
-//                        61.802,
-//                        62.095,
-//                        62.347,
-//                        62.573,
-//                        62.785,
-//                        62.980,
-//                        63.166
-//                     };
-//
-//    return getIntegratedCv( T, CpData);
-//}
-//
-//
-//#endif
-//
-//
-//
-//#endif
-//
diff --git a/src/gpu/GksGpu/FlowStateData/ThermalDependencies.cuh b/src/gpu/GksGpu/FlowStateData/ThermalDependencies.cuh
deleted file mode 100644
index 47eb261a089b9a1c8d7bb14bca864c334887d447..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/FlowStateData/ThermalDependencies.cuh
+++ /dev/null
@@ -1,67 +0,0 @@
-#ifndef ThermalDependencies_H
-#define ThermalDependencies_H
-
-#ifdef __CUDACC__
-#include <cuda_runtime.h>
-#else
-#ifndef __host__
-#define __host__
-#endif
-#ifndef __device__
-#define __device__
-#endif
-#endif
-
-#include <math.h>
-
-#include "Core/DataTypes.h"
-#include "Core/RealConstants.h"
-
-#include "Definitions/PassiveScalar.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/HeatCapacities.cuh"
-
-namespace GksGpu {
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#define R_U  real( 8.31445984848 )
-
-#define M_A  real( 0.02884 )
-#define M_P  real( 0.0276199095022624 )
-#define M_F  real( 0.016 )
-
-#define M_O2 real( 0.032 )
-
-#define rX   real( 0.21 )
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__host__ __device__ inline real getT( const PrimitiveVariables& prim )
-{
-    real T = M_A / ( c2o1 * prim.lambda * R_U );
-
-    return T;
-}
-
-__host__ __device__ inline void setLambdaFromT( PrimitiveVariables& prim, real T )
-{
-    prim.lambda =  M_A / ( c2o1 * T * R_U );
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-} // namespace GksGpu
-
-
-
-
-#endif
-
diff --git a/src/gpu/GksGpu/FluxComputation/ApplyFlux.cuh b/src/gpu/GksGpu/FluxComputation/ApplyFlux.cuh
deleted file mode 100644
index 0a938799d240d42615f8c478a0c949526adf9463..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/FluxComputation/ApplyFlux.cuh
+++ /dev/null
@@ -1,141 +0,0 @@
-#ifndef ApplyFlux_CUH
-#define ApplyFlux_CUH
-
-
-#include "GksGpu_export.h"
-
-#include "Core/DataTypes.h"
-
-#include "DataBase/DataBase.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-namespace GksGpu {
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__host__ __device__ inline void applyFluxToNegCell( const DataBaseStruct& dataBase,
-                                                    const uint& negCellIdx,
-                                                    const ConservedVariables& flux,
-                                                    const char direction,
-                                                    const Parameters& parameters)
-{
-    realAccumulator* dataUpdate = dataBase.dataUpdate;
-
-#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 0))
-    atomicAdd( &( dataUpdate[ RHO__(negCellIdx, dataBase.numberOfCells) ] ), - (realAccumulator)flux.rho  );
-    atomicAdd( &( dataUpdate[ RHO_U(negCellIdx, dataBase.numberOfCells) ] ), - (realAccumulator)flux.rhoU );
-    atomicAdd( &( dataUpdate[ RHO_V(negCellIdx, dataBase.numberOfCells) ] ), - (realAccumulator)flux.rhoV );
-    atomicAdd( &( dataUpdate[ RHO_W(negCellIdx, dataBase.numberOfCells) ] ), - (realAccumulator)flux.rhoW );
-    atomicAdd( &( dataUpdate[ RHO_E(negCellIdx, dataBase.numberOfCells) ] ), - (realAccumulator)flux.rhoE );
-#ifdef USE_PASSIVE_SCALAR
-	atomicAdd( &( dataUpdate[ RHO_S_1(negCellIdx, dataBase.numberOfCells) ] ), - (realAccumulator)flux.rhoS_1 );
-	atomicAdd( &( dataUpdate[ RHO_S_2(negCellIdx, dataBase.numberOfCells) ] ), - (realAccumulator)flux.rhoS_2 );
-#endif // USE_PASSIVE_SCALAR
-    
-    if( parameters.forcingSchemeIdx == 0 || parameters.forcingSchemeIdx == 1 )
-    {
-        if (direction == 'x')
-            atomicAdd(&(dataBase.massFlux[VEC_X(negCellIdx, dataBase.numberOfCells)]), flux.rho);
-        if (direction == 'y')
-            atomicAdd(&(dataBase.massFlux[VEC_Y(negCellIdx, dataBase.numberOfCells)]), flux.rho);
-        if (direction == 'z')
-            atomicAdd(&(dataBase.massFlux[VEC_Z(negCellIdx, dataBase.numberOfCells)]), flux.rho);
-    }
-#else
-#pragma omp atomic
-    dataUpdate[ RHO__(negCellIdx, dataBase.numberOfCells) ] -= flux.rho ;
-#pragma omp atomic
-    dataUpdate[ RHO_U(negCellIdx, dataBase.numberOfCells) ] -= flux.rhoU;
-#pragma omp atomic
-    dataUpdate[ RHO_V(negCellIdx, dataBase.numberOfCells) ] -= flux.rhoV;
-#pragma omp atomic
-    dataUpdate[ RHO_W(negCellIdx, dataBase.numberOfCells) ] -= flux.rhoW;
-#pragma omp atomic
-    dataUpdate[ RHO_E(negCellIdx, dataBase.numberOfCells) ] -= flux.rhoE;
-#ifdef USE_PASSIVE_SCALAR
-#pragma omp atomic
-	dataUpdate[ RHO_S_1(negCellIdx, dataBase.numberOfCells) ] -= flux.rhoS_1;
-	dataUpdate[ RHO_S_2(negCellIdx, dataBase.numberOfCells) ] -= flux.rhoS_2;
-#endif // USE_PASSIVE_SCALAR
-
-    if( parameters.forcingSchemeIdx == 0 || parameters.forcingSchemeIdx == 1 )
-    {
-        if( direction == 'x' )
-    #pragma omp atomic
-            dataBase.massFlux[ VEC_X(negCellIdx, dataBase.numberOfCells) ] += flux.rho;
-        if( direction == 'y' )
-    #pragma omp atomic
-            dataBase.massFlux[ VEC_Y(negCellIdx, dataBase.numberOfCells) ] += flux.rho;
-        if( direction == 'z' )
-    #pragma omp atomic
-            dataBase.massFlux[ VEC_Z(negCellIdx, dataBase.numberOfCells) ] += flux.rho;
-    }
-#endif
-
-}
-
-__host__ __device__ inline void applyFluxToPosCell( const DataBaseStruct& dataBase,
-                                                    const uint& posCellIdx,
-                                                    const ConservedVariables& flux,
-                                                    const char& direction,
-                                                    const Parameters& parameters)
-{
-    realAccumulator* dataUpdate = dataBase.dataUpdate;
-
-#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 0))
-    atomicAdd( &( dataUpdate[ RHO__(posCellIdx, dataBase.numberOfCells) ] ),   (realAccumulator)flux.rho  );
-    atomicAdd( &( dataUpdate[ RHO_U(posCellIdx, dataBase.numberOfCells) ] ),   (realAccumulator)flux.rhoU );
-    atomicAdd( &( dataUpdate[ RHO_V(posCellIdx, dataBase.numberOfCells) ] ),   (realAccumulator)flux.rhoV );
-    atomicAdd( &( dataUpdate[ RHO_W(posCellIdx, dataBase.numberOfCells) ] ),   (realAccumulator)flux.rhoW );
-    atomicAdd( &( dataUpdate[ RHO_E(posCellIdx, dataBase.numberOfCells) ] ),   (realAccumulator)flux.rhoE );
-#ifdef USE_PASSIVE_SCALAR
-	atomicAdd( &( dataUpdate[ RHO_S_1(posCellIdx, dataBase.numberOfCells) ] ),   (realAccumulator)flux.rhoS_1 );
-	atomicAdd( &( dataUpdate[ RHO_S_2(posCellIdx, dataBase.numberOfCells) ] ),   (realAccumulator)flux.rhoS_2 );
-#endif // USE_PASSIVE_SCALAR
-    
-    if( parameters.forcingSchemeIdx == 0 || parameters.forcingSchemeIdx == 1 )
-    {
-        if (direction == 'x')
-            atomicAdd(&(dataBase.massFlux[VEC_X(posCellIdx, dataBase.numberOfCells)]), flux.rho);
-        if (direction == 'y')
-            atomicAdd(&(dataBase.massFlux[VEC_Y(posCellIdx, dataBase.numberOfCells)]), flux.rho);
-        if (direction == 'z')
-            atomicAdd(&(dataBase.massFlux[VEC_Z(posCellIdx, dataBase.numberOfCells)]), flux.rho);
-    }
-#else
-#pragma omp atomic
-    dataUpdate[ RHO__(posCellIdx, dataBase.numberOfCells) ] += flux.rho ;
-#pragma omp atomic
-    dataUpdate[ RHO_U(posCellIdx, dataBase.numberOfCells) ] += flux.rhoU;
-#pragma omp atomic
-    dataUpdate[ RHO_V(posCellIdx, dataBase.numberOfCells) ] += flux.rhoV;
-#pragma omp atomic
-    dataUpdate[ RHO_W(posCellIdx, dataBase.numberOfCells) ] += flux.rhoW;
-#pragma omp atomic
-    dataUpdate[ RHO_E(posCellIdx, dataBase.numberOfCells) ] += flux.rhoE;
-#ifdef USE_PASSIVE_SCALAR
-#pragma omp atomic
-	dataUpdate[ RHO_S_1(posCellIdx, dataBase.numberOfCells) ] += flux.rhoS_1;
-	dataUpdate[ RHO_S_2(posCellIdx, dataBase.numberOfCells) ] += flux.rhoS_2;
-#endif // USE_PASSIVE_SCALAR
-    
-    if( parameters.forcingSchemeIdx == 0 || parameters.forcingSchemeIdx == 1 )
-    {
-        if (direction == 'x')
-    #pragma omp atomic
-            dataBase.massFlux[VEC_X(posCellIdx, dataBase.numberOfCells)] += flux.rho;
-        if (direction == 'y')
-    #pragma omp atomic
-            dataBase.massFlux[VEC_Y(posCellIdx, dataBase.numberOfCells)] += flux.rho;
-        if (direction == 'z')
-    #pragma omp atomic
-            dataBase.massFlux[VEC_Z(posCellIdx, dataBase.numberOfCells)] += flux.rho;
-    }
-#endif
-}
-
-} // namespace GksGpu
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/GksGpu/FluxComputation/AssembleFlux.cuh b/src/gpu/GksGpu/FluxComputation/AssembleFlux.cuh
deleted file mode 100644
index 3e00c3ae33d6083d37d4cf334dd51d575f4fc0db..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/FluxComputation/AssembleFlux.cuh
+++ /dev/null
@@ -1,569 +0,0 @@
-#ifndef AssembleFlux_CUH
-#define AssembleFlux_CUH
-
-
-#include "GksGpu_export.h"
-
-#include "Core/DataTypes.h"
-#include "Core/RealConstants.h"
-
-#include "DataBase/DataBase.h"
-#include "Parameters/Parameters.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#include "FluxComputation/SutherlandsLaw.cuh"
-#include "FluxComputation/Moments.cuh"
-
-extern __device__ real atomicAdd(real* address, real val);
-
-#define NUMBER_OF_MOMENTS 7
-
-namespace GksGpu {
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-__host__ __device__ inline void computeTimeDerivative( const PrimitiveVariables& facePrim, 
-                                                       const real momentU [ NUMBER_OF_MOMENTS ], 
-                                                       const real momentV [ NUMBER_OF_MOMENTS ], 
-                                                       const real momentW [ NUMBER_OF_MOMENTS ], 
-                                                       const real momentXi[ NUMBER_OF_MOMENTS ],
-                                                       const real ax[LENGTH_CELL_DATA],
-                                                       const real ay[LENGTH_CELL_DATA],
-                                                       const real az[LENGTH_CELL_DATA],
-                                                       const Vec3& force,
-                                                       ConservedVariables& timeGrad )
-{
-    timeGrad.rho = ax[0]*momentU[1] + ax[1]*momentU[2] + c1o2*ax[4]*momentU[3] + ay[0]*momentV[1] + 
-   ax[2]*momentU[1]*momentV[1] + ay[1]*momentU[1]*momentV[1] + 
-   c1o2*ay[4]*momentU[2]*momentV[1] + ay[2]*momentV[2] + c1o2*ax[4]*momentU[1]*momentV[2] + 
-   c1o2*ay[4]*momentV[3] + az[0]*momentW[1] + ax[3]*momentU[1]*momentW[1] + 
-   az[1]*momentU[1]*momentW[1] + c1o2*az[4]*momentU[2]*momentW[1] + 
-   ay[3]*momentV[1]*momentW[1] + az[2]*momentV[1]*momentW[1] + 
-   c1o2*az[4]*momentV[2]*momentW[1] + az[3]*momentW[2] + c1o2*ax[4]*momentU[1]*momentW[2] + 
-   c1o2*ay[4]*momentV[1]*momentW[2] + c1o2*az[4]*momentW[3] + 
-   c1o2*ax[4]*momentU[1]*momentXi[2] + c1o2*ay[4]*momentV[1]*momentXi[2] + 
-   c1o2*az[4]*momentW[1]*momentXi[2];
-
-    timeGrad.rhoU = ax[0]*momentU[2] + ax[1]*momentU[3] + c1o2*ax[4]*momentU[4] + 
-   ay[0]*momentU[1]*momentV[1] + ax[2]*momentU[2]*momentV[1] + 
-   ay[1]*momentU[2]*momentV[1] + c1o2*ay[4]*momentU[3]*momentV[1] + 
-   ay[2]*momentU[1]*momentV[2] + c1o2*ax[4]*momentU[2]*momentV[2] + 
-   c1o2*ay[4]*momentU[1]*momentV[3] + az[0]*momentU[1]*momentW[1] + 
-   ax[3]*momentU[2]*momentW[1] + az[1]*momentU[2]*momentW[1] + 
-   c1o2*az[4]*momentU[3]*momentW[1] + ay[3]*momentU[1]*momentV[1]*momentW[1] + 
-   az[2]*momentU[1]*momentV[1]*momentW[1] + c1o2*az[4]*momentU[1]*momentV[2]*momentW[1] + 
-   az[3]*momentU[1]*momentW[2] + c1o2*ax[4]*momentU[2]*momentW[2] + 
-   c1o2*ay[4]*momentU[1]*momentV[1]*momentW[2] + c1o2*az[4]*momentU[1]*momentW[3] + 
-   c1o2*ax[4]*momentU[2]*momentXi[2] + c1o2*ay[4]*momentU[1]*momentV[1]*momentXi[2] + 
-   c1o2*az[4]*momentU[1]*momentW[1]*momentXi[2];
-
-    timeGrad.rhoV = ax[0]*momentU[1]*momentV[1] + ax[1]*momentU[2]*momentV[1] + 
-   c1o2*ax[4]*momentU[3]*momentV[1] + ay[0]*momentV[2] + ax[2]*momentU[1]*momentV[2] + 
-   ay[1]*momentU[1]*momentV[2] + c1o2*ay[4]*momentU[2]*momentV[2] + ay[2]*momentV[3] + 
-   c1o2*ax[4]*momentU[1]*momentV[3] + c1o2*ay[4]*momentV[4] + az[0]*momentV[1]*momentW[1] + 
-   ax[3]*momentU[1]*momentV[1]*momentW[1] + az[1]*momentU[1]*momentV[1]*momentW[1] + 
-   c1o2*az[4]*momentU[2]*momentV[1]*momentW[1] + ay[3]*momentV[2]*momentW[1] + 
-   az[2]*momentV[2]*momentW[1] + c1o2*az[4]*momentV[3]*momentW[1] + 
-   az[3]*momentV[1]*momentW[2] + c1o2*ax[4]*momentU[1]*momentV[1]*momentW[2] + 
-   c1o2*ay[4]*momentV[2]*momentW[2] + c1o2*az[4]*momentV[1]*momentW[3] + 
-   c1o2*ax[4]*momentU[1]*momentV[1]*momentXi[2] + c1o2*ay[4]*momentV[2]*momentXi[2] + 
-   c1o2*az[4]*momentV[1]*momentW[1]*momentXi[2];
-
-    timeGrad.rhoW = ax[0]*momentU[1]*momentW[1] + ax[1]*momentU[2]*momentW[1] + 
-   c1o2*ax[4]*momentU[3]*momentW[1] + ay[0]*momentV[1]*momentW[1] + 
-   ax[2]*momentU[1]*momentV[1]*momentW[1] + ay[1]*momentU[1]*momentV[1]*momentW[1] + 
-   c1o2*ay[4]*momentU[2]*momentV[1]*momentW[1] + ay[2]*momentV[2]*momentW[1] + 
-   c1o2*ax[4]*momentU[1]*momentV[2]*momentW[1] + c1o2*ay[4]*momentV[3]*momentW[1] + 
-   az[0]*momentW[2] + ax[3]*momentU[1]*momentW[2] + az[1]*momentU[1]*momentW[2] + 
-   c1o2*az[4]*momentU[2]*momentW[2] + ay[3]*momentV[1]*momentW[2] + 
-   az[2]*momentV[1]*momentW[2] + c1o2*az[4]*momentV[2]*momentW[2] + az[3]*momentW[3] + 
-   c1o2*ax[4]*momentU[1]*momentW[3] + c1o2*ay[4]*momentV[1]*momentW[3] + 
-   c1o2*az[4]*momentW[4] + c1o2*ax[4]*momentU[1]*momentW[1]*momentXi[2] + 
-   c1o2*ay[4]*momentV[1]*momentW[1]*momentXi[2] + c1o2*az[4]*momentW[2]*momentXi[2];
-
-    timeGrad.rhoE = c1o2*ax[0]*momentU[3] + c1o2*ax[1]*momentU[4] + c1o4*ax[4]*momentU[5] + 
-   c1o2*ay[0]*momentU[2]*momentV[1] + c1o2*ax[2]*momentU[3]*momentV[1] + 
-   c1o2*ay[1]*momentU[3]*momentV[1] + c1o4*ay[4]*momentU[4]*momentV[1] + 
-   c1o2*ax[0]*momentU[1]*momentV[2] + c1o2*ax[1]*momentU[2]*momentV[2] + 
-   c1o2*ay[2]*momentU[2]*momentV[2] + c1o2*ax[4]*momentU[3]*momentV[2] + 
-   c1o2*ay[0]*momentV[3] + c1o2*ax[2]*momentU[1]*momentV[3] + 
-   c1o2*ay[1]*momentU[1]*momentV[3] + c1o2*ay[4]*momentU[2]*momentV[3] + 
-   c1o2*ay[2]*momentV[4] + c1o4*ax[4]*momentU[1]*momentV[4] + c1o4*ay[4]*momentV[5] + 
-   c1o2*az[0]*momentU[2]*momentW[1] + c1o2*ax[3]*momentU[3]*momentW[1] + 
-   c1o2*az[1]*momentU[3]*momentW[1] + c1o4*az[4]*momentU[4]*momentW[1] + 
-   c1o2*ay[3]*momentU[2]*momentV[1]*momentW[1] + c1o2*az[2]*momentU[2]*momentV[1]*momentW[1] + 
-   c1o2*az[0]*momentV[2]*momentW[1] + c1o2*ax[3]*momentU[1]*momentV[2]*momentW[1] + 
-   c1o2*az[1]*momentU[1]*momentV[2]*momentW[1] + c1o2*az[4]*momentU[2]*momentV[2]*momentW[1] + 
-   c1o2*ay[3]*momentV[3]*momentW[1] + c1o2*az[2]*momentV[3]*momentW[1] + 
-   c1o4*az[4]*momentV[4]*momentW[1] + c1o2*ax[0]*momentU[1]*momentW[2] + 
-   c1o2*ax[1]*momentU[2]*momentW[2] + c1o2*az[3]*momentU[2]*momentW[2] + 
-   c1o2*ax[4]*momentU[3]*momentW[2] + c1o2*ay[0]*momentV[1]*momentW[2] + 
-   c1o2*ax[2]*momentU[1]*momentV[1]*momentW[2] + c1o2*ay[1]*momentU[1]*momentV[1]*momentW[2] + 
-   c1o2*ay[4]*momentU[2]*momentV[1]*momentW[2] + c1o2*ay[2]*momentV[2]*momentW[2] + 
-   c1o2*az[3]*momentV[2]*momentW[2] + c1o2*ax[4]*momentU[1]*momentV[2]*momentW[2] + 
-   c1o2*ay[4]*momentV[3]*momentW[2] + c1o2*az[0]*momentW[3] + 
-   c1o2*ax[3]*momentU[1]*momentW[3] + c1o2*az[1]*momentU[1]*momentW[3] + 
-   c1o2*az[4]*momentU[2]*momentW[3] + c1o2*ay[3]*momentV[1]*momentW[3] + 
-   c1o2*az[2]*momentV[1]*momentW[3] + c1o2*az[4]*momentV[2]*momentW[3] + 
-   c1o2*az[3]*momentW[4] + c1o4*ax[4]*momentU[1]*momentW[4] + 
-   c1o4*ay[4]*momentV[1]*momentW[4] + c1o4*az[4]*momentW[5] + 
-   c1o2*ax[0]*momentU[1]*momentXi[2] + c1o2*ax[1]*momentU[2]*momentXi[2] + 
-   c1o2*ax[4]*momentU[3]*momentXi[2] + c1o2*ay[0]*momentV[1]*momentXi[2] + 
-   c1o2*ax[2]*momentU[1]*momentV[1]*momentXi[2] + 
-   c1o2*ay[1]*momentU[1]*momentV[1]*momentXi[2] + 
-   c1o2*ay[4]*momentU[2]*momentV[1]*momentXi[2] + c1o2*ay[2]*momentV[2]*momentXi[2] + 
-   c1o2*ax[4]*momentU[1]*momentV[2]*momentXi[2] + c1o2*ay[4]*momentV[3]*momentXi[2] + 
-   c1o2*az[0]*momentW[1]*momentXi[2] + c1o2*ax[3]*momentU[1]*momentW[1]*momentXi[2] + 
-   c1o2*az[1]*momentU[1]*momentW[1]*momentXi[2] + 
-   c1o2*az[4]*momentU[2]*momentW[1]*momentXi[2] + 
-   c1o2*ay[3]*momentV[1]*momentW[1]*momentXi[2] + 
-   c1o2*az[2]*momentV[1]*momentW[1]*momentXi[2] + 
-   c1o2*az[4]*momentV[2]*momentW[1]*momentXi[2] + c1o2*az[3]*momentW[2]*momentXi[2] + 
-   c1o2*ax[4]*momentU[1]*momentW[2]*momentXi[2] + 
-   c1o2*ay[4]*momentV[1]*momentW[2]*momentXi[2] + c1o2*az[4]*momentW[3]*momentXi[2] + 
-   c1o4*ax[4]*momentU[1]*momentXi[4] + c1o4*ay[4]*momentV[1]*momentXi[4] + 
-   c1o4*az[4]*momentW[1]*momentXi[4];
-
-    //////////////////////////////////////////////////////////////////////////
-
-    timeGrad.rho  += c2o1 * facePrim.lambda * (                                                          facePrim.U - momentU[1]                           ) * force.x
-                   + c2o1 * facePrim.lambda * (                                                          facePrim.V -              momentV[1]              ) * force.y
-                   + c2o1 * facePrim.lambda * (                                                          facePrim.W -                           momentW[1] ) * force.z ;
-                                                                                                         
-    timeGrad.rhoU += c2o1 * facePrim.lambda * (   momentU[1] *                                           facePrim.U - momentU[2]                           ) * force.x
-                   + c2o1 * facePrim.lambda * (   momentU[1] *                                           facePrim.V - momentU[1] * momentV[1]              ) * force.y
-                   + c2o1 * facePrim.lambda * (   momentU[1] *                                           facePrim.W - momentU[1] *              momentW[1] ) * force.z ;
-                                                                                                         
-    timeGrad.rhoV += c2o1 * facePrim.lambda * (                momentV[1] *                              facePrim.U - momentU[1] * momentV[1]              ) * force.x
-                   + c2o1 * facePrim.lambda * (                momentV[1] *                              facePrim.V -              momentV[2]              ) * force.y
-                   + c2o1 * facePrim.lambda * (                momentV[1] *                              facePrim.W -              momentV[1] * momentW[1] ) * force.z ;
-                                                                                                         
-    timeGrad.rhoW += c2o1 * facePrim.lambda * (                             momentW[1] *                 facePrim.U - momentU[1] *              momentW[1] ) * force.x
-                   + c2o1 * facePrim.lambda * (                             momentW[1] *                 facePrim.V -              momentV[1] * momentW[1] ) * force.y
-                   + c2o1 * facePrim.lambda * (                             momentW[1] *                 facePrim.W -                           momentW[2] ) * force.z ;
-
-    timeGrad.rhoE +=       facePrim.lambda * ( ( momentU[2] + momentV[2] + momentW[2] + momentXi[2] ) * facePrim.U
-
-                                             - ( momentU[3]
-                                               + momentU[1] * momentV[2] 
-                                               + momentU[1] *              momentW[2] 
-                                               + momentU[1] *                           momentXi[2] )
-                                             ) * force.x
-
-                   +       facePrim.lambda * ( ( momentU[2] + momentV[2] + momentW[2] + momentXi[2] ) * facePrim.V
-
-                                             - ( momentU[2] * momentV[1]
-                                               +              momentV[3]
-                                               +              momentV[1] * momentW[2]
-                                               +              momentV[1] *              momentXi[2] )
-                                             ) * force.y
-
-                   +       facePrim.lambda * ( ( momentU[2] + momentV[2] + momentW[2] + momentXi[2] ) * facePrim.W
-
-                                             - ( momentU[2] *              momentW[1]
-                                               +              momentV[2] * momentW[1]
-                                               +                           momentW[3]
-                                               +                           momentW[1] * momentXi[2] )
-                                             ) * force.z ;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    timeGrad.rho  *= - c1o1;
-    timeGrad.rhoU *= - c1o1;
-    timeGrad.rhoV *= - c1o1;
-    timeGrad.rhoW *= - c1o1;
-    timeGrad.rhoE *= - c1o1;
-
-    //////////////////////////////////////////////////////////////////////////
-
-#ifdef USE_PASSIVE_SCALAR
-	timeGrad.rhoS_1 = timeGrad.rho * facePrim.S_1
-		            + ( ax[5] * momentU[1]  
-		              + ay[5] *              momentV[1]
-		              + az[5] *                           momentW[1] )
-		            / (c1o2 * facePrim.lambda);
-
-	timeGrad.rhoS_2 = timeGrad.rho * facePrim.S_2
-		            + ( ax[6] * momentU[1]  
-		              + ay[6] *              momentV[1]
-		              + az[6] *                           momentW[1] )
-		            / (c1o2 * facePrim.lambda);
-
-    timeGrad.rhoS_1 *= - c1o1;
-    timeGrad.rhoS_2 *= - c1o1;
-#endif // USE_PASSIVE_SCALAR
-
-    //////////////////////////////////////////////////////////////////////////
-}
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__host__ __device__ inline void computeTimeCoefficients(const PrimitiveVariables & facePrim, Parameters& parameters, real timeCoefficients[4])
-{
-    real r   = parameters.lambdaRef / facePrim.lambda;
-
-    //if( r < zero ) printf( "ERROR: %f/%f\n", parameters.lambdaRef, facePrim.lambda );
-
-    real mu = getViscosity(parameters, r);
-
-    real tau = c2o1 * facePrim.lambda * mu / facePrim.rho;
-
-    real dt = parameters.dt;
-
-    timeCoefficients[0] =                         dt;
-    timeCoefficients[1] =                 - tau * dt;
-    timeCoefficients[2] =  c1o2 * dt * dt - tau * dt;
-
-    timeCoefficients[3] =                   tau     ;
-}
-
-__host__ __device__ inline void getTau(const PrimitiveVariables & facePrim, Parameters& parameters, real& tau)
-{
-    real r   = parameters.lambdaRef / facePrim.lambda;
-
-    real mu = getViscosity(parameters, r);  mu = sutherlandsLaw2( parameters, r );
-
-    tau = c2o1 * facePrim.lambda * mu / facePrim.rho;
-}
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__host__ __device__ inline void assembleFlux( const PrimitiveVariables& facePrim, 
-                                              const real momentU [ NUMBER_OF_MOMENTS ], 
-                                              const real momentV [ NUMBER_OF_MOMENTS ], 
-                                              const real momentW [ NUMBER_OF_MOMENTS ], 
-                                              const real momentXi[ NUMBER_OF_MOMENTS ],
-                                              const real ax[LENGTH_CELL_DATA],
-                                              const real ay[LENGTH_CELL_DATA],
-                                              const real az[LENGTH_CELL_DATA],
-                                              const real at[LENGTH_CELL_DATA],
-                                              const real timeCoefficients[4],
-                                              const Parameters& parameters,
-                                              const Vec3 force,
-                                              ConservedVariables& flux,
-                                              real& heatFlux )
-{
-    ConservedVariables flux_1, flux_2, flux_3;
-
-    flux_1.rho  =           momentU[0+1]                          ;
-    flux_1.rhoU =           momentU[1+1]                          ;
-    flux_1.rhoV =           momentU[0+1] * momentV[1]             ;
-    flux_1.rhoW =           momentU[0+1] *              momentW[1];
-
-    flux_1.rhoE =  c1o2 * ( momentU[2+1]             
-                          + momentU[0+1] * momentV[2]
-                          + momentU[0+1] *              momentW[2]
-                          + momentU[0+1] *                           momentXi[2] );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    flux_2.rho  = ax[0]*momentU[2] + ax[1]*momentU[3] + c1o2*ax[4]*momentU[4] + 
-   ay[0]*momentU[1]*momentV[1] + ax[2]*momentU[2]*momentV[1] + 
-   ay[1]*momentU[2]*momentV[1] + c1o2*ay[4]*momentU[3]*momentV[1] + 
-   ay[2]*momentU[1]*momentV[2] + c1o2*ax[4]*momentU[2]*momentV[2] + 
-   c1o2*ay[4]*momentU[1]*momentV[3] + az[0]*momentU[1]*momentW[1] + 
-   ax[3]*momentU[2]*momentW[1] + az[1]*momentU[2]*momentW[1] + 
-   c1o2*az[4]*momentU[3]*momentW[1] + ay[3]*momentU[1]*momentV[1]*momentW[1] + 
-   az[2]*momentU[1]*momentV[1]*momentW[1] + c1o2*az[4]*momentU[1]*momentV[2]*momentW[1] + 
-   az[3]*momentU[1]*momentW[2] + c1o2*ax[4]*momentU[2]*momentW[2] + 
-   c1o2*ay[4]*momentU[1]*momentV[1]*momentW[2] + c1o2*az[4]*momentU[1]*momentW[3] + 
-   c1o2*ax[4]*momentU[2]*momentXi[2] + c1o2*ay[4]*momentU[1]*momentV[1]*momentXi[2] + 
-   c1o2*az[4]*momentU[1]*momentW[1]*momentXi[2];
-
-    flux_2.rhoU = ax[0]*momentU[3] + ax[1]*momentU[4] + c1o2*ax[4]*momentU[5] + 
-   ay[0]*momentU[2]*momentV[1] + ax[2]*momentU[3]*momentV[1] + 
-   ay[1]*momentU[3]*momentV[1] + c1o2*ay[4]*momentU[4]*momentV[1] + 
-   ay[2]*momentU[2]*momentV[2] + c1o2*ax[4]*momentU[3]*momentV[2] + 
-   c1o2*ay[4]*momentU[2]*momentV[3] + az[0]*momentU[2]*momentW[1] + 
-   ax[3]*momentU[3]*momentW[1] + az[1]*momentU[3]*momentW[1] + 
-   c1o2*az[4]*momentU[4]*momentW[1] + ay[3]*momentU[2]*momentV[1]*momentW[1] + 
-   az[2]*momentU[2]*momentV[1]*momentW[1] + c1o2*az[4]*momentU[2]*momentV[2]*momentW[1] + 
-   az[3]*momentU[2]*momentW[2] + c1o2*ax[4]*momentU[3]*momentW[2] + 
-   c1o2*ay[4]*momentU[2]*momentV[1]*momentW[2] + c1o2*az[4]*momentU[2]*momentW[3] + 
-   c1o2*ax[4]*momentU[3]*momentXi[2] + c1o2*ay[4]*momentU[2]*momentV[1]*momentXi[2] + 
-   c1o2*az[4]*momentU[2]*momentW[1]*momentXi[2];
-
-    flux_2.rhoV = ax[0]*momentU[2]*momentV[1] + ax[1]*momentU[3]*momentV[1] + 
-   c1o2*ax[4]*momentU[4]*momentV[1] + ay[0]*momentU[1]*momentV[2] + 
-   ax[2]*momentU[2]*momentV[2] + ay[1]*momentU[2]*momentV[2] + 
-   c1o2*ay[4]*momentU[3]*momentV[2] + ay[2]*momentU[1]*momentV[3] + 
-   c1o2*ax[4]*momentU[2]*momentV[3] + c1o2*ay[4]*momentU[1]*momentV[4] + 
-   az[0]*momentU[1]*momentV[1]*momentW[1] + ax[3]*momentU[2]*momentV[1]*momentW[1] + 
-   az[1]*momentU[2]*momentV[1]*momentW[1] + c1o2*az[4]*momentU[3]*momentV[1]*momentW[1] + 
-   ay[3]*momentU[1]*momentV[2]*momentW[1] + az[2]*momentU[1]*momentV[2]*momentW[1] + 
-   c1o2*az[4]*momentU[1]*momentV[3]*momentW[1] + az[3]*momentU[1]*momentV[1]*momentW[2] + 
-   c1o2*ax[4]*momentU[2]*momentV[1]*momentW[2] + c1o2*ay[4]*momentU[1]*momentV[2]*momentW[2] + 
-   c1o2*az[4]*momentU[1]*momentV[1]*momentW[3] + 
-   c1o2*ax[4]*momentU[2]*momentV[1]*momentXi[2] + 
-   c1o2*ay[4]*momentU[1]*momentV[2]*momentXi[2] + 
-   c1o2*az[4]*momentU[1]*momentV[1]*momentW[1]*momentXi[2];
-    
-    flux_2.rhoW = ax[0]*momentU[2]*momentW[1] + ax[1]*momentU[3]*momentW[1] + 
-   c1o2*ax[4]*momentU[4]*momentW[1] + ay[0]*momentU[1]*momentV[1]*momentW[1] + 
-   ax[2]*momentU[2]*momentV[1]*momentW[1] + ay[1]*momentU[2]*momentV[1]*momentW[1] + 
-   c1o2*ay[4]*momentU[3]*momentV[1]*momentW[1] + ay[2]*momentU[1]*momentV[2]*momentW[1] + 
-   c1o2*ax[4]*momentU[2]*momentV[2]*momentW[1] + c1o2*ay[4]*momentU[1]*momentV[3]*momentW[1] + 
-   az[0]*momentU[1]*momentW[2] + ax[3]*momentU[2]*momentW[2] + 
-   az[1]*momentU[2]*momentW[2] + c1o2*az[4]*momentU[3]*momentW[2] + 
-   ay[3]*momentU[1]*momentV[1]*momentW[2] + az[2]*momentU[1]*momentV[1]*momentW[2] + 
-   c1o2*az[4]*momentU[1]*momentV[2]*momentW[2] + az[3]*momentU[1]*momentW[3] + 
-   c1o2*ax[4]*momentU[2]*momentW[3] + c1o2*ay[4]*momentU[1]*momentV[1]*momentW[3] + 
-   c1o2*az[4]*momentU[1]*momentW[4] + c1o2*ax[4]*momentU[2]*momentW[1]*momentXi[2] + 
-   c1o2*ay[4]*momentU[1]*momentV[1]*momentW[1]*momentXi[2] + 
-   c1o2*az[4]*momentU[1]*momentW[2]*momentXi[2];
-
-    flux_2.rhoE = c1o2*ax[0]*momentU[4] + c1o2*ax[1]*momentU[5] + c1o4*ax[4]*momentU[6] + 
-   c1o2*ay[0]*momentU[3]*momentV[1] + c1o2*ax[2]*momentU[4]*momentV[1] + 
-   c1o2*ay[1]*momentU[4]*momentV[1] + c1o4*ay[4]*momentU[5]*momentV[1] + 
-   c1o2*ax[0]*momentU[2]*momentV[2] + c1o2*ax[1]*momentU[3]*momentV[2] + 
-   c1o2*ay[2]*momentU[3]*momentV[2] + c1o2*ax[4]*momentU[4]*momentV[2] + 
-   c1o2*ay[0]*momentU[1]*momentV[3] + c1o2*ax[2]*momentU[2]*momentV[3] + 
-   c1o2*ay[1]*momentU[2]*momentV[3] + c1o2*ay[4]*momentU[3]*momentV[3] + 
-   c1o2*ay[2]*momentU[1]*momentV[4] + c1o4*ax[4]*momentU[2]*momentV[4] + 
-   c1o4*ay[4]*momentU[1]*momentV[5] + c1o2*az[0]*momentU[3]*momentW[1] + 
-   c1o2*ax[3]*momentU[4]*momentW[1] + c1o2*az[1]*momentU[4]*momentW[1] + 
-   c1o4*az[4]*momentU[5]*momentW[1] + c1o2*ay[3]*momentU[3]*momentV[1]*momentW[1] + 
-   c1o2*az[2]*momentU[3]*momentV[1]*momentW[1] + 
-   c1o2*az[0]*momentU[1]*momentV[2]*momentW[1] + 
-   c1o2*ax[3]*momentU[2]*momentV[2]*momentW[1] + c1o2*az[1]*momentU[2]*momentV[2]*momentW[1] + 
-   c1o2*az[4]*momentU[3]*momentV[2]*momentW[1] + c1o2*ay[3]*momentU[1]*momentV[3]*momentW[1] + 
-   c1o2*az[2]*momentU[1]*momentV[3]*momentW[1] + c1o4*az[4]*momentU[1]*momentV[4]*momentW[1] + 
-   c1o2*ax[0]*momentU[2]*momentW[2] + c1o2*ax[1]*momentU[3]*momentW[2] + 
-   c1o2*az[3]*momentU[3]*momentW[2] + c1o2*ax[4]*momentU[4]*momentW[2] + 
-   c1o2*ay[0]*momentU[1]*momentV[1]*momentW[2] + 
-   c1o2*ax[2]*momentU[2]*momentV[1]*momentW[2] + c1o2*ay[1]*momentU[2]*momentV[1]*momentW[2] + 
-   c1o2*ay[4]*momentU[3]*momentV[1]*momentW[2] + c1o2*ay[2]*momentU[1]*momentV[2]*momentW[2] + 
-   c1o2*az[3]*momentU[1]*momentV[2]*momentW[2] + c1o2*ax[4]*momentU[2]*momentV[2]*momentW[2] + 
-   c1o2*ay[4]*momentU[1]*momentV[3]*momentW[2] + c1o2*az[0]*momentU[1]*momentW[3] + 
-   c1o2*ax[3]*momentU[2]*momentW[3] + c1o2*az[1]*momentU[2]*momentW[3] + 
-   c1o2*az[4]*momentU[3]*momentW[3] + c1o2*ay[3]*momentU[1]*momentV[1]*momentW[3] + 
-   c1o2*az[2]*momentU[1]*momentV[1]*momentW[3] + c1o2*az[4]*momentU[1]*momentV[2]*momentW[3] + 
-   c1o2*az[3]*momentU[1]*momentW[4] + c1o4*ax[4]*momentU[2]*momentW[4] + 
-   c1o4*ay[4]*momentU[1]*momentV[1]*momentW[4] + c1o4*az[4]*momentU[1]*momentW[5] + 
-   c1o2*ax[0]*momentU[2]*momentXi[2] + c1o2*ax[1]*momentU[3]*momentXi[2] + 
-   c1o2*ax[4]*momentU[4]*momentXi[2] + c1o2*ay[0]*momentU[1]*momentV[1]*momentXi[2] + 
-   c1o2*ax[2]*momentU[2]*momentV[1]*momentXi[2] + 
-   c1o2*ay[1]*momentU[2]*momentV[1]*momentXi[2] + 
-   c1o2*ay[4]*momentU[3]*momentV[1]*momentXi[2] + 
-   c1o2*ay[2]*momentU[1]*momentV[2]*momentXi[2] + 
-   c1o2*ax[4]*momentU[2]*momentV[2]*momentXi[2] + 
-   c1o2*ay[4]*momentU[1]*momentV[3]*momentXi[2] + 
-   c1o2*az[0]*momentU[1]*momentW[1]*momentXi[2] + 
-   c1o2*ax[3]*momentU[2]*momentW[1]*momentXi[2] + 
-   c1o2*az[1]*momentU[2]*momentW[1]*momentXi[2] + 
-   c1o2*az[4]*momentU[3]*momentW[1]*momentXi[2] + 
-   c1o2*ay[3]*momentU[1]*momentV[1]*momentW[1]*momentXi[2] + 
-   c1o2*az[2]*momentU[1]*momentV[1]*momentW[1]*momentXi[2] + 
-   c1o2*az[4]*momentU[1]*momentV[2]*momentW[1]*momentXi[2] + 
-   c1o2*az[3]*momentU[1]*momentW[2]*momentXi[2] + 
-   c1o2*ax[4]*momentU[2]*momentW[2]*momentXi[2] + 
-   c1o2*ay[4]*momentU[1]*momentV[1]*momentW[2]*momentXi[2] + 
-   c1o2*az[4]*momentU[1]*momentW[3]*momentXi[2] + c1o4*ax[4]*momentU[2]*momentXi[4] + 
-   c1o4*ay[4]*momentU[1]*momentV[1]*momentXi[4] + c1o4*az[4]*momentU[1]*momentW[1]*momentXi[4];
-
-    //////////////////////////////////////////////////////////////////////////
-
-    flux_2.rho  += c2o1 * facePrim.lambda * (   momentU[0+1] *                                           facePrim.U - momentU[1+1]                           ) * force.x
-                 + c2o1 * facePrim.lambda * (   momentU[0+1] *                                           facePrim.V - momentU[0+1] * momentV[1]              ) * force.y
-                 + c2o1 * facePrim.lambda * (   momentU[0+1] *                                           facePrim.W - momentU[0+1] *              momentW[1] ) * force.z ;
-                                                                                                         
-    flux_2.rhoU += c2o1 * facePrim.lambda * (   momentU[1+1] *                                           facePrim.U - momentU[2+1]                           ) * force.x
-                 + c2o1 * facePrim.lambda * (   momentU[1+1] *                                           facePrim.V - momentU[1+1] * momentV[1]              ) * force.y
-                 + c2o1 * facePrim.lambda * (   momentU[1+1] *                                           facePrim.W - momentU[1+1] *              momentW[1] ) * force.z ;
-                                                                                                         
-    flux_2.rhoV += c2o1 * facePrim.lambda * (   momentU[0+1] * momentV[1] *                              facePrim.U - momentU[1+1] * momentV[1]              ) * force.x
-                 + c2o1 * facePrim.lambda * (   momentU[0+1] * momentV[1] *                              facePrim.V - momentU[0+1] * momentV[2]              ) * force.y
-                 + c2o1 * facePrim.lambda * (   momentU[0+1] * momentV[1] *                              facePrim.W - momentU[0+1] * momentV[1] * momentW[1] ) * force.z ;
-                                                                                                         
-    flux_2.rhoW += c2o1 * facePrim.lambda * (   momentU[0+1] *              momentW[1] *                 facePrim.U - momentU[1+1] *              momentW[1] ) * force.x
-                 + c2o1 * facePrim.lambda * (   momentU[0+1] *              momentW[1] *                 facePrim.V - momentU[0+1] * momentV[1] * momentW[1] ) * force.y
-                 + c2o1 * facePrim.lambda * (   momentU[0+1] *              momentW[1] *                 facePrim.W - momentU[0+1] *              momentW[2] ) * force.z ;
-
-    flux_2.rhoE +=       facePrim.lambda * ( ( momentU[2+1] + momentV[2] + momentW[2] + momentXi[2] ) * facePrim.U
-
-                                           - ( momentU[3+1]
-                                             + momentU[1+1] * momentV[2] 
-                                             + momentU[1+1] *              momentW[2] 
-                                             + momentU[1+1] *                           momentXi[2] )
-                                           ) * force.x
-
-                 +       facePrim.lambda * ( ( momentU[2+1] + momentV[2] + momentW[2] + momentXi[2] ) * facePrim.V
-
-                                           - ( momentU[2+1] * momentV[1]
-                                             + momentU[0+1] * momentV[3]
-                                             + momentU[0+1] * momentV[1] * momentW[2]
-                                             + momentU[0+1] * momentV[1] *              momentXi[2] )
-                                           ) * force.y
-
-                 +       facePrim.lambda * ( ( momentU[2+1] + momentV[2] + momentW[2] + momentXi[2] ) * facePrim.W
-
-                                           - ( momentU[2+1] *              momentW[1]
-                                             + momentU[0+1] * momentV[2] * momentW[1]
-                                             + momentU[0+1] *              momentW[3]
-                                             + momentU[0+1] *              momentW[1] * momentXi[2] )
-                                           ) * force.z ;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    flux_3.rho  = at[0]*momentU[1] + at[1]*momentU[2] + c1o2*at[4]*momentU[3] + at[2]*momentU[1]*momentV[1] + 
-   c1o2*at[4]*momentU[1]*momentV[2] + at[3]*momentU[1]*momentW[1] + 
-   c1o2*at[4]*momentU[1]*momentW[2] + c1o2*at[4]*momentU[1]*momentXi[2];
-
-    flux_3.rhoU = at[0]*momentU[2] + at[1]*momentU[3] + c1o2*at[4]*momentU[4] + at[2]*momentU[2]*momentV[1] + 
-   c1o2*at[4]*momentU[2]*momentV[2] + at[3]*momentU[2]*momentW[1] + 
-   c1o2*at[4]*momentU[2]*momentW[2] + c1o2*at[4]*momentU[2]*momentXi[2];
-
-    flux_3.rhoV = at[0]*momentU[1]*momentV[1] + at[1]*momentU[2]*momentV[1] + 
-   c1o2*at[4]*momentU[3]*momentV[1] + at[2]*momentU[1]*momentV[2] + 
-   c1o2*at[4]*momentU[1]*momentV[3] + at[3]*momentU[1]*momentV[1]*momentW[1] + 
-   c1o2*at[4]*momentU[1]*momentV[1]*momentW[2] + c1o2*at[4]*momentU[1]*momentV[1]*momentXi[2];
-
-    flux_3.rhoW = at[0]*momentU[1]*momentW[1] + at[1]*momentU[2]*momentW[1] + 
-   c1o2*at[4]*momentU[3]*momentW[1] + at[2]*momentU[1]*momentV[1]*momentW[1] + 
-   c1o2*at[4]*momentU[1]*momentV[2]*momentW[1] + at[3]*momentU[1]*momentW[2] + 
-   c1o2*at[4]*momentU[1]*momentW[3] + c1o2*at[4]*momentU[1]*momentW[1]*momentXi[2];
-
-    flux_3.rhoE = c1o2*at[0]*momentU[3] + c1o2*at[1]*momentU[4] + c1o4*at[4]*momentU[5] + 
-   c1o2*at[2]*momentU[3]*momentV[1] + c1o2*at[0]*momentU[1]*momentV[2] + 
-   c1o2*at[1]*momentU[2]*momentV[2] + c1o2*at[4]*momentU[3]*momentV[2] + 
-   c1o2*at[2]*momentU[1]*momentV[3] + c1o4*at[4]*momentU[1]*momentV[4] + 
-   c1o2*at[3]*momentU[3]*momentW[1] + c1o2*at[3]*momentU[1]*momentV[2]*momentW[1] + 
-   c1o2*at[0]*momentU[1]*momentW[2] + c1o2*at[1]*momentU[2]*momentW[2] + 
-   c1o2*at[4]*momentU[3]*momentW[2] + c1o2*at[2]*momentU[1]*momentV[1]*momentW[2] + 
-   c1o2*at[4]*momentU[1]*momentV[2]*momentW[2] + c1o2*at[3]*momentU[1]*momentW[3] + 
-   c1o4*at[4]*momentU[1]*momentW[4] + c1o2*at[0]*momentU[1]*momentXi[2] + 
-   c1o2*at[1]*momentU[2]*momentXi[2] + c1o2*at[4]*momentU[3]*momentXi[2] + 
-   c1o2*at[2]*momentU[1]*momentV[1]*momentXi[2] + 
-   c1o2*at[4]*momentU[1]*momentV[2]*momentXi[2] + 
-   c1o2*at[3]*momentU[1]*momentW[1]*momentXi[2] + 
-   c1o2*at[4]*momentU[1]*momentW[2]*momentXi[2] + c1o4*at[4]*momentU[1]*momentXi[4];
-
-    //////////////////////////////////////////////////////////////////////////
-
-    flux.rho  = ( timeCoefficients[0] * flux_1.rho  + timeCoefficients[1] * flux_2.rho  + timeCoefficients[2] * flux_3.rho  ) * parameters.dx * parameters.dx * facePrim.rho;
-    flux.rhoU = ( timeCoefficients[0] * flux_1.rhoU + timeCoefficients[1] * flux_2.rhoU + timeCoefficients[2] * flux_3.rhoU ) * parameters.dx * parameters.dx * facePrim.rho;
-    flux.rhoV = ( timeCoefficients[0] * flux_1.rhoV + timeCoefficients[1] * flux_2.rhoV + timeCoefficients[2] * flux_3.rhoV ) * parameters.dx * parameters.dx * facePrim.rho;
-    flux.rhoW = ( timeCoefficients[0] * flux_1.rhoW + timeCoefficients[1] * flux_2.rhoW + timeCoefficients[2] * flux_3.rhoW ) * parameters.dx * parameters.dx * facePrim.rho;
-    flux.rhoE = ( timeCoefficients[0] * flux_1.rhoE + timeCoefficients[1] * flux_2.rhoE + timeCoefficients[2] * flux_3.rhoE ) * parameters.dx * parameters.dx * facePrim.rho;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    real qConstPr = timeCoefficients[1] * ( ( flux_2.rhoE + flux_3.rhoE ) 
-                                          - facePrim.U * ( flux_2.rhoU + flux_3.rhoU ) 
-                                          - facePrim.V * ( flux_2.rhoV + flux_3.rhoV ) 
-                                          - facePrim.W * ( flux_2.rhoW + flux_3.rhoW )
-                                          ) * parameters.dx * parameters.dx * facePrim.rho;
-
-    flux.rhoE += ( c1o1 / parameters.Pr - c1o1 ) * qConstPr;
-
-    heatFlux   = qConstPr / parameters.Pr;
-
-    //////////////////////////////////////////////////////////////////////////
-
-//#ifdef USE_PASSIVE_SCALAR
-//	flux_1.rhoS_1 = flux_1.rho * facePrim.S_1;
-//	flux_1.rhoS_2 = flux_1.rho * facePrim.S_2;
-//
-//	flux_2.rhoS_1 = flux_2.rho * facePrim.S_1 
-//				  + ( ax[5] * momentU[1+1]                          
-//				    + ay[5] * momentU[0+1] * momentV[1]             
-//				    + az[5] * momentU[0+1] *              momentW[1]
-//				    ) / (two * facePrim.lambda);
-//
-//	flux_2.rhoS_2 = flux_2.rho * facePrim.S_2 
-//				  + ( ax[6] * momentU[1+1]                          
-//				    + ay[6] * momentU[0+1] * momentV[1]             
-//				    + az[6] * momentU[0+1] *              momentW[1]
-//				    ) / (two * facePrim.lambda);
-//
-//	flux_3.rhoS_1 = flux_3.rho * facePrim.S_1
-//				  + at[5] * momentU[0 + 1]
-//				  / ( two * facePrim.lambda );
-//
-//	flux_3.rhoS_2 = flux_3.rho * facePrim.S_2
-//				  + at[6] * momentU[0 + 1]
-//				  / ( two * facePrim.lambda );
-//
-//    
-//	real tauS = parameters.D * two * facePrim.lambda;
-//
-//	real dt = parameters.dt;
-//
-//    real timeCoefficientsScalar [3];
-//
-//	timeCoefficientsScalar[0] =							dt;
-//	timeCoefficientsScalar[1] =					-tauS * dt;
-//	timeCoefficientsScalar[2] = c1o2 * dt * dt - tauS * dt;
-//
-//    flux.rhoS_1 = ( timeCoefficientsScalar[0] * flux_1.rhoS_1 + timeCoefficientsScalar[1] * flux_2.rhoS_1 + timeCoefficientsScalar[2] * flux_3.rhoS_1 ) * parameters.dx * parameters.dx * facePrim.rho;
-//    flux.rhoS_2 = ( timeCoefficientsScalar[0] * flux_1.rhoS_2 + timeCoefficientsScalar[1] * flux_2.rhoS_2 + timeCoefficientsScalar[2] * flux_3.rhoS_2 ) * parameters.dx * parameters.dx * facePrim.rho;
-//
-//#endif // USE_PASSIVE_SCALAR
-
-    //////////////////////////////////////////////////////////////////////////
-
-#ifdef USE_PASSIVE_SCALAR
-
-	flux_2.rhoS_1 = ( ax[5] * momentU[1+1]                          
-				    + ay[5] * momentU[0+1] * momentV[1]             
-				    + az[5] * momentU[0+1] *              momentW[1]
-				    ) / (c2o1 * facePrim.lambda);
-
-	flux_2.rhoS_2 = ( ax[6] * momentU[1+1]                          
-				    + ay[6] * momentU[0+1] * momentV[1]             
-				    + az[6] * momentU[0+1] *              momentW[1]
-				    ) / (c2o1 * facePrim.lambda);
-
-	flux_3.rhoS_1 = at[5] * momentU[0 + 1]
-				  / ( c2o1 * facePrim.lambda );
-
-	flux_3.rhoS_2 = at[6] * momentU[0 + 1]
-				  / ( c2o1 * facePrim.lambda );
-
-    //////////////////////////////////////////////////////////////////////////
-
-	real dt = parameters.dt;
-    real timeCoefficientsScalar [3];
-
-    {
-        real tauS = parameters.D1 * c2o1 * facePrim.lambda;
-        timeCoefficientsScalar[0] = dt;
-        timeCoefficientsScalar[1] = -tauS * dt;
-        timeCoefficientsScalar[2] = c1o2 * dt * dt - tauS * dt;
-
-        flux.rhoS_1 = flux.rho * facePrim.S_1 + ( timeCoefficientsScalar[1] * flux_2.rhoS_1 + timeCoefficientsScalar[2] * flux_3.rhoS_1 ) * parameters.dx * parameters.dx * facePrim.rho;
-    }
-
-    {
-        real tauS = parameters.D2 * c2o1 * facePrim.lambda;
-        timeCoefficientsScalar[0] = dt;
-        timeCoefficientsScalar[1] = -tauS * dt;
-        timeCoefficientsScalar[2] = c1o2 * dt * dt - tauS * dt;
-
-        flux.rhoS_2 = flux.rho * facePrim.S_2 + ( timeCoefficientsScalar[1] * flux_2.rhoS_2 + timeCoefficientsScalar[2] * flux_3.rhoS_2 ) * parameters.dx * parameters.dx * facePrim.rho;
-    }
-
-#endif // USE_PASSIVE_SCALAR
-}
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-} // namespace GksGpu
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/GksGpu/FluxComputation/ExpansionCoefficients.cuh b/src/gpu/GksGpu/FluxComputation/ExpansionCoefficients.cuh
deleted file mode 100644
index 69c00acdda7d0f35ac3433af177471db083853b9..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/FluxComputation/ExpansionCoefficients.cuh
+++ /dev/null
@@ -1,61 +0,0 @@
-#ifndef ExpansionCoefficients_CUH
-#define ExpansionCoefficients_CUH
-
-
-#include "GksGpu_export.h"
-
-#include "Core/DataTypes.h"
-
-#include "DataBase/DataBase.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-namespace GksGpu {
-
-__host__ __device__ inline void computeExpansionCoefficients(const PrimitiveVariables & facePrim, 
-                                                             const ConservedVariables & gradient,
-                                                             const real K, 
-                                                             real expansionCoefficient[LENGTH_CELL_DATA])
-{
-    real two_E, 
-             rho_dU_dx, 
-             rho_dV_dx, 
-             rho_dW_dx, 
-         two_rho_dE_dx;
-
-    two_E = facePrim.U * facePrim.U 
-          + facePrim.V * facePrim.V 
-          + facePrim.W * facePrim.W 
-          + c1o2 * ( K + c3o1 ) / facePrim.lambda;
-
-    rho_dU_dx     =       gradient.rhoU - facePrim.U  * gradient.rho;
-    rho_dV_dx     =       gradient.rhoV - facePrim.V  * gradient.rho;
-    rho_dW_dx     =       gradient.rhoW - facePrim.W  * gradient.rho;
-    two_rho_dE_dx = c2o1 * gradient.rhoE -      two_E  * gradient.rho;
-
-    expansionCoefficient[4] = ( c4o1 * facePrim.lambda * facePrim.lambda ) / ( K + c3o1 )
-                            * ( two_rho_dE_dx - c2o1 * facePrim.U * rho_dU_dx 
-                                              - c2o1 * facePrim.V * rho_dV_dx 
-                                              - c2o1 * facePrim.W * rho_dW_dx );
-
-    expansionCoefficient[3] = c2o1 * facePrim.lambda * rho_dW_dx - facePrim.W * expansionCoefficient[4];
-
-    expansionCoefficient[2] = c2o1 * facePrim.lambda * rho_dV_dx - facePrim.V * expansionCoefficient[4];
-
-    expansionCoefficient[1] = c2o1 * facePrim.lambda * rho_dU_dx - facePrim.U * expansionCoefficient[4];
-
-    expansionCoefficient[0] = gradient.rho -   facePrim.U * expansionCoefficient[1] 
-                                           -   facePrim.V * expansionCoefficient[2] 
-                                           -   facePrim.W * expansionCoefficient[3] 
-                                           - c1o2 * two_E * expansionCoefficient[4];
-
-#ifdef USE_PASSIVE_SCALAR
-	expansionCoefficient[5] = c2o1 * facePrim.lambda * (gradient.rhoS_1 - facePrim.S_1 * gradient.rho);
-	expansionCoefficient[6] = c2o1 * facePrim.lambda * (gradient.rhoS_2 - facePrim.S_2 * gradient.rho);
-#endif // USE_PASSIVE_SCALAR
-}
-
-} // namespace GksGpu
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/GksGpu/FluxComputation/FluxComputation.cu b/src/gpu/GksGpu/FluxComputation/FluxComputation.cu
deleted file mode 100644
index 25ba5726bfd505518bf82b88accea1c3549c5b96..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/FluxComputation/FluxComputation.cu
+++ /dev/null
@@ -1,478 +0,0 @@
-#include "FluxComputation.h"
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include "PointerDefinitions.h"
-
-#include "DataBase/DataBaseStruct.h"
-
-#include "Definitions/PassiveScalar.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-
-#include "CellProperties/CellProperties.cuh"
-
-#include "FluxComputation/Moments.cuh"
-#include "FluxComputation/Reconstruction.cuh"
-#include "FluxComputation/Transformation.cuh"
-#include "FluxComputation/ExpansionCoefficients.cuh"
-#include "FluxComputation/AssembleFlux.cuh"
-#include "FluxComputation/ApplyFlux.cuh"
-#include "FluxComputation/Smagorinsky.cuh"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu {
-
-__global__                 void fluxKernel  ( DataBaseStruct dataBase, Parameters parameters, char direction, uint startIndex, uint numberOfEntities );
-
-__host__ __device__ inline void fluxFunction( DataBaseStruct dataBase, Parameters parameters, char direction, uint startIndex, uint index );
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void FluxComputation::run( SPtr<DataBase> dataBase, Parameters parameters, uint level, bool evaluateCommFaces )
-{
-    //{
-    //    CudaUtility::CudaGrid grid(dataBase->perLevelCount[level].numberOfFacesX, 128);
-
-    //    runKernel(fluxKernel,
-    //              fluxFunction,
-    //              dataBase->getDeviceType(), grid,
-    //              dataBase->toStruct(),
-    //              parameters,
-    //              'x',
-    //              dataBase->perLevelCount[level].startOfFacesX);
-
-    //    cudaDeviceSynchronize();
-
-    //    getLastCudaError("FluxComputation::run( SPtr<DataBase> dataBase, Parameters parameters, 'x', uint level )");
-    //}
-    //{
-    //    CudaUtility::CudaGrid grid(dataBase->perLevelCount[level].numberOfFacesY, 128);
-
-    //    runKernel(fluxKernel,
-    //              fluxFunction,
-    //              dataBase->getDeviceType(), grid,
-    //              dataBase->toStruct(),
-    //              parameters,
-    //              'y',
-    //              dataBase->perLevelCount[level].startOfFacesY);
-
-    //    cudaDeviceSynchronize();
-
-    //    getLastCudaError("FluxComputation::run( SPtr<DataBase> dataBase, Parameters parameters, 'y', uint level )");
-    //}
-    //{
-    //    CudaUtility::CudaGrid grid(dataBase->perLevelCount[level].numberOfFacesZ, 128);
-
-    //    runKernel(fluxKernel,
-    //              fluxFunction,
-    //              dataBase->getDeviceType(), grid,
-    //              dataBase->toStruct(),
-    //              parameters,
-    //              'z',
-    //              dataBase->perLevelCount[level].startOfFacesZ);
-
-    //    cudaDeviceSynchronize();
-
-    //    getLastCudaError("FluxComputation::run( SPtr<DataBase> dataBase, Parameters parameters, 'z', uint level )");
-    //}
-    //////////////////////////////////////////////////////////////////////////
-    //{
-    //    CudaUtility::CudaGrid grid(dataBase->perLevelCount[level].numberOfFaces, 64);
-
-    //    runKernel(fluxKernel,
-    //              fluxFunction,
-    //              dataBase->getDeviceType(), grid,
-    //              dataBase->toStruct(),
-    //              parameters,
-    //              'x',
-    //              dataBase->perLevelCount[level].startOfFacesX);
-
-    //    cudaDeviceSynchronize();
-
-    //    getLastCudaError("FluxComputation::run( SPtr<DataBase> dataBase, Parameters parameters, 'x', uint level )");
-    //}
-    //////////////////////////////////////////////////////////////////////////
-    if( evaluateCommFaces )
-    {
-        CudaUtility::CudaGrid grid(dataBase->perLevelCount[level].numberOfFaces - dataBase->perLevelCount[level].numberOfInnerFaces, 64, CudaUtility::communicationStream);
-
-        if( grid.numberOfEntities <= 0 ) return;
-
-        runKernel(fluxKernel,
-                  fluxFunction,
-                  dataBase->getDeviceType(), grid,
-                  dataBase->toStruct(),
-                  parameters,
-                  'x',
-                  dataBase->perLevelCount[level].startOfFacesX + dataBase->perLevelCount[level].numberOfInnerFaces);
-    }
-    else
-    {
-        CudaUtility::CudaGrid grid(dataBase->perLevelCount[level].numberOfInnerFaces, 64, CudaUtility::computeStream);
-
-        runKernel(fluxKernel,
-                  fluxFunction,
-                  dataBase->getDeviceType(), grid,
-                  dataBase->toStruct(),
-                  parameters,
-                  'x',
-                  dataBase->perLevelCount[level].startOfFacesX);
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__global__ void fluxKernel(DataBaseStruct dataBase, Parameters parameters, char direction, uint startIndex, uint numberOfEntities)
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    fluxFunction( dataBase, parameters, direction, startIndex, index );
-}
-
-__host__ __device__ inline void fluxFunction(DataBaseStruct dataBase, Parameters parameters, char direction, uint startIndex, uint index)
-{
-    uint faceIndex = startIndex + index;
-
-    real K = parameters.K;
-
-    direction = dataBase.faceOrientation[ faceIndex ];
-
-    parameters.D1 = parameters.D;
-    parameters.D2 = parameters.D;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( parameters.useSpongeLayer )
-    {
-        if( parameters.spongeLayerIdx == 0 )
-        {
-            // real x = dataBase.faceCenter[VEC_X(faceIndex, dataBase.numberOfFaces)];
-            real z = dataBase.faceCenter[VEC_Z(faceIndex, dataBase.numberOfFaces)];
-
-            real muNew = parameters.mu;
-
-            real zStart = real(0.35);
-
-            if (fabsf(z) > zStart)
-            {
-                muNew += (fabs(z) - zStart) * c10o1 * c10o1 * c10o1 * parameters.mu;
-            }
-
-            parameters.mu = muNew;
-        }
-        if( parameters.spongeLayerIdx == 1 )
-        {
-            // real x = dataBase.faceCenter[VEC_X(faceIndex, dataBase.numberOfFaces)];
-            real z = dataBase.faceCenter[VEC_Z(faceIndex, dataBase.numberOfFaces)];
-
-            real muNew = parameters.mu;
-
-            real zStart = real(3.5);
-
-            if (fabsf(z) > zStart)
-            {
-                muNew += (fabs(z) - zStart) * c10o1 * c10o1 * c10o1 * parameters.mu;
-            }
-
-            parameters.mu = muNew;
-        }
-        if( parameters.spongeLayerIdx == 2 )
-        {
-            real y = dataBase.faceCenter[VEC_Y(faceIndex, dataBase.numberOfFaces)];
-
-            real muNew = parameters.mu;
-
-            real yStart = real(3.0);
-
-            if (fabsf(y) > yStart)
-            {
-                muNew += (fabs(y) - yStart) * c10o1 * c10o1 * parameters.mu;
-            }
-
-            parameters.mu = muNew;
-        }
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    PrimitiveVariables facePrim;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    real ax[LENGTH_CELL_DATA];
-    real ay[LENGTH_CELL_DATA];
-    real az[LENGTH_CELL_DATA];
-    real at[LENGTH_CELL_DATA];
-
-#pragma unroll
-    for( uint i = 0; i < LENGTH_CELL_DATA; i++ )
-    { 
-        ax[i] = c0o1; 
-        ay[i] = c0o1; 
-        az[i] = c0o1; 
-        at[i] = c0o1;
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    {
-        ConservedVariables gradN, gradT1, gradT2;
-
-        reconstructFiniteDifferences(faceIndex,
-                                     dataBase,
-                                     parameters,
-                                     direction,
-                                     gradN,
-                                     gradT1,
-                                     gradT2,
-                                     facePrim,
-                                     K);
-
-        transformGlobalToLocal( gradN , direction );
-        transformGlobalToLocal( gradT1, direction );
-        transformGlobalToLocal( gradT2, direction );
-
-        transformGlobalToLocal( facePrim, direction );
-
-        computeExpansionCoefficients(facePrim, gradN , K, ax);
-        computeExpansionCoefficients(facePrim, gradT1, K, ay);
-        computeExpansionCoefficients(facePrim, gradT2, K, az);
-
-        //////////////////////////////////////////////////////////////////////////
-
-        if(parameters.useSmagorinsky)
-        {
-            real muTurb = getTurbulentViscositySmagorinsky( parameters, facePrim, gradN, gradT1, gradT2 );
-
-            if( muTurb > parameters.mu )
-            {
-                real turbSc = real(0.3);
-                real turbPr = real(0.5);
-
-                parameters.mu = muTurb;
-
-                parameters.D  = muTurb / turbSc;
-                parameters.Pr = turbPr;
-            }
-        }
-
-        //////////////////////////////////////////////////////////////////////////
-
-        if(parameters.useTemperatureLimiter){
-            real k = parameters.mu / parameters.Pr;
-
-            real dUdx1 = ( gradN.rhoU  - facePrim.U * gradN.rho  );
-            real dUdx2 = ( gradT1.rhoU - facePrim.U * gradT1.rho );
-            real dUdx3 = ( gradT2.rhoU - facePrim.U * gradT2.rho );
-    
-            real dVdx1 = ( gradN.rhoV  - facePrim.V * gradN.rho  );
-            real dVdx2 = ( gradT1.rhoV - facePrim.V * gradT1.rho );
-            real dVdx3 = ( gradT2.rhoV - facePrim.V * gradT2.rho );
-    
-            real dWdx1 = ( gradN.rhoW  - facePrim.W * gradN.rho  );
-            real dWdx2 = ( gradT1.rhoW - facePrim.W * gradT1.rho );
-            real dWdx3 = ( gradT2.rhoW - facePrim.W * gradT2.rho );
-    
-            real dEdx1 = ( gradN.rhoE  - facePrim.W * gradN.rho  );
-            real dEdx2 = ( gradT1.rhoE - facePrim.W * gradT1.rho );
-            real dEdx3 = ( gradT2.rhoE - facePrim.W * gradT2.rho );
-
-            real dTdx1 = dEdx1 - c2o1 * facePrim.U * dUdx1 - c2o1 * facePrim.V * dVdx1 - c2o1 * facePrim.W * dWdx1;
-            real dTdx2 = dEdx2 - c2o1 * facePrim.U * dUdx2 - c2o1 * facePrim.V * dVdx2 - c2o1 * facePrim.W * dWdx2;
-            real dTdx3 = dEdx3 - c2o1 * facePrim.U * dUdx3 - c2o1 * facePrim.V * dVdx3 - c2o1 * facePrim.W * dWdx3;
-    
-            //real E = c1o2 * ( facePrim.U * facePrim.U 
-            //                + facePrim.V * facePrim.V 
-            //                + facePrim.W * facePrim.W 
-            //                + ( parameters.K + c3o1 ) / ( c4o1 * facePrim.lambda ) );
-
-            //real dEdx1 = ( gradN.rhoE  - E * gradN.rho  );
-            //real dEdx2 = ( gradT1.rhoE - E * gradT1.rho );
-            //real dEdx3 = ( gradT2.rhoE - E * gradT2.rho );
-
-            //real dTdx1 = dEdx1 - facePrim.U * dUdx1 - facePrim.V * dVdx1 - facePrim.W * dWdx1;
-            //real dTdx2 = dEdx2 - facePrim.U * dUdx2 - facePrim.V * dVdx2 - facePrim.W * dWdx2;
-            //real dTdx3 = dEdx3 - facePrim.U * dUdx3 - facePrim.V * dVdx3 - facePrim.W * dWdx3;
-
-            ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-            // this one works for some time
-            //real S = parameters.dx * parameters.dx * ( fabsf(dTdx1) + fabsf(dTdx2) + fabsf(dTdx3) );
-            //k += real(0.00002) / real(0.015625) * S;
-
-            //real T = getT(facePrim);
-            //if( T > 20 )
-                //k += parameters.temperatureLimiter * S;
-
-            ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-            
-            //real S = parameters.dx * ( fabsf(dTdx1) + fabsf(dTdx2) + fabsf(dTdx3) );
-            //k += real(0.00001) * real(0.0025) * S * S;
-            
-            //real kMax = real(0.01) * c1o2 * parameters.dx * parameters.dx / parameters.dt;
-            //real kMax = real(0.01);
-
-            real S = parameters.dx * parameters.dx * ( dTdx1 * dTdx1 + dTdx2 * dTdx2 + dTdx3 * dTdx3 );
-
-            k += fminf(parameters.temperatureLimiterUpperLimit, parameters.temperatureLimiter * S);
-
-            ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-            // this one works for some time
-            //real S = ( fabsf(dTdx1) + fabsf(dTdx2) + fabsf(dTdx3) );
-            //k += real(0.00002) / real(0.015625) * S;
-            //k += real(1.28e-4) * parameters.dx * parameters.dx * parameters.dx * S * S;
-            ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-            parameters.Pr = parameters.mu / k;
-        }
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    parameters.D1 = parameters.D;
-    parameters.D2 = parameters.D;
-
-    if(parameters.usePassiveScalarLimiter){
-    #ifdef USE_PASSIVE_SCALAR
-
-        if( facePrim.S_1 < c0o1 ) parameters.D1 += - parameters.passiveScalarLimiter *   facePrim.S_1;
-        if( facePrim.S_1 > c1o1  ) parameters.D1 +=   parameters.passiveScalarLimiter * ( facePrim.S_1 - c1o1 );
-        
-        parameters.D2 = parameters.D1;
-
-        if( facePrim.S_2 < c0o1 ) parameters.D2 += - real(0.1)*parameters.passiveScalarLimiter *   facePrim.S_2;
-        if( facePrim.S_2 > c1o1  ) parameters.D2 +=   real(0.1)*parameters.passiveScalarLimiter * ( facePrim.S_2 - c1o1 );
-    #endif // USE_PASSIVE_SCALAR
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    //{
-    //#ifdef USE_PASSIVE_SCALAR
-    //    if( facePrim.S_1 < zero )
-    //    {
-    //        parameters.D += - real(0.1) * facePrim.S_1;
-    //    }
-    //    if( facePrim.S_1 > one )
-    //    {
-    //        parameters.D +=   real(0.1) * ( facePrim.S_1 - one );
-    //    }
-
-    //#endif // USE_PASSIVE_SCALAR
-    //}
-
-    //////////////////////////////////////////////////////////////////////////
-
-    {
-        ConservedVariables flux;
-
-        {
-            real momentU [ NUMBER_OF_MOMENTS ]; 
-            real momentV [ NUMBER_OF_MOMENTS ]; 
-            real momentW [ NUMBER_OF_MOMENTS ]; 
-            real momentXi[ NUMBER_OF_MOMENTS ];
-
-            computeMoments( facePrim, K, momentU, momentV, momentW, momentXi );
-
-            Vec3 force = parameters.force;
-
-            transformGlobalToLocal(force, direction);
-
-            {
-                ConservedVariables timeGrad;
-                computeTimeDerivative( facePrim, 
-                                       momentU, 
-                                       momentV, 
-                                       momentW, 
-                                       momentXi, 
-                                       ax, ay, az,
-                                       force,
-                                       timeGrad );
-
-                computeExpansionCoefficients( facePrim, timeGrad, K, at );
-            }
-            {
-                real timeCoefficients[4];
-                computeTimeCoefficients( facePrim, parameters, timeCoefficients );
-
-                real heatFlux;
-                assembleFlux( facePrim, 
-                              momentU, momentV, momentW, momentXi,
-                              ax, ay, az, at, 
-                              timeCoefficients, 
-                              parameters,
-                              force,
-                              flux,
-                              heatFlux );
-
-                transformLocalToGlobal( flux, direction );
-            }
-        }
-
-        //////////////////////////////////////////////////////////////////////////
-
-        {
-            uint negCellIdx = dataBase.faceToCell[ NEG_CELL(faceIndex, dataBase.numberOfFaces) ];
-            uint posCellIdx = dataBase.faceToCell[ POS_CELL(faceIndex, dataBase.numberOfFaces) ];
-
-        #if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 0))
-            atomicAdd( &( dataBase.diffusivity[ negCellIdx ] ), (realAccumulator)( parameters.D * parameters.dx * parameters.dx * parameters.dt ) );
-            atomicAdd( &( dataBase.diffusivity[ posCellIdx ] ), (realAccumulator)( parameters.D * parameters.dx * parameters.dx * parameters.dt ) );
-        #endif
-
-            CellProperties negCellProperties = dataBase.cellProperties[ negCellIdx ];
-            CellProperties posCellProperties = dataBase.cellProperties[ posCellIdx ];
-
-            //if( isCellProperties( negCellProperties, CELL_PROPERTIES_IS_FLUX_BC ) || 
-            //    isCellProperties( posCellProperties, CELL_PROPERTIES_IS_FLUX_BC ) )
-            //    return;
-
-            if( isCellProperties( negCellProperties, CELL_PROPERTIES_WALL ) || 
-                isCellProperties( posCellProperties, CELL_PROPERTIES_WALL ) )
-            {
-                flux.rho    = c0o1;
-            #ifdef USE_PASSIVE_SCALAR
-                flux.rhoS_1 = c0o1;
-                flux.rhoS_2 = c0o1;
-            #endif //USE_PASSIVE_SCALAR
-            }
-
-            if( isCellProperties( negCellProperties, CELL_PROPERTIES_IS_INSULATED ) || 
-                isCellProperties( posCellProperties, CELL_PROPERTIES_IS_INSULATED ) )
-            {
-                flux.rhoE   = c0o1;
-            }
-
-            uint negCellParentIdx = dataBase.parentCell[ negCellIdx ];
-            uint posCellParentIdx = dataBase.parentCell[ posCellIdx ];
-
-            //if( !( negCellParentIdx != INVALID_INDEX ) != !( posCellParentIdx != INVALID_INDEX ) ) // XOR
-            if( ( negCellParentIdx == INVALID_INDEX ) != ( posCellParentIdx == INVALID_INDEX ) ) // XOR
-            {
-                if( !isCellProperties( negCellProperties, CELL_PROPERTIES_GHOST ) && 
-                    !isCellProperties( posCellProperties, CELL_PROPERTIES_GHOST ) )
-                {
-                    if (negCellParentIdx != INVALID_INDEX)
-                    {
-                        applyFluxToNegCell(dataBase, negCellParentIdx, flux, direction, parameters);
-                    }
-
-                    if (posCellParentIdx != INVALID_INDEX)
-                    {
-                        applyFluxToPosCell(dataBase, posCellParentIdx, flux, direction, parameters);
-                    }
-                }
-            }
-
-            applyFluxToNegCell(dataBase, negCellIdx, flux, direction, parameters);
-            applyFluxToPosCell(dataBase, posCellIdx, flux, direction, parameters);
-        }
-    }
-}
-
-} // namespace GksGpu
diff --git a/src/gpu/GksGpu/FluxComputation/FluxComputation.h b/src/gpu/GksGpu/FluxComputation/FluxComputation.h
deleted file mode 100644
index 0fe86577dbe9901ac513cce802667db4b3db2ee7..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/FluxComputation/FluxComputation.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#ifndef  FluxComputation_H
-#define  FluxComputation_H
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-
-#include "DataBase/DataBase.h"
-#include "Parameters/Parameters.h"
-
-namespace GksGpu {
-
-class GKSGPU_EXPORT FluxComputation
-{
-public:
-
-    static void run( SPtr<DataBase> dataBase, 
-                     Parameters parameters, 
-                     uint level,
-                     bool evaluateCommFaces = false);
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/FluxComputation/Moments.cuh b/src/gpu/GksGpu/FluxComputation/Moments.cuh
deleted file mode 100644
index 8ba25dbef215bdff85c8618cff5e9da047b364f2..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/FluxComputation/Moments.cuh
+++ /dev/null
@@ -1,56 +0,0 @@
-#ifndef Moments_CUH
-#define Moments_CUH
-
-
-#include "GksGpu_export.h"
-
-#include "Core/DataTypes.h"
-
-#include "DataBase/DataBase.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#define NUMBER_OF_MOMENTS 7
-
-namespace GksGpu {
-
-__host__ __device__ inline void computeMoments( const PrimitiveVariables & facePrim,
-                                                const real K,
-                                                real momentU [NUMBER_OF_MOMENTS], 
-                                                real momentV [NUMBER_OF_MOMENTS], 
-                                                real momentW [NUMBER_OF_MOMENTS], 
-                                                real momentXi[NUMBER_OF_MOMENTS] )
-{
-    momentU[0] = c1o1;
-    momentU[1] = facePrim.U;
-#pragma unroll
-    for ( uint i = 2; i < NUMBER_OF_MOMENTS; i++ )
-        momentU[i] = facePrim.U * momentU[i - 1] + ( real(i - 1) * momentU[i - 2] )/( c2o1 * facePrim.lambda );
-
-    momentV[0] = c1o1;
-    momentV[1] = facePrim.V;
-#pragma unroll
-    for ( uint i = 2; i < NUMBER_OF_MOMENTS; i++ )
-        momentV[i] = facePrim.V * momentV[i - 1] + ( real(i - 1) * momentV[i - 2] )/( c2o1 * facePrim.lambda );
-
-    momentW[0] = c1o1;
-    momentW[1] = facePrim.W;
-#pragma unroll
-    for ( uint i = 2; i < NUMBER_OF_MOMENTS; i++ )
-        momentW[i] = facePrim.W * momentW[i - 1] + ( real(i - 1) * momentW[i - 2] )/( c2o1 * facePrim.lambda );
-
-    momentXi[0] = c1o1;
-    momentXi[1] = c0o1;
-    momentXi[2] = K / ( c2o1 * facePrim.lambda );
-    momentXi[3] = c0o1;
-    momentXi[4] = K * ( c2o1 + K ) / ( c4o1 * facePrim.lambda * facePrim.lambda );
-    momentXi[5] = c0o1;
-    momentXi[6] = ( K + c4o1 ) / ( c2o1 * facePrim.lambda ) * momentXi[4];
-}
-
-} // namespace GksGpu
-
-
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/GksGpu/FluxComputation/Reconstruction.cuh b/src/gpu/GksGpu/FluxComputation/Reconstruction.cuh
deleted file mode 100644
index 74134aba8b2f6e453b6e6095d887ec086836c1e3..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/FluxComputation/Reconstruction.cuh
+++ /dev/null
@@ -1,263 +0,0 @@
-#ifndef Reconstruction_CUH
-#define Reconstruction_CUH
-
-
-#include "GksGpu_export.h"
-
-#include "Core/DataTypes.h"
-
-#include "DataBase/DataBase.h"
-#include "Parameters/Parameters.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/FlowStateDataConversion.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-#include "FlowStateData/ThermalDependencies.cuh"
-
-namespace GksGpu {
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__host__ __device__ inline void getCellIndicesN ( const uint faceIndex,
-                                                  const DataBaseStruct& dataBase,
-                                                  uint& posCellIndexN,
-                                                  uint& negCellIndexN )
-{
-    posCellIndexN = dataBase.faceToCell[ POS_CELL( faceIndex, dataBase.numberOfFaces ) ];
-    negCellIndexN = dataBase.faceToCell[ NEG_CELL( faceIndex, dataBase.numberOfFaces ) ];
-}
-
-__host__ __device__ inline void getCellIndicesTX( const uint faceIndex,
-                                                  const DataBaseStruct& dataBase,
-                                                  const uint posCellIndexN,
-                                                  const uint negCellIndexN,
-                                                  uint* posCellIndexTX,
-                                                  uint* negCellIndexTX )
-{
-    posCellIndexTX[0] = dataBase.cellToCell[ CELL_TO_CELL( posCellIndexN, 0, dataBase.numberOfCells ) ];
-    posCellIndexTX[1] = dataBase.cellToCell[ CELL_TO_CELL( negCellIndexN, 0, dataBase.numberOfCells ) ];
-
-    negCellIndexTX[0] = dataBase.cellToCell[ CELL_TO_CELL( posCellIndexN, 1, dataBase.numberOfCells ) ];
-    negCellIndexTX[1] = dataBase.cellToCell[ CELL_TO_CELL( negCellIndexN, 1, dataBase.numberOfCells ) ];
-}
-
-__host__ __device__ inline void getCellIndicesTY( const uint faceIndex,
-                                                  const DataBaseStruct& dataBase,
-                                                  const uint posCellIndexN,
-                                                  const uint negCellIndexN,
-                                                  uint* posCellIndexTY,
-                                                  uint* negCellIndexTY )
-{
-    posCellIndexTY[0] = dataBase.cellToCell[ CELL_TO_CELL( posCellIndexN, 2, dataBase.numberOfCells ) ];
-    posCellIndexTY[1] = dataBase.cellToCell[ CELL_TO_CELL( negCellIndexN, 2, dataBase.numberOfCells ) ];
-
-    negCellIndexTY[0] = dataBase.cellToCell[ CELL_TO_CELL( posCellIndexN, 3, dataBase.numberOfCells ) ];
-    negCellIndexTY[1] = dataBase.cellToCell[ CELL_TO_CELL( negCellIndexN, 3, dataBase.numberOfCells ) ];
-}
-
-__host__ __device__ inline void getCellIndicesTZ( const uint faceIndex,
-                                                  const DataBaseStruct& dataBase,
-                                                  const uint posCellIndexN,
-                                                  const uint negCellIndexN,
-                                                  uint* posCellIndexTZ,
-                                                  uint* negCellIndexTZ )
-{
-    posCellIndexTZ[0] = dataBase.cellToCell[ CELL_TO_CELL( posCellIndexN, 4, dataBase.numberOfCells ) ];
-    posCellIndexTZ[1] = dataBase.cellToCell[ CELL_TO_CELL( negCellIndexN, 4, dataBase.numberOfCells ) ];
-
-    negCellIndexTZ[0] = dataBase.cellToCell[ CELL_TO_CELL( posCellIndexN, 5, dataBase.numberOfCells ) ];
-    negCellIndexTZ[1] = dataBase.cellToCell[ CELL_TO_CELL( negCellIndexN, 5, dataBase.numberOfCells ) ];
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__host__ __device__ inline void computeFaceCons( const ConservedVariables& posCons,
-                                                 const ConservedVariables& negCons,
-                                                 ConservedVariables& faceCons )
-{
-    faceCons.rho  = c1o2 * ( negCons.rho  + posCons.rho  );
-    faceCons.rhoU = c1o2 * ( negCons.rhoU + posCons.rhoU );
-    faceCons.rhoV = c1o2 * ( negCons.rhoV + posCons.rhoV );
-    faceCons.rhoW = c1o2 * ( negCons.rhoW + posCons.rhoW );
-    faceCons.rhoE = c1o2 * ( negCons.rhoE + posCons.rhoE );
-#ifdef USE_PASSIVE_SCALAR
-	faceCons.rhoS_1 = c1o2 * ( negCons.rhoS_1 + posCons.rhoS_1 );
-	faceCons.rhoS_2 = c1o2 * ( negCons.rhoS_2 + posCons.rhoS_2 );
-#endif // USE_PASSIVE_SCALAR
-}
-
-__host__ __device__ inline void computeGradN( const Parameters& parameters,
-                                              const ConservedVariables& posCons,
-                                              const ConservedVariables& negCons,
-                                              const PrimitiveVariables& facePrim,
-                                              ConservedVariables& gradN )
-{
-    gradN.rho  = ( posCons.rho  - negCons.rho  ) / ( parameters.dx * facePrim.rho );
-    gradN.rhoU = ( posCons.rhoU - negCons.rhoU ) / ( parameters.dx * facePrim.rho );
-    gradN.rhoV = ( posCons.rhoV - negCons.rhoV ) / ( parameters.dx * facePrim.rho );
-    gradN.rhoW = ( posCons.rhoW - negCons.rhoW ) / ( parameters.dx * facePrim.rho );
-    gradN.rhoE = ( posCons.rhoE - negCons.rhoE ) / ( parameters.dx * facePrim.rho );
-#ifdef USE_PASSIVE_SCALAR
-	gradN.rhoS_1 = ( posCons.rhoS_1 - negCons.rhoS_1 ) / ( parameters.dx * facePrim.rho );
-	gradN.rhoS_2 = ( posCons.rhoS_2 - negCons.rhoS_2 ) / ( parameters.dx * facePrim.rho );
-#endif // USE_PASSIVE_SCALAR
-}
-
-__host__ __device__ inline void computeGradT( const DataBaseStruct& dataBase,
-                                              const Parameters& parameters,
-                                              const uint posCellIndexT[2],
-                                              const uint negCellIndexT[2],
-                                              const PrimitiveVariables& facePrim,
-                                              ConservedVariables& gradN )
-{
-    ConservedVariables cons;
-
-    //////////////////////////////////////////////////////////////////////////
-    {
-        readCellData(posCellIndexT[0], dataBase, cons);
-
-        gradN.rho  += c1o2 * cons.rho;
-        gradN.rhoU += c1o2 * cons.rhoU;
-        gradN.rhoV += c1o2 * cons.rhoV;
-        gradN.rhoW += c1o2 * cons.rhoW;
-        gradN.rhoE += c1o2 * cons.rhoE;
-    #ifdef USE_PASSIVE_SCALAR
-        gradN.rhoS_1 += c1o2 * cons.rhoS_1;
-        gradN.rhoS_2 += c1o2 * cons.rhoS_2;
-    #endif // USE_PASSIVE_SCALAR
-    }
-    {
-        readCellData(posCellIndexT[1], dataBase, cons);
-
-        gradN.rho  += c1o2 * cons.rho;
-        gradN.rhoU += c1o2 * cons.rhoU;
-        gradN.rhoV += c1o2 * cons.rhoV;
-        gradN.rhoW += c1o2 * cons.rhoW;
-        gradN.rhoE += c1o2 * cons.rhoE;
-    #ifdef USE_PASSIVE_SCALAR
-        gradN.rhoS_1 += c1o2 * cons.rhoS_1;
-        gradN.rhoS_2 += c1o2 * cons.rhoS_2;
-    #endif // USE_PASSIVE_SCALAR
-    }
-    //////////////////////////////////////////////////////////////////////////
-    {
-        readCellData(negCellIndexT[0], dataBase, cons);
-
-        gradN.rho  -= c1o2 * cons.rho;
-        gradN.rhoU -= c1o2 * cons.rhoU;
-        gradN.rhoV -= c1o2 * cons.rhoV;
-        gradN.rhoW -= c1o2 * cons.rhoW;
-        gradN.rhoE -= c1o2 * cons.rhoE;
-    #ifdef USE_PASSIVE_SCALAR
-        gradN.rhoS_1 -= c1o2 * cons.rhoS_1;
-        gradN.rhoS_2 -= c1o2 * cons.rhoS_2;
-    #endif // USE_PASSIVE_SCALAR
-    }
-    {
-        readCellData(negCellIndexT[1], dataBase, cons);
-
-        gradN.rho  -= c1o2 * cons.rho;
-        gradN.rhoU -= c1o2 * cons.rhoU;
-        gradN.rhoV -= c1o2 * cons.rhoV;
-        gradN.rhoW -= c1o2 * cons.rhoW;
-        gradN.rhoE -= c1o2 * cons.rhoE;
-    #ifdef USE_PASSIVE_SCALAR
-        gradN.rhoS_1 -= c1o2 * cons.rhoS_1;
-        gradN.rhoS_2 -= c1o2 * cons.rhoS_2;
-    #endif // USE_PASSIVE_SCALAR
-    }
-    //////////////////////////////////////////////////////////////////////////
-    {
-        gradN.rho  /= c2o1 * parameters.dx * facePrim.rho;
-        gradN.rhoU /= c2o1 * parameters.dx * facePrim.rho;
-        gradN.rhoV /= c2o1 * parameters.dx * facePrim.rho;
-        gradN.rhoW /= c2o1 * parameters.dx * facePrim.rho;
-        gradN.rhoE /= c2o1 * parameters.dx * facePrim.rho;
-    #ifdef USE_PASSIVE_SCALAR
-        gradN.rhoS_1 /= c2o1 * parameters.dx * facePrim.rho;
-        gradN.rhoS_2 /= c2o1 * parameters.dx * facePrim.rho;
-    #endif // USE_PASSIVE_SCALAR
-    }
-    //////////////////////////////////////////////////////////////////////////
-}
-
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__host__ __device__ inline void reconstructFiniteDifferences( const uint faceIndex,
-                                                              const DataBaseStruct& dataBase,
-                                                              const Parameters& parameters,
-                                                              const char direction,
-                                                              ConservedVariables& gradN,
-                                                              ConservedVariables& gradT1,
-                                                              ConservedVariables& gradT2,
-                                                              PrimitiveVariables& facePrim,
-                                                              real& K )
-{
-    uint posCellIndexN, negCellIndexN;
-
-    getCellIndicesN( faceIndex, dataBase, posCellIndexN, negCellIndexN );
-    
-    {
-        ConservedVariables posCons, negCons, faceCons;
-
-        readCellData(posCellIndexN, dataBase, posCons);
-        readCellData(negCellIndexN, dataBase, negCons);
-        
-        computeFaceCons(posCons, negCons, faceCons);
-
-    #ifdef USE_PASSIVE_SCALAR
-        {
-            //K = getK(faceCons);
-        }
-    #endif
-
-        facePrim = toPrimitiveVariables( faceCons, K, false );
-
-        computeGradN( parameters, posCons, negCons, facePrim, gradN );
-    }
-
-    {
-        uint posCellIndexT1[2];
-        uint negCellIndexT1[2];
-    
-        if( direction == 'x' ) getCellIndicesTY(faceIndex, dataBase, posCellIndexN, negCellIndexN, posCellIndexT1, negCellIndexT1);
-        if( direction == 'y' ) getCellIndicesTZ(faceIndex, dataBase, posCellIndexN, negCellIndexN, posCellIndexT1, negCellIndexT1);
-        if( direction == 'z' ) getCellIndicesTX(faceIndex, dataBase, posCellIndexN, negCellIndexN, posCellIndexT1, negCellIndexT1);
-
-        computeGradT( dataBase, parameters, posCellIndexT1, negCellIndexT1, facePrim, gradT1 );
-    }
-
-    {
-        uint posCellIndexT2[2];
-        uint negCellIndexT2[2];
-    
-        if( direction == 'x' ) getCellIndicesTZ(faceIndex, dataBase, posCellIndexN, negCellIndexN, posCellIndexT2, negCellIndexT2);
-        if( direction == 'y' ) getCellIndicesTX(faceIndex, dataBase, posCellIndexN, negCellIndexN, posCellIndexT2, negCellIndexT2);
-        if( direction == 'z' ) getCellIndicesTY(faceIndex, dataBase, posCellIndexN, negCellIndexN, posCellIndexT2, negCellIndexT2);
-
-        computeGradT( dataBase, parameters, posCellIndexT2, negCellIndexT2, facePrim, gradT2 );
-    }
-}
-
-} // namespace GksGpu
-
-
-
-
-
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/GksGpu/FluxComputation/Smagorinsky.cuh b/src/gpu/GksGpu/FluxComputation/Smagorinsky.cuh
deleted file mode 100644
index 8a03ae722e62f338eed7672d362cf0aaa28d6d7a..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/FluxComputation/Smagorinsky.cuh
+++ /dev/null
@@ -1,56 +0,0 @@
-#ifndef Smagorinsky_CUH
-#define Smagorinsky_CUH
-
-#include <cmath>
-
-
-#include "GksGpu_export.h"
-
-#include "Core/DataTypes.h"
-#include "Core/RealConstants.h"
-
-#include "Parameters/Parameters.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-namespace GksGpu {
-
-inline __host__ __device__ real getTurbulentViscositySmagorinsky(const Parameters & parameters, 
-                                                                 const PrimitiveVariables& facePrim, 
-                                                                 const ConservedVariables gradX1, 
-                                                                 const ConservedVariables gradX2, 
-                                                                 const ConservedVariables gradX3 )
-{
-    // See FDS 6 Technical Reference Guide, Section 4.2.8
-
-    real dUdx1 = ( gradX1.rhoU - facePrim.U * gradX1.rho )/* / facePrim.rho*/;
-    real dUdx2 = ( gradX2.rhoU - facePrim.U * gradX2.rho )/* / facePrim.rho*/;
-    real dUdx3 = ( gradX3.rhoU - facePrim.U * gradX3.rho )/* / facePrim.rho*/;
-    real dVdx1 = ( gradX1.rhoV - facePrim.V * gradX1.rho )/* / facePrim.rho*/;
-    real dVdx2 = ( gradX2.rhoV - facePrim.V * gradX2.rho )/* / facePrim.rho*/;
-    real dVdx3 = ( gradX3.rhoV - facePrim.V * gradX3.rho )/* / facePrim.rho*/;
-    real dWdx1 = ( gradX1.rhoW - facePrim.W * gradX1.rho )/* / facePrim.rho*/;
-    real dWdx2 = ( gradX2.rhoW - facePrim.W * gradX2.rho )/* / facePrim.rho*/;
-    real dWdx3 = ( gradX3.rhoW - facePrim.W * gradX3.rho )/* / facePrim.rho*/;
-
-    real S11sq = dUdx1*dUdx1;
-    real S22sq = dVdx2*dVdx2;
-    real S33sq = dWdx3*dWdx3;
-
-    real S12sq = c1o4 * ( dUdx2 + dVdx1 ) * ( dUdx2 + dVdx1 );
-    real S13sq = c1o4 * ( dUdx3 + dWdx1 ) * ( dUdx3 + dWdx1 );
-    real S23sq = c1o4 * ( dVdx3 + dWdx2 ) * ( dVdx3 + dWdx2 );
-
-    real divergence = dUdx1 + dVdx2 + dWdx3;
-
-    real S = sqrt( c2o1 * ( S11sq + S22sq + S33sq + c2o1 * ( S12sq + S13sq + S23sq ) ) - c2o3 * divergence * divergence );
-
-    real Cs = parameters.smagorinskyConstant;
-
-    return facePrim.rho * Cs*Cs * parameters.dx*parameters.dx * S;
-}
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/FluxComputation/SutherlandsLaw.cuh b/src/gpu/GksGpu/FluxComputation/SutherlandsLaw.cuh
deleted file mode 100644
index 5af3ca3889fc73f4230bdcfb18a7cf13a3c0d82b..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/FluxComputation/SutherlandsLaw.cuh
+++ /dev/null
@@ -1,55 +0,0 @@
-#ifndef SutherlandsLaw_CUH
-#define SutherlandsLaw_CUH
-
-#include <cmath>
-
-
-#include "GksGpu_export.h"
-
-#include "Core/DataTypes.h"
-#include "Core/RealConstants.h"
-
-#include "Parameters/Parameters.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-namespace GksGpu {
-
-inline __host__ __device__ real sutherlandsLaw(Parameters & parameters, const real r)
-{
-    real S  = real( 110.5 );
-
-    real T0 = real( 600.0 );
-
-    real C = S / T0;
-
-    return parameters.mu * sqrt( r * r * r ) * ( C  + c1o1 ) / ( r  + C );
-}
-
-inline __host__ __device__ real sutherlandsLaw2(Parameters & parameters, const real r)
-{
-    real Smu = real( 0.648 );
-
-    real Sk  = real( 0.368 );
-
-    parameters.Pr *= ( ( Smu  + c1o1 ) / ( Sk  + c1o1 ) ) * ( ( r  + Sk ) / ( r  + Smu ) );
-
-    return parameters.mu * sqrt( r * r * r ) * ( Smu  + c1o1 ) / ( r  + Smu );
-}
-
-inline __host__ __device__ real getViscosity(Parameters & parameters, const real r)
-{
-    if ( parameters.viscosityModel == ViscosityModel::sutherlandsLaw ){
-        return sutherlandsLaw( parameters, r );
-    }
-    else if ( parameters.viscosityModel == ViscosityModel::sutherlandsLaw2 ){
-        return sutherlandsLaw2( parameters, r );
-    }
-
-    return parameters.mu;
-}
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/FluxComputation/Transformation.cuh b/src/gpu/GksGpu/FluxComputation/Transformation.cuh
deleted file mode 100644
index d8ffb91fab35c23b8925fc55f3531c4a83c53dbe..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/FluxComputation/Transformation.cuh
+++ /dev/null
@@ -1,128 +0,0 @@
-#ifndef Transformation_CUH
-#define Transformation_CUH
-
-
-#include "GksGpu_export.h"
-
-#include "Core/DataTypes.h"
-
-#include "DataBase/DataBase.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-namespace GksGpu {
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__host__ __device__ inline void transformGlobalToLocal(Vec3& vector, const char direction)
-{
-    if( direction == 'x' ) return;
-
-    if( direction == 'y' )
-    {
-        Vec3 tmp = vector;
-    
-        vector.x = tmp.y;
-        vector.y = tmp.z;
-        vector.z = tmp.x;
-
-        return;
-    }
-
-    if( direction == 'z' )
-    {
-        Vec3 tmp = vector;
-    
-        vector.x = tmp.z;
-        vector.y = tmp.x;
-        vector.z = tmp.y;
-
-        return;
-    }
-}
-
-__host__ __device__ inline void transformLocalToGlobal(Vec3& vector, const char direction)
-{
-    if( direction == 'x' ) return;
-
-    if( direction == 'y' )
-    {
-        Vec3 tmp;
-    
-        tmp.y = vector.x;
-        tmp.z = vector.y;
-        tmp.x = vector.z;
-
-        vector = tmp;
-
-        return;
-    }
-
-    if( direction == 'z' )
-    {
-        Vec3 tmp;
-    
-        tmp.z = vector.x;
-        tmp.x = vector.y;
-        tmp.y = vector.z;
-
-        vector = tmp;
-
-        return;
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__host__ __device__ inline void transformGlobalToLocal(ConservedVariables& cons, const char direction)
-{
-    Vec3 vector( cons.rhoU, cons.rhoV, cons.rhoW );
-
-    transformGlobalToLocal( vector, direction );
-
-    cons.rhoU = vector.x;
-    cons.rhoV = vector.y;
-    cons.rhoW = vector.z;
-}
-
-__host__ __device__ inline void transformGlobalToLocal(PrimitiveVariables& prim, const char direction)
-{
-    Vec3 vector( prim.U, prim.V, prim.W );
-
-    transformGlobalToLocal( vector, direction );
-
-    prim.U = vector.x;
-    prim.V = vector.y;
-    prim.W = vector.z;
-}
-
-//////////////////////////////////////////////////////////////////////////
-
-__host__ __device__ inline void transformLocalToGlobal(ConservedVariables& cons, const char direction)
-{
-    Vec3 vector( cons.rhoU, cons.rhoV, cons.rhoW );
-
-    transformLocalToGlobal( vector, direction );
-
-    cons.rhoU = vector.x;
-    cons.rhoV = vector.y;
-    cons.rhoW = vector.z;
-}
-
-__host__ __device__ inline void transformLocalToGlobal(PrimitiveVariables& prim, const char direction)
-{
-    Vec3 vector( prim.U, prim.V, prim.W );
-
-    transformLocalToGlobal( vector, direction );
-
-    prim.U = vector.x;
-    prim.V = vector.y;
-    prim.W = vector.z;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/Initializer/Initializer.cpp b/src/gpu/GksGpu/Initializer/Initializer.cpp
deleted file mode 100644
index f5c2cf535aefaa4e5662ae8dc588859ccebdc008..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Initializer/Initializer.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-#include "Initializer.h"
-
-#include <sstream>
-#define _USE_MATH_DEFINES
-#include <math.h>
-
-#include "PointerDefinitions.h"
-#include "Core/RealConstants.h"
-
-#include "DataBase/DataBaseStruct.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-#include "Definitions/PassiveScalar.h"
-
-namespace GksGpu {
-
-void Initializer::interpret(SPtr<DataBase> dataBase, std::function<ConservedVariables(Vec3)> initialCondition)
-{
-    for( uint cellIdx = 0; cellIdx < dataBase->numberOfCells; cellIdx++ ){
-
-        Vec3 cellCenter = dataBase->getCellCenter( cellIdx );
-
-        ConservedVariables cellCons = initialCondition(cellCenter);
-
-        dataBase->dataHost[ RHO__(cellIdx, dataBase->numberOfCells) ] = cellCons.rho ;
-        dataBase->dataHost[ RHO_U(cellIdx, dataBase->numberOfCells) ] = cellCons.rhoU;
-        dataBase->dataHost[ RHO_V(cellIdx, dataBase->numberOfCells) ] = cellCons.rhoV;
-        dataBase->dataHost[ RHO_W(cellIdx, dataBase->numberOfCells) ] = cellCons.rhoW;
-        dataBase->dataHost[ RHO_E(cellIdx, dataBase->numberOfCells) ] = cellCons.rhoE;
-    #ifdef USE_PASSIVE_SCALAR
-	    dataBase->dataHost[ RHO_S_1(cellIdx, dataBase->numberOfCells) ] = cellCons.rhoS_1;
-	    dataBase->dataHost[ RHO_S_2(cellIdx, dataBase->numberOfCells) ] = cellCons.rhoS_2;
-    #endif // USE_PASSIVE_SCALAR
-    }
-
-    return;
-}
-
-} // namespace GksGpu
diff --git a/src/gpu/GksGpu/Initializer/Initializer.h b/src/gpu/GksGpu/Initializer/Initializer.h
deleted file mode 100644
index badac795fedf7bc6df82c6a34139c7c427c051aa..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Initializer/Initializer.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#ifndef  Initializer_H
-#define  Initializer_H
-
-#include <string>
-#include <memory>
-#include <functional>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-
-#include "DataBase/DataBase.h"
-#include "FlowStateData/FlowStateData.cuh"
-
-namespace GksGpu {
-
-class GKSGPU_EXPORT Initializer
-{
-public:
-
-    static void interpret( SPtr<DataBase> dataBase, std::function<ConservedVariables(Vec3)> initialCondition );
-
-    static void initializeDataUpdate( SPtr<DataBase> dataBase );
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/Initializer/InitializerKernel.cu b/src/gpu/GksGpu/Initializer/InitializerKernel.cu
deleted file mode 100644
index 8b456e9be744b0d7999ff0d5e585c55460b7dbcf..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Initializer/InitializerKernel.cu
+++ /dev/null
@@ -1,69 +0,0 @@
-#include "Initializer.h"
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include "PointerDefinitions.h"
-#include "Core/RealConstants.h"
-
-#include "DataBase/DataBaseStruct.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-#include "Definitions/PassiveScalar.h"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu {
-
-__global__                 void initializeDataUpdateKernel  ( DataBaseStruct dataBase, uint numberOfEntities );
-
-__host__ __device__ inline void initializeDataUpdateFunction( DataBaseStruct dataBase, uint index );
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void Initializer::initializeDataUpdate( SPtr<DataBase> dataBase )
-{
-    CudaUtility::CudaGrid grid( dataBase->numberOfCells, 32 );
-
-    runKernel( initializeDataUpdateKernel,
-               initializeDataUpdateFunction,
-               dataBase->getDeviceType(), grid, 
-               dataBase->toStruct() );
-
-    cudaDeviceSynchronize();
-
-    getLastCudaError("Initializer::initializeDataUpdate( SPtr<DataBase> dataBase )");
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__global__ void initializeDataUpdateKernel(DataBaseStruct dataBase, uint numberOfEntities)
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    initializeDataUpdateFunction( dataBase, index );
-}
-
-__host__ __device__ inline void initializeDataUpdateFunction(DataBaseStruct dataBase, uint index)
-{
-    dataBase.dataUpdate[ RHO__(index, dataBase.numberOfCells) ] = c0o1;
-    dataBase.dataUpdate[ RHO_U(index, dataBase.numberOfCells) ] = c0o1;
-    dataBase.dataUpdate[ RHO_V(index, dataBase.numberOfCells) ] = c0o1;
-    dataBase.dataUpdate[ RHO_W(index, dataBase.numberOfCells) ] = c0o1;
-    dataBase.dataUpdate[ RHO_E(index, dataBase.numberOfCells) ] = c0o1;
-#ifdef USE_PASSIVE_SCALAR
-	dataBase.dataUpdate[ RHO_S_1(index, dataBase.numberOfCells) ] = c0o1;
-	dataBase.dataUpdate[ RHO_S_2(index, dataBase.numberOfCells) ] = c0o1;
-#endif // USE_PASSIVE_SCALAR
-
-    dataBase.massFlux[ VEC_X(index, dataBase.numberOfCells) ]   = c0o1;
-    dataBase.massFlux[ VEC_Y(index, dataBase.numberOfCells) ]   = c0o1;
-    dataBase.massFlux[ VEC_Z(index, dataBase.numberOfCells) ]   = c0o1;
-
-    dataBase.diffusivity[ index ] = c1o1;
-}
-
-} // namespace GksGpu
diff --git a/src/gpu/GksGpu/Interface/CoarseToFineKernel.cu b/src/gpu/GksGpu/Interface/CoarseToFineKernel.cu
deleted file mode 100644
index d1451db3c2595fc65a9f052033822459bb07000e..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Interface/CoarseToFineKernel.cu
+++ /dev/null
@@ -1,427 +0,0 @@
-#include "Interface.h"
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include "PointerDefinitions.h"
-
-#include "DataBase/DataBaseStruct.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/FlowStateDataConversion.cuh"
-
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#include "Definitions/PassiveScalar.h"
-#include "Definitions/MemoryAccessPattern.h"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu {
-
-//////////////////////////////////////////////////////////////////////////
-
-__global__                 void coarseToFineKernel  ( DataBaseStruct dataBase, uint startIndex, uint numberOfEntities );
-
-__host__ __device__ inline void coarseToFineFunction                      ( DataBaseStruct dataBase, uint startIndex, uint index );
-__host__ __device__ inline void coarseToFineFunctionPrimitiveInterpolation( DataBaseStruct dataBase, uint startIndex, uint index );
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-void Interface::runCoarseToFine( SPtr<DataBase> dataBase, uint level )
-{
-    CudaUtility::CudaGrid grid(dataBase->perLevelCount[level].numberOfCoarseToFine, 128);
-
-    runKernel(coarseToFineKernel,
-              coarseToFineFunction,
-              dataBase->getDeviceType(), grid,
-              dataBase->toStruct(),
-              dataBase->perLevelCount[level].startOfCoarseToFine);
-
-    cudaDeviceSynchronize();
-
-    getLastCudaError("void Interface::runCoarseToFine( SPtr<DataBase> dataBase, uint level )");
-}
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-__global__ void coarseToFineKernel( DataBaseStruct dataBase, uint startIndex, uint numberOfEntities )
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    coarseToFineFunction( dataBase, startIndex, index );
-    //coarseToFineFunctionPrimitiveInterpolation( dataBase, startIndex, index );
-}
-
-//__host__ __device__ inline void coarseToFineFunction( DataBaseStruct dataBase, uint startIndex, uint index )
-//{
-//    index += startIndex;
-//
-//    uint cellIndex = dataBase.coarseToFine[ COARSE_TO_FINE( index, 0, dataBase.numberOfFineGhostCells ) ];
-//
-//    uint cellToCell [6];
-//
-//    cellToCell[0] = dataBase.cellToCell[ CELL_TO_CELL( cellIndex, 0, dataBase.numberOfCells ) ];
-//    cellToCell[1] = dataBase.cellToCell[ CELL_TO_CELL( cellIndex, 1, dataBase.numberOfCells ) ];
-//    cellToCell[2] = dataBase.cellToCell[ CELL_TO_CELL( cellIndex, 2, dataBase.numberOfCells ) ];
-//    cellToCell[3] = dataBase.cellToCell[ CELL_TO_CELL( cellIndex, 3, dataBase.numberOfCells ) ];
-//    cellToCell[4] = dataBase.cellToCell[ CELL_TO_CELL( cellIndex, 4, dataBase.numberOfCells ) ];
-//    cellToCell[5] = dataBase.cellToCell[ CELL_TO_CELL( cellIndex, 5, dataBase.numberOfCells ) ];
-//
-//    ConservedVariables childCons [8];
-//
-//    {
-//        real data [7];
-//
-//        data[0] = dataBase.data[ RHO__(cellToCell[0], dataBase.numberOfCells) ];
-//        data[1] = dataBase.data[ RHO__(cellToCell[1], dataBase.numberOfCells) ];
-//        data[2] = dataBase.data[ RHO__(cellToCell[2], dataBase.numberOfCells) ];
-//        data[3] = dataBase.data[ RHO__(cellToCell[3], dataBase.numberOfCells) ];
-//        data[4] = dataBase.data[ RHO__(cellToCell[4], dataBase.numberOfCells) ];
-//        data[5] = dataBase.data[ RHO__(cellToCell[5], dataBase.numberOfCells) ];
-//        data[6] = dataBase.data[ RHO__(cellIndex    , dataBase.numberOfCells) ];
-//
-//        //                                      PX        PY        PZ            MX        MY        MZ
-//        childCons[0].rho  = data[6] + c1o8 * ( + data[0] + data[2] + data[4]     - data[1] - data[3] - data[5] ); // PX PY PZ
-//        childCons[1].rho  = data[6] + c1o8 * ( + data[0] + data[2] - data[4]     - data[1] - data[3] + data[5] ); // PX PY MZ
-//        childCons[2].rho  = data[6] + c1o8 * ( + data[0] - data[2] + data[4]     - data[1] + data[3] - data[5] ); // PX MY PZ
-//        childCons[3].rho  = data[6] + c1o8 * ( + data[0] - data[2] - data[4]     - data[1] + data[3] + data[5] ); // PX MY MZ
-//        childCons[4].rho  = data[6] + c1o8 * ( - data[0] + data[2] + data[4]     + data[1] - data[3] - data[5] ); // MX PY PZ
-//        childCons[5].rho  = data[6] + c1o8 * ( - data[0] + data[2] - data[4]     + data[1] - data[3] + data[5] ); // MX PY MZ
-//        childCons[6].rho  = data[6] + c1o8 * ( - data[0] - data[2] + data[4]     + data[1] + data[3] - data[5] ); // MX MY PZ
-//        childCons[7].rho  = data[6] + c1o8 * ( - data[0] - data[2] - data[4]     + data[1] + data[3] + data[5] ); // MX MY MZ
-//    }
-//
-//    {
-//        real data [7];
-//
-//        data[0] = dataBase.data[ RHO_U(cellToCell[0], dataBase.numberOfCells) ];
-//        data[1] = dataBase.data[ RHO_U(cellToCell[1], dataBase.numberOfCells) ];
-//        data[2] = dataBase.data[ RHO_U(cellToCell[2], dataBase.numberOfCells) ];
-//        data[3] = dataBase.data[ RHO_U(cellToCell[3], dataBase.numberOfCells) ];
-//        data[4] = dataBase.data[ RHO_U(cellToCell[4], dataBase.numberOfCells) ];
-//        data[5] = dataBase.data[ RHO_U(cellToCell[5], dataBase.numberOfCells) ];
-//        data[6] = dataBase.data[ RHO_U(cellIndex    , dataBase.numberOfCells) ];
-//
-//        //                                      PX        PY        PZ            MX        MY        MZ
-//        childCons[0].rhoU = data[6] + c1o8 * ( + data[0] + data[2] + data[4]     - data[1] - data[3] - data[5] ); // PX PY PZ
-//        childCons[1].rhoU = data[6] + c1o8 * ( + data[0] + data[2] - data[4]     - data[1] - data[3] + data[5] ); // PX PY MZ
-//        childCons[2].rhoU = data[6] + c1o8 * ( + data[0] - data[2] + data[4]     - data[1] + data[3] - data[5] ); // PX MY PZ
-//        childCons[3].rhoU = data[6] + c1o8 * ( + data[0] - data[2] - data[4]     - data[1] + data[3] + data[5] ); // PX MY MZ
-//        childCons[4].rhoU = data[6] + c1o8 * ( - data[0] + data[2] + data[4]     + data[1] - data[3] - data[5] ); // MX PY PZ
-//        childCons[5].rhoU = data[6] + c1o8 * ( - data[0] + data[2] - data[4]     + data[1] - data[3] + data[5] ); // MX PY MZ
-//        childCons[6].rhoU = data[6] + c1o8 * ( - data[0] - data[2] + data[4]     + data[1] + data[3] - data[5] ); // MX MY PZ
-//        childCons[7].rhoU = data[6] + c1o8 * ( - data[0] - data[2] - data[4]     + data[1] + data[3] + data[5] ); // MX MY MZ
-//    }
-//
-//    {
-//        real data [7];
-//
-//        data[0] = dataBase.data[ RHO_V(cellToCell[0], dataBase.numberOfCells) ];
-//        data[1] = dataBase.data[ RHO_V(cellToCell[1], dataBase.numberOfCells) ];
-//        data[2] = dataBase.data[ RHO_V(cellToCell[2], dataBase.numberOfCells) ];
-//        data[3] = dataBase.data[ RHO_V(cellToCell[3], dataBase.numberOfCells) ];
-//        data[4] = dataBase.data[ RHO_V(cellToCell[4], dataBase.numberOfCells) ];
-//        data[5] = dataBase.data[ RHO_V(cellToCell[5], dataBase.numberOfCells) ];
-//        data[6] = dataBase.data[ RHO_V(cellIndex    , dataBase.numberOfCells) ];
-//
-//        //                                      PX        PY        PZ            MX        MY        MZ
-//        childCons[0].rhoV = data[6] + c1o8 * ( + data[0] + data[2] + data[4]     - data[1] - data[3] - data[5] ); // PX PY PZ
-//        childCons[1].rhoV = data[6] + c1o8 * ( + data[0] + data[2] - data[4]     - data[1] - data[3] + data[5] ); // PX PY MZ
-//        childCons[2].rhoV = data[6] + c1o8 * ( + data[0] - data[2] + data[4]     - data[1] + data[3] - data[5] ); // PX MY PZ
-//        childCons[3].rhoV = data[6] + c1o8 * ( + data[0] - data[2] - data[4]     - data[1] + data[3] + data[5] ); // PX MY MZ
-//        childCons[4].rhoV = data[6] + c1o8 * ( - data[0] + data[2] + data[4]     + data[1] - data[3] - data[5] ); // MX PY PZ
-//        childCons[5].rhoV = data[6] + c1o8 * ( - data[0] + data[2] - data[4]     + data[1] - data[3] + data[5] ); // MX PY MZ
-//        childCons[6].rhoV = data[6] + c1o8 * ( - data[0] - data[2] + data[4]     + data[1] + data[3] - data[5] ); // MX MY PZ
-//        childCons[7].rhoV = data[6] + c1o8 * ( - data[0] - data[2] - data[4]     + data[1] + data[3] + data[5] ); // MX MY MZ
-//    }
-//
-//    {
-//        real data [7];
-//
-//        data[0] = dataBase.data[ RHO_W(cellToCell[0], dataBase.numberOfCells) ];
-//        data[1] = dataBase.data[ RHO_W(cellToCell[1], dataBase.numberOfCells) ];
-//        data[2] = dataBase.data[ RHO_W(cellToCell[2], dataBase.numberOfCells) ];
-//        data[3] = dataBase.data[ RHO_W(cellToCell[3], dataBase.numberOfCells) ];
-//        data[4] = dataBase.data[ RHO_W(cellToCell[4], dataBase.numberOfCells) ];
-//        data[5] = dataBase.data[ RHO_W(cellToCell[5], dataBase.numberOfCells) ];
-//        data[6] = dataBase.data[ RHO_W(cellIndex    , dataBase.numberOfCells) ];
-//
-//        //                                      PX        PY        PZ            MX        MY        MZ
-//        childCons[0].rhoW = data[6] + c1o8 * ( + data[0] + data[2] + data[4]     - data[1] - data[3] - data[5] ); // PX PY PZ
-//        childCons[1].rhoW = data[6] + c1o8 * ( + data[0] + data[2] - data[4]     - data[1] - data[3] + data[5] ); // PX PY MZ
-//        childCons[2].rhoW = data[6] + c1o8 * ( + data[0] - data[2] + data[4]     - data[1] + data[3] - data[5] ); // PX MY PZ
-//        childCons[3].rhoW = data[6] + c1o8 * ( + data[0] - data[2] - data[4]     - data[1] + data[3] + data[5] ); // PX MY MZ
-//        childCons[4].rhoW = data[6] + c1o8 * ( - data[0] + data[2] + data[4]     + data[1] - data[3] - data[5] ); // MX PY PZ
-//        childCons[5].rhoW = data[6] + c1o8 * ( - data[0] + data[2] - data[4]     + data[1] - data[3] + data[5] ); // MX PY MZ
-//        childCons[6].rhoW = data[6] + c1o8 * ( - data[0] - data[2] + data[4]     + data[1] + data[3] - data[5] ); // MX MY PZ
-//        childCons[7].rhoW = data[6] + c1o8 * ( - data[0] - data[2] - data[4]     + data[1] + data[3] + data[5] ); // MX MY MZ
-//    }
-//
-//    {
-//        real data [7];
-//
-//        data[0] = dataBase.data[ RHO_E(cellToCell[0], dataBase.numberOfCells) ];
-//        data[1] = dataBase.data[ RHO_E(cellToCell[1], dataBase.numberOfCells) ];
-//        data[2] = dataBase.data[ RHO_E(cellToCell[2], dataBase.numberOfCells) ];
-//        data[3] = dataBase.data[ RHO_E(cellToCell[3], dataBase.numberOfCells) ];
-//        data[4] = dataBase.data[ RHO_E(cellToCell[4], dataBase.numberOfCells) ];
-//        data[5] = dataBase.data[ RHO_E(cellToCell[5], dataBase.numberOfCells) ];
-//        data[6] = dataBase.data[ RHO_E(cellIndex    , dataBase.numberOfCells) ];
-//
-//        //                                      PX        PY        PZ            MX        MY        MZ
-//        childCons[0].rhoE = data[6] + c1o8 * ( + data[0] + data[2] + data[4]     - data[1] - data[3] - data[5] ); // PX PY PZ
-//        childCons[1].rhoE = data[6] + c1o8 * ( + data[0] + data[2] - data[4]     - data[1] - data[3] + data[5] ); // PX PY MZ
-//        childCons[2].rhoE = data[6] + c1o8 * ( + data[0] - data[2] + data[4]     - data[1] + data[3] - data[5] ); // PX MY PZ
-//        childCons[3].rhoE = data[6] + c1o8 * ( + data[0] - data[2] - data[4]     - data[1] + data[3] + data[5] ); // PX MY MZ
-//        childCons[4].rhoE = data[6] + c1o8 * ( - data[0] + data[2] + data[4]     + data[1] - data[3] - data[5] ); // MX PY PZ
-//        childCons[5].rhoE = data[6] + c1o8 * ( - data[0] + data[2] - data[4]     + data[1] - data[3] + data[5] ); // MX PY MZ
-//        childCons[6].rhoE = data[6] + c1o8 * ( - data[0] - data[2] + data[4]     + data[1] + data[3] - data[5] ); // MX MY PZ
-//        childCons[7].rhoE = data[6] + c1o8 * ( - data[0] - data[2] - data[4]     + data[1] + data[3] + data[5] ); // MX MY MZ
-//    }
-//
-//    #ifdef USE_PASSIVE_SCALAR
-//    {
-//        {
-//            real data[7];
-//
-//            data[0] = dataBase.data[RHO_S_1(cellToCell[0], dataBase.numberOfCells)];
-//            data[1] = dataBase.data[RHO_S_1(cellToCell[1], dataBase.numberOfCells)];
-//            data[2] = dataBase.data[RHO_S_1(cellToCell[2], dataBase.numberOfCells)];
-//            data[3] = dataBase.data[RHO_S_1(cellToCell[3], dataBase.numberOfCells)];
-//            data[4] = dataBase.data[RHO_S_1(cellToCell[4], dataBase.numberOfCells)];
-//            data[5] = dataBase.data[RHO_S_1(cellToCell[5], dataBase.numberOfCells)];
-//            data[6] = dataBase.data[RHO_S_1(cellIndex, dataBase.numberOfCells)];
-//
-//            //                                      PX        PY        PZ            MX        MY        MZ
-//            childCons[0].rhoS_1 = data[6] + c1o8 * (+data[0] + data[2] + data[4] - data[1] - data[3] - data[5]); // PX PY PZ
-//            childCons[1].rhoS_1 = data[6] + c1o8 * (+data[0] + data[2] - data[4] - data[1] - data[3] + data[5]); // PX PY MZ
-//            childCons[2].rhoS_1 = data[6] + c1o8 * (+data[0] - data[2] + data[4] - data[1] + data[3] - data[5]); // PX MY PZ
-//            childCons[3].rhoS_1 = data[6] + c1o8 * (+data[0] - data[2] - data[4] - data[1] + data[3] + data[5]); // PX MY MZ
-//            childCons[4].rhoS_1 = data[6] + c1o8 * (-data[0] + data[2] + data[4] + data[1] - data[3] - data[5]); // MX PY PZ
-//            childCons[5].rhoS_1 = data[6] + c1o8 * (-data[0] + data[2] - data[4] + data[1] - data[3] + data[5]); // MX PY MZ
-//            childCons[6].rhoS_1 = data[6] + c1o8 * (-data[0] - data[2] + data[4] + data[1] + data[3] - data[5]); // MX MY PZ
-//            childCons[7].rhoS_1 = data[6] + c1o8 * (-data[0] - data[2] - data[4] + data[1] + data[3] + data[5]); // MX MY MZ
-//        }
-//
-//        {
-//            real data[7];
-//
-//            data[0] = dataBase.data[RHO_S_2(cellToCell[0], dataBase.numberOfCells)];
-//            data[1] = dataBase.data[RHO_S_2(cellToCell[1], dataBase.numberOfCells)];
-//            data[2] = dataBase.data[RHO_S_2(cellToCell[2], dataBase.numberOfCells)];
-//            data[3] = dataBase.data[RHO_S_2(cellToCell[3], dataBase.numberOfCells)];
-//            data[4] = dataBase.data[RHO_S_2(cellToCell[4], dataBase.numberOfCells)];
-//            data[5] = dataBase.data[RHO_S_2(cellToCell[5], dataBase.numberOfCells)];
-//            data[6] = dataBase.data[RHO_S_2(cellIndex, dataBase.numberOfCells)];
-//
-//            //                                      PX        PY        PZ            MX        MY        MZ
-//            childCons[0].rhoS_2 = data[6] + c1o8 * (+data[0] + data[2] + data[4] - data[1] - data[3] - data[5]); // PX PY PZ
-//            childCons[1].rhoS_2 = data[6] + c1o8 * (+data[0] + data[2] - data[4] - data[1] - data[3] + data[5]); // PX PY MZ
-//            childCons[2].rhoS_2 = data[6] + c1o8 * (+data[0] - data[2] + data[4] - data[1] + data[3] - data[5]); // PX MY PZ
-//            childCons[3].rhoS_2 = data[6] + c1o8 * (+data[0] - data[2] - data[4] - data[1] + data[3] + data[5]); // PX MY MZ
-//            childCons[4].rhoS_2 = data[6] + c1o8 * (-data[0] + data[2] + data[4] + data[1] - data[3] - data[5]); // MX PY PZ
-//            childCons[5].rhoS_2 = data[6] + c1o8 * (-data[0] + data[2] - data[4] + data[1] - data[3] + data[5]); // MX PY MZ
-//            childCons[6].rhoS_2 = data[6] + c1o8 * (-data[0] - data[2] + data[4] + data[1] + data[3] - data[5]); // MX MY PZ
-//            childCons[7].rhoS_2 = data[6] + c1o8 * (-data[0] - data[2] - data[4] + data[1] + data[3] + data[5]); // MX MY MZ
-//        }
-//    }
-//    #endif // USE_PASSIVE_SCALAR
-//
-//#pragma unroll
-//    for( uint childIndex = 0; childIndex < 8; childIndex++ ){
-//
-//        uint childCellIndex = dataBase.coarseToFine[ COARSE_TO_FINE( index, ( 1 + childIndex ), dataBase.numberOfFineGhostCells ) ];
-//
-//        dataBase.data[ RHO__(childCellIndex, dataBase.numberOfCells) ] = childCons[childIndex].rho ;
-//        dataBase.data[ RHO_U(childCellIndex, dataBase.numberOfCells) ] = childCons[childIndex].rhoU;
-//        dataBase.data[ RHO_V(childCellIndex, dataBase.numberOfCells) ] = childCons[childIndex].rhoV;
-//        dataBase.data[ RHO_W(childCellIndex, dataBase.numberOfCells) ] = childCons[childIndex].rhoW;
-//        dataBase.data[ RHO_E(childCellIndex, dataBase.numberOfCells) ] = childCons[childIndex].rhoE;
-//    #ifdef USE_PASSIVE_SCALAR
-//	    dataBase.data[ RHO_S_1(childCellIndex, dataBase.numberOfCells) ] = childCons[childIndex].rhoS_1;
-//	    dataBase.data[ RHO_S_2(childCellIndex, dataBase.numberOfCells) ] = childCons[childIndex].rhoS_2;
-//    #endif // USE_PASSIVE_SCALAR
-//    }
-//}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-__host__ __device__ inline void coarseToFineFunction( DataBaseStruct dataBase, uint startIndex, uint index )
-{
-    index += startIndex;
-
-    uint cellIndex = dataBase.coarseToFine[ COARSE_TO_FINE( index, 0, dataBase.numberOfFineGhostCells ) ];
-
-    uint cellToCell [6];
-
-    cellToCell[0] = dataBase.cellToCell[ CELL_TO_CELL( cellIndex, 0, dataBase.numberOfCells ) ];
-    cellToCell[1] = dataBase.cellToCell[ CELL_TO_CELL( cellIndex, 1, dataBase.numberOfCells ) ];
-    cellToCell[2] = dataBase.cellToCell[ CELL_TO_CELL( cellIndex, 2, dataBase.numberOfCells ) ];
-    cellToCell[3] = dataBase.cellToCell[ CELL_TO_CELL( cellIndex, 3, dataBase.numberOfCells ) ];
-    cellToCell[4] = dataBase.cellToCell[ CELL_TO_CELL( cellIndex, 4, dataBase.numberOfCells ) ];
-    cellToCell[5] = dataBase.cellToCell[ CELL_TO_CELL( cellIndex, 5, dataBase.numberOfCells ) ];
-
-    ConservedVariables cons[7];
-
-    readCellData(cellToCell[0], dataBase, cons[0]);
-    readCellData(cellToCell[1], dataBase, cons[1]);
-    readCellData(cellToCell[2], dataBase, cons[2]);
-    readCellData(cellToCell[3], dataBase, cons[3]);
-    readCellData(cellToCell[4], dataBase, cons[4]);
-    readCellData(cellToCell[5], dataBase, cons[5]);
-    readCellData(cellIndex, dataBase, cons[6]);
-
-    ConservedVariables childCons [8];
-    ConservedVariables zeroCons;
-
-    //                                                 PX           PY           PZ               MX           MY           MZ
-    childCons[0]    = cons[6]    + c1o8 * ( zeroCons + cons[0]    + cons[2]    + cons[4]        - cons[1]    - cons[3]    - cons[5]    ); // PX PY PZ
-    childCons[1]    = cons[6]    + c1o8 * ( zeroCons + cons[0]    + cons[2]    - cons[4]        - cons[1]    - cons[3]    + cons[5]    ); // PX PY MZ
-    childCons[2]    = cons[6]    + c1o8 * ( zeroCons + cons[0]    - cons[2]    + cons[4]        - cons[1]    + cons[3]    - cons[5]    ); // PX MY PZ
-    childCons[3]    = cons[6]    + c1o8 * ( zeroCons + cons[0]    - cons[2]    - cons[4]        - cons[1]    + cons[3]    + cons[5]    ); // PX MY MZ
-    childCons[4]    = cons[6]    + c1o8 * ( zeroCons - cons[0]    + cons[2]    + cons[4]        + cons[1]    - cons[3]    - cons[5]    ); // MX PY PZ
-    childCons[5]    = cons[6]    + c1o8 * ( zeroCons - cons[0]    + cons[2]    - cons[4]        + cons[1]    - cons[3]    + cons[5]    ); // MX PY MZ
-    childCons[6]    = cons[6]    + c1o8 * ( zeroCons - cons[0]    - cons[2]    + cons[4]        + cons[1]    + cons[3]    - cons[5]    ); // MX MY PZ
-    childCons[7]    = cons[6]    + c1o8 * ( zeroCons - cons[0]    - cons[2]    - cons[4]        + cons[1]    + cons[3]    + cons[5]    ); // MX MY MZ
-    
-#ifdef USE_PASSIVE_SCALAR
-    ConservedVariables min(  1.0e99,  1.0e99,  1.0e99,  1.0e99,  1.0e99,  1.0e99,  1.0e99 );
-    ConservedVariables max( -1.0e99, -1.0e99, -1.0e99, -1.0e99, -1.0e99, -1.0e99, -1.0e99 );
-#else
-    ConservedVariables min(  1.0e99,  1.0e99,  1.0e99,  1.0e99,  1.0e99 );
-    ConservedVariables max( -1.0e99, -1.0e99, -1.0e99, -1.0e99, -1.0e99 );
-#endif
-
-    for( uint index = 0; index < 7; index++ )
-    {
-        if( cons[ index ].rho    < min.rho    ) min.rho    = cons[ index ].rho   ;
-        if( cons[ index ].rhoU   < min.rhoU   ) min.rhoU   = cons[ index ].rhoU  ;
-        if( cons[ index ].rhoV   < min.rhoV   ) min.rhoV   = cons[ index ].rhoV  ;
-        if( cons[ index ].rhoW   < min.rhoW   ) min.rhoW   = cons[ index ].rhoW  ;
-        if( cons[ index ].rhoE   < min.rhoE   ) min.rhoE   = cons[ index ].rhoE  ;
-    #ifdef USE_PASSIVE_SCALAR
-        if( cons[ index ].rhoS_1 < min.rhoS_1 ) min.rhoS_1 = cons[ index ].rhoS_1;
-        if( cons[ index ].rhoS_2 < min.rhoS_2 ) min.rhoS_2 = cons[ index ].rhoS_2;
-    #endif
-
-        if( cons[ index ].rho    > max.rho    ) max.rho    = cons[ index ].rho   ;
-        if( cons[ index ].rhoU   > max.rhoU   ) max.rhoU   = cons[ index ].rhoU  ;
-        if( cons[ index ].rhoV   > max.rhoV   ) max.rhoV   = cons[ index ].rhoV  ;
-        if( cons[ index ].rhoW   > max.rhoW   ) max.rhoW   = cons[ index ].rhoW  ;
-        if( cons[ index ].rhoE   > max.rhoE   ) max.rhoE   = cons[ index ].rhoE  ;
-    #ifdef USE_PASSIVE_SCALAR
-        if( cons[ index ].rhoS_1 > max.rhoS_1 ) max.rhoS_1 = cons[ index ].rhoS_1;
-        if( cons[ index ].rhoS_2 > max.rhoS_2 ) max.rhoS_2 = cons[ index ].rhoS_2;
-    #endif
-    }
-
-#pragma unroll
-    for( uint index = 0; index < 8; index++ )
-    {
-        if( childCons[ index ].rho    < min.rho    ) childCons[ index ].rho    = min.rho    ;
-        if( childCons[ index ].rhoU   < min.rhoU   ) childCons[ index ].rhoU   = min.rhoU   ;
-        if( childCons[ index ].rhoV   < min.rhoV   ) childCons[ index ].rhoV   = min.rhoV   ;
-        if( childCons[ index ].rhoW   < min.rhoW   ) childCons[ index ].rhoW   = min.rhoW   ;
-        if( childCons[ index ].rhoE   < min.rhoE   ) childCons[ index ].rhoE   = min.rhoE   ;
-    #ifdef USE_PASSIVE_SCALAR
-        if( childCons[ index ].rhoS_1 < min.rhoS_1 ) childCons[ index ].rhoS_1 = min.rhoS_1 ;
-        if( childCons[ index ].rhoS_2 < min.rhoS_2 ) childCons[ index ].rhoS_2 = min.rhoS_2 ;
-    #endif
-        
-        if( childCons[ index ].rho    > max.rho    ) childCons[ index ].rho    = max.rho    ;
-        if( childCons[ index ].rhoU   > max.rhoU   ) childCons[ index ].rhoU   = max.rhoU   ;
-        if( childCons[ index ].rhoV   > max.rhoV   ) childCons[ index ].rhoV   = max.rhoV   ;
-        if( childCons[ index ].rhoW   > max.rhoW   ) childCons[ index ].rhoW   = max.rhoW   ;
-        if( childCons[ index ].rhoE   > max.rhoE   ) childCons[ index ].rhoE   = max.rhoE   ;
-    #ifdef USE_PASSIVE_SCALAR
-        if( childCons[ index ].rhoS_1 > max.rhoS_1 ) childCons[ index ].rhoS_1 = max.rhoS_1 ;
-        if( childCons[ index ].rhoS_2 > max.rhoS_2 ) childCons[ index ].rhoS_2 = max.rhoS_2 ;
-    #endif
-    }
-
-#pragma unroll
-    for( uint childIndex = 0; childIndex < 8; childIndex++ ){
-
-        uint childCellIndex = dataBase.coarseToFine[ COARSE_TO_FINE( index, ( 1 + childIndex ), dataBase.numberOfFineGhostCells ) ];
-
-        writeCellData(childCellIndex, dataBase, childCons[childIndex]);
-    }
-}
-
-__host__ __device__ inline void coarseToFineFunctionPrimitiveInterpolation( DataBaseStruct dataBase, uint startIndex, uint index )
-{
-    index += startIndex;
-
-    uint cellIndex = dataBase.coarseToFine[ COARSE_TO_FINE( index, 0, dataBase.numberOfFineGhostCells ) ];
-
-    uint cellToCell [6];
-
-    cellToCell[0] = dataBase.cellToCell[ CELL_TO_CELL( cellIndex, 0, dataBase.numberOfCells ) ];
-    cellToCell[1] = dataBase.cellToCell[ CELL_TO_CELL( cellIndex, 1, dataBase.numberOfCells ) ];
-    cellToCell[2] = dataBase.cellToCell[ CELL_TO_CELL( cellIndex, 2, dataBase.numberOfCells ) ];
-    cellToCell[3] = dataBase.cellToCell[ CELL_TO_CELL( cellIndex, 3, dataBase.numberOfCells ) ];
-    cellToCell[4] = dataBase.cellToCell[ CELL_TO_CELL( cellIndex, 4, dataBase.numberOfCells ) ];
-    cellToCell[5] = dataBase.cellToCell[ CELL_TO_CELL( cellIndex, 5, dataBase.numberOfCells ) ];
-
-    PrimitiveVariables prim [7];
-    ConservedVariables cons[7];
-
-    readCellData(cellToCell[0], dataBase, cons[0]);
-    readCellData(cellToCell[1], dataBase, cons[1]);
-    readCellData(cellToCell[2], dataBase, cons[2]);
-    readCellData(cellToCell[3], dataBase, cons[3]);
-    readCellData(cellToCell[4], dataBase, cons[4]);
-    readCellData(cellToCell[5], dataBase, cons[5]);
-    readCellData(cellIndex, dataBase, cons[6]);
-
-    prim[0] = toPrimitiveVariables(cons[0], c2o1);
-    prim[1] = toPrimitiveVariables(cons[1], c2o1);
-    prim[2] = toPrimitiveVariables(cons[2], c2o1);
-    prim[3] = toPrimitiveVariables(cons[3], c2o1);
-    prim[4] = toPrimitiveVariables(cons[4], c2o1);
-    prim[5] = toPrimitiveVariables(cons[5], c2o1);
-    prim[6] = toPrimitiveVariables(cons[6], c2o1);
-
-    PrimitiveVariables childPrim [8];
-    PrimitiveVariables zeroPrim;
-
-    //                                                     PX           PY           PZ               MX           MY           MZ
-        childPrim[0]    = prim[6]    + c1o8 * ( zeroPrim + prim[0]    + prim[2]    + prim[4]        - prim[1]    - prim[3]    - prim[5]    ); // PX PY PZ
-        childPrim[1]    = prim[6]    + c1o8 * ( zeroPrim + prim[0]    + prim[2]    - prim[4]        - prim[1]    - prim[3]    + prim[5]    ); // PX PY MZ
-        childPrim[2]    = prim[6]    + c1o8 * ( zeroPrim + prim[0]    - prim[2]    + prim[4]        - prim[1]    + prim[3]    - prim[5]    ); // PX MY PZ
-        childPrim[3]    = prim[6]    + c1o8 * ( zeroPrim + prim[0]    - prim[2]    - prim[4]        - prim[1]    + prim[3]    + prim[5]    ); // PX MY MZ
-        childPrim[4]    = prim[6]    + c1o8 * ( zeroPrim - prim[0]    + prim[2]    + prim[4]        + prim[1]    - prim[3]    - prim[5]    ); // MX PY PZ
-        childPrim[5]    = prim[6]    + c1o8 * ( zeroPrim - prim[0]    + prim[2]    - prim[4]        + prim[1]    - prim[3]    + prim[5]    ); // MX PY MZ
-        childPrim[6]    = prim[6]    + c1o8 * ( zeroPrim - prim[0]    - prim[2]    + prim[4]        + prim[1]    + prim[3]    - prim[5]    ); // MX MY PZ
-        childPrim[7]    = prim[6]    + c1o8 * ( zeroPrim - prim[0]    - prim[2]    - prim[4]        + prim[1]    + prim[3]    + prim[5]    ); // MX MY MZ
-
-#pragma unroll
-    for( uint childIndex = 0; childIndex < 8; childIndex++ ){
-
-        uint childCellIndex = dataBase.coarseToFine[ COARSE_TO_FINE( index, ( 1 + childIndex ), dataBase.numberOfFineGhostCells ) ];
-
-        ConservedVariables childCons = toConservedVariables(childPrim[childIndex], c2o1);
-
-        writeCellData(childCellIndex, dataBase, childCons);
-    }
-}
-
-} // namespace GksGpu
diff --git a/src/gpu/GksGpu/Interface/FineToCoarseKernel.cu b/src/gpu/GksGpu/Interface/FineToCoarseKernel.cu
deleted file mode 100644
index e31fdb912d735669297027b4f15808869d17abf2..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Interface/FineToCoarseKernel.cu
+++ /dev/null
@@ -1,115 +0,0 @@
-#include "Interface.h"
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include "PointerDefinitions.h"
-
-#include "DataBase/DataBaseStruct.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/FlowStateDataConversion.cuh"
-
-#include "FlowStateData/AccessDeviceData.cuh"
-
-#include "Definitions/PassiveScalar.h"
-#include "Definitions/MemoryAccessPattern.h"
-
-#include "CudaUtility/CudaRunKernel.hpp"
-
-namespace GksGpu {
-
-//////////////////////////////////////////////////////////////////////////
-
-__global__                 void fineToCoarseKernel  ( DataBaseStruct dataBase, uint startIndex, uint numberOfEntities );
-
-__host__ __device__ inline void fineToCoarseFunction                      ( DataBaseStruct dataBase, uint startIndex, uint index );
-__host__ __device__ inline void fineToCoarseFunctionPrimitiveInterpolation( DataBaseStruct dataBase, uint startIndex, uint index );
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-void Interface::runFineToCoarse( SPtr<DataBase> dataBase, uint level )
-{    
-    CudaUtility::CudaGrid grid(dataBase->perLevelCount[level].numberOfFineToCoarse, 128);
-
-    runKernel(fineToCoarseKernel,
-              fineToCoarseFunction,
-              dataBase->getDeviceType(), grid,
-              dataBase->toStruct(),
-              dataBase->perLevelCount[level].startOfFineToCoarse);
-
-    cudaDeviceSynchronize();
-
-    getLastCudaError("Interface::runFineToCoarse( SPtr<DataBase> dataBase, uint level )");
-}
-
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////
-
-__global__ void fineToCoarseKernel( DataBaseStruct dataBase, uint startIndex, uint numberOfEntities )
-{
-    uint index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if( index >= numberOfEntities ) return;
-
-    fineToCoarseFunction( dataBase, startIndex, index );
-    //fineToCoarseFunctionPrimitiveInterpolation( dataBase, startIndex, index );
-}
-
-__host__ __device__ inline void fineToCoarseFunction( DataBaseStruct dataBase, uint startIndex, uint index )
-{
-    index += startIndex;
-
-    ConservedVariables parentCons;
-
-#pragma unroll
-    for( uint childIdx = 1; childIdx < LENGTH_FINE_TO_COARSE; childIdx++ ){
-
-        uint cellIdx = dataBase.fineToCoarse[ FINE_TO_COARSE( index, childIdx, dataBase.numberOfCoarseGhostCells ) ];
-
-        ConservedVariables cons;
-
-        readCellData( cellIdx, dataBase, cons );
-
-        parentCons = parentCons + c1o8 * cons;
-    }
-
-    {
-        uint cellIdx = dataBase.fineToCoarse[FINE_TO_COARSE(index, 0, dataBase.numberOfCoarseGhostCells)];
-
-        writeCellData(cellIdx, dataBase, parentCons);
-    }
-}
-
-__host__ __device__ inline void fineToCoarseFunctionPrimitiveInterpolation( DataBaseStruct dataBase, uint startIndex, uint index )
-{
-    index += startIndex;
-
-    PrimitiveVariables parentPrim;
-
-#pragma unroll
-    for( uint childIdx = 1; childIdx < LENGTH_FINE_TO_COARSE; childIdx++ ){
-
-        uint cellIdx = dataBase.fineToCoarse[ FINE_TO_COARSE( index, childIdx, dataBase.numberOfCoarseGhostCells ) ];
-
-        ConservedVariables cons;
-
-        readCellData( cellIdx, dataBase, cons );
-
-        parentPrim = parentPrim + c1o8 * toPrimitiveVariables(cons, c2o1);
-    }
-
-    {
-        uint cellIdx = dataBase.fineToCoarse[FINE_TO_COARSE(index, 0, dataBase.numberOfCoarseGhostCells)];
-
-        ConservedVariables parentCons = toConservedVariables(parentPrim, c2o1);
-
-        writeCellData(cellIdx, dataBase, parentCons);
-    }
-}
-
-} // namespace GksGpu
diff --git a/src/gpu/GksGpu/Interface/Interface.h b/src/gpu/GksGpu/Interface/Interface.h
deleted file mode 100644
index 5aa99bd99db520818a18aa638c8061995b58aa6e..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Interface/Interface.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef  FineToCoarse_H
-#define  FineToCoarse_H
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-
-#include "DataBase/DataBase.h"
-
-namespace GksGpu {
-
-class GKSGPU_EXPORT Interface
-{
-public:
-    static void runFineToCoarse( SPtr<DataBase> dataBase, uint level );
-
-    static void runCoarseToFine( SPtr<DataBase> dataBase, uint level );
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/Output/VtkWriter.cpp b/src/gpu/GksGpu/Output/VtkWriter.cpp
deleted file mode 100644
index 234151c7df481e81e5dd68c9a4692831f7271f54..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Output/VtkWriter.cpp
+++ /dev/null
@@ -1,150 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
-//  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
-//  for more details.
-//  
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file VtkWriter.cpp
-//! \ingroup Output
-//! \author Stephan Lenz
-//=======================================================================================
-#include "VtkWriter.h"
-
-#include <vector>
-#include <memory>
-
-#include "Core/Logger/Logger.h"
-
-#include "basics/utilities/UbTuple.h"
-#include "basics/writer/WbWriterVtkXmlBinary.h"
-
-#include "DataBase/DataBase.h"
-#include "Parameters/Parameters.h"
-
-#include "FlowStateData/FlowStateData.cuh"
-#include "FlowStateData/FlowStateDataConversion.cuh"
-#include "FlowStateData/AccessDeviceData.cuh"
-
-namespace GksGpu {
-
-void VtkWriter::write(std::shared_ptr<DataBase> dataBase, Parameters parameters, std::string filename)
-{
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Write " << filename << ".vtu" << " ... \n";
-
-    //////////////////////////////////////////////////////////////////////////
-
-    std::vector< UbTupleFloat3 > nodes;
-    std::vector< UbTupleInt8  > cells;
-
-    nodes.resize( dataBase->numberOfNodes );
-    cells.resize( dataBase->numberOfCells );
-
-    for( uint nodeIdx = 0; nodeIdx < dataBase->numberOfNodes; nodeIdx++ )
-    {
-        Vec3& node = dataBase->nodeCoordinates[ nodeIdx ];
-
-        nodes[nodeIdx] = makeUbTuple( node.x, node.y, node.z );
-    }
-    
-    for( uint cellIdx = 0; cellIdx < dataBase->numberOfCells; cellIdx++ )
-    {
-        cells[cellIdx] = makeUbTuple( (int)dataBase->cellToNode[ cellIdx ][ 0 ],
-                                      (int)dataBase->cellToNode[ cellIdx ][ 1 ],
-                                      (int)dataBase->cellToNode[ cellIdx ][ 2 ],
-                                      (int)dataBase->cellToNode[ cellIdx ][ 3 ],
-                                      (int)dataBase->cellToNode[ cellIdx ][ 4 ],
-                                      (int)dataBase->cellToNode[ cellIdx ][ 5 ],
-                                      (int)dataBase->cellToNode[ cellIdx ][ 6 ],
-                                      (int)dataBase->cellToNode[ cellIdx ][ 7 ] );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    std::vector< std::string > cellDataNames;
-    cellDataNames.push_back("Press");       // 0
-    cellDataNames.push_back("Rho");         // 1
-    cellDataNames.push_back("Vx");          // 2
-    cellDataNames.push_back("Vy");          // 3
-    cellDataNames.push_back("Vz");          // 4
-    cellDataNames.push_back("Temperature"); // 5
-    cellDataNames.push_back("Geometry");    // 6
-#ifdef USE_PASSIVE_SCALAR
-    cellDataNames.push_back("S_1");         // 7
-    cellDataNames.push_back("S_2");         // 8
-#endif
-
-    //////////////////////////////////////////////////////////////////////////
-
-    std::vector< std::vector< double > > cellData(cellDataNames.size());
-
-    for( auto& i : cellData ) i.resize( dataBase->numberOfCells );
-
-    for( uint cellIdx = 0; cellIdx < dataBase->numberOfCells; cellIdx++ )
-    {
-        ConservedVariables cons;
-
-        cons.rho  = dataBase->dataHost[ RHO__(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoU = dataBase->dataHost[ RHO_U(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoV = dataBase->dataHost[ RHO_V(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoW = dataBase->dataHost[ RHO_W(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoE = dataBase->dataHost[ RHO_E(cellIdx, dataBase->numberOfCells) ];
-#ifdef USE_PASSIVE_SCALAR
-        cons.rhoS_1 = dataBase->dataHost[ RHO_S_1(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoS_2 = dataBase->dataHost[ RHO_S_2(cellIdx, dataBase->numberOfCells) ];
-#endif // USE_PASSIVE_SCALAR
-
-        PrimitiveVariables prim = toPrimitiveVariables(cons, parameters.K);
-
-        real p = 0.5 * prim.rho / prim.lambda;
-
-#ifdef USE_PASSIVE_SCALAR
-        real T = getT(prim);
-#else // USE_PASSIVE_SCALAR
-        real T = 1.0 / prim.lambda;
-#endif // USE_PASSIVE_SCALAR
-
-        cellData[0][cellIdx] = p;
-        cellData[1][cellIdx] = prim.rho;
-        cellData[2][cellIdx] = prim.U;
-        cellData[3][cellIdx] = prim.V;
-        cellData[4][cellIdx] = prim.W;
-        cellData[5][cellIdx] = T;
-        cellData[6][cellIdx] = dataBase->isGhostCell(cellIdx);
-#ifdef USE_PASSIVE_SCALAR
-        cellData[7][cellIdx] = prim.S_1;
-        cellData[8][cellIdx] = prim.S_2;
-#endif
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    WbWriterVtkXmlBinary::getInstance()->writeOctsWithCellData(filename, nodes, cells, cellDataNames, cellData);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
-}
-
-}
diff --git a/src/gpu/GksGpu/Output/VtkWriter.h b/src/gpu/GksGpu/Output/VtkWriter.h
deleted file mode 100644
index 679fae55b2db5ec418b389ca0840961ab8f80dde..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Output/VtkWriter.h
+++ /dev/null
@@ -1,57 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
-//  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
-//  for more details.
-//  
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file VtkWriter.h
-//! \ingroup Output
-//! \author Stephan Lenz
-//=======================================================================================
-#ifndef VTK_WRITER_H
-#define VTK_WRITER_H
-
-#include <memory>
-#include <string>
-
-#include "GksGpu_export.h"
-
-namespace GksGpu {
-
-struct DataBase;
-struct Parameters;
-
-
-class GKSGPU_EXPORT VtkWriter
-{
-public:
-    static void write( std::shared_ptr<DataBase> dataBase, 
-                       Parameters parameters, 
-                       std::string filename );
-};
-
-}
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/GksGpu/Parameters/Parameters.h b/src/gpu/GksGpu/Parameters/Parameters.h
deleted file mode 100644
index ab7bdb4c61909f3f08b2857e62b458d14d589280..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Parameters/Parameters.h
+++ /dev/null
@@ -1,78 +0,0 @@
-#ifndef Parameters_H
-#define Parameters_H
-
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-
-
-#include "GksGpu_export.h"
-
-namespace GksGpu {
-
-enum class GKSGPU_EXPORT ViscosityModel{
-    constant,
-    sutherlandsLaw,
-    sutherlandsLaw2
-};
-
-struct  GKSGPU_EXPORT Parameters
-{
-
-    real mu = real(0.01);
-    real K  = real(2.0);
-    real Pr = real(1.0);
-    real D  = real(0.01);
-    real D1 = real(0.01);
-    real D2 = real(0.01);
-
-    real dt = real(0.01);
-    real dx = real(0.01);
-
-    Vec3 force;
-
-    real lambdaRef = real(1.0);
-
-    real rhoRef = real(1.0);
-
-    ViscosityModel viscosityModel = ViscosityModel::constant;
-
-    real boussinesqT0   = real(1.0);
-    real boussinesqBeta = real(1.0);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    bool useSmagorinsky = false;
-    real smagorinskyConstant = real(0.2);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    bool useSpongeLayer = false;
-    uint spongeLayerIdx = 0;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    uint forcingSchemeIdx = 0;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    bool enableReaction = false;
-
-    real heatOfReaction = real(8000.0); // kJ / kmol  
-
-    bool useHeatReleaseRateLimiter = false;
-    bool useTemperatureLimiter     = false;
-    bool usePassiveScalarLimiter   = false;
-
-    real heatReleaseRateLimiter       = real(20000.0);
-    real temperatureLimiter           = real(1.0e-3);
-    real temperatureLimiterUpperLimit = real(1.0e-3);
-    real passiveScalarLimiter         = real(0.1);
-
-    // deprecated limiters
-    bool useReactionLimiter = false;
-    real reactionLimiter    = real(1.005);
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksGpu/Restart/Restart.cpp b/src/gpu/GksGpu/Restart/Restart.cpp
deleted file mode 100644
index 5f593c22662f2a6c471d59d432b8694d9bdefc6d..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Restart/Restart.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-#include "Restart.h"
-
-#include <iostream>
-#include <fstream>
-
-#include "PointerDefinitions.h"
-#include "Core/RealConstants.h"
-#include "Core/Logger/Logger.h"
-
-#include "DataBase/DataBase.h"
-
-#include "Definitions/MemoryAccessPattern.h"
-
-namespace GksGpu {
-
-void Restart::writeRestart( SPtr<DataBase> dataBase, std::string filename, uint iter )
-{
-    filename += ".rst";
-
-    *logging::out << logging::Logger::INFO_HIGH << "Writing restart file " << filename << " ... ";
-	
-    std::ofstream file;
-
-	file.open( filename.c_str(), std::ios::binary );
-
-	if (!file.is_open()) {
-		throw std::runtime_error("\nFile cannot be opened.\n\nERROR!\n\n\n");
-        return;
-	}
-
-    //////////////////////////////////////////////////////////////////////////
-
-    file.write( (char*) &iter, sizeof( uint ) );
-
-    file.write( (char*) &dataBase->numberOfLevels, sizeof( uint ) );
-    file.write( (char*) &dataBase->numberOfCells,  sizeof( uint ) );
-    file.write( (char*) &dataBase->numberOfFaces,  sizeof( uint ) );
-
-    file.write( (char*) dataBase->dataHost.data(), LENGTH_CELL_DATA * dataBase->numberOfCells * sizeof( real ) );
-
-    file.close();
-
-    *logging::out << logging::Logger::INFO_HIGH << "done!\n";
-}
-
-bool Restart::readRestart( SPtr<DataBase> dataBase, std::string filename, uint& iter )
-{
-    filename += ".rst";
-
-    *logging::out << logging::Logger::INFO_HIGH << "Reading restart file " << filename << " ... ";
-	
-    std::ifstream file;
-
-	file.open( filename.c_str(), std::ios::binary );
-
-	if (!file.is_open()) {
-		throw std::runtime_error("\nFile cannot be opened.\n\nERROR!\n\n\n");
-        return false;
-	}
-
-    //////////////////////////////////////////////////////////////////////////
-
-    file.read( (char*) &iter, sizeof( uint ) );
-
-    uint numberOfLevelsRead;
-    uint numberOfCellsRead;
-    uint numberOfFacesRead;
-    
-    file.read( (char*) &numberOfLevelsRead, sizeof( uint ) );
-    file.read( (char*) &numberOfCellsRead,  sizeof( uint ) );
-    file.read( (char*) &numberOfFacesRead,  sizeof( uint ) );
-
-    if( numberOfLevelsRead != dataBase->numberOfLevels ||
-        numberOfCellsRead  != dataBase->numberOfCells  ||
-        numberOfFacesRead  != dataBase->numberOfFaces  ){
-    
-        *logging::out << logging::Logger::INFO_HIGH << "\n";
-        *logging::out << logging::Logger::INFO_HIGH << "Levels: " << numberOfLevelsRead << " vs. " << dataBase->numberOfLevels << "\n";
-        *logging::out << logging::Logger::INFO_HIGH << "Cells:  " << numberOfCellsRead  << " vs. " << dataBase->numberOfCells  << "\n";
-        *logging::out << logging::Logger::INFO_HIGH << "Faces:  " << numberOfFacesRead  << " vs. " << dataBase->numberOfFaces  << "\n";
-
-        file.close();
-
-        throw std::runtime_error("\nERROR: Restart file does not match current setup");
-    }
-
-    file.read( (char*) dataBase->dataHost.data(), LENGTH_CELL_DATA * dataBase->numberOfCells * sizeof( real ) );
-
-    file.close();
-
-    *logging::out << logging::Logger::INFO_HIGH << "done!\n";
-
-    return true;
-}
-
-} // namespace GksGpu
\ No newline at end of file
diff --git a/src/gpu/GksGpu/Restart/Restart.h b/src/gpu/GksGpu/Restart/Restart.h
deleted file mode 100644
index 235a4334373be3f0b6c788ba5f89d226e5c77f17..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/Restart/Restart.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef Restart_h
-#define Restart_h
-
-#include <string>
-#include <memory>
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-
-namespace GksGpu {
-
-struct DataBase;
-
-class GKSGPU_EXPORT Restart
-{
-
-public:
-    static void writeRestart( SPtr<DataBase> dataBase, std::string filename, uint  iter );
-
-    static bool readRestart ( SPtr<DataBase> dataBase, std::string filename, uint& iter );
-
-private:
-    Restart(){}
-    ~Restart(){}
-};
-
-} // namespace GksGpu
-
-
-#endif
diff --git a/src/gpu/GksGpu/TimeStepping/NestedTimeStep.cpp b/src/gpu/GksGpu/TimeStepping/NestedTimeStep.cpp
deleted file mode 100644
index 516349ff67bc9fa6618ab40aee32a94148633c5d..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/TimeStepping/NestedTimeStep.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-#include "NestedTimeStep.h"
-
-#include <iostream>
-
-#include "Core/RealConstants.h"
-
-#include "BoundaryConditions/BoundaryCondition.h"
-#include "Communication/Communicator.h"
-#include "CellUpdate/CellUpdate.h"
-#include "FluxComputation/FluxComputation.h"
-#include "Interface/Interface.h"
-#include "Initializer/Initializer.h"
-#include "CudaUtility/CudaUtility.h"
-
-namespace GksGpu {
-
-void TimeStepping::nestedTimeStep( SPtr<DataBase> dataBase, 
-                                   Parameters parameters,
-                                   uint level )
-{
-    //////////////////////////////////////////////////////////////////////////
-
-    if( level != 0 ) parameters.dt /= c2o1;
-    if( level != 0 ) parameters.dx /= c2o1;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( level != dataBase->numberOfLevels - 1 )
-    {
-        Interface::runFineToCoarse( dataBase, level );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    for( SPtr<BoundaryCondition> bc : dataBase->boundaryConditions )
-    {
-        bc->runBoundaryConditionKernel( dataBase, parameters, level );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    CudaUtility::synchronizeCudaDevice();
-
-    //////////////////////////////////////////////////////////////////////////
-
-    FluxComputation::run( dataBase, parameters, level ); // comment out to disable commhiding
-    
-    //////////////////////////////////////////////////////////////////////////
-    
-    if( !dataBase->communicators.empty() )
-    {
-        //////////////////////////////////////////////////////////////////////////
-        // X
-        //////////////////////////////////////////////////////////////////////////
-        if( dataBase->communicators[level][0] != nullptr ) dataBase->communicators[level][0]->sendData(dataBase, Communicator::tagSendNegative);
-        if( dataBase->communicators[level][1] != nullptr ) dataBase->communicators[level][1]->sendData(dataBase, Communicator::tagSendPositive);
-
-        if( dataBase->communicators[level][0] != nullptr ) dataBase->communicators[level][0]->recvData(dataBase, Communicator::tagSendPositive);
-        if( dataBase->communicators[level][1] != nullptr ) dataBase->communicators[level][1]->recvData(dataBase, Communicator::tagSendNegative);
-        //////////////////////////////////////////////////////////////////////////
-        // Y
-        //////////////////////////////////////////////////////////////////////////
-        if( dataBase->communicators[level][2] != nullptr ) dataBase->communicators[level][2]->sendData(dataBase, Communicator::tagSendNegative);
-        if( dataBase->communicators[level][3] != nullptr ) dataBase->communicators[level][3]->sendData(dataBase, Communicator::tagSendPositive);
-        
-        if( dataBase->communicators[level][2] != nullptr ) dataBase->communicators[level][2]->recvData(dataBase, Communicator::tagSendPositive);
-        if( dataBase->communicators[level][3] != nullptr ) dataBase->communicators[level][3]->recvData(dataBase, Communicator::tagSendNegative);
-        //////////////////////////////////////////////////////////////////////////
-        // Z
-        //////////////////////////////////////////////////////////////////////////
-        if( dataBase->communicators[level][4] != nullptr ) dataBase->communicators[level][4]->sendData(dataBase, Communicator::tagSendNegative);
-        if( dataBase->communicators[level][5] != nullptr ) dataBase->communicators[level][5]->sendData(dataBase, Communicator::tagSendPositive);
-        
-        if( dataBase->communicators[level][4] != nullptr ) dataBase->communicators[level][4]->recvData(dataBase, Communicator::tagSendPositive);
-        if( dataBase->communicators[level][5] != nullptr ) dataBase->communicators[level][5]->recvData(dataBase, Communicator::tagSendNegative);
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    FluxComputation::run( dataBase, parameters, level, true ); // comment out to disable commhiding
-    
-    //CudaUtility::synchronizeCudaDevice();                   // comment in to disable commhiding
-    //FluxComputation::run( dataBase, parameters, level );    // comment in to disable commhiding
-
-    //////////////////////////////////////////////////////////////////////////
-
-    CudaUtility::synchronizeCudaDevice();
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( level != dataBase->numberOfLevels - 1 )
-    {
-        Interface::runCoarseToFine( dataBase, level );
-
-        nestedTimeStep( dataBase, parameters, level + 1 );
-        nestedTimeStep( dataBase, parameters, level + 1 );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    CellUpdate::run( dataBase, parameters, level );
-
-    //////////////////////////////////////////////////////////////////////////
-}
-
-} // namespace GksGpu
-
diff --git a/src/gpu/GksGpu/TimeStepping/NestedTimeStep.h b/src/gpu/GksGpu/TimeStepping/NestedTimeStep.h
deleted file mode 100644
index 315db1778b79237cafa9f52ba53b26065d5f97a5..0000000000000000000000000000000000000000
--- a/src/gpu/GksGpu/TimeStepping/NestedTimeStep.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef  NestedTimeStep_H
-#define  NestedTimeStep_H
-
-
-#include "GksGpu_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-
-#include "DataBase/DataBase.h"
-#include "Parameters/Parameters.h"
-namespace GksGpu{ 
-
-class GKSGPU_EXPORT TimeStepping
-{
-public:
-
-    static void nestedTimeStep( SPtr<DataBase> dataBase, 
-                                Parameters parameters, 
-                                uint level );
-
-};
-
-} // namespace GksGpu
-
-#endif
diff --git a/src/gpu/GksMeshAdapter/CMakeLists.txt b/src/gpu/GksMeshAdapter/CMakeLists.txt
deleted file mode 100644
index b9a2d12df4d0bee9396a706c6636b5f4056b2d3a..0000000000000000000000000000000000000000
--- a/src/gpu/GksMeshAdapter/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-project(GksMeshAdapter LANGUAGES CUDA CXX)
-
-vf_add_library(PRIVATE_LINK basics GridGenerator lbmCuda)
diff --git a/src/gpu/GksMeshAdapter/GksMeshAdapter.cpp b/src/gpu/GksMeshAdapter/GksMeshAdapter.cpp
deleted file mode 100644
index ca6b223c90859656231360142b08d543b019b7ad..0000000000000000000000000000000000000000
--- a/src/gpu/GksMeshAdapter/GksMeshAdapter.cpp
+++ /dev/null
@@ -1,1328 +0,0 @@
-#include "GksMeshAdapter.h"
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-
-#include <fstream>
-#include <algorithm>
-#include <numeric>
-#include <functional>
-#include <iostream>
-#include <iomanip>
-#include <sstream>
-#include <mpi.h>
-
-#include "Core/Logger/Logger.h"
-
-#include "GridGenerator/grid/distributions/D3Q27.h"
-#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/NodeValues.h"
-#include "GridGenerator/utilities/math/Math.h"
-
-#include "MeshCell.h"
-#include "MeshFace.h"
-
-#include <lbm/constants/NumericConstants.h>
-
-using namespace vf::lbm::constant;
-
-using namespace vf::gpu;
-
-GksMeshAdapter::GksMeshAdapter(SPtr<MultipleGridBuilder> gridBuilder)
-    : gridBuilder(gridBuilder)
-{}
-
-void GksMeshAdapter::inputGrid()
-{
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "inputGrid()" << "\n";
-
-    this->numberOfLevels = this->gridBuilder->getNumberOfGridLevels();
-
-    std::vector< SPtr<Grid> > grids = this->gridBuilder->getGrids();
-
-    this->dxCoarse = grids[0]->getDelta();
-
-    //////////////////////////////////////////////////////////////////////////
-
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Allocate gridToMesh[][]" << "\n";
-
-    this->gridToMesh.resize( this->gridBuilder->getNumberOfGridLevels() );
-
-    for( uint level = 0; level < this->gridBuilder->getNumberOfGridLevels(); level++ ){
-        this->gridToMesh[level].resize( grids[level]->getSize() );
-
-        for( auto& cellIdx : this->gridToMesh[level] ) cellIdx = INVALID_INDEX;
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-    //
-    //    I d e n t i f y    C e l l s    i n    L B - G r i d
-    //
-    //////////////////////////////////////////////////////////////////////////
-
-    uint numberOfCells = 0;
-
-    for( uint level = 0; level < this->gridBuilder->getNumberOfGridLevels(); level++ ){
-        for( uint gridIdx = 0; gridIdx < grids[level]->getSize(); gridIdx++ ){
-            if (grids[level]->getFieldEntry(gridIdx)  != STOPPER_COARSE_UNDER_FINE &&
-                //grids[level]->getFieldEntry(gridIdx)  != STOPPER_SOLID &&
-                grids[level]->getFieldEntry(gridIdx)  != INVALID_COARSE_UNDER_FINE &&
-                grids[level]->getFieldEntry(gridIdx)  != INVALID_OUT_OF_GRID &&
-                grids[level]->getFieldEntry(gridIdx)  != INVALID_SOLID )
-            {
-                this->gridToMesh[level][gridIdx] = numberOfCells++;
-            }
-        }
-    }
-    
-    //////////////////////////////////////////////////////////////////////////
-    //
-    //    S e t    M e s h    t o    G r i d    i n f o r m a t i o n
-    //
-    //////////////////////////////////////////////////////////////////////////
-
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Allocate " << numberOfCells << " cells" << "\n";
-
-    this->cells.resize( numberOfCells );
-
-    for( uint level = 0; level < this->gridBuilder->getNumberOfGridLevels(); level++ ){
-        for( uint gridIdx = 0; gridIdx < grids[level]->getSize(); gridIdx++ ){
-            if ( this->gridToMesh[level][gridIdx] != INVALID_INDEX ){
-
-                uint cellIdx = gridToMesh[level][gridIdx];
-
-                MeshCell& cell = this->cells[ cellIdx ];
-
-                cell.level   = level;
-                cell.gridIdx = gridIdx;
-
-                cell.type = grids[level]->getFieldEntry(gridIdx);
-            }
-        }
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    this->findQuadtreeConnectivity();
-    this->findCellToCellConnectivity();
-    this->countCells();
-    this->partitionCells();
-    this->generateNodes();
-    this->computeCellGeometry();
-
-    this->getCommunicationIndices();
-
-    this->generateFaces();
-    this->sortFaces();
-    this->countFaces();
-    this->generateInterfaceConnectivity();
-
-    //////////////////////////////////////////////////////////////////////////
-
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "inputGrid() finished!" << "\n";
-}
-
-void GksMeshAdapter::findQuadtreeConnectivity()
-{
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "findQuadtreeConnectivity()" << "\n";
-
-    std::vector< SPtr<Grid> > grids = this->gridBuilder->getGrids();
-
-    Distribution dirs = DistributionHelper::getDistribution27();
-
-    for( uint cellIdx = 0; cellIdx < this->cells.size(); cellIdx++ ){
-    
-        MeshCell& cell = this->cells[ cellIdx ];
-
-        if( cell.type == FLUID_FCC || cell.type == FLUID_CFC ){
-
-            real x, y, z;
-            grids[cell.level]->transIndexToCoords(cell.gridIdx, x, y, z);
-
-            real d = 0.25 * grids[cell.level]->getDelta();
-
-            for( uint idx = 0; idx < 8; idx++ )
-            {
-
-                real xSign = dirs.directions[idx + 19][0];
-                real ySign = dirs.directions[idx + 19][1];
-                real zSign = dirs.directions[idx + 19][2];
-
-                cell.children[ idx ] = this->gridToMesh[cell.level+1][ grids[cell.level+1]->transCoordToIndex( x + xSign * d, 
-                                                                                                               y + ySign * d, 
-                                                                                                               z + zSign * d ) ];
-            }
-
-            // register parent
-            if( cell.type == FLUID_CFC )
-                for (uint child = 0; child < 8; child++)
-                    this->cells[cell.children[child]].parent = cellIdx;
-
-            // set correct type for CFF cells
-            if( cell.type == FLUID_CFC )
-                for( uint child = 0; child < 8; child++ )
-                    this->cells[ cell.children[child] ].type = FLUID_CFF;
-
-        }
-    }
-}
-
-void GksMeshAdapter::findCellToCellConnectivity()
-{
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "findCellToCellConnectivity()" << "\n";
-
-    std::vector< SPtr<Grid> > grids = this->gridBuilder->getGrids();
-
-    Distribution dirs = DistributionHelper::getDistribution27();
-
-    for( uint cellIdx = 0; cellIdx < this->cells.size(); cellIdx++ ){
-    
-        MeshCell& cell = this->cells[ cellIdx ];
-
-        real x, y, z;
-        grids[cell.level]->transIndexToCoords(cell.gridIdx, x, y, z);
-
-        real d = grids[cell.level]->getDelta();
-
-        for( uint idx = 0; idx < 27; idx++ )
-        {
-            if( idx == DIR_27_REST ) continue;
-
-            int xSign = dirs.directions[idx][0];
-            int ySign = dirs.directions[idx][1];
-            int zSign = dirs.directions[idx][2];
-
-            uint neighborGridIdx = grids[cell.level]->transCoordToIndex( x + xSign * d, 
-                                                                         y + ySign * d, 
-                                                                         z + zSign * d );
-
-            if( neighborGridIdx == INVALID_INDEX || this->gridToMesh[cell.level][neighborGridIdx] == INVALID_INDEX ){
-                if( !cell.isCoarseGhostCell() && cell.type != BC_SOLID )
-                    cell.isGhostCell = true;
-
-                continue;
-            }
-
-            cell.cellToCell[ idx ] = this->gridToMesh[cell.level][neighborGridIdx];
-        }
-    }
-}
-
-void GksMeshAdapter::countCells()
-{
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "countCells()" << "\n";
-
-    this->numberOfCellsPerLevel    .resize( this->numberOfLevels );
-    this->numberOfBulkCellsPerLevel.resize( this->numberOfLevels );
-    this->startOfCellsPerLevel     .resize( this->numberOfLevels );
-
-    for( auto& i : this->numberOfCellsPerLevel     ) i = 0;
-    for( auto& i : this->numberOfBulkCellsPerLevel ) i = 0;
-    for( auto& i : this->startOfCellsPerLevel      ) i = 0;
-
-    uint level = 0;
-    for( uint cellIdx = 0; cellIdx < this->cells.size(); cellIdx++ ){
-        MeshCell& cell = this->cells[ cellIdx ];
-
-        if( cell.level != level ) level++;
-
-        this->numberOfCellsPerLevel[ level ]++; 
-
-        if( ! ( cell.isGhostCell || cell.isCoarseGhostCell() ) )
-            this->numberOfBulkCellsPerLevel[ level ]++;
-    }
-
-    for( uint level = 1; level < this->numberOfLevels; level++ )
-        this->startOfCellsPerLevel[ level ] = this->startOfCellsPerLevel[ level-1 ] + this->numberOfCellsPerLevel[ level-1 ];
-}
-
-void GksMeshAdapter::partitionCells()
-{
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "partitionCells()" << "\n";
-
-    for( uint level = 0; level < this->numberOfLevels; level++ ){
-
-        std::vector<uint> idxMap( this->cells.size() );
-        std::iota( idxMap.begin(), idxMap.end(), 0 );
-
-        // partition idxMap
-        std::stable_partition(  idxMap.begin() + this->startOfCellsPerLevel[level], 
-                                idxMap.begin() + this->startOfCellsPerLevel[level] 
-                                               + this->numberOfCellsPerLevel[level], 
-                                [this](int lhs){ 
-                                    return ! ( this->cells[ lhs ].isGhostCell || this->cells[ lhs ].isCoarseGhostCell() );
-                                }
-                             );
-
-        // invert idxMap
-        {
-            std::vector<uint> buffer = idxMap;
-            for( uint idx = 0; idx < idxMap.size(); idx ++ )
-                idxMap[ buffer[idx] ] = idx;
-        }
-
-        // partition cell list
-        std::stable_partition(  this->cells.begin() + this->startOfCellsPerLevel[level], 
-                                this->cells.begin() + this->startOfCellsPerLevel[level] 
-                                                    + this->numberOfCellsPerLevel[level], 
-                                [this](MeshCell lhs){ 
-                                    return ! ( lhs.isGhostCell || lhs.isCoarseGhostCell() );
-                                }
-                             );
-
-        this->refreshCellConnectivity( idxMap );
-    }
-}
-
-void GksMeshAdapter::refreshCellConnectivity(const std::vector<uint>& idxMap)
-{
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "refreshCellConnectivity()" << "\n";
-
-    for( auto& cell : this->cells ){
-        for( uint idx = 0; idx < 27; idx++ )
-            if( cell.cellToCell[ idx ] != INVALID_INDEX )
-                cell.cellToCell[ idx ] = idxMap[ cell.cellToCell[ idx ] ];
-
-        if( cell.parent != INVALID_INDEX )
-            cell.parent = idxMap[ cell.parent ];
-
-        for( uint idx = 0; idx < 8; idx++ )
-            if( cell.children[ idx ] != INVALID_INDEX )
-                cell.children[ idx ] = idxMap[ cell.children[ idx ] ];
-    }
-
-    for( auto& grid : this->gridToMesh ){
-        for( auto& cellIdx : grid ){
-            if( cellIdx != INVALID_INDEX )
-                cellIdx = idxMap[ cellIdx ];
-        }
-    }
-}
-
-void GksMeshAdapter::findCornerCells()
-{
-    //SPtr<Grid> grid = this->gridBuilder->getGrids()[0];
-    //
-    //this->cornerCells[0] = this->gridToMesh[ 0 ][ grid->transCoordToIndex( grid->getStartX(), grid->getStartY(), z0 ) ];
-    //this->cornerCells[1] = this->gridToMesh[ 0 ][ grid->transCoordToIndex( grid->getEndX()  , grid->getStartY(), z0 ) ];
-    //this->cornerCells[2] = this->gridToMesh[ 0 ][ grid->transCoordToIndex( grid->getEndX()  , grid->getEndY()  , z0 ) ];
-    //this->cornerCells[3] = this->gridToMesh[ 0 ][ grid->transCoordToIndex( grid->getStartX(), grid->getEndY()  , z0 ) ];
-}
-
-void GksMeshAdapter::generateNodes()
-{
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "generateNodes()" << "\n";
-
-    std::vector< SPtr<Grid> > grids = gridBuilder->getGrids();
-
-    nodes.reserve( 2 * this->cells.size() );
-
-    Distribution dirs = DistributionHelper::getDistribution27();
-
-    for( uint cellIdx = 0; cellIdx < this->cells.size(); cellIdx++ ){
-    
-        MeshCell& cell = this->cells[ cellIdx ];
-
-        //if( cell.type == STOPPER_SOLID ) continue;
-
-        real x, y, z;
-        grids[cell.level]->transIndexToCoords(cell.gridIdx, x, y, z);
-
-        real d = 0.5 * grids[cell.level]->getDelta();
-
-        std::array<Vec3,8> dir;
-
-        for( uint idx = 0; idx < 8; idx++ )
-        {
-            if( cell.cellToNode[idx] == INVALID_INDEX )
-            {
-
-                real dx = dirs.directions[idx + 19][0] * d;
-                real dy = dirs.directions[idx + 19][1] * d;
-                real dz = dirs.directions[idx + 19][2] * d;
-
-                nodes.push_back( Vec3( x + dx, y + dy, z + dz ) );
-
-                cell.cellToNode[idx] = nodes.size()-1;
-
-                //// register new node at neighbor cells on same level
-                for (uint idx = 0; idx < 8; idx++)
-                {
-                    real dxNeighbor = -dirs.directions[idx + 19][0] * d;
-                    real dyNeighbor = -dirs.directions[idx + 19][1] * d;
-                    real dzNeighbor = -dirs.directions[idx + 19][2] * d;
-
-                    real xNeighbor = nodes.back().x + dxNeighbor;
-                    real yNeighbor = nodes.back().y + dyNeighbor;
-                    real zNeighbor = nodes.back().z + dzNeighbor;
-
-                    uint neighborGridIdx = grids[cell.level]->transCoordToIndex(xNeighbor, yNeighbor, zNeighbor);
-
-                    if ( neighborGridIdx == INVALID_INDEX ) continue;
-
-                    uint neighborIdx = gridToMesh[cell.level][neighborGridIdx];
-
-                    if ( neighborIdx != INVALID_INDEX )
-                    {
-                        this->cells[ neighborIdx ].cellToNode[idx] = nodes.size() - 1;
-                    }
-                }
-            }
-        }
-    }
-}
-
-void GksMeshAdapter::computeCellGeometry()
-{    
-    for( uint cellIdx = 0; cellIdx < this->cells.size(); cellIdx++ ){
-        
-        MeshCell& cell = this->cells[ cellIdx ];
-
-        Vec3 cellCenter;
-
-        for( uint node = 0; node < 8; node++ ){
-            cellCenter = cellCenter + this->nodes[ cell.cellToNode[node] ];
-        }
-
-        cell.cellCenter.x = cellCenter.x / c8o1;
-        cell.cellCenter.y = cellCenter.y / c8o1;
-        cell.cellCenter.z = cellCenter.z / c8o1;
-    }
-}
-
-void GksMeshAdapter::generateFaces()
-{
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "generateFaces()" << "\n";
-
-    std::vector< SPtr<Grid> > grids = this->gridBuilder->getGrids();
-
-    this->faces.reserve( 2 * this->cells.size() );
-
-    for( uint cellIdx = 0; cellIdx < this->cells.size(); cellIdx++ ){
-    
-        MeshCell& cell = this->cells[ cellIdx ];
-
-        //if( cell.type == BC_SOLID || cell.type == STOPPER_SOLID ) continue;
-
-        // generate faces in positive direction
-        for( uint neighborIdx = 0; neighborIdx < 6; neighborIdx += 2 ){
-
-            if( cell.faceExists[ neighborIdx ] ) continue;
-
-            if( cell.cellToCell[ neighborIdx ] == INVALID_INDEX ) continue;
-
-            uint neighborCellIdx = cell.cellToCell[ neighborIdx ];
-
-            MeshCell& neighborCell = this->cells[ neighborCellIdx ];
-
-            if( cell.isGhostCell && neighborCell.isGhostCell ) continue;
-
-            if( cell.isCoarseGhostCell() || neighborCell.isCoarseGhostCell() ) continue;
-
-            //////////////////////////////////////////////////////////////////////////
-
-            MeshFace newFace;
-
-            newFace.level = cell.level;
-
-            if( neighborIdx == 0 )
-            {
-                newFace.faceToNode[ 0 ] = cell.cellToNode[ 3 ];
-                newFace.faceToNode[ 1 ] = cell.cellToNode[ 1 ];
-                newFace.faceToNode[ 2 ] = cell.cellToNode[ 0 ];
-                newFace.faceToNode[ 3 ] = cell.cellToNode[ 2 ];
-                newFace.orientation = 'x';
-            }
-            if( neighborIdx == 2 )
-            {
-                newFace.faceToNode[ 0 ] = cell.cellToNode[ 5 ];
-                newFace.faceToNode[ 1 ] = cell.cellToNode[ 4 ];
-                newFace.faceToNode[ 2 ] = cell.cellToNode[ 0 ];
-                newFace.faceToNode[ 3 ] = cell.cellToNode[ 1 ];
-                newFace.orientation = 'y';
-            }
-            if( neighborIdx == 4 )
-            {
-                newFace.faceToNode[ 0 ] = cell.cellToNode[ 6 ];
-                newFace.faceToNode[ 1 ] = cell.cellToNode[ 2 ];
-                newFace.faceToNode[ 2 ] = cell.cellToNode[ 0 ];
-                newFace.faceToNode[ 3 ] = cell.cellToNode[ 4 ];
-                newFace.orientation = 'z';
-            }
-
-            //////////////////////////////////////////////////////////////////////////
-
-            cell.faceExists[ neighborIdx ] = true;
-
-            // register face at neighbor
-            for( uint idx = 0; idx < 6; idx++ ){
-                if( neighborCell.cellToCell[ idx ] == cellIdx ){
-                    neighborCell.faceExists[ idx ] = true;
-                    break;
-                }
-            }
-
-            //////////////////////////////////////////////////////////////////////////
-
-            newFace.negCell = cellIdx;
-            newFace.posCell = neighborCellIdx;
-
-            //////////////////////////////////////////////////////////////////////////
-
-            //if ( cell.type == FLUID_CFF && neighborCell.type == FLUID_FCF ) newFace.negCellCoarse = cell.parent;
-            //if ( cell.type == FLUID_FCF && neighborCell.type == FLUID_CFF ) newFace.posCellCoarse = neighborCell.parent;
-
-            //////////////////////////////////////////////////////////////////////////
-            
-            Vec3 faceCenter;
-
-            for( uint node = 0; node < 4; node++ ){
-                faceCenter = faceCenter + this->nodes[ newFace.faceToNode[node] ];
-            }
-
-            newFace.faceCenter.x = faceCenter.x / c4o1;
-            newFace.faceCenter.y = faceCenter.y / c4o1;
-            newFace.faceCenter.z = faceCenter.z / c4o1;
-
-            this->faces.push_back( newFace );
-        }
-    }
-}
-
-#define OLD_SORTING
-#ifdef  OLD_SORTING
-
-void GksMeshAdapter::sortFaces()
-{
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    // sort by level and orientation
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "sortFaces()" << "\n";
-
-    std::stable_sort(this->faces.begin(), this->faces.end(),
-            [&, this](MeshFace lhs, MeshFace rhs)
-            {
-                if( lhs.level != rhs.level ) return lhs.level < rhs.level;
-
-                if (lhs.orientation != rhs.orientation) {
-                    if      (lhs.orientation == 'x' && rhs.orientation == 'y') return true;
-                    else if (lhs.orientation == 'y' && rhs.orientation == 'z') return true;
-                    else if (lhs.orientation == 'x' && rhs.orientation == 'z') return true;
-                    else                                                       return false;
-                }
-
-                return false;
-            }
-    );
-
-    this->countFaces();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    // sort into blocks
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    // std::array<char, 3> orientations = {'x', 'y', 'z'};
-
-    for( uint level = 0; level < this->gridBuilder->getNumberOfLevels(); level++ )
-    {
-        for( uint idx = 0; idx < 3; idx++ )
-        {
-            uint start =         this->startOfFacesPerLevelXYZ [ 3 * level + idx];
-            uint end   = start + this->numberOfFacesPerLevelXYZ[ 3 * level + idx];
-
-            // real xMax = (*std::max_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.x < rhs.faceCenter.x; })).faceCenter.x;
-            // real yMax = (*std::max_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.y < rhs.faceCenter.y; })).faceCenter.y;
-            // real zMax = (*std::max_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.z < rhs.faceCenter.z; })).faceCenter.z;
-
-            real xMin = (*std::min_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.x < rhs.faceCenter.x; })).faceCenter.x;
-            real yMin = (*std::min_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.y < rhs.faceCenter.y; })).faceCenter.y;
-            real zMin = (*std::min_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.z < rhs.faceCenter.z; })).faceCenter.z;
-
-            // real xRange = xMax - xMin;
-            // real yRange = yMax - yMin;
-            // real zRange = zMax - zMin;
-
-            uint blockDim = 8;
-
-            real dx = this->gridBuilder->getGrid(level)->getDelta();
-
-            std::sort(this->faces.begin() + start, this->faces.begin() + end,
-                [&, this](MeshFace lhs, MeshFace rhs)
-            {
-                uint xIdxLhs = lround((lhs.faceCenter.x - xMin) / dx);
-                uint yIdxLhs = lround((lhs.faceCenter.y - yMin) / dx);
-                uint zIdxLhs = lround((lhs.faceCenter.z - zMin) / dx);
-
-                uint xIdxRhs = lround((rhs.faceCenter.x - xMin) / dx);
-                uint yIdxRhs = lround((rhs.faceCenter.y - yMin) / dx);
-                uint zIdxRhs = lround((rhs.faceCenter.z - zMin) / dx);
-
-                real xBlockLhs = xIdxLhs / blockDim;
-                real yBlockLhs = yIdxLhs / blockDim;
-                real zBlockLhs = zIdxLhs / blockDim;
-
-                real xBlockRhs = xIdxRhs / blockDim;
-                real yBlockRhs = yIdxRhs / blockDim;
-                real zBlockRhs = zIdxRhs / blockDim;
-
-                if (zBlockLhs < zBlockRhs) return true;
-                if (zBlockLhs > zBlockRhs) return false;
-                if (yBlockLhs < yBlockRhs) return true;
-                if (yBlockLhs > yBlockRhs) return false;
-                if (xBlockLhs < xBlockRhs) return true;
-                if (xBlockLhs > xBlockRhs) return false;
-
-                if (zIdxLhs < zIdxRhs) return true;
-                if (zIdxLhs > zIdxRhs) return false;
-                if (yIdxLhs < yIdxRhs) return true;
-                if (yIdxLhs > yIdxRhs) return false;
-                if (xIdxLhs < xIdxRhs) return true;
-                if (xIdxLhs > xIdxRhs) return false;
-
-                return true;
-            }
-            );
-        }
-    }
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    // partition by inner and out for communication hiding
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    this->numberOfInnerFacesPerLevel.resize( this->numberOfLevels );
-
-    for( uint level = 0; level < this->gridBuilder->getNumberOfLevels(); level++ )
-    {
-        auto bound =
-        std::stable_partition(  this->faces.begin() + this->startOfFacesPerLevelXYZ [3 * level], 
-                                this->faces.begin() + this->startOfFacesPerLevelXYZ [3 * level] 
-                                                    + this->numberOfFacesPerLevelXYZ[3 * level + 0] 
-                                                    + this->numberOfFacesPerLevelXYZ[3 * level + 1] 
-                                                    + this->numberOfFacesPerLevelXYZ[3 * level + 2], 
-                                    [this](MeshFace& lhs)
-                                    {
-                                        //return true; // comment this in to disable sorting for Comm hiding                            
-
-                                        for( uint neighborIndex = 0; neighborIndex < 6; neighborIndex++ )
-                                        {
-                                            uint neighborCellIndex = this->cells[ lhs.posCell ].cellToCell[ neighborIndex ];
-                                            if( neighborCellIndex != INVALID_INDEX && this->cells[ neighborCellIndex ].isRecvCell )
-                                            {
-                                                return false;
-                                            }
-                                        }
-                                        for( uint neighborIndex = 0; neighborIndex < 6; neighborIndex++ )
-                                        {
-                                            uint neighborCellIndex = this->cells[ lhs.negCell ].cellToCell[ neighborIndex ];
-                                            if( neighborCellIndex != INVALID_INDEX && this->cells[ neighborCellIndex ].isRecvCell )
-                                            {
-                                                return false;
-                                            }
-                                        }
-
-                                        return true;
-                                    }
-                                 );
-
-        this->numberOfInnerFacesPerLevel[ level ] = 0;
-        for( auto it = this->faces.begin() + this->startOfFacesPerLevelXYZ [3 * level]; it != bound; it++ )
-        {
-            this->numberOfInnerFacesPerLevel[ level ]++;
-        }
-
-        *logging::out << logging::Logger::INFO_LOW << "    Level " << level << ": " << this->numberOfFacesPerLevelXYZ[ 3 * level + 0 ]
-                                                                                     + this->numberOfFacesPerLevelXYZ[ 3 * level + 1 ]
-                                                                                     + this->numberOfFacesPerLevelXYZ[ 3 * level + 2 ]
-                                                                                    << " faces" << "\n";
-        *logging::out << logging::Logger::INFO_LOW << "    Level " << level << ": " << this->numberOfInnerFacesPerLevel[ level ] << " inner faces" << "\n";
-    }
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-}
-
-#else
-
-void GksMeshAdapter::sortFaces()
-{
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "sortFaces()" << "\n";
-
-    std::stable_sort(this->faces.begin(), this->faces.end(),
-            [&, this](MeshFace lhs, MeshFace rhs)
-            {
-                if( lhs.level != rhs.level ) return lhs.level < rhs.level;
-
-                return false;
-            }
-    );
-
-    countFaces();
-
-    std::array<char, 3> orientations = {'x', 'y', 'z'};
-
-    for( uint level = 0; level < this->gridBuilder->getNumberOfLevels(); level++ )
-    {
-        uint start =         this->startOfFacesPerLevelXYZ [ 3 * level ];
-        uint end   = start + this->numberOfFacesPerLevelXYZ[ 3 * level + 0]
-                           + this->numberOfFacesPerLevelXYZ[ 3 * level + 1]
-                           + this->numberOfFacesPerLevelXYZ[ 3 * level + 2];
-
-        uint blockDim = 16;
-
-        real dx = this->gridBuilder->getGrid(level)->getDelta();
-
-        real xMax = (*std::max_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.x < rhs.faceCenter.x; })).faceCenter.x + 0.5 * dx;
-        real yMax = (*std::max_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.y < rhs.faceCenter.y; })).faceCenter.y + 0.5 * dx;
-        real zMax = (*std::max_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.z < rhs.faceCenter.z; })).faceCenter.z + 0.5 * dx;
-
-        real xMin = (*std::min_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.x < rhs.faceCenter.x; })).faceCenter.x + 0.5 * dx;
-        real yMin = (*std::min_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.y < rhs.faceCenter.y; })).faceCenter.y + 0.5 * dx;
-        real zMin = (*std::min_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.z < rhs.faceCenter.z; })).faceCenter.z + 0.5 * dx; 
-
-        std::stable_sort(this->faces.begin() + start, this->faces.begin() + end,
-            [&, this](MeshFace lhs, MeshFace rhs)
-        {
-            Vec3 lhsCenter = lhs.faceCenter;
-            Vec3 rhsCenter = rhs.faceCenter;
-
-            if( lhs.orientation == 'x' ) lhsCenter.x += 0.5 * dx;
-            if( lhs.orientation == 'y' ) lhsCenter.y += 0.5 * dx;
-            if( lhs.orientation == 'z' ) lhsCenter.z += 0.5 * dx;
-
-            if( rhs.orientation == 'x' ) rhsCenter.x += 0.5 * dx;
-            if( rhs.orientation == 'y' ) rhsCenter.y += 0.5 * dx;
-            if( rhs.orientation == 'z' ) rhsCenter.z += 0.5 * dx;
-
-            uint xIdxLhs = lround((lhsCenter.x - xMin) / dx);
-            uint yIdxLhs = lround((lhsCenter.y - yMin) / dx);
-            uint zIdxLhs = lround((lhsCenter.z - zMin) / dx);
-
-            uint xIdxRhs = lround((rhsCenter.x - xMin) / dx);
-            uint yIdxRhs = lround((rhsCenter.y - yMin) / dx);
-            uint zIdxRhs = lround((rhsCenter.z - zMin) / dx);
-
-            uint xBlockLhs = xIdxLhs / blockDim;
-            uint yBlockLhs = yIdxLhs / blockDim;
-            uint zBlockLhs = zIdxLhs / blockDim;
-
-            uint xBlockRhs = xIdxRhs / blockDim;
-            uint yBlockRhs = yIdxRhs / blockDim;
-            uint zBlockRhs = zIdxRhs / blockDim;
-
-            if (zBlockLhs < zBlockRhs) return true;
-            if (zBlockLhs > zBlockRhs) return false;
-            if (yBlockLhs < yBlockRhs) return true;
-            if (yBlockLhs > yBlockRhs) return false;
-            if (xBlockLhs < xBlockRhs) return true;
-            if (xBlockLhs > xBlockRhs) return false;
-
-            if (lhs.orientation != rhs.orientation) {
-                if      (lhs.orientation == 'x' && rhs.orientation == 'y') return true;
-                else if (lhs.orientation == 'y' && rhs.orientation == 'z') return true;
-                else if (lhs.orientation == 'x' && rhs.orientation == 'z') return true;
-                else                                                       return false;
-            }
-
-            if (zIdxLhs < zIdxRhs) return true;
-            if (zIdxLhs > zIdxRhs) return false;
-            if (yIdxLhs < yIdxRhs) return true;
-            if (yIdxLhs > yIdxRhs) return false;
-            if (xIdxLhs < xIdxRhs) return true;
-            if (xIdxLhs > xIdxRhs) return false;
-
-            return false;
-        });
-    }
-}
-
-#endif
-
-void GksMeshAdapter::countFaces()
-{
-    this->numberOfFacesPerLevelXYZ.resize( 3 * this->numberOfLevels );
-    this->startOfFacesPerLevelXYZ.resize ( 3 * this->numberOfLevels );
-
-    for( auto& i : this->numberOfFacesPerLevelXYZ ) i = 0;
-    for( auto& i : this->startOfFacesPerLevelXYZ  ) i = 0;
-
-    for( auto& face : this->faces ){
-        if      ( face.orientation == 'x' ) this->numberOfFacesPerLevelXYZ[ 3 * face.level     ]++;
-        else if ( face.orientation == 'y' ) this->numberOfFacesPerLevelXYZ[ 3 * face.level + 1 ]++;
-        else if ( face.orientation == 'z' ) this->numberOfFacesPerLevelXYZ[ 3 * face.level + 2 ]++;
-    }
-
-    this->startOfFacesPerLevelXYZ[0] = 0;
-
-    for( uint level = 1; level < 3 * this->numberOfLevels; level++ ){
-        
-        this->startOfFacesPerLevelXYZ[level] = this->startOfFacesPerLevelXYZ [level - 1]
-                                             + this->numberOfFacesPerLevelXYZ[level - 1];
-    }
-}
-
-void GksMeshAdapter::generateInterfaceConnectivity()
-{
-    this->numberOfFineToCoarsePerLevel.resize( this->numberOfLevels );
-    this->startOfFineToCoarsePerLevel.resize ( this->numberOfLevels );
-    this->numberOfCoarseToFinePerLevel.resize( this->numberOfLevels );
-    this->startOfCoarseToFinePerLevel.resize ( this->numberOfLevels );
-
-    for( uint cellIdx = 0; cellIdx < this->cells.size(); cellIdx++ ){
-
-        MeshCell& cell = this->cells[ cellIdx ];
-
-        if( cell.type == FLUID_FCC ){
-
-            uint_9 connectivity;
-
-            connectivity[ 0 ] = cellIdx;
-            connectivity[ 1 ] = cell.children[ 0 ];
-            connectivity[ 2 ] = cell.children[ 1 ];
-            connectivity[ 3 ] = cell.children[ 2 ];
-            connectivity[ 4 ] = cell.children[ 3 ];
-            connectivity[ 5 ] = cell.children[ 4 ];
-            connectivity[ 6 ] = cell.children[ 5 ];
-            connectivity[ 7 ] = cell.children[ 6 ];
-            connectivity[ 8 ] = cell.children[ 7 ];
-
-            this->fineToCoarse.push_back( connectivity );
-
-            this->numberOfFineToCoarsePerLevel[ cell.level ]++;
-        }
-
-        if( cell.type == FLUID_CFC ){
-            
-            uint_15 connectivity;
-
-            connectivity[  0 ] = cellIdx;
-
-            //connectivity[  1 ] = cell.cellToCell[ 0 ];
-            //connectivity[  2 ] = cell.cellToCell[ 1 ];
-            //connectivity[  3 ] = cell.cellToCell[ 2 ];
-            //connectivity[  4 ] = cell.cellToCell[ 3 ];
-            //connectivity[  5 ] = cell.cellToCell[ 4 ];
-            //connectivity[  6 ] = cell.cellToCell[ 5 ];
-
-            //connectivity[  7 ] = cell.children[ 0 ];
-            //connectivity[  8 ] = cell.children[ 1 ];
-            //connectivity[  9 ] = cell.children[ 2 ];
-            //connectivity[ 10 ] = cell.children[ 3 ];
-            //connectivity[ 11 ] = cell.children[ 4 ];
-            //connectivity[ 12 ] = cell.children[ 5 ];
-            //connectivity[ 13 ] = cell.children[ 6 ];
-            //connectivity[ 14 ] = cell.children[ 7 ];
-
-            connectivity[ 1 ] = cell.children[ 0 ];
-            connectivity[ 2 ] = cell.children[ 1 ];
-            connectivity[ 3 ] = cell.children[ 2 ];
-            connectivity[ 4 ] = cell.children[ 3 ];
-            connectivity[ 5 ] = cell.children[ 4 ];
-            connectivity[ 6 ] = cell.children[ 5 ];
-            connectivity[ 7 ] = cell.children[ 6 ];
-            connectivity[ 8 ] = cell.children[ 7 ];
-
-            this->coarseToFine.push_back( connectivity );
-
-            numberOfCoarseToFinePerLevel[ cell.level ]++;
-        }
-    }
-    
-    this->startOfFineToCoarsePerLevel[0] = 0;
-    this->startOfCoarseToFinePerLevel[0] = 0;
-
-    for( uint level = 1; level < this->numberOfLevels; level++ )
-    {
-        this->startOfFineToCoarsePerLevel[level] = this->startOfFineToCoarsePerLevel [level - 1]
-                                                 + this->numberOfFineToCoarsePerLevel[level - 1];
-        
-        this->startOfCoarseToFinePerLevel[level] = this->startOfCoarseToFinePerLevel [level - 1]
-                                                 + this->numberOfCoarseToFinePerLevel[level - 1];
-    }
-}
-
-void GksMeshAdapter::findPeriodicBoundaryNeighbors()
-{
-    for( uint level = 0; level < this->numberOfLevels; level++ )
-    {
-        SPtr<Grid> grid = this->gridBuilder->getGrid(level);
-
-        if( !grid->getPeriodicityX() && !grid->getPeriodicityY() && !grid->getPeriodicityZ() )
-            throw std::runtime_error( "GksMeshAdapter::findPeriodicBoundaryNeighbors() failed, because no periodic direction is set!" );
-
-        uint startIdx = startOfCellsPerLevel[ level ] + numberOfBulkCellsPerLevel[ level ];
-
-        uint endIdx   = startOfCellsPerLevel[ level ] + numberOfCellsPerLevel[ level ];
-
-        for( uint cellIdx = startIdx; cellIdx < endIdx; cellIdx++ )
-        {
-            MeshCell cell = this->cells[ cellIdx ];
-
-            if( cell.type != STOPPER_OUT_OF_GRID && cell.type != STOPPER_OUT_OF_GRID_BOUNDARY && cell.type != STOPPER_SOLID ) continue;
-
-            Vec3 gridStart ( grid->getStartX() + c1o2 * grid->getDelta(),
-                             grid->getStartY() + c1o2 * grid->getDelta(),
-                             grid->getStartZ() + c1o2 * grid->getDelta() );
-
-            Vec3 gridEnd   ( grid->getEndX()   - c1o2 * grid->getDelta(),
-                             grid->getEndY()   - c1o2 * grid->getDelta(),
-                             grid->getEndZ()   - c1o2 * grid->getDelta() );
-
-            Vec3 size = gridEnd - gridStart;
-
-            Vec3 delta;
-
-            if( grid->getPeriodicityX() && cell.cellCenter.x < gridStart.x ) delta.x =   size.x;
-            if( grid->getPeriodicityX() && cell.cellCenter.x > gridEnd.x   ) delta.x = - size.x;
-
-            if( grid->getPeriodicityY() && cell.cellCenter.y < gridStart.y ) delta.y =   size.y;
-            if( grid->getPeriodicityY() && cell.cellCenter.y > gridEnd.y   ) delta.y = - size.y;
-
-            if( grid->getPeriodicityZ() && cell.cellCenter.z < gridStart.z ) delta.z =   size.z;
-            if( grid->getPeriodicityZ() && cell.cellCenter.z > gridEnd.z   ) delta.z = - size.z;
-
-            uint neighborGridIdx = grid->transCoordToIndex( cell.cellCenter.x + delta.x,
-                                                            cell.cellCenter.y + delta.y,
-                                                            cell.cellCenter.z + delta.z );
-            
-            if( neighborGridIdx == INVALID_INDEX ) throw std::runtime_error( std::string("No periodic cell found!") );
-
-            uint neighborIdx = this->gridToMesh[ level ][ neighborGridIdx ];
-
-            //if( neighborIdx == cellIdx ) neighborIdx == INVALID_INDEX;
-
-            if( neighborIdx == INVALID_INDEX )
-            {
-                std::stringstream s;
-
-                s << "No periodic cell found: ";
-                s << "( " << cell.cellCenter.x           << ", " << cell.cellCenter.y           << ", " << cell.cellCenter.z           << " )";
-                s << "( " << cell.cellCenter.x + delta.x << ", " << cell.cellCenter.y + delta.y << ", " << cell.cellCenter.z + delta.z << " )";
-
-                s << "( " << gridStart.x << ", " << gridStart.y << ", " << gridStart.z << " )";
-                s << "( " << gridEnd.x   << ", " << gridEnd.y   << ", " << gridEnd.z   << " )";
-
-                throw std::runtime_error( s.str() );
-            }
-            
-            this->periodicBoundaryNeighbors.push_back( {cellIdx, neighborIdx} );
-        }
-    }
-}
-
-void GksMeshAdapter::getCommunicationIndices()
-{
-    this->communicationProcesses[0] = this->gridBuilder->getCommunicationProcess(0);
-    this->communicationProcesses[1] = this->gridBuilder->getCommunicationProcess(1);
-    this->communicationProcesses[2] = this->gridBuilder->getCommunicationProcess(2);
-    this->communicationProcesses[3] = this->gridBuilder->getCommunicationProcess(3);
-    this->communicationProcesses[4] = this->gridBuilder->getCommunicationProcess(4);
-    this->communicationProcesses[5] = this->gridBuilder->getCommunicationProcess(5);
-
-    this->communicationIndices.resize( this->gridBuilder->getNumberOfLevels() );
-
-    //int rank = 0;
-    //MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-
-    //int mpiWorldSize = 1;
-    //MPI_Comm_size(MPI_COMM_WORLD, &mpiWorldSize);
-
-    //MPI_Barrier(MPI_COMM_WORLD);
-
-    //for( int i = 0; i < rank; i++ ) MPI_Barrier(MPI_COMM_WORLD);
-
-    for( uint level = 0; level < this->gridBuilder->getNumberOfLevels(); level++ )
-    {
-        //////////////////////////////////////////////////////////////////////////
-
-        SPtr<Grid> grid = this->gridBuilder->getGrid(level);
-
-        for (uint direction = 0; direction < 6; direction++)
-        {
-            for (uint index = 0; index < grid->getNumberOfSendNodes(direction); index++)
-            {
-                uint cellIndex = this->gridToMesh[level][grid->getSendIndex(direction, index)];
-                this->communicationIndices[level].sendIndices[direction].push_back(cellIndex);
-            }
-
-            for (uint index = 0; index < grid->getNumberOfReceiveNodes(direction); index++)
-            {
-                uint cellIndex = this->gridToMesh[level][grid->getReceiveIndex(direction, index)];
-                this->communicationIndices[level].recvIndices[direction].push_back(cellIndex);
-                this->cells[ cellIndex ].isRecvCell = true;
-            }
-
-            std::stringstream msg;
-
-            msg << "Rank " << /*rank <<*/ " | Level " << level << " | dir " << direction << " | ";
-            msg << "Send " << this->communicationIndices[level].sendIndices[direction].size() << " | ";
-            msg << "Recv " << this->communicationIndices[level].recvIndices[direction].size() << std::endl;
-
-            *logging::out << logging::Logger::INFO_INTERMEDIATE << msg.str();
-        }
-
-        //////////////////////////////////////////////////////////////////////////
-    }
-
-    //for( int i = rank; i < mpiWorldSize; i++ ) MPI_Barrier(MPI_COMM_WORLD);
-}
-
-void GksMeshAdapter::writeMeshVTK(std::string filename)
-{
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "writeMeshVTK( " << filename << " )" << "\n";
-
-    std::ofstream file;
-
-    file.open(filename);
-
-    file << "# vtk DataFile Version 3.0\n";
-    file << "by MeshGenerator\n";
-    file << "ASCII\n";
-    file << "DATASET UNSTRUCTURED_GRID\n";
-
-    file << "POINTS " << nodes.size() << " float" << std::endl;
-
-    for (auto node : nodes){
-        file << node.x << " " << node.y << " " << node.z << std::endl;
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    file << "CELLS " << this->cells.size() << " " << this->cells.size() * 9 << std::endl;
-
-    for ( uint cellIdx = 0; cellIdx < this->cells.size(); cellIdx++ ){
-    
-        MeshCell& cell = this->cells[ cellIdx ];
-
-        uint_8 nodes;
-        for( auto& i : nodes ) i = INVALID_INDEX;
-
-        nodes[0] = cell.cellToNode[7];//[ 6 ];
-        nodes[1] = cell.cellToNode[3];//[ 5 ];
-        nodes[2] = cell.cellToNode[1];//[ 2 ];
-        nodes[3] = cell.cellToNode[5];//[ 1 ];
-        nodes[4] = cell.cellToNode[6];//[ 4 ];
-        nodes[5] = cell.cellToNode[2];//[ 7 ];
-        nodes[6] = cell.cellToNode[0];//[ 0 ];
-        nodes[7] = cell.cellToNode[4];//[ 3 ];
-
-        file << 8 << " ";
-
-        for( uint i = 0; i < 8; i++ ) file << nodes[i] << " ";
-
-        file << std::endl;
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    file << "CELL_TYPES " << this->cells.size() << std::endl;
-
-    for ( uint cellIdx = 0; cellIdx < this->cells.size(); cellIdx++ ){
-        file << 12 << std::endl;
-    }
-    //////////////////////////////////////////////////////////////////////////
-
-    file << "\nCELL_DATA " << this->cells.size() << std::endl;
-
-    file << "FIELD Label " << 4 << std::endl;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    file << "CellIdx 1 " << this->cells.size() << " int" << std::endl;
-
-    for ( uint cellIdx = 0; cellIdx < this->cells.size(); cellIdx++ ){
-
-        file << cellIdx << std::endl;
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    file << "level 1 " << this->cells.size() << " int" << std::endl;
-
-    for ( uint cellIdx = 0; cellIdx < this->cells.size(); cellIdx++ ){
-    
-        MeshCell& cell = this->cells[ cellIdx ];
-
-        file << cell.level << std::endl;
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    file << "type 1 " << this->cells.size() << " int" << std::endl;
-
-    for ( uint cellIdx = 0; cellIdx < this->cells.size(); cellIdx++ ){
-    
-        MeshCell& cell = this->cells[ cellIdx ];
-
-        file << (uint) cell.type << std::endl;
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    file << "isGhostCell 1 " << this->cells.size() << " int" << std::endl;
-
-    for ( uint cellIdx = 0; cellIdx < this->cells.size(); cellIdx++ ){
-    
-        MeshCell& cell = this->cells[ cellIdx ];
-
-        file << (uint) cell.isGhostCell << std::endl;
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    file.close();
-}
-
-void GksMeshAdapter::writeMeshFaceVTK(std::string filename)
-{
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "writeMeshFaceVTK( " << filename << " )" << "\n";
-
-    std::ofstream file;
-
-    file.open(filename);
-
-    file << "# vtk DataFile Version 3.0\n";
-    file << "by MeshGenerator\n";
-    file << "ASCII\n";
-    file << "DATASET UNSTRUCTURED_GRID\n";
-
-    file << "POINTS " << nodes.size() << " float" << std::endl;
-
-    for (auto node : nodes){
-        file << node.x << " " << node.y << " " << node.z << std::endl;
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    file << "CELLS " << this->faces.size() << " " << 5 * this->faces.size() << std::endl;
-
-    for ( uint faceIdx = 0; faceIdx < this->faces.size(); faceIdx++ ){
-
-        file << "4 ";
-
-        file << this->faces[ faceIdx ].faceToNode[0] << " ";
-        file << this->faces[ faceIdx ].faceToNode[1] << " ";
-        file << this->faces[ faceIdx ].faceToNode[2] << " ";
-        file << this->faces[ faceIdx ].faceToNode[3] << " ";
-
-        file << std::endl;
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    file << "CELL_TYPES " << this->faces.size() << std::endl;
-
-    for ( uint faceIdx = 0; faceIdx < this->faces.size(); faceIdx++ ){
-        file << "9" << std::endl;
-    }
-    //////////////////////////////////////////////////////////////////////////
-
-    file << "\nCELL_DATA " << this->faces.size() << std::endl;
-
-    file << "FIELD Label " << 3 << std::endl;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    file << "FaceIdx 1 " << this->faces.size() << " int" << std::endl;
-
-    for ( uint faceIdx = 0; faceIdx < this->faces.size(); faceIdx++ ){
-
-        file << faceIdx << std::endl;
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    file << "level 1 " << this->faces.size() << " int" << std::endl;
-
-    for ( uint faceIdx = 0; faceIdx < this->faces.size(); faceIdx++ ){
-
-        file << this->faces[ faceIdx ].level << std::endl;
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    file << "orientation 1 " << this->faces.size() << " int" << std::endl;
-
-    for ( uint faceIdx = 0; faceIdx < this->faces.size(); faceIdx++ ){
-
-        file << (int)this->faces[ faceIdx ].orientation << std::endl;
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-    //////////////////////////////////////////////////////////////////////////
-    //////////////////////////////////////////////////////////////////////////
-
-    file << "VECTORS posCell double" << std::endl;
-
-	for ( auto face : this->faces )
-    {
-        MeshCell& cell = this->cells[ face.posCell ];
-
-        Vec3 vec = cell.cellCenter - face.faceCenter;
-            
-		file << vec.x << " ";
-		file << vec.y << " ";
-		file << vec.z << std::endl;
-    }
-
-    file << "VECTORS negCell double" << std::endl;
-
-	for ( auto face : this->faces )
-    {
-        MeshCell& cell = this->cells[ face.negCell ];
-
-        Vec3 vec = cell.cellCenter - face.faceCenter;
-            
-		file << vec.x << " ";
-		file << vec.y << " ";
-		file << vec.z << std::endl;
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    file.close();
-}
-
-void GksMeshAdapter::writeMeshCellToCellVTK(std::string filename)
-{
-    //std::ofstream file;
-
-    //file.open(filename);
-
-    //file << "# vtk DataFile Version 3.0\n";
-    //file << "by MeshGenerator\n";
-    //file << "ASCII\n";
-    //file << "DATASET UNSTRUCTURED_GRID\n";
-
-    //file << "POINTS " << this->cells.size() << " float" << std::endl;
-
-    //for (auto cell : cells){
-    //    file << cell.cellCenter.x << " " << cell.cellCenter.y << " 0.0" << std::endl;
-    //}
-
-    ////////////////////////////////////////////////////////////////////////////
-
-    //file << "CELLS " << 8 * this->cells.size() << " " << 3 * 8 * this->cells.size() << std::endl;
-
-    //for ( uint cellIdx = 0; cellIdx < this->cells.size(); cellIdx++ ){
-
-    //    for( uint i = 0; i < 8; i++ )
-    //        if(  this->cells[ cellIdx ].cellToCell[ i ] != INVALID_INDEX )
-    //            file << "2 " << cellIdx << " " << this->cells[ cellIdx ].cellToCell[ i ] << " " << std::endl;
-    //        else
-    //            file << "2 " << cellIdx << " " << cellIdx << " " << std::endl;
-    //}
-
-    ////////////////////////////////////////////////////////////////////////////
-
-    //file << "CELL_TYPES " << 8 * this->cells.size() << std::endl;
-
-    //for ( uint i = 0; i < 8 * this->cells.size(); i++ ){
-    //    file << "3" << std::endl;
-    //}
-    ////////////////////////////////////////////////////////////////////////////
-
-    //file << "\nCELL_DATA " << 8 * this->cells.size() << std::endl;
-
-    //file << "FIELD Label " << 2 << std::endl;
-
-    ////////////////////////////////////////////////////////////////////////////
-
-    //file << "CellIdx 1 " << 8 * this->cells.size() << " int" << std::endl;
-
-    //for ( uint cellIdx = 0; cellIdx < this->cells.size(); cellIdx++ ){
-
-    //    for( uint i = 0; i < 8; i++ )
-    //        file << cellIdx << std::endl;
-    //}
-
-    ////////////////////////////////////////////////////////////////////////////
-
-    //file << "CellToCell 1 " << 8 * this->cells.size() << " int" << std::endl;
-
-    //for ( uint cellIdx = 0; cellIdx < this->cells.size(); cellIdx++ ){
-
-    //    for( uint i = 0; i < 8; i++ )
-    //        file << i << std::endl;
-    //}
-
-    ////////////////////////////////////////////////////////////////////////////
-
-    //file.close();
-}
-
-void GksMeshAdapter::writeMeshFaceToCellVTK(std::string filename)
-{
-    //std::ofstream file;
-
-    //file.open(filename);
-
-    //file << "# vtk DataFile Version 3.0\n";
-    //file << "by MeshGenerator\n";
-    //file << "ASCII\n";
-    //file << "DATASET UNSTRUCTURED_GRID\n";
-
-    //file << "POINTS " << this->cells.size() + this->faces.size() << " float" << std::endl;
-
-    //for (auto cell : cells){
-    //    file << cell.cellCenter.x << " " << cell.cellCenter.y << " 0.0" << std::endl;
-    //}
-
-    //for (auto face : faces){
-    //    file << face.faceCenter.x << " " << face.faceCenter.y << " 0.0" << std::endl;
-    //}
-
-    ////////////////////////////////////////////////////////////////////////////
-
-    //file << "CELLS " << 6 * this->faces.size() << " " << 3 * 6 * this->faces.size() << std::endl;
-
-    //for ( uint faceIdx = 0; faceIdx < this->faces.size(); faceIdx++ ){
-
-    //    for( uint i = 0; i < 6; i++ )
-    //        if(  this->faces[ faceIdx ].faceToCell[ i ] != INVALID_INDEX )
-    //            file << "2 " << this->cells.size() + faceIdx << " " << this->faces[ faceIdx ].faceToCell[ i ] << " " << std::endl;
-    //        else
-    //            file << "2 " << this->cells.size() + faceIdx << " " << this->cells.size() + faceIdx << " " << std::endl;
-    //}
-
-    ////////////////////////////////////////////////////////////////////////////
-
-    //file << "CELL_TYPES " << 6 * this->faces.size() << std::endl;
-
-    //for ( uint i = 0; i < 6 * this->faces.size(); i++ ){
-    //    file << "3" << std::endl;
-    //}
-    ////////////////////////////////////////////////////////////////////////////
-
-    //file << "\nCELL_DATA " << 6 * this->faces.size() << std::endl;
-
-    //file << "FIELD Label " << 2 << std::endl;
-
-    ////////////////////////////////////////////////////////////////////////////
-
-    //file << "FaceIdx 1 " << 6 * this->faces.size() << " int" << std::endl;
-
-    //for ( uint faceIdx = 0; faceIdx < this->faces.size(); faceIdx++ ){
-
-    //    for( uint i = 0; i < 6; i++ )
-    //        file << faceIdx << std::endl;
-    //}
-
-    ////////////////////////////////////////////////////////////////////////////
-
-    //file << "FaceToCell 1 " << 6 * this->faces.size() << " int" << std::endl;
-
-    //for ( uint faceIdx = 0; faceIdx < this->faces.size(); faceIdx++ ){
-
-    //    for( uint i = 0; i < 6; i++ )
-    //        file << i << std::endl;
-    //}
-
-    ////////////////////////////////////////////////////////////////////////////
-
-    //file.close();
-}
-
-double GksMeshAdapter::getDx(uint level)
-{
-    return dxCoarse / pow( 2.0, level );
-}
diff --git a/src/gpu/GksMeshAdapter/GksMeshAdapter.h b/src/gpu/GksMeshAdapter/GksMeshAdapter.h
deleted file mode 100644
index ede88301f7133fdb6685d94906960eec955ad449..0000000000000000000000000000000000000000
--- a/src/gpu/GksMeshAdapter/GksMeshAdapter.h
+++ /dev/null
@@ -1,160 +0,0 @@
-#ifndef GKS_MESH_ADAPTER_H
-#define GKS_MESH_ADAPTER_H
-
-#include <memory>
-#include <array>
-#include <vector>
-
-#include "Core/DataTypes.h"
-#include "PointerDefinitions.h"
-
-#include "MeshCell.h"
-#include "MeshFace.h"
-
-
-
-#include "GksMeshAdapter_export.h"
-
-class MultipleGridBuilder;
-
-class GKSMESHADAPTER_EXPORT GksMeshAdapter{
-
-public:
-
-    SPtr<MultipleGridBuilder> gridBuilder;
-
-    uint numberOfLevels;
-
-    double dxCoarse;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    std::vector<Vec3> nodes;
-
-    //////////////////////////////////////////////////////////////////////////
-    //
-    //    C e l l    s o r t i n g :
-    //
-    //  | Level 0                   | Level 1                   | Level 2                   |
-    //  | FluidCells   | GhostCells | FluidCells   | GhostCells | FluidCells   | GhostCells | 
-    //
-    //  GhostCells: not included in Cell update, i.e. BoundaryCells and FCC-Cells
-    //  FluidCells: all other, including CFF-Cells
-    //
-    std::vector<MeshCell> cells;
-
-    std::vector<uint> numberOfCellsPerLevel;
-    std::vector<uint> numberOfBulkCellsPerLevel;
-    std::vector<uint> startOfCellsPerLevel;
-
-    //////////////////////////////////////////////////////////////////////////
-    //
-    //    F a c e    s o r t i n g :
-    //
-    //  | Level 0                              | Level 1                              | Level 2                              |
-    //  | x-normal   | y-normal   | z-normal   | x-normal   | y-normal   | z-normal   | x-normal   | y-normal   | z-normal   |
-    //
-    std::vector<MeshFace> faces;
-
-    std::vector<uint> numberOfFacesPerLevelXYZ;
-    std::vector<uint> startOfFacesPerLevelXYZ;
-
-    std::vector<uint> numberOfInnerFacesPerLevel;
-
-    //////////////////////////////////////////////////////////////////////////
-    //
-    //    F i n e T o C o a r s e    s o r t i n g :
-    //
-    //  | Coarse Cell Idx | Child Idcs | ...
-    //
-    std::vector<uint_9> fineToCoarse;
-
-    std::vector<uint> numberOfFineToCoarsePerLevel;
-    std::vector<uint> startOfFineToCoarsePerLevel;
-
-    //////////////////////////////////////////////////////////////////////////
-    //
-    //    F i n e T o C o a r s e    s o r t i n g :
-    //
-    //  | Coarse Cell Idx | Coarse Neighbor Idcs | Child Idcs | ...
-    //
-    std::vector<uint_15> coarseToFine;
-
-    std::vector<uint> numberOfCoarseToFinePerLevel;
-    std::vector<uint> startOfCoarseToFinePerLevel;
-
-    //////////////////////////////////////////////////////////////////////////
-    // 
-    // Connectivity from LBM grid to GKS Mesh
-    //
-    //    cellIdx = gridToMesh[ level ][ gridIdx ]
-    //
-    std::vector< std::vector<uint> > gridToMesh;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    std::vector< uint_2 > periodicBoundaryNeighbors;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    struct CommunicationIndices
-    {
-        std::array< std::vector< uint >, 6 > sendIndices;
-        std::array< std::vector< uint >, 6 > recvIndices;
-    };
-
-    std::vector< CommunicationIndices > communicationIndices;
-
-    std::array<uint, 6> communicationProcesses;
-
-public:
-
-    GksMeshAdapter( SPtr<MultipleGridBuilder> gridBuilder );
-
-    void inputGrid();
-
-    void findQuadtreeConnectivity();
-
-    void findCellToCellConnectivity();
-
-    void countCells();
-
-    void partitionCells();
-
-    void refreshCellConnectivity(const std::vector<uint>& idxMap);
-
-    void findCornerCells();
-
-    void generateNodes();
-
-    void computeCellGeometry();
-
-    void generateFaces();
-
-    void sortFaces();
-
-    void countFaces();
-
-    void countInnerFaces();
-
-    void generateInterfaceConnectivity();
-
-    void findPeriodicBoundaryNeighbors();
-
-    void getCommunicationIndices();
-
-    void writeMeshVTK( std::string filename );
-
-    void writeMeshFaceVTK( std::string filename );
-
-    void writeMeshCellToCellVTK( std::string filename );
-
-    void writeMeshFaceToCellVTK( std::string filename );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    double getDx(uint level);
-};
-
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/GksMeshAdapter/MeshCell.cpp b/src/gpu/GksMeshAdapter/MeshCell.cpp
deleted file mode 100644
index 349d308ee498e89f9705091c72d3ca7013bf3d4e..0000000000000000000000000000000000000000
--- a/src/gpu/GksMeshAdapter/MeshCell.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
-//  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
-//  for more details.
-//  
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file MeshCell.cpp
-//! \ingroup GksMeshAdapter
-//! \author Stephan Lenz
-//=======================================================================================
-#include "MeshCell.h"
-
-#include "GridGenerator/grid/NodeValues.h"
-
-using namespace vf::gpu;
-
-MeshCell::MeshCell(){
-
-    level   = INVALID_INDEX;
-    gridIdx = INVALID_INDEX;
-
-
-    for( uint& index : this->cellToNode     ) index = INVALID_INDEX;
-    for( uint& index : this->cellToEdgeNode ) index = INVALID_INDEX;
-    for( uint& index : this->cellToFaceNode ) index = INVALID_INDEX;
-    for( uint& index : this->cellToCell     ) index = INVALID_INDEX;
-    for( uint& index : this->children       ) index = INVALID_INDEX;
-    
-    parent = INVALID_INDEX;
-
-    for( bool& flag : this->faceExists    ) flag = false;
-
-    isGhostCell = false;
-
-    isWall = false;
-
-    isFluxBC = false;
-
-    isInsulated = false;
-
-    isRecvCell = false;
-}
-
-bool MeshCell::isCoarseGhostCell()
-{
-    return this->type == FLUID_FCC;
-}
-
-bool MeshCell::isFineGhostCell()
-{
-    return this->type == FLUID_CFF;
-}
diff --git a/src/gpu/GksMeshAdapter/MeshCell.h b/src/gpu/GksMeshAdapter/MeshCell.h
deleted file mode 100644
index 1b09a69b847a27c98c27b65b3190ce00ebec18c4..0000000000000000000000000000000000000000
--- a/src/gpu/GksMeshAdapter/MeshCell.h
+++ /dev/null
@@ -1,113 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
-//  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
-//  for more details.
-//  
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file MeshCell.h
-//! \ingroup GksMeshAdapter
-//! \author Stephan Lenz
-//=======================================================================================
-#ifndef MESH_CELL_H
-#define MESH_CELL_H
-
-#include <array>
-
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/ArrayTypes.h"
-
-#include "GksMeshAdapter_export.h"
-
-struct GKSMESHADAPTER_EXPORT MeshCell{
-
-    uint level;
-    uint gridIdx;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    uint_8 cellToNode;           uint_12 cellToEdgeNode;         uint_6 cellToFaceNode;
-
-    // for sorting see LBM f numbering
-    //                                   8                              
-    //     4 o--------o 0            o--------o                    o--------o 
-    //      /|       /|            7/|      4/|                   /|  4    /|  
-    //     / |      / |            /3| 11   / |                  / |    2 / |  
-    //  6 o--------o 2|           o--------o  |0                o--------o  |  
-    //    |  |     |  |           |  |  10 |  |                 |1 |     | 0|  
-    //    |5 o-----|--o 1        1|  o-----|--o                 |  o-----|--o  
-    //    | /      | /            |5/     2| /6                 | / 3    | /   
-    //    |/       |/             |/   9   |/                   |/    5  |/    
-    //  7 o--------o 3            o--------o                    o--------o      
-    //
-    //  z | / y
-    //    |/
-    //    +---- x
-    //     
-
-    //////////////////////////////////////////////////////////////////////////
-
-    uint_27 cellToCell;
-
-    // for sorting see LBM f numbering
-
-    //////////////////////////////////////////////////////////////////////////
-
-    uint_8 children;
-
-    // for sorting see cellToNode
-
-    uint  parent;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Vec3   cellCenter;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    // order: +x, -x, +y, -y, +z, -z (see cellToCell)
-    bool_6 faceExists;
-    
-    bool isGhostCell;   // this denotes cells that do not have all neighbors, excluding coarse ghost cells
-
-    bool isWall;
-
-    bool isFluxBC;
-
-    bool isInsulated;
-
-    bool isRecvCell;
-
-    char type;
-
-    MeshCell();
-
-    bool isCoarseGhostCell();
-
-    bool isFineGhostCell();
-};
-
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/GksMeshAdapter/MeshFace.cpp b/src/gpu/GksMeshAdapter/MeshFace.cpp
deleted file mode 100644
index b07355583e50b4b88c9ce5f43c489b3f82310ce2..0000000000000000000000000000000000000000
--- a/src/gpu/GksMeshAdapter/MeshFace.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
-//  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
-//  for more details.
-//  
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file MeshFace.cpp
-//! \ingroup GksMeshAdapter
-//! \author Stephan Lenz
-//=======================================================================================
-#include "MeshFace.h"
-
-MeshFace::MeshFace()
-{
-    for( uint& node : this->faceToNode ) node = INVALID_INDEX;
-
-    posCell       = INVALID_INDEX;
-    negCell       = INVALID_INDEX;
-    posCellCoarse = INVALID_INDEX;
-    negCellCoarse = INVALID_INDEX;
-}
\ No newline at end of file
diff --git a/src/gpu/GksMeshAdapter/MeshFace.h b/src/gpu/GksMeshAdapter/MeshFace.h
deleted file mode 100644
index 54a37dbbcdc89224037edeb3e01e34b67f8fc536..0000000000000000000000000000000000000000
--- a/src/gpu/GksMeshAdapter/MeshFace.h
+++ /dev/null
@@ -1,82 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
-//  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
-//  for more details.
-//  
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file MeshFace.h
-//! \ingroup GksMeshAdapter
-//! \author Stephan Lenz
-//=======================================================================================
-#ifndef MESH_FACE_H
-#define MESH_FACE_H
-
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/ArrayTypes.h"
-
-#include "GksMeshAdapter_export.h"
-
-struct GKSMESHADAPTER_EXPORT MeshFace
-{
-    //////////////////////////////////////////////////////////////////////////
-
-    //      o 2                 
-    //     /|                   
-    //    / |                   
-    //   o 3|     n            
-    //   | -+--------->        
-    //   |  o 1                
-    //   | /                    
-    //   |/                     
-    //   o 0                    
-    //
-    //
-
-    uint_4 faceToNode;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    uint posCell;
-    uint negCell;
-
-    uint posCellCoarse;
-    uint negCellCoarse;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    Vec3 faceCenter;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    char orientation;
-
-    uint level;
-
-    MeshFace();
-};
-
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/GksVtkAdapter/CMakeLists.txt b/src/gpu/GksVtkAdapter/CMakeLists.txt
deleted file mode 100644
index fdc7a1eb56f548afc58e83ef7b0f7ad02ad12ea9..0000000000000000000000000000000000000000
--- a/src/gpu/GksVtkAdapter/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-
-
-vf_add_library(BUILDTYPE static PRIVATE_LINK basics GksGpu)
-
-include (${VF_CMAKE_DIR}/3rd/vtk.cmake)
\ No newline at end of file
diff --git a/src/gpu/GksVtkAdapter/VTKAdapter.cpp b/src/gpu/GksVtkAdapter/VTKAdapter.cpp
deleted file mode 100644
index df81524982a73e82ac3e5f564a1b82b057196854..0000000000000000000000000000000000000000
--- a/src/gpu/GksVtkAdapter/VTKAdapter.cpp
+++ /dev/null
@@ -1,905 +0,0 @@
-#include "VTKAdapter.h"
-#include "VTKInterface.h"
-
-#include <vtkImageData.h>
-
-#include <vtkCellData.h>
-#include <vtkPointData.h>
-
-#include <vtkResampleWithDataSet.h>
-#include <vtkPNGWriter.h>
-
-#include <vtkGeometryFilter.h>
-#include <vtkCleanPolyData.h>
-#include <vtkCellDataToPointData.h>
-#include <vtkPointDataToCellData.h>
-
-#include "Core/DataTypes.h"
-#include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "GksGpu/Analyzer/TurbulenceAnalyzer.h"
-
-#include "GksGpu/BoundaryConditions/ConcreteHeatFlux.h"
-
-#include "GksGpu/Definitions/MemoryAccessPattern.h"
-#include "GksGpu/FlowStateData/FlowStateData.cuh"
-#include "GksGpu/FlowStateData/FlowStateDataConversion.cuh"
-
-vtkGridPtr getVtkUnstructuredOctGrid( SPtr<GksGpu::DataBase> dataBase, bool excludeGhostCells )
-{
-    vtkGridPtr grid = vtkGridPtr::New();
- 
-    vtkPointsPtr points = vtkPointsPtr::New();
- 
-    //////////////////////////////////////////////////////////////////////////
-
-    for( uint nodeIdx = 0; nodeIdx < dataBase->numberOfNodes; nodeIdx++ ){
-  
-        Vec3& node = dataBase->nodeCoordinates[ nodeIdx ];
-
-        points->InsertNextPoint( node.x, node.y, node.z );
-    }
-
-    grid->SetPoints( points );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    for( uint cellIdx = 0; cellIdx < dataBase->numberOfCells; cellIdx++ ){
-
-        if( dataBase->isGhostCell( cellIdx ) && excludeGhostCells ) continue;
-
-        vtkIdListPtr idList = vtkIdListPtr::New();
-
-        idList->SetNumberOfIds( 8 );
-
-        idList->SetId( 0, dataBase->cellToNode[ cellIdx ][ 0 ] );
-        idList->SetId( 1, dataBase->cellToNode[ cellIdx ][ 1 ] );
-        idList->SetId( 2, dataBase->cellToNode[ cellIdx ][ 2 ] );
-        idList->SetId( 3, dataBase->cellToNode[ cellIdx ][ 3 ] );
-        idList->SetId( 4, dataBase->cellToNode[ cellIdx ][ 4 ] );
-        idList->SetId( 5, dataBase->cellToNode[ cellIdx ][ 5 ] );
-        idList->SetId( 6, dataBase->cellToNode[ cellIdx ][ 6 ] );
-        idList->SetId( 7, dataBase->cellToNode[ cellIdx ][ 7 ] );
-
-        grid->InsertNextCell( 12, idList );
-    }
-
-    //////////////////////////////////////////////////////////////////////////
-
-    return grid;
-}
-
-void addScalarIntCellData( vtkGridPtr grid, 
-                        uint numberOfCells, 
-                        std::string name, 
-                        std::function<int(uint)> getData )
-{
-    vtkIntArrayPtr data = vtkIntArrayPtr::New();
-
-    data->SetNumberOfComponents( 1 );
-
-    data->SetName( name.c_str() );
-
-    for( uint cellIdx = 0; cellIdx < numberOfCells; cellIdx++ ){
-        data->InsertNextValue( getData(cellIdx) );
-    }
-
-    grid->GetCellData()->AddArray( data );
-}
-
-void addScalarRealCellData( vtkGridPtr grid, 
-                        uint numberOfCells, 
-                        std::string name, 
-                        std::function<real(uint)> getData )
-{
-    vtkDoubleArrayPtr data = vtkDoubleArrayPtr::New();
-
-    data->SetNumberOfComponents( 1 );
-
-    data->SetName( name.c_str() );
-
-    for( uint cellIdx = 0; cellIdx < numberOfCells; cellIdx++ ){
-        data->InsertNextValue( getData(cellIdx) );
-    }
-
-    grid->GetCellData()->AddArray( data );
-}
-
-void addVectorCellData( vtkGridPtr grid, 
-                        uint numberOfCells, 
-                        std::string name, 
-                        std::function<Vec3(uint)> getData )
-{
-    vtkDoubleArrayPtr data = vtkDoubleArrayPtr::New();
-
-    data->SetNumberOfComponents( 3 );
-
-    data->SetName( name.c_str() );
-
-    for( uint cellIdx = 0; cellIdx < numberOfCells; cellIdx++ ){
-        Vec3 vec = getData(cellIdx);
-        double tupel[3] = {vec.x, vec.y, vec.z};
-        data->InsertNextTuple(tupel);
-    }
-
-    grid->GetCellData()->AddArray( data );
-}
-
-void addBaseData(vtkGridPtr grid, SPtr<GksGpu::DataBase> dataBase, GksGpu::Parameters parameters)
-{
-    addScalarIntCellData( grid, dataBase->numberOfCells, "CellIdx", [&] (uint cellIdx) {
-        return cellIdx;
-    } );
-
-    addScalarRealCellData( grid, dataBase->numberOfCells, "rho", [&] (uint cellIdx) {
-        return dataBase->dataHost[ RHO__(cellIdx, dataBase->numberOfCells) ];
-    } );
-
-    addScalarRealCellData( grid, dataBase->numberOfCells, "T", [&] (uint cellIdx) {
-                    
-        GksGpu::ConservedVariables cons;
-
-        cons.rho  = dataBase->dataHost[ RHO__(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoU = dataBase->dataHost[ RHO_U(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoV = dataBase->dataHost[ RHO_V(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoW = dataBase->dataHost[ RHO_W(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoE = dataBase->dataHost[ RHO_E(cellIdx, dataBase->numberOfCells) ];
-#ifdef USE_PASSIVE_SCALAR
-        cons.rhoS_1 = dataBase->dataHost[ RHO_S_1(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoS_2 = dataBase->dataHost[ RHO_S_2(cellIdx, dataBase->numberOfCells) ];
-#endif // USE_PASSIVE_SCALAR
-
-        GksGpu::PrimitiveVariables prim = toPrimitiveVariables(cons, parameters.K);
-        
-#ifdef USE_PASSIVE_SCALAR
-        return getT(prim);
-#else // USE_PASSIVE_SCALAR
-        return 1.0 / prim.lambda;
-#endif // USE_PASSIVE_SCALAR
-    } );
-
-    addScalarRealCellData( grid, dataBase->numberOfCells, "lambda", [&] (uint cellIdx) {
-                    
-        GksGpu::ConservedVariables cons;
-
-        cons.rho  = dataBase->dataHost[ RHO__(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoU = dataBase->dataHost[ RHO_U(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoV = dataBase->dataHost[ RHO_V(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoW = dataBase->dataHost[ RHO_W(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoE = dataBase->dataHost[ RHO_E(cellIdx, dataBase->numberOfCells) ];
-#ifdef USE_PASSIVE_SCALAR
-        cons.rhoS_1 = dataBase->dataHost[ RHO_S_1(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoS_2 = dataBase->dataHost[ RHO_S_2(cellIdx, dataBase->numberOfCells) ];
-#endif // USE_PASSIVE_SCALAR
-
-        GksGpu::PrimitiveVariables prim = toPrimitiveVariables(cons, parameters.K);
-
-        return prim.lambda;
-    } );
-
-    addScalarRealCellData( grid, dataBase->numberOfCells, "p", [&] (uint cellIdx) {
-                    
-        GksGpu::ConservedVariables cons;
-
-        cons.rho  = dataBase->dataHost[ RHO__(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoU = dataBase->dataHost[ RHO_U(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoV = dataBase->dataHost[ RHO_V(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoW = dataBase->dataHost[ RHO_W(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoE = dataBase->dataHost[ RHO_E(cellIdx, dataBase->numberOfCells) ];
-#ifdef USE_PASSIVE_SCALAR
-        cons.rhoS_1 = dataBase->dataHost[ RHO_S_1(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoS_2 = dataBase->dataHost[ RHO_S_2(cellIdx, dataBase->numberOfCells) ];
-#endif // USE_PASSIVE_SCALAR
-
-        GksGpu::PrimitiveVariables prim = toPrimitiveVariables(cons, parameters.K);
-
-        return 0.5 * prim.rho / prim.lambda;
-    } );
-
-    addScalarIntCellData( grid, dataBase->numberOfCells, "GhostCell", [&] (uint cellIdx) -> int {
-        return dataBase->isGhostCell( cellIdx );
-    } );
-
-    addScalarIntCellData( grid, dataBase->numberOfCells, "Level", [&] (uint cellIdx) {
-        return dataBase->getCellLevel(cellIdx);
-    } );
-            
-    addVectorCellData( grid, dataBase->numberOfCells, "Velocity", [&] (uint cellIdx) {
-                    
-        GksGpu::ConservedVariables cons;
-
-        cons.rho  = dataBase->dataHost[ RHO__(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoU = dataBase->dataHost[ RHO_U(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoV = dataBase->dataHost[ RHO_V(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoW = dataBase->dataHost[ RHO_W(cellIdx, dataBase->numberOfCells) ];
-        cons.rhoE = dataBase->dataHost[ RHO_E(cellIdx, dataBase->numberOfCells) ];
-
-        GksGpu::PrimitiveVariables prim = toPrimitiveVariables( cons, parameters.K );
-
-        return Vec3( prim.U, prim.V, prim.W );
-    } );
-
-#ifdef USE_PASSIVE_SCALAR
-	addScalarRealCellData( grid, dataBase->numberOfCells, "Y_F", [&] (uint cellIdx) {
-	    return dataBase->dataHost[ RHO_S_1(cellIdx, dataBase->numberOfCells) ]
-             / dataBase->dataHost[ RHO__(cellIdx, dataBase->numberOfCells)   ];
-	} );
-
-	addScalarRealCellData( grid, dataBase->numberOfCells, "Y_P", [&] (uint cellIdx) {
-	    return dataBase->dataHost[ RHO_S_2(cellIdx, dataBase->numberOfCells) ]
-             / dataBase->dataHost[ RHO__(cellIdx, dataBase->numberOfCells)   ];
-	} );
-
-	addScalarRealCellData( grid, dataBase->numberOfCells, "Y_A", [&] (uint cellIdx) {
-	    return c1o1 - dataBase->dataHost[ RHO_S_1(cellIdx, dataBase->numberOfCells) ]
-                   / dataBase->dataHost[ RHO__  (cellIdx, dataBase->numberOfCells) ]
-                   - dataBase->dataHost[ RHO_S_2(cellIdx, dataBase->numberOfCells) ]
-                   / dataBase->dataHost[ RHO__  (cellIdx, dataBase->numberOfCells) ]
-               ;
-	} );
-
-	addScalarRealCellData( grid, dataBase->numberOfCells, "rhoE", [&] (uint cellIdx) {
-	    return dataBase->dataHost[ RHO_E(cellIdx, dataBase->numberOfCells) ];
-	} );
-#endif // USE_PASSIVE_SCALAR
-
-	addScalarRealCellData( grid, dataBase->numberOfCells, "D_LES", [&] (uint cellIdx) {
-	    return dataBase->diffusivityHost[ cellIdx ];
-	} );
-
-}
-
-void writeVtkUnstructuredGrid( vtkGridPtr grid, int mode, std::string filename )
-{
-    vtkWriterPtr writer = vtkWriterPtr::New();
-
-    writer->SetDataMode(mode);
-
-    filename += ".";
-    filename += writer->GetDefaultFileExtension();
-
-    writer->SetFileName( filename.c_str() );
-
-    writer->SetInputData( grid );
-
-    writer->Write();
-}
-
-void GKSVTKADAPTER_EXPORT writeVtkParallelUnstructuredGridSummaryFile(vtkGridPtr grid, std::string filename, uint mpiWorldSize)
-{
-    uint numberOfArrays = grid->GetCellData()->GetNumberOfArrays();
-
-    const auto filenameWithoutPath=filename.substr( filename.find_last_of('/') + 1 );
-
-    std::ofstream file;
-
-    file.open( filename + ".pvtu" );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    file << "<VTKFile type=\"PUnstructuredGrid\" version=\"1.0\" byte_order=\"LittleEndian\" header_type=\"UInt64\">" << std::endl;
-    file << "  <PUnstructuredGrid GhostLevel=\"1\">" << std::endl;
-
-    file << "    <PCellData>" << std::endl;
-
-    for( uint i = 0; i < numberOfArrays; i++ )
-    {
-        int typeID( grid->GetCellData()->GetArray(i)->GetDataType() );
-        std::string name( grid->GetCellData()->GetArray(i)->GetName() );
-
-        uint numberOfComponents = grid->GetCellData()->GetArray(i)->GetNumberOfComponents();
-
-        std::string type;
-        if( typeID == VTK_INT    ) type = "Int32";
-        if( typeID == VTK_FLOAT  ) type = "Float32";
-        if( typeID == VTK_DOUBLE ) type = "Float64";
-
-        file << "      <PDataArray type=\"" << type << "\" Name=\"" << name << "\" NumberOfComponents=\"" << numberOfComponents << "\"/>" << std::endl;
-    }
-
-    file << "    </PCellData>" << std::endl;
-
-    file << "    <PPoints>" << std::endl;
-    file << "      <PDataArray type=\"Float32\" Name=\"Points\" NumberOfComponents=\"3\"/>" << std::endl;
-    file << "    </PPoints>" << std::endl;
-
-    for( uint rank = 0; rank < mpiWorldSize; rank++ )
-    {
-        file << "    <Piece Source=\"" << filenameWithoutPath << "_rank_" << rank << ".vtu\"/>" << std::endl;
-    }
-
-    file << "  </PUnstructuredGrid>" << std::endl;
-    file << "</VTKFile>" << std::endl;
-
-    //////////////////////////////////////////////////////////////////////////
-}
-
-rgbColor colorMapCoolToWarmExtended( double value, double min, double max )
-{    
-    // Color map exported from Paraview
-    const double colorMap[36][3] = 
-    /*  0 */  { { 0,                      0,                      0.34902              },
-    /*  1 */    { 0.039216000000000001,   0.062744999999999995,   0.38039200000000001  },
-    /*  2 */    { 0.062744999999999995,   0.117647,               0.41176499999999999  },
-    /*  3 */    { 0.090195999999999998,   0.18431400000000001,    0.45097999999999999  },
-    /*  4 */    { 0.12548999999999999,    0.26274500000000001,    0.50196099999999999  },
-    /*  5 */    { 0.16078400000000001,    0.33725500000000003,    0.54117599999999999  },
-    /*  6 */    { 0.20000000000000001,    0.39607799999999999,    0.56862699999999999  },
-    /*  7 */    { 0.23921600000000001,    0.45490199999999997,    0.59999999999999998  },
-    /*  8 */    { 0.286275,               0.52156899999999995,    0.65098              },
-    /*  9 */    { 0.33725500000000003,    0.59215700000000004,    0.70196099999999995  },
-    /* 10 */    { 0.388235,               0.65490199999999998,    0.74902000000000002  },
-    /* 11 */    { 0.466667,               0.73725499999999999,    0.819608             },
-    /* 12 */    { 0.57254899999999997,    0.819608,               0.87843099999999996  },
-    /* 13 */    { 0.65490199999999998,    0.86666699999999997,    0.90980399999999995  },
-    /* 14 */    { 0.75294099999999997,    0.91764699999999999,    0.94117600000000001  },
-    /* 15 */    { 0.82352899999999996,    0.95686300000000002,    0.96862700000000002  },
-    ///* 15 */    { 1.0,                    1.0,                    1.0                  },
-    /* 16 */    { 0.98823499999999997,    0.96078399999999997,    0.90196100000000001  },
-    ///* 16 */    { 1.0,                    1.0,                    1.0                  },
-
-    ///* 17 */    { 1.0,                    1.0,                    1.0                  },
-    /* 17 */    { 0.94117600000000001,    0.98431400000000002,    0.98823499999999997  },
-    ///* 18 */    { 1.0,                    1.0,                    1.0                  },
-    /* 18 */    { 0.98823499999999997,    0.94509799999999999,    0.85097999999999996  },
-    ///* 19 */    { 1.0,                    1.0,                    1.0                  },
-    /* 19 */    { 0.98039200000000004,    0.89803900000000003,    0.78431399999999996  },
-    /* 20 */    { 0.96862700000000002,    0.83529399999999998,    0.69803899999999997  },
-    /* 21 */    { 0.94901999999999997,    0.73333300000000001,    0.58823499999999995  },
-    /* 22 */    { 0.92941200000000002,    0.65098,                0.50980400000000003  },
-    /* 23 */    { 0.90980399999999995,    0.56470600000000004,    0.43529400000000001  },
-    /* 24 */    { 0.87843099999999996,    0.45882400000000001,    0.352941             },
-    /* 25 */    { 0.83921599999999996,    0.388235,               0.286275             },
-    /* 26 */    { 0.76078400000000002,    0.29411799999999999,    0.21176500000000001  },
-    /* 27 */    { 0.70196099999999995,    0.21176500000000001,    0.168627             },
-    /* 28 */    { 0.65098,                0.156863,               0.129412             },
-    /* 29 */    { 0.59999999999999998,    0.094117999999999993,   0.094117999999999993 },
-    /* 30 */    { 0.54901999999999995,    0.066667000000000004,   0.098039000000000001 },
-    /* 31 */    { 0.50196099999999999,    0.050979999999999998,   0.12548999999999999  },
-    /* 32 */    { 0.45097999999999999,    0.054901999999999999,   0.17254900000000001  },
-    /* 33 */    { 0.40000000000000002,    0.054901999999999999,   0.19215699999999999  },
-    /* 34 */    { 0.34902,                0.070587999999999998,   0.21176500000000001  },
-                { 0.34902,                0.070587999999999998,   0.21176500000000001  } };
-    
-    if( value < min )
-        value = 0.0;
-    else if ( value > max )
-        value = 1.0;
-    else
-        value = ( value - min ) / ( max - min );
-
-    unsigned int idx           = value * 34;
-    double       interpolation = value * 34.0 - double( idx );
-
-    rgbColor color;
-
-    color.r = ( ( 1.0 - interpolation ) * colorMap[idx  ][0]
-              +         interpolation   * colorMap[idx+1][0] ) * 200.0 * 1.2;
-
-    color.g = ( ( 1.0 - interpolation ) * colorMap[idx  ][1]
-              +         interpolation   * colorMap[idx+1][1] ) * 197.0 * 1.2;
-
-    color.b = ( ( 1.0 - interpolation ) * colorMap[idx  ][2]
-              +         interpolation   * colorMap[idx+1][2] ) * 189.0 * 1.2;
-
-    return color;
-}
-
-void writePNG( vtkDataObject * inputData, int nx, int ny, double L, double H, std::string filename )
-{
-    vtkSmartPointer<vtkImageData> image = vtkSmartPointer<vtkImageData>::New();
-    image->SetDimensions( nx, ny, 1 );
-    image->SetSpacing( L / ( nx - 1 ), H / ( ny - 1 ), 0 );
-
-    vtkSmartPointer<vtkResampleWithDataSet> resample = vtkSmartPointer<vtkResampleWithDataSet>::New();
-    resample->SetSourceData( inputData );
-    resample->SetInputData( image );
-    resample->Update();
-
-    vtkSmartPointer<vtkImageData> image2 = (vtkImageData*) resample->GetOutput();
-
-    image2->GetPointData()->SetScalars( image2->GetPointData()->GetArray( 0 ) );
-
-    vtkSmartPointer<vtkPNGWriter> writerPNG = vtkSmartPointer<vtkPNGWriter>::New();
-    writerPNG->SetFileName( ( filename + ".png" ).c_str() );
-    writerPNG->SetInputData( image2 );
-    writerPNG->Write();
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void writeVtkXML(std::shared_ptr<GksGpu::DataBase> dataBase, 
-                 GksGpu::Parameters parameters, 
-                 int mode, 
-                 std::string filename)
-{
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Write " << filename << ".vtu" << " ... \n";
-
-    vtkGridPtr grid = getVtkUnstructuredOctGrid(dataBase);
-
-    addBaseData( grid, dataBase, parameters );
-
-    writeVtkUnstructuredGrid( grid, vtkXMLWriter::Binary, filename );
-
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
-}
-
-void GKSVTKADAPTER_EXPORT writeVtkXMLParallelSummaryFile(std::shared_ptr<GksGpu::DataBase> dataBase, GksGpu::Parameters parameters, std::string filename, uint mpiWorldSize)
-{
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Write " << filename << ".pvtu" << " ... \n";
-
-    vtkGridPtr grid = getVtkUnstructuredOctGrid(dataBase);
-
-    addBaseData( grid, dataBase, parameters );
-
-    writeVtkParallelUnstructuredGridSummaryFile( grid, filename, mpiWorldSize );
-
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
-}
-
-void writeTurbulenceVtkXML(std::shared_ptr<GksGpu::DataBase> dataBase, 
-                           std::shared_ptr<GksGpu::TurbulenceAnalyzer> turbulenceAnalyzer,
-                           int mode, 
-                           std::string filename)
-{
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Write " << filename << ".vtu" << " ... \n";
-
-    vtkGridPtr grid = getVtkUnstructuredOctGrid(dataBase);
-
-    addScalarIntCellData( grid, dataBase->numberOfCells, "CellIdx", [&] (uint cellIdx) {
-        return cellIdx;
-    } );
-
-    addScalarIntCellData( grid, dataBase->numberOfCells, "GhostCell", [&] (uint cellIdx) -> int {
-        return dataBase->isGhostCell( cellIdx );
-    } );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    if( turbulenceAnalyzer->collect_U )
-        addScalarRealCellData(grid, dataBase->numberOfCells, "U", [&](uint cellIdx) {
-            return turbulenceAnalyzer->h_U[ cellIdx ];
-        });
-    
-    if( turbulenceAnalyzer->collect_V )
-        addScalarRealCellData(grid, dataBase->numberOfCells, "V", [&](uint cellIdx) {
-            return turbulenceAnalyzer->h_V[ cellIdx ];
-        });
-    
-    if( turbulenceAnalyzer->collect_W )
-        addScalarRealCellData(grid, dataBase->numberOfCells, "W", [&](uint cellIdx) {
-            return turbulenceAnalyzer->h_W[ cellIdx ];
-        });
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    if( turbulenceAnalyzer->collect_UU )
-        addScalarRealCellData(grid, dataBase->numberOfCells, "UU", [&](uint cellIdx) {
-            return turbulenceAnalyzer->h_UU[ cellIdx ];
-        });
-    
-    if( turbulenceAnalyzer->collect_VV )
-        addScalarRealCellData(grid, dataBase->numberOfCells, "VV", [&](uint cellIdx) {
-            return turbulenceAnalyzer->h_VV[ cellIdx ];
-        });
-    
-    if( turbulenceAnalyzer->collect_WW )
-        addScalarRealCellData(grid, dataBase->numberOfCells, "WW", [&](uint cellIdx) {
-            return turbulenceAnalyzer->h_WW[ cellIdx ];
-        });
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    if( turbulenceAnalyzer->collect_UV )
-        addScalarRealCellData(grid, dataBase->numberOfCells, "UV", [&](uint cellIdx) {
-            return turbulenceAnalyzer->h_UV[ cellIdx ];
-        });
-    
-    if( turbulenceAnalyzer->collect_UW )
-        addScalarRealCellData(grid, dataBase->numberOfCells, "UW", [&](uint cellIdx) {
-            return turbulenceAnalyzer->h_UW[ cellIdx ];
-        });
-    
-    if( turbulenceAnalyzer->collect_VW )
-        addScalarRealCellData(grid, dataBase->numberOfCells, "VW", [&](uint cellIdx) {
-            return turbulenceAnalyzer->h_VW[ cellIdx ];
-        });
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    if( turbulenceAnalyzer->collect_T )
-        addScalarRealCellData(grid, dataBase->numberOfCells, "T", [&](uint cellIdx) {
-            return turbulenceAnalyzer->h_T[ cellIdx ];
-        });
-    
-    if( turbulenceAnalyzer->collect_TT )
-        addScalarRealCellData(grid, dataBase->numberOfCells, "TT", [&](uint cellIdx) {
-            return turbulenceAnalyzer->h_TT[ cellIdx ];
-        });
-    
-    if( turbulenceAnalyzer->collect_p )
-        addScalarRealCellData(grid, dataBase->numberOfCells, "p", [&](uint cellIdx) {
-            return turbulenceAnalyzer->h_p[ cellIdx ];
-        });
-
-    //////////////////////////////////////////////////////////////////////////
-
-    writeVtkUnstructuredGrid( grid, vtkXMLWriter::Binary, filename );
-
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
-}
-
-void GKSVTKADAPTER_EXPORT writeTurbulenceVtkXMLParallelSummaryFile(std::shared_ptr<GksGpu::DataBase> dataBase, std::shared_ptr<GksGpu::TurbulenceAnalyzer> turbulenceAnalyzer,GksGpu::Parameters parameters, std::string filename, uint mpiWorldSize)
-{
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Write " << filename << ".pvtu" << " ... \n";
-
-    vtkGridPtr grid = getVtkUnstructuredOctGrid(dataBase);
-    
-    //////////////////////////////////////////////////////////////////////////
-
-    const auto filenameWithoutPath=filename.substr( filename.find_last_of('/') + 1 );
-
-    std::ofstream file;
-
-    file.open( filename + ".pvtu" );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    file << "<VTKFile type=\"PUnstructuredGrid\" version=\"1.0\" byte_order=\"LittleEndian\" header_type=\"UInt64\">" << std::endl;
-    file << "  <PUnstructuredGrid GhostLevel=\"1\">" << std::endl;
-
-    file << "    <PCellData>" << std::endl;
-
-        file << "      <PDataArray type=\"" << "Int32"   << "\" Name=\"" << "CellIdx"   << "\" NumberOfComponents=\"1\"/>" << std::endl;
-        file << "      <PDataArray type=\"" << "Int32"   << "\" Name=\"" << "GhostCell" << "\" NumberOfComponents=\"1\"/>" << std::endl;
-
-        if( turbulenceAnalyzer->collect_U  ) file << "      <PDataArray type=\"" << "Float64" << "\" Name=\"" << "U"         << "\" NumberOfComponents=\"1\"/>" << std::endl;
-        if( turbulenceAnalyzer->collect_V  ) file << "      <PDataArray type=\"" << "Float64" << "\" Name=\"" << "V"         << "\" NumberOfComponents=\"1\"/>" << std::endl;
-        if( turbulenceAnalyzer->collect_W  ) file << "      <PDataArray type=\"" << "Float64" << "\" Name=\"" << "W"         << "\" NumberOfComponents=\"1\"/>" << std::endl;
-
-        if( turbulenceAnalyzer->collect_UU ) file << "      <PDataArray type=\"" << "Float64" << "\" Name=\"" << "UU"        << "\" NumberOfComponents=\"1\"/>" << std::endl;
-        if( turbulenceAnalyzer->collect_VV ) file << "      <PDataArray type=\"" << "Float64" << "\" Name=\"" << "VV"        << "\" NumberOfComponents=\"1\"/>" << std::endl;
-        if( turbulenceAnalyzer->collect_WW ) file << "      <PDataArray type=\"" << "Float64" << "\" Name=\"" << "WW"        << "\" NumberOfComponents=\"1\"/>" << std::endl;
-
-        if( turbulenceAnalyzer->collect_UV ) file << "      <PDataArray type=\"" << "Float64" << "\" Name=\"" << "UV"        << "\" NumberOfComponents=\"1\"/>" << std::endl;
-        if( turbulenceAnalyzer->collect_UW ) file << "      <PDataArray type=\"" << "Float64" << "\" Name=\"" << "UW"        << "\" NumberOfComponents=\"1\"/>" << std::endl;
-        if( turbulenceAnalyzer->collect_VW ) file << "      <PDataArray type=\"" << "Float64" << "\" Name=\"" << "VW"        << "\" NumberOfComponents=\"1\"/>" << std::endl;
-
-        if( turbulenceAnalyzer->collect_T  ) file << "      <PDataArray type=\"" << "Float64" << "\" Name=\"" << "T"         << "\" NumberOfComponents=\"1\"/>" << std::endl;
-        if( turbulenceAnalyzer->collect_TT ) file << "      <PDataArray type=\"" << "Float64" << "\" Name=\"" << "TT"        << "\" NumberOfComponents=\"1\"/>" << std::endl;
-        if( turbulenceAnalyzer->collect_p  ) file << "      <PDataArray type=\"" << "Float64" << "\" Name=\"" << "p"         << "\" NumberOfComponents=\"1\"/>" << std::endl;
-
-    file << "    </PCellData>" << std::endl;
-
-    file << "    <PPoints>" << std::endl;
-    file << "      <PDataArray type=\"Float32\" Name=\"Points\" NumberOfComponents=\"3\"/>" << std::endl;
-    file << "    </PPoints>" << std::endl;
-
-    for( uint rank = 0; rank < mpiWorldSize; rank++ )
-    {
-        file << "    <Piece Source=\"" << filenameWithoutPath << "_rank_" << rank << ".vtu\"/>" << std::endl;
-    }
-
-    file << "  </PUnstructuredGrid>" << std::endl;
-    file << "</VTKFile>" << std::endl;
-
-    //////////////////////////////////////////////////////////////////////////
-
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
-}
-
-void mapFlowField(std::shared_ptr<GksGpu::DataBase> base, std::shared_ptr<GksGpu::DataBase> target)
-{
-    vtkGridPtr gridBase   = getVtkUnstructuredOctGrid(base,   true);
-    vtkGridPtr gridTarget = getVtkUnstructuredOctGrid(target, true);
-
-    //////////////////////////////////////////////////////////////////////////
-
-    vtkSmartPointer<vtkDoubleArray> rho  = vtkSmartPointer<vtkDoubleArray>::New();
-    vtkSmartPointer<vtkDoubleArray> rhoU = vtkSmartPointer<vtkDoubleArray>::New();
-    vtkSmartPointer<vtkDoubleArray> rhoE = vtkSmartPointer<vtkDoubleArray>::New();
-            
-    rho->SetNumberOfComponents ( 1 );
-    rhoU->SetNumberOfComponents( 3 );
-    rhoE->SetNumberOfComponents( 1 );
-
-    rho->SetName ( "rho"  );
-    rhoU->SetName( "rhoU" );
-    rhoE->SetName( "rhoW" );
-
-    for( uint cellIdx = 0; cellIdx < base->numberOfCells; cellIdx++ ){
-
-        if( base->isGhostCell( cellIdx ) ) continue;
-                    
-        GksGpu::ConservedVariables cons;
-
-        cons.rho  = base->dataHost[ RHO__(cellIdx, base->numberOfCells) ];
-        cons.rhoU = base->dataHost[ RHO_U(cellIdx, base->numberOfCells) ];
-        cons.rhoV = base->dataHost[ RHO_V(cellIdx, base->numberOfCells) ];
-        cons.rhoW = base->dataHost[ RHO_W(cellIdx, base->numberOfCells) ];
-        cons.rhoE = base->dataHost[ RHO_E(cellIdx, base->numberOfCells) ];
-
-        rho->InsertNextTuple1 ( cons.rho );
-        rhoU->InsertNextTuple3( cons.rhoU, cons.rhoV, cons.rhoW );
-        rhoE->InsertNextTuple1( cons.rhoE );
-    }
-
-    gridBase->GetCellData()->AddArray( rho  );
-    gridBase->GetCellData()->AddArray( rhoU );
-    gridBase->GetCellData()->AddArray( rhoE );
-        
-#ifdef USE_PASSIVE_SCALAR
-
-        vtkSmartPointer<vtkDoubleArray> dataS_1 = vtkSmartPointer<vtkDoubleArray>::New();
-        vtkSmartPointer<vtkDoubleArray> dataS_2 = vtkSmartPointer<vtkDoubleArray>::New();
-
-        dataS_1->SetNumberOfComponents(1);
-        dataS_2->SetNumberOfComponents(1);
-
-        dataS_1->SetName("rhoS_1");
-        dataS_2->SetName("rhoS_2");
-
-        for (uint cellIdx = 0; cellIdx < base->numberOfCells; cellIdx++) {
-
-            if (base->isGhostCell(cellIdx)) continue;
-
-            dataS_1->InsertNextTuple1(base->dataHost[RHO_S_1(cellIdx, base->numberOfCells)]);
-            dataS_2->InsertNextTuple1(base->dataHost[RHO_S_2(cellIdx, base->numberOfCells)]);
-        }
-
-        gridBase->GetCellData()->AddArray(dataS_1);
-        gridBase->GetCellData()->AddArray(dataS_2);
-
-#endif // USE_PASSIVE_SCALAR
-
-    //////////////////////////////////////////////////////////////////////////
-
-    vtkSmartPointer<vtkCellDataToPointData> cellDataToPointDataBase = vtkSmartPointer<vtkCellDataToPointData>::New();
-    cellDataToPointDataBase->SetInputData( gridBase );
-    cellDataToPointDataBase->Update();
-
-    vtkSmartPointer<vtkGeometryFilter> gridToPolyDataBase = vtkSmartPointer<vtkGeometryFilter>::New();
-    gridToPolyDataBase->SetInputConnection( cellDataToPointDataBase->GetOutputPort() );
-    gridToPolyDataBase->Update();
-
-    vtkSmartPointer<vtkCleanPolyData> cleanPolyData = vtkSmartPointer<vtkCleanPolyData>::New();
-    cleanPolyData->SetInputConnection( gridToPolyDataBase->GetOutputPort() );
-    cleanPolyData->Update();
-
-    vtkSmartPointer<vtkResampleWithDataSet> resampleWithDataSet = vtkSmartPointer<vtkResampleWithDataSet>::New();
-    resampleWithDataSet->SetSourceConnection( cleanPolyData->GetOutputPort() );
-    resampleWithDataSet->SetInputData(  gridTarget );
-    resampleWithDataSet->Update();
-
-    vtkSmartPointer<vtkPointDataToCellData> pointDataToCellDataTarget = vtkSmartPointer<vtkPointDataToCellData>::New();
-    pointDataToCellDataTarget->SetInputConnection( resampleWithDataSet->GetOutputPort() );
-    pointDataToCellDataTarget->Update();
-
-    gridTarget = (vtkUnstructuredGrid*) pointDataToCellDataTarget->GetOutput();
-
-    //////////////////////////////////////////////////////////////////////////
-
-    for( uint cellIdx = 0, gridCellIdx = 0; cellIdx < target->numberOfCells; cellIdx++ ){
-
-        if( target->isGhostCell( cellIdx ) ) continue;
-
-        double  rho  = gridTarget->GetCellData()->GetArray(1)->GetTuple1(gridCellIdx);
-        double* rhoU = gridTarget->GetCellData()->GetArray(2)->GetTuple3(gridCellIdx);
-        double  rhoE = gridTarget->GetCellData()->GetArray(3)->GetTuple1(gridCellIdx);
-
-        target->dataHost[ RHO__(cellIdx, target->numberOfCells) ] = rho;
-        target->dataHost[ RHO_U(cellIdx, target->numberOfCells) ] = rhoU[0];
-        target->dataHost[ RHO_V(cellIdx, target->numberOfCells) ] = rhoU[1];
-        target->dataHost[ RHO_W(cellIdx, target->numberOfCells) ] = rhoU[2];
-        target->dataHost[ RHO_E(cellIdx, target->numberOfCells) ] = rhoE;
-
-#ifdef USE_PASSIVE_SCALAR
-        {
-            double  rhoS_1 = gridTarget->GetCellData()->GetArray(4)->GetTuple1(gridCellIdx);
-            double  rhoS_2 = gridTarget->GetCellData()->GetArray(5)->GetTuple1(gridCellIdx);
-
-            target->dataHost[RHO_S_1(cellIdx, target->numberOfCells)] = rhoS_1;
-            target->dataHost[RHO_S_2(cellIdx, target->numberOfCells)] = rhoS_2;
-        }
-#endif // USE_PASSIVE_SCALAR
-
-        gridCellIdx++;
-    }
-}
-
-void GKSVTKADAPTER_EXPORT writeConcreteHeatFluxVtkXML(std::shared_ptr<GksGpu::DataBase> dataBase, std::shared_ptr<GksGpu::ConcreteHeatFlux> bc, GksGpu::Parameters parameters, int mode, std::string filename)
-{
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Write " << filename << ".vtu" << " ... \n";
-
-    bc->download();
-
-    vtkGridPtr grid = vtkGridPtr::New();
- 
-    vtkPointsPtr points = vtkPointsPtr::New();
-
-    //////////////////////////////////////////////////////////////////////////
-
-    for( uint index = 0; index < bc->numberOfCells; index ++ )
-    {
-        if( bc->domainCellsHost[index] > dataBase->perLevelCount[ dataBase->getCellLevel( bc->domainCellsHost[index] ) ].startOfCells
-                                       + dataBase->perLevelCount[ dataBase->getCellLevel( bc->domainCellsHost[index] ) ].numberOfBulkCells ) continue;
-
-        if( GksGpu::isCellProperties( dataBase->cellPropertiesHost[ bc->domainCellsHost[index] ], CELL_PROPERTIES_FINE_GHOST ) ) continue;
-
-        real dx = bc->L / real(bc->numberOfPoints + 1);
-
-        Vec3 displacement = dataBase->nodeCoordinates[ dataBase->cellToNode[ bc->ghostCellsHost [index] ][0] ]
-                          - dataBase->nodeCoordinates[ dataBase->cellToNode[ bc->domainCellsHost[index] ][0] ];
-        real dn = displacement.length();
-        displacement = ( c1o1 / displacement.length() ) * displacement;
-
-        char direction = 'z';
-        if ( std::abs(displacement.x) > std::abs(displacement.y) && std::abs(displacement.x) > std::abs(displacement.z) ) direction = 'x';
-        if ( std::abs(displacement.y) > std::abs(displacement.x) && std::abs(displacement.y) > std::abs(displacement.z) ) direction = 'y';
-
-        Vec3 dn1, dn2, dn3, dn4;
-        if( direction == 'x' )
-        {
-            dn1.y =  c1o2*dn; dn1.z =  c1o2*dn; 
-            dn2.y = -c1o2*dn; dn2.z =  c1o2*dn; 
-            dn3.y = -c1o2*dn; dn3.z = -c1o2*dn; 
-            dn4.y =  c1o2*dn; dn4.z = -c1o2*dn;
-        }
-        if( direction == 'y' )
-        {
-            dn1.x =  c1o2*dn; dn1.z =  c1o2*dn; 
-            dn2.x = -c1o2*dn; dn2.z =  c1o2*dn; 
-            dn3.x = -c1o2*dn; dn3.z = -c1o2*dn; 
-            dn4.x =  c1o2*dn; dn4.z = -c1o2*dn;
-        }
-        if( direction == 'z' )
-        {
-            dn1.x =  c1o2*dn; dn1.y =  c1o2*dn; 
-            dn2.x = -c1o2*dn; dn2.y =  c1o2*dn; 
-            dn3.x = -c1o2*dn; dn3.y = -c1o2*dn; 
-            dn4.x =  c1o2*dn; dn4.y = -c1o2*dn;
-        }
-
-        Vec3 faceCenter;
-        for( uint i = 0; i < 8; i++ )
-        {
-            faceCenter = faceCenter + dataBase->nodeCoordinates[ dataBase->cellToNode[ bc->ghostCellsHost [index] ][i] ];
-            faceCenter = faceCenter + dataBase->nodeCoordinates[ dataBase->cellToNode[ bc->domainCellsHost[index] ][i] ];
-        }
-        faceCenter = c1o16 * faceCenter;
-
-        uint nodeStartNumber = points->GetNumberOfPoints();
-
-        //////////////////////////////////////////////////////////////////////////
-
-        Vec3 tmp;
-
-        tmp = faceCenter + dn1; points->InsertNextPoint( tmp.x, tmp.y, tmp.z );
-        tmp = faceCenter + dn2; points->InsertNextPoint( tmp.x, tmp.y, tmp.z );
-        tmp = faceCenter + dn3; points->InsertNextPoint( tmp.x, tmp.y, tmp.z );
-        tmp = faceCenter + dn4; points->InsertNextPoint( tmp.x, tmp.y, tmp.z );
-
-        for( uint i = 1; i <= bc->numberOfPoints; i++ )
-        {
-            Vec3 localDisplacement = real(i) * dx * displacement;
-            tmp = faceCenter + localDisplacement + dn1; points->InsertNextPoint( tmp.x, tmp.y, tmp.z );
-            tmp = faceCenter + localDisplacement + dn2; points->InsertNextPoint( tmp.x, tmp.y, tmp.z );
-            tmp = faceCenter + localDisplacement + dn3; points->InsertNextPoint( tmp.x, tmp.y, tmp.z );
-            tmp = faceCenter + localDisplacement + dn4; points->InsertNextPoint( tmp.x, tmp.y, tmp.z );
-        }
-
-        Vec3 localDisplacement = bc->L * displacement;
-        tmp = faceCenter + localDisplacement + dn1; points->InsertNextPoint( tmp.x, tmp.y, tmp.z );
-        tmp = faceCenter + localDisplacement + dn2; points->InsertNextPoint( tmp.x, tmp.y, tmp.z );
-        tmp = faceCenter + localDisplacement + dn3; points->InsertNextPoint( tmp.x, tmp.y, tmp.z );
-        tmp = faceCenter + localDisplacement + dn4; points->InsertNextPoint( tmp.x, tmp.y, tmp.z );
-
-        //////////////////////////////////////////////////////////////////////////
-        
-        for( uint i = 0; i <= bc->numberOfPoints; i++ )
-        {
-            vtkIdListPtr idList = vtkIdListPtr::New();
-
-            idList->SetNumberOfIds( 8 );
-
-            idList->SetId( 0, nodeStartNumber + (i    ) * 4     );
-            idList->SetId( 1, nodeStartNumber + (i    ) * 4 + 1 );
-            idList->SetId( 2, nodeStartNumber + (i    ) * 4 + 2 );
-            idList->SetId( 3, nodeStartNumber + (i    ) * 4 + 3 );
-            idList->SetId( 4, nodeStartNumber + (i + 1) * 4     );
-            idList->SetId( 5, nodeStartNumber + (i + 1) * 4 + 1 );
-            idList->SetId( 6, nodeStartNumber + (i + 1) * 4 + 2 );
-            idList->SetId( 7, nodeStartNumber + (i + 1) * 4 + 3 );
-
-            grid->InsertNextCell( 12, idList );
-        }
-    }
-
-    grid->SetPoints( points );
-
-    //////////////////////////////////////////////////////////////////////////
-    
-    vtkDoubleArrayPtr data = vtkDoubleArrayPtr::New();
-
-    data->SetNumberOfComponents( 1 );
-
-    data->SetName( "T" );
-
-    for( uint cellIdx = 0; cellIdx < bc->numberOfCells; cellIdx++ )
-    {
-        if( bc->domainCellsHost[cellIdx] > dataBase->perLevelCount[ dataBase->getCellLevel( bc->domainCellsHost[cellIdx] ) ].startOfCells
-                                         + dataBase->perLevelCount[ dataBase->getCellLevel( bc->domainCellsHost[cellIdx] ) ].numberOfBulkCells ) continue;
-
-        if( GksGpu::isCellProperties( dataBase->cellPropertiesHost[ bc->domainCellsHost[cellIdx] ], CELL_PROPERTIES_FINE_GHOST ) ) continue;
-
-        real T = c0o1;
-
-        {
-            GksGpu::ConservedVariables cons;
-
-            cons.rho  = dataBase->dataHost[RHO__(bc->domainCellsHost[cellIdx], dataBase->numberOfCells)];
-            cons.rhoU = dataBase->dataHost[RHO_U(bc->domainCellsHost[cellIdx], dataBase->numberOfCells)];
-            cons.rhoV = dataBase->dataHost[RHO_V(bc->domainCellsHost[cellIdx], dataBase->numberOfCells)];
-            cons.rhoW = dataBase->dataHost[RHO_W(bc->domainCellsHost[cellIdx], dataBase->numberOfCells)];
-            cons.rhoE = dataBase->dataHost[RHO_E(bc->domainCellsHost[cellIdx], dataBase->numberOfCells)];
-
-            GksGpu::PrimitiveVariables prim = GksGpu::toPrimitiveVariables(cons, parameters.K);
-
-#ifdef USE_PASSIVE_SCALAR
-            T += c3o2 * getT(prim);
-            //T += getT(prim);
-#else // USE_PASSIVE_SCALAR
-            T += c3o2 * 1.0 / prim.lambda;
-#endif // USE_PASSIVE_SCALAR
-        }
-
-        {
-            GksGpu::ConservedVariables cons;
-
-            cons.rho  = dataBase->dataHost[RHO__(bc->secondCellsHost[cellIdx], dataBase->numberOfCells)];
-            cons.rhoU = dataBase->dataHost[RHO_U(bc->secondCellsHost[cellIdx], dataBase->numberOfCells)];
-            cons.rhoV = dataBase->dataHost[RHO_V(bc->secondCellsHost[cellIdx], dataBase->numberOfCells)];
-            cons.rhoW = dataBase->dataHost[RHO_W(bc->secondCellsHost[cellIdx], dataBase->numberOfCells)];
-            cons.rhoE = dataBase->dataHost[RHO_E(bc->secondCellsHost[cellIdx], dataBase->numberOfCells)];
-
-            GksGpu::PrimitiveVariables prim = GksGpu::toPrimitiveVariables(cons, parameters.K);
-
-#ifdef USE_PASSIVE_SCALAR
-            T -= c1o2 * getT(prim);
-#else // USE_PASSIVE_SCALAR
-            T -= c1o2 * 1.0 / prim.lambda;
-#endif // USE_PASSIVE_SCALAR
-        }
-
-        data->InsertNextValue(T);
-        data->InsertNextValue(T);
-        data->InsertNextValue(T);
-        data->InsertNextValue(T);
-
-        for( uint i = 0; i < bc->numberOfPoints; i++ )
-        {
-            data->InsertNextValue(bc->temperaturesHost[ bc->numberOfPoints * cellIdx + i ]);
-            data->InsertNextValue(bc->temperaturesHost[ bc->numberOfPoints * cellIdx + i ]);
-            data->InsertNextValue(bc->temperaturesHost[ bc->numberOfPoints * cellIdx + i ]);
-            data->InsertNextValue(bc->temperaturesHost[ bc->numberOfPoints * cellIdx + i ]);
-        }
-
-        data->InsertNextValue(bc->ambientTemperature);
-        data->InsertNextValue(bc->ambientTemperature);
-        data->InsertNextValue(bc->ambientTemperature);
-        data->InsertNextValue(bc->ambientTemperature);
-    }
-
-    grid->GetPointData()->AddArray( data );
-
-    //////////////////////////////////////////////////////////////////////////
-
-    writeVtkUnstructuredGrid( grid, vtkXMLWriter::Binary, filename );
-
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
-}
\ No newline at end of file
diff --git a/src/gpu/GksVtkAdapter/VTKAdapter.h b/src/gpu/GksVtkAdapter/VTKAdapter.h
deleted file mode 100644
index 5f3209a04e0d7942f890f2186779e75c6f14683f..0000000000000000000000000000000000000000
--- a/src/gpu/GksVtkAdapter/VTKAdapter.h
+++ /dev/null
@@ -1,73 +0,0 @@
-#ifndef VTKAdapter_H
-#define VTKAdapter_H
-
-#include <vtkSmartPointer.h>
-#include <vtkVersion.h>
- 
-#include <vtkPoints.h>
-#include <vtkUnstructuredGrid.h>
-#include <vtkDataObject.h>
-
-
-#include <vtkIdList.h>
-
-#include <vtkIntArray.h>
-#include <vtkDoubleArray.h>
-
-#include <vtkXMLUnstructuredGridWriter.h>
-
-#include <memory>
-#include <functional>
-#include <string>
-
-#include "PointerDefinitions.h"
-
-
-
-#include "DataBase/DataBase.h"
-#include "Parameters/Parameters.h"
-
-#include "GksVtkAdapter_export.h"
-
-typedef vtkSmartPointer<vtkUnstructuredGrid>          vtkGridPtr;
-typedef vtkSmartPointer<vtkPoints>                    vtkPointsPtr;
-typedef vtkSmartPointer<vtkIdList>                    vtkIdListPtr;
-typedef vtkSmartPointer<vtkIntArray>                  vtkIntArrayPtr;
-typedef vtkSmartPointer<vtkDoubleArray>               vtkDoubleArrayPtr;
-typedef vtkSmartPointer<vtkXMLUnstructuredGridWriter> vtkWriterPtr;
-
-struct rgbColor
-{
-    unsigned char r;
-    unsigned char g;
-    unsigned char b;
-};
-
-vtkGridPtr GKSVTKADAPTER_EXPORT getVtkUnstructuredOctGrid( SPtr<GksGpu::DataBase> dataBase, bool excludeGhostCells = false );
-
-void GKSVTKADAPTER_EXPORT addScalarIntCellData( vtkGridPtr grid,
-                                     uint numberOfCells, 
-                                     std::string name, 
-                                     std::function<int(uint)> getData );
-
-void GKSVTKADAPTER_EXPORT addScalarRealCellData( vtkGridPtr grid,
-                                      uint numberOfCells, 
-                                      std::string name, 
-                                      std::function<real(uint)> getData );
-
-void GKSVTKADAPTER_EXPORT addVectorCellData( vtkGridPtr grid,
-                                  uint numberOfCells, 
-                                  std::string name, 
-                                  std::function<Vec3(uint)> getData );
-
-void GKSVTKADAPTER_EXPORT addBaseData( vtkGridPtr grid, SPtr<GksGpu::DataBase> dataBase, GksGpu::Parameters parameters );
-
-void GKSVTKADAPTER_EXPORT writeVtkUnstructuredGrid( vtkGridPtr grid, int mode, std::string filename );
-
-void GKSVTKADAPTER_EXPORT writeVtkParallelUnstructuredGridSummaryFile( vtkGridPtr grid, std::string filename, uint mpiWorldSize );
-
-rgbColor GKSVTKADAPTER_EXPORT colorMapCoolToWarmExtended( double value, double min, double max );
-
-void GKSVTKADAPTER_EXPORT writePNG( vtkDataObject* inputData, int nx, int ny, double L, double H, std::string filename );
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/GksVtkAdapter/VTKInterface.h b/src/gpu/GksVtkAdapter/VTKInterface.h
deleted file mode 100644
index 0120a7ad19d059a830530857b3ae118b4a50d224..0000000000000000000000000000000000000000
--- a/src/gpu/GksVtkAdapter/VTKInterface.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef VTKInterface_H
-#define VTKInterface_H
-
-#include "GksGpu/Parameters/Parameters.h"
-
-
-#include "GksVtkAdapter_export.h"
-
-namespace GksGpu{ 
-struct DataBase;
-class TurbulenceAnalyzer;
-struct ConcreteHeatFlux;
-}
-
-void GKSVTKADAPTER_EXPORT writeVtkXML(std::shared_ptr<GksGpu::DataBase> dataBase,
-                           GksGpu::Parameters parameters, 
-                           int mode, 
-                           std::string filename);
-
-void GKSVTKADAPTER_EXPORT writeVtkXMLParallelSummaryFile(std::shared_ptr<GksGpu::DataBase> dataBase,
-                                              GksGpu::Parameters parameters, 
-                                              std::string filename,
-                                              uint mpiWorldSize);
-
-void GKSVTKADAPTER_EXPORT writeTurbulenceVtkXML(std::shared_ptr<GksGpu::DataBase> dataBase,
-                                     std::shared_ptr<GksGpu::TurbulenceAnalyzer> turbulenceAnalyzer,
-                                     int mode, 
-                                     std::string filename);
-
-void GKSVTKADAPTER_EXPORT writeTurbulenceVtkXMLParallelSummaryFile(std::shared_ptr<GksGpu::DataBase> dataBase,
-                                                        std::shared_ptr<GksGpu::TurbulenceAnalyzer> turbulenceAnalyzer,
-                                                        GksGpu::Parameters parameters, 
-                                                        std::string filename,
-                                                        uint mpiWorldSize);
-
-void GKSVTKADAPTER_EXPORT mapFlowField( std::shared_ptr<GksGpu::DataBase> base, std::shared_ptr<GksGpu::DataBase> target );
-
-void GKSVTKADAPTER_EXPORT writeConcreteHeatFluxVtkXML(std::shared_ptr<GksGpu::DataBase> dataBase,
-                                           std::shared_ptr<GksGpu::ConcreteHeatFlux> bc, 
-                                           GksGpu::Parameters parameters, 
-                                           int mode, 
-                                           std::string filename);
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/GridGenerator/CMakeLists.txt b/src/gpu/GridGenerator/CMakeLists.txt
index 8102ad3a10b53dded2ba6fe489753f20d1d2ed4f..7cc0f0e20e2b43601f9f70e639d0ecd61cadb2ae 100644
--- a/src/gpu/GridGenerator/CMakeLists.txt
+++ b/src/gpu/GridGenerator/CMakeLists.txt
@@ -1,6 +1,6 @@
 project(GridGenerator LANGUAGES CXX)
 
-vf_add_library(PRIVATE_LINK basics OpenMP::OpenMP_CXX)
+vf_add_library(PUBLIC_LINK basics logger OpenMP::OpenMP_CXX)
 vf_add_tests()
 
 if(NOT MSVC) 
diff --git a/src/gpu/GridGenerator/StreetPointFinder/JunctionReader.cpp b/src/gpu/GridGenerator/StreetPointFinder/JunctionReader.cpp
deleted file mode 100644
index bac17264d1c00389bbefacc4063d7801e8f5baa7..0000000000000000000000000000000000000000
--- a/src/gpu/GridGenerator/StreetPointFinder/JunctionReader.cpp
+++ /dev/null
@@ -1,145 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
-//      \    \  |    |   ________________________________________________________________
-//       \    \ |    |  |  ______________________________________________________________|
-//        \    \|    |  |  |         __          __     __     __     ______      _______
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file JunctionReader.cpp
-//! \ingroup StreetPointFinder
-//! \author Stephan Lenz
-//=======================================================================================
-#include "JunctionReader.h"
-
-#include <fstream>
-#include <iostream>
-#include <string>
-
-
-JunctionReaderData::JunctionReaderData(std::vector<uint> inCells, std::vector<uint> outCells, std::vector<int> carCanNotEnterThisOutCell, uint trafficLightSwitchTime = 0) :
-	inCells{ inCells }, outCells{ outCells }, carCanNotEnterThisOutCell{ carCanNotEnterThisOutCell }, trafficLightSwitchTime{ trafficLightSwitchTime }
-{}
-
-void JunctionReader::readJunctions(std::string filename, StreetPointFinder* streetPointFinder)
-{
-	*logging::out << logging::Logger::INFO_INTERMEDIATE << "StreetPointFinder::readJunctions( " << filename << " )" << "\n";
-
-	std::ifstream file;
-	file.open(filename.c_str());
-	if (!file.is_open()) std::cerr << "File not found" << std::endl;
-	this->streetPointFinder = streetPointFinder;
-
-	uint numberOfJunctions;
-	file >> numberOfJunctions;
-
-	std::string inOutDummy;
-	int streetIndex = 0;
-	uint trafficLightTime = 0;
-	bool onlyNeighbors = false;
-
-	file >> inOutDummy;
-
-	for (uint i = 0; i < numberOfJunctions; i++) {
-		std::vector<uint> inCells, outCells;
-		std::vector<int> carCanNotEnterThisOutCell;
-
-		//inCells
-		file >> inOutDummy;
-		while (inOutDummy.compare("out") != 0) {
-			streetIndex = std::stoi(inOutDummy);
-
-			if (streetIndex >= 0)
-				inCells.push_back(getCellIndex(streetIndex, 'e'));
-
-			file >> inOutDummy;
-		}
-
-		//outCells
-		file >> inOutDummy;
-		while (inOutDummy.compare("in") != 0 && inOutDummy.compare("end") != 0 && inOutDummy.compare("t") != 0 && inOutDummy.compare("c") != 0) {
-			streetIndex = std::stoi(inOutDummy);
-
-			if (streetIndex >= 0) {
-				outCells.push_back(getCellIndex(streetIndex, 's'));
-				if (carCanNotEnterThisOutCell.size() < inCells.size())
-					carCanNotEnterThisOutCell.push_back(getCellIndex(streetIndex, 's'));
-			}
-			else if (streetIndex == -2) //no prohibited outCell
-				carCanNotEnterThisOutCell.push_back(-2);
-
-			file >> inOutDummy;
-		}
-
-		//trafficLightTime
-		if (inOutDummy.compare("t") == 0) {
-			file >> inOutDummy;
-			trafficLightTime = std::stoi(inOutDummy);
-			file >> inOutDummy;
-		}
-		else
-			trafficLightTime = 0;
-
-		// only neighbors (used for curves)
-		if (inOutDummy.compare("c") == 0) {
-			onlyNeighbors = true;
-			file >> inOutDummy;
-		}
-
-
-		//make Junction or neighbors
-		if (onlyNeighbors) {
-			if (inCells.size() == 2 && outCells.size() == 2) {
-				specialNeighbors.cells.insert(specialNeighbors.cells.end(), inCells.begin(), inCells.end());
-				specialNeighbors.neighbors.push_back(outCells[1]);     
-				specialNeighbors.neighbors.push_back(outCells[0]);
-
-				onlyNeighbors = false;
-			}
-			else 
-            { 
-                // TODO: this could be a bug, as before this change "continue" was not guarded by the "else"
-                // https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/11
-                std::cerr << "can't add curve" << std::endl; 
-                continue;
-            }
-		}
-		else
-			junctions.push_back(JunctionReaderData(inCells, outCells, carCanNotEnterThisOutCell, trafficLightTime));
-
-	}
-}
-
-
-unsigned int JunctionReader::getCellIndex(unsigned int streetIndex, char startOrEnd)
-{
-	uint i = 0;
-	unsigned int cellIndex = 0;
-	while (i < streetIndex) {
-		cellIndex += streetPointFinder->streets[i].numberOfCells;
-		++i;
-	}
-	if (startOrEnd == 's') 	return cellIndex;
-	return cellIndex + streetPointFinder->streets[streetIndex].numberOfCells - 1;
-}
-
diff --git a/src/gpu/GridGenerator/StreetPointFinder/JunctionReader.h b/src/gpu/GridGenerator/StreetPointFinder/JunctionReader.h
deleted file mode 100644
index 5b68b0357ea2432dfde6d167b27908fe1aa4348a..0000000000000000000000000000000000000000
--- a/src/gpu/GridGenerator/StreetPointFinder/JunctionReader.h
+++ /dev/null
@@ -1,78 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
-//      \    \  |    |   ________________________________________________________________
-//       \    \ |    |  |  ______________________________________________________________|
-//        \    \|    |  |  |         __          __     __     __     ______      _______
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file JunctionReader.h
-//! \ingroup StreetPointFinder
-//! \author Stephan Lenz
-//=======================================================================================
-#ifndef JUNCTIONREADER_H
-#define JUNCTIONREADER_H
-
-#include <vector>
-
-#include "GridGenerator_export.h"
-
-#include "Core/DataTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "StreetPointFinder.h"
-
-
-
-struct GRIDGENERATOR_EXPORT JunctionReaderData
-{
-	std::vector<uint> inCells;
-	std::vector<uint> outCells;
-	std::vector<int> carCanNotEnterThisOutCell;
-	uint trafficLightSwitchTime;
-
-	JunctionReaderData(std::vector<uint> inCells, std::vector<uint> outCells, std::vector<int> carCanNotEnterThisOutCell, uint trafficLightSwitchTime);
-};
-
-
-struct GRIDGENERATOR_EXPORT Neighbors
-{
-	std::vector<int> cells;
-	std::vector<int> neighbors;
-};
-
-
-
-struct GRIDGENERATOR_EXPORT JunctionReader
-{
-	std::vector<JunctionReaderData> junctions;
-	Neighbors specialNeighbors;
-	StreetPointFinder* streetPointFinder;
-
-	void readJunctions(std::string filename, StreetPointFinder* streetPointFinder);
-
-
-private:
-	unsigned int getCellIndex(unsigned int streetIndex, char startOrEnd);
-};
-#endif
diff --git a/src/gpu/GridGenerator/StreetPointFinder/SinkReader.cpp b/src/gpu/GridGenerator/StreetPointFinder/SinkReader.cpp
deleted file mode 100644
index 1224f1bf7cad8e535e842426406aacc619dad314..0000000000000000000000000000000000000000
--- a/src/gpu/GridGenerator/StreetPointFinder/SinkReader.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
-//      \    \  |    |   ________________________________________________________________
-//       \    \ |    |  |  ______________________________________________________________|
-//        \    \|    |  |  |         __          __     __     __     ______      _______
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file SinkReader.cpp
-//! \ingroup StreetPointFinder
-//! \author Stephan Lenz
-//=======================================================================================
-#include "SinkReader.h"
-
-#include <fstream>
-#include <iostream>
-
-SinkReaderData::SinkReaderData(uint sinkIndex, float sinkBlockedPossibility) :
-	sinkIndex{ sinkIndex }, sinkBlockedPossibility{ sinkBlockedPossibility }
-{}
-
-void SinkReader::readSinks(std::string filename, StreetPointFinder* streetPointFinder)
-{
-	*logging::out << logging::Logger::INFO_INTERMEDIATE << "StreetPointFinder::readSinks( " << filename << " )" << "\n";
-
-	this->streetPointFinder = streetPointFinder;
-
-	std::ifstream file;
-	file.open(filename.c_str());
-	if (!file.is_open()) std::cerr << "File not found" << std::endl;
-
-	uint numberOfSinks;
-	file >> numberOfSinks;
-
-	uint streetIndex;
-	float sinkBlockedPossibility;
-
-
-	for (uint i = 0; i < numberOfSinks; i++) {
-		file >> streetIndex >> sinkBlockedPossibility;
-		sinks.push_back(SinkReaderData(getCellIndexEnd(streetIndex), sinkBlockedPossibility));
-	}
-}
-
-unsigned int SinkReader::getCellIndexEnd(unsigned int streetIndex)
-{
-	uint i = 0;
-	unsigned int cellIndex = 0;
-	while (i < streetIndex) {
-		cellIndex += streetPointFinder->streets[i].numberOfCells;
-		++i;
-	}
-	
-	return cellIndex + streetPointFinder->streets[streetIndex].numberOfCells - 1;
-}
diff --git a/src/gpu/GridGenerator/StreetPointFinder/SinkReader.h b/src/gpu/GridGenerator/StreetPointFinder/SinkReader.h
deleted file mode 100644
index ba28596b0eb63954eb5f7162c4849f863e15f657..0000000000000000000000000000000000000000
--- a/src/gpu/GridGenerator/StreetPointFinder/SinkReader.h
+++ /dev/null
@@ -1,65 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
-//      \    \  |    |   ________________________________________________________________
-//       \    \ |    |  |  ______________________________________________________________|
-//        \    \|    |  |  |         __          __     __     __     ______      _______
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file SinkReader.h
-//! \ingroup StreetPointFinder
-//! \author Stephan Lenz
-//=======================================================================================
-#ifndef SINKREADER_H
-#define  SINKREADER_H
-
-#include <vector>
-
-#include "GridGenerator_export.h"
-
-#include "Core/DataTypes.h"
-#include "Core/Logger/Logger.h"
-
-#include "StreetPointFinder.h"
-
-
-
-struct GRIDGENERATOR_EXPORT SinkReaderData{
-	uint sinkIndex;
-	float sinkBlockedPossibility;
-	SinkReaderData(uint sinkIndex, float sinkBlockedPossibility);
-};
-
-struct GRIDGENERATOR_EXPORT SinkReader
-{
-	std::vector<SinkReaderData> sinks;
-	StreetPointFinder* streetPointFinder;
-
-	void readSinks(std::string filename, StreetPointFinder* streetPointFinder);
-
-private:
-	unsigned int getCellIndexEnd(unsigned int streetIndex);
-};
-
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/GridGenerator/StreetPointFinder/SourceReader.cpp b/src/gpu/GridGenerator/StreetPointFinder/SourceReader.cpp
deleted file mode 100644
index a3a62f942f96fa1faf9e49448ed3ae627d985273..0000000000000000000000000000000000000000
--- a/src/gpu/GridGenerator/StreetPointFinder/SourceReader.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
-//      \    \  |    |   ________________________________________________________________
-//       \    \ |    |  |  ______________________________________________________________|
-//        \    \|    |  |  |         __          __     __     __     ______      _______
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file SourceReader.cpp
-//! \ingroup StreetPointFinder
-//! \author Stephan Lenz
-//=======================================================================================
-#include "SourceReader.h"
-
-#include <fstream>
-#include <iostream>
-
-SourceReaderData::SourceReaderData(unsigned int sourceIndex, float sourcePossibility):
-	sourceIndex{sourceIndex}, sourcePossibility{sourcePossibility}
-{}
-
-
-void SourceReader::readSources(std::string filename, StreetPointFinder* streetPointFinder)
-{
-	*logging::out << logging::Logger::INFO_INTERMEDIATE << "StreetPointFinder::readSources( " << filename << " )" << "\n";
-
-	this->streetPointFinder = streetPointFinder;
-
-	std::ifstream file;
-	file.open(filename.c_str());
-	if (!file.is_open()) std::cerr << "File not found" << std::endl;
-
-	uint numberOfSources;
-	file >> numberOfSources;
-
-	uint streetIndex;
-	float sourcePossibility;
-
-
-	for (uint i = 0; i < numberOfSources; i++) {
-		file >> streetIndex  >> sourcePossibility;
-		sources.push_back(SourceReaderData(getCellIndexStart(streetIndex), sourcePossibility));
-	}
-}
-
-
-unsigned int SourceReader::getCellIndexStart(unsigned int streetIndex)
-{
-	uint i = 0;
-	unsigned int cellIndex = 0;
-	while (i < streetIndex) {
-		cellIndex += streetPointFinder->streets[i].numberOfCells;
-		++i;
-	}
-	return cellIndex;
-}
-	
-	
diff --git a/src/gpu/GridGenerator/StreetPointFinder/StreetPointFinder.cpp b/src/gpu/GridGenerator/StreetPointFinder/StreetPointFinder.cpp
deleted file mode 100644
index 9fbd3933a5457e96d2d1aa01f1fadcf675be1980..0000000000000000000000000000000000000000
--- a/src/gpu/GridGenerator/StreetPointFinder/StreetPointFinder.cpp
+++ /dev/null
@@ -1,790 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
-//      \    \  |    |   ________________________________________________________________
-//       \    \ |    |  |  ______________________________________________________________|
-//        \    \|    |  |  |         __          __     __     __     ______      _______
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file StreetPointFinder.cpp
-//! \ingroup StreetPointFinder
-//! \author Stephan Lenz
-//=======================================================================================
-#include "StreetPointFinder.h"
-
-#include "Core/Logger/Logger.h"
-
-#include <string>
-#include <sstream>
-#include <fstream>
-#include <iostream>
-#include <cmath>
-#include <algorithm>
-#include <numeric>
-
-#include "grid/Grid.h"
-#include "grid/NodeValues.h"
-
-using namespace vf::gpu;
-
-Street::Street(real xStartCell, real yStartCell, real xEndCell, real yEndCell, real dx)
-{
-	real length = std::sqrt((xEndCell - xStartCell)*(xEndCell - xStartCell)
-		+ (yEndCell - yStartCell)*(yEndCell - yStartCell));
-
-	this->numberOfCells = std::floor(length / dx);
-
-	real realLength = dx * (this->numberOfCells);
-
-	real vectorX = (xEndCell - xStartCell) / length;
-	real vectorY = (yEndCell - yStartCell) / length;
-
-	this->xStart = xStartCell - 0.5 * (realLength - length) * vectorX + 0.5 * dx  * vectorX;
-	this->yStart = yStartCell - 0.5 * (realLength - length) * vectorY + 0.5 * dx  * vectorY;
-
-	this->xEnd = xEndCell + 0.5 * (realLength - length) * vectorX - 0.5 * dx  * vectorX;
-	this->yEnd = yEndCell + 0.5 * (realLength - length) * vectorY - 0.5 * dx  * vectorY;
-
-	//this->xStart = xStart + dx * (xEnd - xStart) / length;
-	//this->yStart = yStart + dx * (yEnd - yStart) / length;
-	//
-	//this->xEnd   = xEnd   - dx * (xEnd - xStart) / length;
-	//this->yEnd   = yEnd   - dx * (yEnd - yStart) / length;
-
-	//this->numberOfCells = std::lround( length / dx ) + 1;
-}
-
-real Street::getCoordinateX(int cellIndex)
-{
-	return xStart + real(cellIndex) / real(numberOfCells - 1) * (xEnd - xStart);
-}
-
-real Street::getCoordinateY(int cellIndex)
-{
-	return yStart + real(cellIndex) / real(numberOfCells - 1) * (yEnd - yStart);
-}
-
-real Street::getVectorX()
-{
-	real vecX = this->xEnd - this->xStart;
-	real vecY = this->yEnd - this->yStart;
-
-	real length = sqrt(vecX*vecX + vecY*vecY);
-
-	return vecX / length;
-}
-
-real Street::getVectorY()
-{
-	real vecX = this->xEnd - this->xStart;
-	real vecY = this->yEnd - this->yStart;
-
-	real length = sqrt(vecX*vecX + vecY*vecY);
-
-	return vecY / length;
-}
-
-void Street::findIndicesLB(SPtr<Grid> grid, real initialSearchHeight)
-{
-	for (uint i = 0; i < numberOfCells; i++)
-	{
-		real x = getCoordinateX(i);
-		real y = getCoordinateY(i);
-
-		uint matrixIndex = grid->transCoordToIndex(x, y, initialSearchHeight);
-
-		real xLB, yLB, zLB;
-		grid->transIndexToCoords(matrixIndex, xLB, yLB, zLB);
-
-		while (grid->getFieldEntry(matrixIndex) != BC_SOLID ||
-			   grid->getFieldEntry(grid->transCoordToIndex(xLB, yLB, zLB-grid->getDelta())) != STOPPER_SOLID)
-		{
-			zLB -= grid->getDelta();
-			matrixIndex = grid->transCoordToIndex(xLB, yLB, zLB);
-		}
-
-		std::stringstream msg;
-
-
-		msg << "( " << x << ", " << y << " )" << "  ==>  ";
-		msg << "( " << xLB << ", " << yLB << ", " << zLB << " ), type = [" << (int)grid->getFieldEntry(matrixIndex) << "], z = " << zLB << " \n";
-
-		*logging::out << logging::Logger::INFO_LOW << msg.str();
-
-		this->matrixIndicesLB.push_back(matrixIndex);
-		this->sparseIndicesLB.push_back(grid->getSparseIndex(matrixIndex));
-	}
-}
-
-void StreetPointFinder::prepareSimulationFileData()
-{
-	//////////////////////////////////////////////////////////////////////////
-	// Concatenate sparseIndicesLB
-
-	for (auto& street : this->streets) this->sparseIndicesLB.insert(this->sparseIndicesLB.end(), street.sparseIndicesLB.begin(), street.sparseIndicesLB.end());
-
-	//////////////////////////////////////////////////////////////////////////
-	// prepare vectors
-
-	uint numberOfCells = (uint)this->sparseIndicesLB.size();
-
-	mapNashToConc.resize(numberOfCells);
-
-	std::vector<uint> indexMap(numberOfCells);
-	std::iota(indexMap.begin(), indexMap.end(), 0);
-
-	//////////////////////////////////////////////////////////////////////////
-	// sort vectors
-
-	std::stable_sort(indexMap.begin(),
-		indexMap.end(),
-		[&](uint lhs, uint rhs) {
-		return this->sparseIndicesLB[lhs] <= this->sparseIndicesLB[rhs];
-	});
-
-	std::stable_sort(this->sparseIndicesLB.begin(),
-		this->sparseIndicesLB.end(),
-		[](uint lhs, uint rhs) {
-		return lhs <= rhs;
-	});
-	//////////////////////////////////////////////////////////////////////////
-	// invert idxMap
-
-	{
-		std::vector<uint> buffer = indexMap;
-		for (uint idx = 0; idx < indexMap.size(); idx++)
-			indexMap[buffer[idx]] = idx;
-	}
-
-	//////////////////////////////////////////////////////////////////////////
-	// identify duplicates and find correct mapping indices
-
-	std::vector<uint> reducedIndexMap(numberOfCells);
-
-	uint currentSparseIndex = this->sparseIndicesLB[0];
-	uint reducedIndex = 0;
-	for (uint index = 1; index < numberOfCells; index++)
-	{
-		if (this->sparseIndicesLB[index] == currentSparseIndex)
-		{
-			reducedIndexMap[index] = reducedIndex;
-		}
-		else
-		{
-			currentSparseIndex = this->sparseIndicesLB[index];
-			reducedIndexMap[index] = ++reducedIndex;
-		}
-	}
-
-	for (uint index = 0; index < numberOfCells; index++)
-	{
-		mapNashToConc[index] = reducedIndexMap[indexMap[index]];
-	}
-
-	//////////////////////////////////////////////////////////////////////////
-	// erase duplicated
-
-	auto newEnd = std::unique(this->sparseIndicesLB.begin(), this->sparseIndicesLB.end());
-
-	this->sparseIndicesLB.resize(std::distance(this->sparseIndicesLB.begin(), newEnd));
-
-	//////////////////////////////////////////////////////////////////////////
-}
-
-void StreetPointFinder::readStreets(std::string filename)
-{
-	*logging::out << logging::Logger::INFO_INTERMEDIATE << "StreetPointFinder::readStreets( " << filename << " )" << "\n";
-
-	uint numberOfStreets;
-
-	std::ifstream file;
-
-	file.open(filename.c_str());
-
-	file >> numberOfStreets;
-
-	for (uint i = 0; i < numberOfStreets; i++)
-	{
-		real xStart, yStart, xEnd, yEnd;
-
-		real dx;
-
-		file >> xStart >> yStart >> xEnd >> yEnd >> dx;
-
-		streets.push_back(Street(xStart, yStart, xEnd, yEnd, dx));
-	}
-
-	file.close();
-
-	*logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
-}
-
-void StreetPointFinder::findIndicesLB(SPtr<Grid> grid, real initialSearchHeight)
-{
-	*logging::out << logging::Logger::INFO_INTERMEDIATE << "StreetPointFinder::findIndicesLB()\n";
-
-	for (auto& street : streets) street.findIndicesLB(grid, initialSearchHeight);
-
-	*logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
-}
-
-void StreetPointFinder::writeVTK(std::string filename, const std::vector<int>& cars)
-{
-	uint numberOfCells = 0;
-
-	*logging::out << logging::Logger::INFO_INTERMEDIATE << "StreetPointFinder::writeVTK( " << filename << " )" << "\n";
-
-	std::ofstream file;
-
-	file.open(filename);
-
-	prepareWriteVTK(file, numberOfCells);
-
-	//////////////////////////////////////////////////////////////////////////
-
-	file << "FIELD Label " << 3 << std::endl;
-
-	//////////////////////////////////////////////////////////////////////////
-
-	writeStreetsVTK(file, numberOfCells);
-
-	writeLengthsVTK(file, numberOfCells);
-
-	writeCarsVTK(file, numberOfCells, cars);
-
-	////////////////////////////////////////////////////////////////////////////
-
-	file.close();
-
-	*logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
-}
-
-
-void StreetPointFinder::writeReducedVTK(std::string filename, const std::vector<int>& cars)
-{
-	uint numberOfCells = 0;
-
-	*logging::out << logging::Logger::INFO_INTERMEDIATE << "StreetPointFinder::writeVTK( " << filename << " )" << "\n";
-
-	std::ofstream file;
-
-	file.open(filename);
-
-	prepareWriteVTK(file, numberOfCells);
-
-	//////////////////////////////////////////////////////////////////////////
-
-	file << "FIELD Label " << 1 << std::endl;
-
-	//////////////////////////////////////////////////////////////////////////
-
-	writeCarsVTK(file, numberOfCells, cars);
-
-	////////////////////////////////////////////////////////////////////////////
-
-	file.close();
-
-	*logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
-}
-
-void StreetPointFinder::prepareWriteVTK(std::ofstream & file, uint & numberOfCells)
-{
-
-	uint numberOfNodes = 0;
-
-	for (auto& street : streets)
-	{
-		numberOfCells += street.numberOfCells;
-		numberOfNodes += street.numberOfCells + 1;
-	}	
-
-	file << "# vtk DataFile Version 3.0\n";
-	file << "by MeshGenerator\n";
-	file << "ASCII\n";
-	file << "DATASET UNSTRUCTURED_GRID\n";
-
-	file << "POINTS " << numberOfNodes << " float" << std::endl;
-
-	for (auto& street : streets)
-	{
-		for (uint i = 0; i <= street.numberOfCells; i++)
-		{
-			file << 0.5 * (street.getCoordinateX(i - 1) + street.getCoordinateX(i)) << " "
-				<< 0.5 * (street.getCoordinateY(i - 1) + street.getCoordinateY(i)) << " " << 0.0 << std::endl;
-		}
-	}
-
-	//////////////////////////////////////////////////////////////////////////
-
-	file << "CELLS " << numberOfCells << " " << 3 * numberOfCells << std::endl;
-
-
-	uint nodeIndex = 0;
-	for (auto& street : streets)
-	{
-		for (uint i = 0; i < street.numberOfCells; i++)
-		{
-			file << "2 " << nodeIndex << " " << nodeIndex + 1 << std::endl;
-			nodeIndex++;
-		}
-		nodeIndex++;
-	}
-
-	//////////////////////////////////////////////////////////////////////////
-
-	file << "CELL_TYPES " << numberOfCells << std::endl;
-
-	for (uint i = 0; i < numberOfCells; i++) {
-		file << "3" << std::endl;
-	}
-	//////////////////////////////////////////////////////////////////////////
-
-	file << "\nCELL_DATA " << numberOfCells << std::endl;
-}
-
-
-void StreetPointFinder::writeStreetsVTK(std::ofstream & file, uint numberOfCells)
-{
-	file << "StreetIndex 1 " << numberOfCells << " int" << std::endl;
-
-	uint streetIndex = 0;
-	for (auto& street : streets)
-	{
-		for (uint i = 0; i < street.numberOfCells; i++)
-		{
-			file << streetIndex << std::endl;
-		}
-		streetIndex++;
-	}
-}
-
-
-
-void StreetPointFinder::writeCarsVTK(std::ofstream& file, uint numberOfCells, const std::vector<int>& cars)
-{
-	file << "Cars 1 " << numberOfCells << " float" << std::endl;
-
-	uint index = 0;
-	for (auto& street : streets)
-	{
-		for (uint i = 0; i < street.numberOfCells; i++)
-		{
-			if (index < cars.size())
-				file << cars[index] << std::endl;
-			else
-				file << -1 << std::endl;
-			index++;
-		}
-	}
-}
-
-
-void StreetPointFinder::writeLengthsVTK(std::ofstream & file, uint numberOfCells)
-{
-	file << "StreetLength 1 " << numberOfCells << " float" << std::endl;
-
-	for (auto& street : streets)
-	{
-		for (uint i = 0; i < street.numberOfCells; i++)
-		{
-			real length = std::sqrt((street.getCoordinateX(i) - street.getCoordinateX(0)) * (street.getCoordinateX(i) - street.getCoordinateX(0))
-				+ (street.getCoordinateY(i) - street.getCoordinateY(0)) * (street.getCoordinateY(i) - street.getCoordinateY(0)));
-
-			file << length << std::endl;
-		}
-	}
-}
-
-
-void StreetPointFinder::writeConnectionVTK(std::string filename, SPtr<Grid> grid)
-{
-	uint numberOfCells = 0;
-
-	for (auto& street : streets)
-	{
-		numberOfCells += street.numberOfCells;
-	}
-
-	*logging::out << logging::Logger::INFO_INTERMEDIATE << "StreetPointFinder::writeConnectionVTK( " << filename << " )" << "\n";
-
-	std::ofstream file;
-
-	file.open(filename);
-
-	file << "# vtk DataFile Version 3.0\n";
-	file << "by MeshGenerator\n";
-	file << "ASCII\n";
-	file << "DATASET UNSTRUCTURED_GRID\n";
-
-	file << "POINTS " << 2 * numberOfCells << " float" << std::endl;
-
-	for (auto& street : streets)
-	{
-		for (uint i = 0; i < street.numberOfCells; i++)
-		{
-			real xLB, yLB, zLB;
-			grid->transIndexToCoords(street.matrixIndicesLB[i], xLB, yLB, zLB);
-
-			file << street.getCoordinateX(i) << " " << street.getCoordinateY(i) << " " << 5.0 << std::endl;
-			file << xLB << " " << yLB << " " << zLB << std::endl;
-		}
-	}
-
-	//////////////////////////////////////////////////////////////////////////
-
-	file << "CELLS " << numberOfCells << " " << 3 * numberOfCells << std::endl;
-
-
-	uint nodeIndex = 0;
-	for (auto& street : streets)
-	{
-		for (uint i = 0; i < street.numberOfCells; i++)
-		{
-			file << "2 " << nodeIndex << " " << nodeIndex + 1 << std::endl;
-			nodeIndex += 2;
-		}
-	}
-
-	//////////////////////////////////////////////////////////////////////////
-
-	file << "CELL_TYPES " << numberOfCells << std::endl;
-
-	for (uint i = 0; i < numberOfCells; i++) {
-		file << "3" << std::endl;
-	}
-
-	//////////////////////////////////////////////////////////////////////////
-
-	file.close();
-
-	*logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
-}
-
-void StreetPointFinder::writeSimulationFile(std::string gridPath, real concentration, uint numberOfLevels, uint level)
-{
-	*logging::out << logging::Logger::INFO_INTERMEDIATE << "StreetPointFinder::writeSimulationFile( " << gridPath << "conc.dat )" << "\n";
-
-	std::ofstream file;
-
-	file.open(gridPath + "conc.dat");
-
-	file << "concentration\n";
-
-	file << numberOfLevels - 1 << "\n";
-
-	for (uint currentLevel = 0; currentLevel < numberOfLevels; currentLevel++)
-	{
-		if (currentLevel == level)
-		{
-			uint numberOfCells = 0;
-			for (auto& street : streets)
-			{
-				numberOfCells += street.numberOfCells;
-			}
-
-			file << numberOfCells << "\n";
-
-			for (auto& street : streets)
-			{
-				for (auto& sparseIndexLB : street.sparseIndicesLB)
-				{
-					// + 1 for numbering shift between GridGenerator and VF_GPU
-					file << sparseIndexLB + 1 << "\n";
-				}
-			}
-		}
-		else
-		{
-			file << "0\n";
-		}
-	}
-
-	file.close();
-
-	*logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
-}
-
-void StreetPointFinder::writeStreetVectorFile(std::string gridPath, real concentration, uint numberOfLevels, uint level)
-{
-	*logging::out << logging::Logger::INFO_INTERMEDIATE << "StreetPointFinder::writeStreetVectorFile( " << gridPath << "streetVector.dat )" << "\n";
-
-	std::ofstream file;
-
-	file.open(gridPath + "streetVector.dat");
-
-	file << "streetVector\n";
-
-	file << numberOfLevels - 1 << "\n";
-
-	for (uint currentLevel = 0; currentLevel < numberOfLevels; currentLevel++)
-	{
-		if (currentLevel == level)
-		{
-			uint numberOfCells = 0;
-			for (auto& street : streets)
-			{
-				numberOfCells += street.numberOfCells;
-			}
-
-			file << numberOfCells << "\n";
-
-			for (auto& street : streets)
-			{
-				for (auto& sparseIndexLB : street.sparseIndicesLB)
-				{
-                    (void) sparseIndexLB;
- 					// + 1 for numbering shift between GridGenerator and VF_GPU
-					file << street.getVectorX() << " " << street.getVectorY() << "\n";
-				}
-			}
-		}
-		else
-		{
-			file << "0\n";
-		}
-	}
-
-	file.close();
-
-	*logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
-}
-
-void StreetPointFinder::writeSimulationFileSorted(std::string gridPath, real concentration, uint numberOfLevels, uint level)
-{
-	*logging::out << logging::Logger::INFO_INTERMEDIATE << "StreetPointFinder::writeSimulationFile( " << gridPath << "concSorted.dat )" << "\n";
-
-	std::ofstream file;
-
-	file.open(gridPath + "concSorted.dat");
-
-	file << "concentration\n";
-
-	file << numberOfLevels - 1 << "\n";
-
-	for (uint currentLevel = 0; currentLevel < numberOfLevels; currentLevel++)
-	{
-		if (currentLevel == level)
-		{
-			file << this->sparseIndicesLB.size() << "\n";
-
-			for (auto& sparseIndexLB : this->sparseIndicesLB)
-			{
-				// + 1 for numbering shift between GridGenerator and VF_GPU
-				file << sparseIndexLB + 1 << "\n";
-			}
-		}
-		else
-		{
-			file << "0\n";
-		}
-	}
-
-	file.close();
-
-	*logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
-}
-
-void StreetPointFinder::writeMappingFile(std::string gridPath)
-{
-	*logging::out << logging::Logger::INFO_INTERMEDIATE << "StreetPointFinder::writeMappingFile( " << gridPath << "mappingNashToConc.dat )" << "\n";
-
-	std::ofstream file;
-
-	file.open(gridPath + "mappingNashToConc.dat");
-
-	file << this->mapNashToConc.size() << "\n";
-
-	for (auto& index : this->mapNashToConc)
-	{
-		file << index << "\n";
-	}
-
-	file.close();
-
-	*logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
-}
-
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-// Speed hackend by Stephan Lenz, not tested
-
-void StreetPointFinder::write3DVTK(std::string filename, const std::vector<int>& cars)
-{
-	uint numberOfCells = 0;
-
-	*logging::out << logging::Logger::INFO_INTERMEDIATE << "StreetPointFinder::writeVTK( " << filename << " )" << "\n";
-
-	std::ofstream file;
-
-	file.open(filename);
-
-	prepareWrite3DVTK(file, numberOfCells, cars);
-
-	////////////////////////////////////////////////////////////////////////////
-
-	file.close();
-
-	*logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
-}
-
-void StreetPointFinder::prepareWrite3DVTK(std::ofstream & file, uint & numberOfCells, const std::vector<int>& cars)
-{
-
-	uint numberOfNodes = 0;
-
-	for (auto& street : streets)
-	{
-		numberOfCells += street.numberOfCells;
-		numberOfNodes += street.numberOfCells + 1;
-	}
-
-	file << "# vtk DataFile Version 3.0\n";
-	file << "by MeshGenerator\n";
-	file << "ASCII\n";
-	file << "DATASET UNSTRUCTURED_GRID\n";
-
-	uint index = 0;
-	uint numberOfCars = 0;
-
-	for (auto& street : streets)
-	{
-		for (uint i = 0; i < street.numberOfCells; i++)
-		{
-			if (index < cars.size() && cars[index] != -1)
-			{
-				numberOfCars++;
-			}
-
-			index++;
-		}
-	}
-
-	file << "POINTS " << 8 * numberOfCars << " float" << std::endl;
-
-	index = 0;
-	for (auto& street : streets)
-	{
-		for (uint i = 0; i < street.numberOfCells; i++)
-		{
-			if(index < cars.size() && cars[index] != -1 )
-			{
-				real xStart = 0.5 * (street.getCoordinateX(i - 1) + street.getCoordinateX(i));
-				real yStart = 0.5 * (street.getCoordinateY(i - 1) + street.getCoordinateY(i));
-
-				real xEnd = 0.5 * (street.getCoordinateX(i) + street.getCoordinateX(i + 1));
-				real yEnd = 0.5 * (street.getCoordinateY(i) + street.getCoordinateY(i + 1));
-
-				real vecX = xEnd - xStart;
-				real vecY = yEnd - yStart;
-
-				file << xStart + vecY << " " << yStart - vecX << " " << 0.0 << std::endl;
-				file << xStart - vecY << " " << yStart + vecX << " " << 0.0 << std::endl;
-
-				file << xEnd + vecY << " " << yEnd - vecX << " " << 0.0 << std::endl;
-				file << xEnd - vecY << " " << yEnd + vecX << " " << 0.0 << std::endl;
-
-				file << xStart + vecY << " " << yStart - vecX << " " << 1.5 << std::endl;
-				file << xStart - vecY << " " << yStart + vecX << " " << 1.5 << std::endl;
-
-				file << xEnd + vecY << " " << yEnd - vecX << " " << 1.5 << std::endl;
-				file << xEnd - vecY << " " << yEnd + vecX << " " << 1.5 << std::endl;
-			}
-
-			index++;
-		}
-	}
-
-	//////////////////////////////////////////////////////////////////////////
-
-	file << "CELLS " << numberOfCars << " " << 9 * numberOfCars << std::endl;
-
-	index = 0;
-	uint carIndex = 0;
-	for (auto& street : streets)
-	{
-		for (uint i = 0; i < street.numberOfCells; i++)
-		{
-			if (index < cars.size() && cars[index] != -1)
-			{
-				file << "8 " 
-					 << 8 * carIndex + 0 << " "
-					 << 8 * carIndex + 1 << " "
-					 << 8 * carIndex + 3 << " "
-					 << 8 * carIndex + 2 << " "
-					 << 8 * carIndex + 4 << " "
-					 << 8 * carIndex + 5 << " "
-					 << 8 * carIndex + 7 << " "
-					 << 8 * carIndex + 6 << " "
-					 << std::endl;
-
-				carIndex++;
-			}
-			index++;
-		}
-	}
-
-	//////////////////////////////////////////////////////////////////////////
-
-	file << "CELL_TYPES " << numberOfCars << std::endl;
-
-	for (uint i = 0; i < numberOfCars; i++) {
-		file << "12" << std::endl;
-	}
-
-	//////////////////////////////////////////////////////////////////////////
-
-	file << "\nCELL_DATA " << numberOfCars << std::endl;
-
-	//////////////////////////////////////////////////////////////////////////
-
-	file << "FIELD Label " << 1 << std::endl;
-
-	//////////////////////////////////////////////////////////////////////////
-
-	file << "Cars 1 " << numberOfCars << " float" << std::endl;
-
-	index = 0;
-	for (auto& street : streets)
-	{
-		for (uint i = 0; i < street.numberOfCells; i++)
-		{
-			if (index < cars.size() && cars[index] != -1)
-				file << cars[index] << std::endl;
-			
-			index++;
-		}
-	}
-}
-
-
-
-
diff --git a/src/gpu/GridGenerator/StreetPointFinder/StreetPointFinder.h b/src/gpu/GridGenerator/StreetPointFinder/StreetPointFinder.h
deleted file mode 100644
index feb3618f64b0a6f757930772594878a6ca7c0144..0000000000000000000000000000000000000000
--- a/src/gpu/GridGenerator/StreetPointFinder/StreetPointFinder.h
+++ /dev/null
@@ -1,120 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
-//      \    \  |    |   ________________________________________________________________
-//       \    \ |    |  |  ______________________________________________________________|
-//        \    \|    |  |  |         __          __     __     __     ______      _______
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file StreetPointFinder.h
-//! \ingroup StreetPointFinder
-//! \author Stephan Lenz
-//=======================================================================================
-#ifndef StreetPointFinder_H
-#define StreetPointFinder_H
-
-#include <vector>
-#include <string>
-
-#include "GridGenerator_export.h"
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-
-
-
-class Grid;
-
-struct GRIDGENERATOR_EXPORT Street
-{
-    // The start and end coordinates are stored for cell centers!
-    //
-    //     |---x---|---x---|---x---|---x---|---x---|---x---|---x---|---x---|---x---|---x---|
-    //         |--->                       |<----->|                               <---|
-    //         xStart                          dx                                   xEnd
-    //
-    // dx = (xStart - xEnd) / (numberOfCells - 1)
-
-    uint numberOfCells;
-    real xStart, yStart, xEnd, yEnd;
-
-    std::vector<uint> matrixIndicesLB;
-    std::vector<uint> sparseIndicesLB;
-
-    // The constructor expect start and end for cells
-    Street( real xStartCell, real yStartCell, real xEndCell, real yEndCell, real dx );
-
-    real getCoordinateX( int cellIndex );
-    real getCoordinateY( int cellIndex );
-
-	real getVectorX();
-	real getVectorY();
-
-    void findIndicesLB( SPtr<Grid> grid, real initialSearchHeight);
-};
-
-struct GRIDGENERATOR_EXPORT StreetPointFinder
-{
-    std::vector<Street> streets;
-
-    std::vector<uint> sparseIndicesLB;
-    std::vector<uint> mapNashToConc;
-
-    void prepareSimulationFileData();
-
-    void readStreets(std::string filename);
-
-    void findIndicesLB( SPtr<Grid> grid, real initialSearchHeight );
-
-	void writeVTK(std::string filename, const std::vector<int>& cars = std::vector<int>());
-
-	void writeReducedVTK(std::string filename, const std::vector<int>& cars = std::vector<int>());
-
-	void prepareWriteVTK(std::ofstream& file, uint & numberOfCells);
-
-	void writeCarsVTK(std::ofstream& file, uint numberOfCells, const std::vector<int>& cars);
-
-	void writeLengthsVTK(std::ofstream& file, uint numberOfCells);
-
-	void writeStreetsVTK(std::ofstream& file, uint numberOfCells);
-
-    void writeConnectionVTK(std::string filename, SPtr<Grid> grid);
-
-	void writeSimulationFile(std::string gridPath, real concentration, uint numberOfLevels, uint level);
-
-	void writeStreetVectorFile(std::string gridPath, real concentration, uint numberOfLevels, uint level);
-
-    void writeSimulationFileSorted( std::string gridPath, real concentration, uint numberOfLevels, uint level );
-
-    void writeMappingFile( std::string gridPath );
-
-	//////////////////////////////////////////////////////////////////////////
-	// 3D cars writer hacked by Stephan L.
-
-	void write3DVTK(std::string filename, const std::vector<int>& cars = std::vector<int>());
-
-	void prepareWrite3DVTK(std::ofstream& file, uint & numberOfCells, const std::vector<int>& cars);
-};
-
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp
index 5f3c4ad492b16c09b26acd00a624a54ad65dffda..571796d503a1a73b3eccf631a347884c7522b533 100644
--- a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp
+++ b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp
@@ -417,7 +417,7 @@ void VTKReader::getNextData(real* data, uint numberOfNodes, real time)
             {
                 numberOfFiles++;
 
-                printf("switching to precursor file no. %zu\n", numberOfFiles);
+                VF_LOG_INFO("PrecursorBC on level {}: switching to file no. {}\n", level, numberOfFiles);
                 if(numberOfFiles == this->fileCollection->files[level][id].size())
                     throw std::runtime_error("Not enough Precursor Files to read");
 
diff --git a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h
index 1663a3ff37ba1bb062647847462d4e364baed93b..bdf29745a0a60473d0454c33dcb10a193ca10780 100644
--- a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h
+++ b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h
@@ -1,8 +1,8 @@
 #ifndef TRANSIENTBCSETTER_H_
 #define TRANSIENTBCSETTER_H_
 
-#include "Core/DataTypes.h"
-#include <Core/StringUtilities/StringUtil.h>
+#include "DataTypes.h"
+#include <StringUtilities/StringUtil.h>
 #include "PointerDefinitions.h"
 
 #include <string>
diff --git a/src/gpu/GridGenerator/geometries/Conglomerate/Conglomerate.cpp b/src/gpu/GridGenerator/geometries/Conglomerate/Conglomerate.cpp
index 331b928c6f5542584cffdcc1b17df7207981b8f8..b41813e12da1727ba03b7f179c93b4c6b042b76f 100644
--- a/src/gpu/GridGenerator/geometries/Conglomerate/Conglomerate.cpp
+++ b/src/gpu/GridGenerator/geometries/Conglomerate/Conglomerate.cpp
@@ -32,30 +32,14 @@
 //=======================================================================================
 #include "Conglomerate.h"
 
-Conglomerate::Conglomerate()
-{
-    addObjects = new Object*[MAX_NUMBER_OF_OBJECTS];
-    subtractObjects = new Object*[MAX_NUMBER_OF_OBJECTS];
-}
-
-Conglomerate::~Conglomerate()
-{
-    for (uint i = 0; i < numberOfAddObjects; i++)
-        delete addObjects[i];
-
-    for (uint i = 0; i < numberOfSubtractObjects; i++)
-        delete subtractObjects[i];
-
-    delete[] addObjects;
-    delete[] subtractObjects;
-}
+#include <memory>
 
 SPtr<Conglomerate> Conglomerate::makeShared()
 {
-    return SPtr<Conglomerate>(new Conglomerate());
+    return std::make_shared<Conglomerate>();
 }
 
-void Conglomerate::add(Object* object)
+void Conglomerate::add(SPtr<Object> object)
 {
     if (numberOfAddObjects < MAX_NUMBER_OF_OBJECTS)
     {
@@ -65,7 +49,7 @@ void Conglomerate::add(Object* object)
         printf("[WARNING] max numbers of %d reached! Object was not added.\n", MAX_NUMBER_OF_OBJECTS);
 }
 
-void Conglomerate::subtract(Object* object)
+void Conglomerate::subtract(SPtr<Object> object)
 {
     if (numberOfSubtractObjects < MAX_NUMBER_OF_OBJECTS)
     {
@@ -76,9 +60,9 @@ void Conglomerate::subtract(Object* object)
         printf("[WARNING] max numbers of %d reached! Object was not added.\n", MAX_NUMBER_OF_OBJECTS);
 }
 
-Object* Conglomerate::clone() const
+SPtr<Object> Conglomerate::clone() const
 {
-    auto conglomerate = new Conglomerate();
+    auto conglomerate = std::make_shared<Conglomerate>();
     for (uint i = 0; i < numberOfAddObjects; i++)
         conglomerate->add(addObjects[i]->clone());
 
@@ -193,5 +177,5 @@ void Conglomerate::findInnerNodes(SPtr<GridImp> grid)
         addObjects[i]->findInnerNodes(grid);
 
     if( numberOfSubtractObjects > 0 )
-        *logging::out << logging::Logger::INFO_INTERMEDIATE << "Warning: Conglomerate::substract() is currently nut fully implemented!\n";
+        VF_LOG_WARNING("Warning: Conglomerate::substract() is currently nut fully implemented!");
 }
diff --git a/src/gpu/GridGenerator/geometries/Conglomerate/Conglomerate.h b/src/gpu/GridGenerator/geometries/Conglomerate/Conglomerate.h
index 8cb26137d6ab4e4c52bed34aa1d044121ac4bf3d..0b3f7ad7b67554b72ad78188d68d9570bf21e090 100644
--- a/src/gpu/GridGenerator/geometries/Conglomerate/Conglomerate.h
+++ b/src/gpu/GridGenerator/geometries/Conglomerate/Conglomerate.h
@@ -33,25 +33,25 @@
 #ifndef CONGLOMERATE_H
 #define CONGLOMERATE_H
 
+#include <array>
+
 #include "global.h"
 
 #include "geometries/Object.h"
+#include "basics/PointerDefinitions.h"
 
 #define MAX_NUMBER_OF_OBJECTS 20
 
 class GRIDGENERATOR_EXPORT Conglomerate : public Object
 {
-public:              
-    Conglomerate();
-    virtual ~Conglomerate();
-
+public:
     static SPtr<Conglomerate> makeShared();
 
-    void add(Object* object);
-    void subtract(Object* objectStub);
+    void add(SPtr<Object> object);
+    void subtract(SPtr<Object> objectStub);
 
 
-    Object* clone() const override;
+    SPtr<Object> clone() const override;
 
     double getX1Centroid() override;
     double getX1Minimum() override;
@@ -74,8 +74,8 @@ protected:
     static double getMaximum(double val1, double val2);
 
 
-    Object** addObjects;
-    Object** subtractObjects;
+    std::array<SPtr<Object>, MAX_NUMBER_OF_OBJECTS> addObjects;
+    std::array<SPtr<Object>, MAX_NUMBER_OF_OBJECTS> subtractObjects;
     uint numberOfAddObjects = 0;
     uint numberOfSubtractObjects = 0;
 };
diff --git a/src/gpu/GridGenerator/geometries/Cuboid/Cuboid.cpp b/src/gpu/GridGenerator/geometries/Cuboid/Cuboid.cpp
index 7b0dbcdbdcbd679f1eb47ed5db7d828da8c31767..2cf56a867954abecf14f7dc2a74f735ebb0660a7 100644
--- a/src/gpu/GridGenerator/geometries/Cuboid/Cuboid.cpp
+++ b/src/gpu/GridGenerator/geometries/Cuboid/Cuboid.cpp
@@ -32,6 +32,7 @@
 //=======================================================================================
 #include "Cuboid.h"
 
+#include "PointerDefinitions.h"
 #include "utilities/math/Math.h"
 
 Cuboid::Cuboid(const double& x1a,const double& x2a, const double& x3a, const double& x1b,const double& x2b, const double& x3b)
@@ -40,14 +41,9 @@ Cuboid::Cuboid(const double& x1a,const double& x2a, const double& x3a, const dou
 
 }
 
-Cuboid::~Cuboid()
+SPtr<Object> Cuboid::clone() const
 {
-
-}
-
-Object* Cuboid::clone() const
-{
-    return new Cuboid(minX1, minX2, minX3, maxX1, maxX2, maxX3);
+    return std::make_shared<Cuboid>(minX1, minX2, minX3, maxX1, maxX2, maxX3);
 }
 
 double Cuboid::getX1Centroid()
diff --git a/src/gpu/GridGenerator/geometries/Cuboid/Cuboid.h b/src/gpu/GridGenerator/geometries/Cuboid/Cuboid.h
index 0351bd3ed847f9702e3c64bce4dcef514804e23a..7ff5c14c2f17b4ed80f4f1e0c03d26ea1a2eb196 100644
--- a/src/gpu/GridGenerator/geometries/Cuboid/Cuboid.h
+++ b/src/gpu/GridGenerator/geometries/Cuboid/Cuboid.h
@@ -41,9 +41,8 @@ class GRIDGENERATOR_EXPORT Cuboid : public Object
 {
 public:              
     Cuboid(const double& minX1, const double& minX2, const double& minX3, const double& maxX1,const double& maxX2, const double& maxX3);
-    virtual ~Cuboid();
 
-    Object* clone() const override;
+    SPtr<Object> clone() const override;
 
     double getX1Centroid() override;
     double getX1Minimum() override;
diff --git a/src/gpu/GridGenerator/geometries/Object.h b/src/gpu/GridGenerator/geometries/Object.h
index b92cca7992dcb06c1f230da8d8c9ce46bb7a3416..8bb0a35d5e38df1f7db390fdf5ee750295788395 100644
--- a/src/gpu/GridGenerator/geometries/Object.h
+++ b/src/gpu/GridGenerator/geometries/Object.h
@@ -43,8 +43,8 @@ struct Vertex;
 class GRIDGENERATOR_EXPORT Object
 {
 public:
-    virtual ~Object() {}
-    virtual Object* clone() const = 0;
+    virtual ~Object() = default;
+    virtual SPtr<Object> clone() const = 0;
 
     virtual double getX1Centroid() = 0;
     virtual double getX1Minimum()  = 0;
diff --git a/src/gpu/GridGenerator/geometries/Sphere/Sphere.cpp b/src/gpu/GridGenerator/geometries/Sphere/Sphere.cpp
index fa460bc021cdca1159f272e3bcb4d4dad50fc352..4669bbcfb9848a910bc86f9331a394daf6c55dd5 100644
--- a/src/gpu/GridGenerator/geometries/Sphere/Sphere.cpp
+++ b/src/gpu/GridGenerator/geometries/Sphere/Sphere.cpp
@@ -44,18 +44,14 @@ Sphere::Sphere(const double& centerX, const double& centerY, const double& cente
 
 }
 
-Sphere::~Sphere()
-{
-}
-
 SPtr<Sphere> Sphere::makeShared(double centerX, double centerY, double centerZ, double radius)
 {
-    return SPtr<Sphere>(new Sphere(centerX, centerY, centerZ, radius));
+    return std::make_shared<Sphere>(centerX, centerY, centerZ, radius);
 }
 
-Object* Sphere::clone() const
+SPtr<Object> Sphere::clone() const
 {
-    return new Sphere(centerX, centerY, centerZ, radius);
+    return std::make_shared<Sphere>(centerX, centerY, centerZ, radius);
 }
 
 double Sphere::getX1Centroid()
diff --git a/src/gpu/GridGenerator/geometries/Sphere/Sphere.h b/src/gpu/GridGenerator/geometries/Sphere/Sphere.h
index 03b9ef9fd101f19dbaff7d4e4109000cce6e3c49..9ac8821a9c95fce8de16c22688ef3c585197804a 100644
--- a/src/gpu/GridGenerator/geometries/Sphere/Sphere.h
+++ b/src/gpu/GridGenerator/geometries/Sphere/Sphere.h
@@ -33,6 +33,7 @@
 #ifndef SPHERE_H
 #define SPHERE_H
 
+#include "PointerDefinitions.h"
 #include "global.h"
 #include "geometries/Object.h"
 
@@ -40,11 +41,10 @@ class GRIDGENERATOR_EXPORT Sphere : public Object
 {
 public:
     Sphere(const double& centerX, const double& centerY, const double& centerZ, const double& radius);
-    virtual ~Sphere();
 
     static SPtr<Sphere> makeShared(double centerX, double centerY, double centerZ, double radius);
 
-    Object* clone() const override;
+    SPtr<Object> clone() const override;
 
     double getX1Centroid() override;
     double getX1Minimum() override;
diff --git a/src/gpu/GridGenerator/geometries/Triangle/Triangle.cpp b/src/gpu/GridGenerator/geometries/Triangle/Triangle.cpp
index bf272b9e7f46c413ae6edce62c05d1be20d327de..8d459e75b5b27c5d5a1c9e645b08b5fc663e8be6 100644
--- a/src/gpu/GridGenerator/geometries/Triangle/Triangle.cpp
+++ b/src/gpu/GridGenerator/geometries/Triangle/Triangle.cpp
@@ -40,7 +40,6 @@ using namespace vf::gpu;
 
 Triangle::Triangle(Vertex &v1, Vertex &v2, Vertex &v3, Vertex &normal) : v1(v1), v2(v2), v3(v3), normal(normal), patchIndex(INVALID_INDEX) {}
 Triangle::Triangle(Vertex &v1, Vertex &v2, Vertex &v3) : v1(v1), v2(v2), v3(v3), patchIndex(INVALID_INDEX) { calcNormal(); }
-Triangle::Triangle(){}
 
 void Triangle::set(const Vertex &v1, const Vertex &v2, const Vertex &v3)
 {
diff --git a/src/gpu/GridGenerator/geometries/TriangularMesh/TriangularMesh.cpp b/src/gpu/GridGenerator/geometries/TriangularMesh/TriangularMesh.cpp
index 883ca0deaf34f45e4608c4e59908b4562932db77..91bb8f030e76fa6e083e7765feb18c7ccc517ee1 100644
--- a/src/gpu/GridGenerator/geometries/TriangularMesh/TriangularMesh.cpp
+++ b/src/gpu/GridGenerator/geometries/TriangularMesh/TriangularMesh.cpp
@@ -32,7 +32,7 @@
 //=======================================================================================
 #include "TriangularMesh.h"
 
-#include "Core/Timer/Timer.h"
+#include "Timer/Timer.h"
 
 #include "basics/geometry3d/GbTriFaceMesh3D.h"
 
@@ -79,9 +79,9 @@ TriangularMesh::TriangularMesh()
     this->minmax = BoundingBox::makeInvalidMinMaxBox();  // blame Lenz
 }
 
-Object* TriangularMesh::clone() const
+SPtr<Object> TriangularMesh::clone() const
 {
-    auto mesh = new TriangularMesh();
+    auto mesh = std::make_shared<TriangularMesh>();
     mesh->setTriangles(this->triangleVec);
     return mesh;
 }
@@ -95,7 +95,7 @@ uint TriangularMesh::getNumberOfTriangles() const
 
 void TriangularMesh::findNeighbors()
 {
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "start finding neighbors ...\n";
+    VF_LOG_INFO("start finding neighbors ...");
 
     auto t = Timer::makeStart();
 
@@ -103,8 +103,7 @@ void TriangularMesh::findNeighbors()
     finder.fillWithNeighborAngles(this);
 
     t->end();
-
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "time finding neighbors: " << t->getTimeInSeconds() << "s\n";
+    VF_LOG_INFO("time finding neighbors = {}", t->getTimeInSeconds());
 }
 
 void TriangularMesh::setTriangles(std::vector<Triangle> triangles)
@@ -152,7 +151,7 @@ GRIDGENERATOR_EXPORT void TriangularMesh::generateGbTriFaceMesh3D()
 {
     if( this->VF_GbTriFaceMesh3D ) return;
 
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start generating GbTriFaceMesh3D:\n";
+    VF_LOG_INFO("Start generating GbTriFaceMesh3D");
 
     std::vector<GbTriFaceMesh3D::Vertex>  *gbVertices = new std::vector<GbTriFaceMesh3D::Vertex>(this->triangleVec.size() * 3);
     std::vector<GbTriFaceMesh3D::TriFace> *gbTriangles = new std::vector<GbTriFaceMesh3D::TriFace>(this->triangleVec.size());
@@ -168,7 +167,7 @@ GRIDGENERATOR_EXPORT void TriangularMesh::generateGbTriFaceMesh3D()
 
     this->VF_GbTriFaceMesh3D = std::make_shared<GbTriFaceMesh3D>( "stl", gbVertices, gbTriangles, GbTriFaceMesh3D::KDTREE_SAHPLIT, false );
 
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Done generating GbTriFaceMesh3D\n";
+    VF_LOG_INFO("Done generating GbTriFaceMesh3D");
 }
 
 
@@ -238,9 +237,9 @@ void TriangularMesh::scale(double offset)
         const int vertexTriangleID = (int)vertexID % 3;
 
         Vertex intersection;
-        Vertex p = this->triangleVec[triangleID].v1 + this->triangleVec[triangleID].normal * offset;
+        // Vertex p = this->triangleVec[triangleID].v1 + this->triangleVec[triangleID].normal * offset; // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/85
         Vertex lineOrigin = this->triangleVec[triangleID].get(vertexTriangleID);
-        //bool b = intersectPlane(this->triangleVec[triangleID].normal, p, lineOrigin, averrageNormal, intersection);
+        // bool b = intersectPlane(this->triangleVec[triangleID].normal, p, lineOrigin, averrageNormal, intersection);
         triangles[triangleID].set(vertexTriangleID, intersection);
         triangles[triangleID].calcNormal();
 
diff --git a/src/gpu/GridGenerator/geometries/TriangularMesh/TriangularMesh.h b/src/gpu/GridGenerator/geometries/TriangularMesh/TriangularMesh.h
index 2e876e1d3c50b377ef6df9a8489fe8a189849594..7c050dd70b3908f3e4b021872887c74833594a26 100644
--- a/src/gpu/GridGenerator/geometries/TriangularMesh/TriangularMesh.h
+++ b/src/gpu/GridGenerator/geometries/TriangularMesh/TriangularMesh.h
@@ -87,7 +87,7 @@ private:
     static void eliminateTriangleswithIdenticialNormal(std::vector<Triangle> &triangles);
 
 public:
-    Object* clone() const override;
+    SPtr<Object> clone() const override;
     double getX1Centroid() override { throw "Not implemented in TriangularMesh"; }
     double getX1Minimum() override { return minmax.minX; }
     double getX1Maximum() override { return minmax.maxX; }
diff --git a/src/gpu/GridGenerator/geometries/TriangularMesh/TriangularMeshStrategy.cpp b/src/gpu/GridGenerator/geometries/TriangularMesh/TriangularMeshStrategy.cpp
index d9c1486e2ca9469d55174eca673f22f180a78294..f66b216525253d26d0402c08ec89131130245ca0 100644
--- a/src/gpu/GridGenerator/geometries/TriangularMesh/TriangularMeshStrategy.cpp
+++ b/src/gpu/GridGenerator/geometries/TriangularMesh/TriangularMeshStrategy.cpp
@@ -32,7 +32,7 @@
 //=======================================================================================
 #include "TriangularMeshStrategy.h"
 
-#include "Core/Timer/Timer.h"
+#include "Timer/Timer.h"
 
 #include "basics/geometry3d/GbTriFaceMesh3D.h"
 
@@ -56,7 +56,7 @@ void PointInObjectDiscretizationStrategy::doDiscretize(TriangularMesh* triangula
 {
     triangularMesh->generateGbTriFaceMesh3D();
 
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start Point-In-Object Test:\n";
+    VF_LOG_INFO("Start Point-In-Object Test");
 
     // trigger the GbTriFaceMesh3D to generate a kd-tree
     triangularMesh->getGbTriFaceMesh3D()->isPointInGbObject3D(0.0, 0.0, 0.0);
@@ -79,12 +79,11 @@ void PointInObjectDiscretizationStrategy::doDiscretize(TriangularMesh* triangula
         //    grid->setNodeTo(i, OuterType);
 
         if( timer->getCurrentRuntimeInSeconds() > outputTime ){
-            *logging::out << logging::Logger::INFO_INTERMEDIATE << "    " << index << "/" << grid->getSize() <<" nodes tested!\n";
+            VF_LOG_INFO("    {} / {} nodes tested", index, grid->getSize());
             timer->start();
         }
     }
-
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Done Point-In-Object Test\n";
+    VF_LOG_INFO("Done Point-In-Object Test");
 }
 
 
diff --git a/src/gpu/GridGenerator/geometries/Vertex/Vertex.h b/src/gpu/GridGenerator/geometries/Vertex/Vertex.h
index ec5fc0f1ced64f7757de26deaf3053504e29d7c6..48e4712d7d701e3710c6ff7c0876e3f762a464d9 100644
--- a/src/gpu/GridGenerator/geometries/Vertex/Vertex.h
+++ b/src/gpu/GridGenerator/geometries/Vertex/Vertex.h
@@ -46,7 +46,6 @@ public:
 
 	Vertex(real x, real y, real z);
 	Vertex();
-	~Vertex() {}
 
 	real getEuclideanDistanceTo(const Vertex &w) const;
 	Vertex operator-(const Vertex &v) const;
diff --git a/src/gpu/GridGenerator/geometries/VerticalCylinder/VerticalCylinder.cpp b/src/gpu/GridGenerator/geometries/VerticalCylinder/VerticalCylinder.cpp
index 89bcd50349fe1a7591c5a873b5924a0c8ce8c2f3..5019f2a7547c509df6b7afd034cae179356c74ec 100644
--- a/src/gpu/GridGenerator/geometries/VerticalCylinder/VerticalCylinder.cpp
+++ b/src/gpu/GridGenerator/geometries/VerticalCylinder/VerticalCylinder.cpp
@@ -38,18 +38,14 @@ VerticalCylinder::VerticalCylinder(const double& centerX, const double& centerY,
 
 }
 
-VerticalCylinder::~VerticalCylinder()
-{
-}
-
 SPtr<VerticalCylinder> VerticalCylinder::makeShared(double centerX, double centerY, double centerZ, double radius, double height)
 {
-    return SPtr<VerticalCylinder>(new VerticalCylinder(centerX, centerY, centerZ, radius, height));
+    return std::make_shared<VerticalCylinder>(centerX, centerY, centerZ, radius, height);
 }
 
-Object* VerticalCylinder::clone() const
+SPtr<Object> VerticalCylinder::clone() const
 {
-    return new VerticalCylinder(centerX, centerY, centerZ, radius, height);
+    return std::make_shared<VerticalCylinder>(centerX, centerY, centerZ, radius, height);
 }
 
 double VerticalCylinder::getX1Centroid()
diff --git a/src/gpu/GridGenerator/geometries/VerticalCylinder/VerticalCylinder.h b/src/gpu/GridGenerator/geometries/VerticalCylinder/VerticalCylinder.h
index e995d2ba16eba17930401c99c260f81b9c6077b4..64cdecef5824388a31a4f545c85b1194d3f4c77b 100644
--- a/src/gpu/GridGenerator/geometries/VerticalCylinder/VerticalCylinder.h
+++ b/src/gpu/GridGenerator/geometries/VerticalCylinder/VerticalCylinder.h
@@ -40,11 +40,10 @@ class GRIDGENERATOR_EXPORT VerticalCylinder : public Object
 {
 public:
     VerticalCylinder(const double& centerX, const double& centerY, const double& centerZ, const double& radius, const double& height);
-    virtual ~VerticalCylinder();
 
     static SPtr<VerticalCylinder> makeShared(double centerX, double centerY, double centerZ, double radius, double height);
 
-    Object* clone() const override;
+    SPtr<Object> clone() const override;
 
     double getX1Centroid() override;
     double getX1Minimum() override;
diff --git a/src/gpu/GridGenerator/global.h b/src/gpu/GridGenerator/global.h
index 79f62bb354bed18075f6a8327a18f24d1d0fd1e2..296c6727f248933e259065e68cb81d8bf4e6c119 100644
--- a/src/gpu/GridGenerator/global.h
+++ b/src/gpu/GridGenerator/global.h
@@ -43,8 +43,8 @@
 #include "GridGenerator_export.h"
 
 #include "basics/PointerDefinitions.h"
-#include "basics/Core/DataTypes.h"
+#include "basics/DataTypes.h"
 
-#include "basics/Core/Logger/Logger.h"
+#include <logger/Logger.h>
 
 #endif
diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp
index 5b191ee4e3fcdc0ec71633111085f70c5dc43479..718a8d5da1de148c72ba67dd2d15c5e3b443e16a 100644
--- a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp
+++ b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp
@@ -37,12 +37,15 @@
 #include "grid/NodeValues.h"
 
 #include "utilities/math/Math.h"
+#include <array>
+#include <cstddef>
+#include <vector>
 
 using namespace gg;
 
-std::vector<real> Side::getNormal()
+std::array<real, 3> Side::getNormal() const
 {
-    std::vector<real> normal;
+    std::array<real, 3> normal;
     if(this->getCoordinate()==X_INDEX)
         normal = {(real)this->getDirection(), 0.0, 0.0};
     if(this->getCoordinate()==Y_INDEX)
@@ -61,31 +64,32 @@ void Side::addIndices(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition
         {
             const uint index = getIndex(grid, coord, constant, v1, v2);
 
-            if ((index != INVALID_INDEX) && (   grid->getFieldEntry(index) == vf::gpu::FLUID
+            if(index == INVALID_INDEX)
+                continue;
+
+            if (   grid->getFieldEntry(index) == vf::gpu::FLUID
                                             ||  grid->getFieldEntry(index) == vf::gpu::FLUID_CFC
                                             ||  grid->getFieldEntry(index) == vf::gpu::FLUID_CFF
                                             ||  grid->getFieldEntry(index) == vf::gpu::FLUID_FCC
                                             ||  grid->getFieldEntry(index) == vf::gpu::FLUID_FCF
                                             ||  grid->getFieldEntry(index) == vf::gpu::FLUID_FCF
-
-                                            //! Enforce overlap of BCs on edge nodes
-                                            ||  grid->getFieldEntry(index)  == vf::gpu::BC_PRESSURE
-                                            ||  grid->getFieldEntry(index)  == vf::gpu::BC_VELOCITY
-                                            ||  grid->getFieldEntry(index)  == vf::gpu::BC_NOSLIP
-                                            ||  grid->getFieldEntry(index)  == vf::gpu::BC_SLIP
-                                            ||  grid->getFieldEntry(index)  == vf::gpu::BC_STRESS ))
-            {
+                                            // Overlap of BCs on edge nodes
+                                            || grid->nodeHasBC(index) )
+            {   
                 grid->setFieldEntry(index, boundaryCondition->getType());
                 boundaryCondition->indices.push_back(index);
                 setPressureNeighborIndices(boundaryCondition, grid, index);
                 setStressSamplingIndices(boundaryCondition, grid, index);
-
+                // if(grid->getFieldEntry(index)==26) printf("index = %u, v1 = %f, v2 = %f, field entry=%u \n", index, v1, v2, grid->getFieldEntry(index) );
                 setQs(grid, boundaryCondition, index);
-
                 boundaryCondition->patches.push_back(0);
             }
         }
     }
+
+    const auto currentBCSide = this->whoAmI();
+    if(currentBCSide != SideType::GEOMETRY)
+        grid->addBCalreadySet(currentBCSide);
 }
 
 void Side::setPressureNeighborIndices(SPtr<BoundaryCondition> boundaryCondition, SPtr<Grid> grid, const uint index)
@@ -138,55 +142,111 @@ void Side::setStressSamplingIndices(SPtr<BoundaryCondition> boundaryCondition, S
 
 void Side::setQs(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition, uint index)
 {
-
     std::vector<real> qNode(grid->getEndDirection() + 1);
 
-    for (int dir = 0; dir <= grid->getEndDirection(); dir++)
-    {
-        real x,y,z;
-        grid->transIndexToCoords( index, x, y, z );
+    for (int dir = 0; dir <= grid->getEndDirection(); dir++) {
+        real x, y, z;
+        grid->transIndexToCoords(index, x, y, z);
 
-        real coords[3] = {x,y,z};
+        std::array<real, 3> coords = { x, y, z };
+        std::array<real, 3> neighborCoords = getNeighborCoordinates(grid.get(), coords, (size_t)dir);
 
-        real neighborX = x + grid->getDirection()[dir * DIMENSION + 0] * grid->getDelta();
-        real neighborY = y + grid->getDirection()[dir * DIMENSION + 1] * grid->getDelta();
-        real neighborZ = z + grid->getDirection()[dir * DIMENSION + 2] * grid->getDelta();
+        correctNeighborForPeriodicBoundaries(grid.get(), coords, neighborCoords);
 
-        // correct neighbor coordinates in case of periodic boundaries
-        if( grid->getPeriodicityX() && grid->getFieldEntry( grid->transCoordToIndex( neighborX, y, z ) ) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY )
-        {
-            if( neighborX > x ) neighborX = grid->getFirstFluidNode( coords, 0, grid->getStartX() );
-            else                neighborX = grid->getLastFluidNode ( coords, 0, grid->getEndX() );
-        }
+        const uint neighborIndex = grid->transCoordToIndex(neighborCoords[0], neighborCoords[1], neighborCoords[2]);
 
-        if( grid->getPeriodicityY() && grid->getFieldEntry( grid->transCoordToIndex( x, neighborY, z ) ) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY )
-        {
-            if( neighborY > y ) neighborY = grid->getFirstFluidNode( coords, 1, grid->getStartY() );
-            else                neighborY = grid->getLastFluidNode ( coords, 1, grid->getEndY() );
+        //! Only setting q's that partially point in the Side-normal direction
+        const bool alignedWithNormal = this->isAlignedWithMyNormal(grid.get(), dir);
+        if (grid->isStopperForBC(neighborIndex) && alignedWithNormal) {
+            qNode[dir] = 0.5;
+        } else {
+            qNode[dir] = -1.0;
         }
 
-        if( grid->getPeriodicityZ() && grid->getFieldEntry( grid->transCoordToIndex( x, y, neighborZ ) ) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY )
-        {
-            if( neighborZ > z ) neighborZ = grid->getFirstFluidNode( coords, 2, grid->getStartZ() );
-            else                neighborZ = grid->getLastFluidNode ( coords, 2, grid->getEndZ() );
+        // reset diagonals in case they were set by another bc
+        resetDiagonalsInCaseOfOtherBC(grid.get(), qNode, dir, coords);
+    }
+
+    boundaryCondition->qs.push_back(qNode);
+}
+
+std::array<real, 3> Side::getNeighborCoordinates(Grid *grid, const std::array<real, 3> &coordinates, size_t direction) const
+{
+    return { coordinates[0] + grid->getDirection()[direction * DIMENSION + 0] * grid->getDelta(),
+             coordinates[1] + grid->getDirection()[direction * DIMENSION + 1] * grid->getDelta(),
+             coordinates[2] + grid->getDirection()[direction * DIMENSION + 2] * grid->getDelta() };
+}
+
+bool Side::neighborNormalToSideIsAStopper(Grid *grid, const std::array<real, 3> &coordinates, SideType side) const
+{
+    const auto neighborCoords = getNeighborCoordinates(grid, coordinates, sideToD3Q27.at(side));
+    const auto neighborIndex = grid->transCoordToIndex(neighborCoords[0], neighborCoords[1], neighborCoords[2]);
+    return grid->isStopperForBC(neighborIndex);
+}
+
+void Side::resetDiagonalsInCaseOfOtherBC(Grid *grid, std::vector<real> &qNode, int dir,
+                                         const std::array<real, 3> &coordinates) const
+{
+    // When to reset a diagonal q to -1:
+    // - it is normal to another boundary condition which was already set
+    // - and it actually is influenced by the other bc:
+    //   We check if its neighbor in the regular direction to the other bc is a stopper. If it is a stopper, it is influenced by the other bc.
+
+    if (qNode[dir] == 0.5 && grid->getBCAlreadySet().size() > 0) {
+        for (int i = 0; i < (int)grid->getBCAlreadySet().size(); i++) {
+            SideType otherDir = grid->getBCAlreadySet()[i];
+
+            // only reset normals for nodes on edges and corners, not on faces
+            if (!neighborNormalToSideIsAStopper(grid, coordinates, otherDir))
+                continue;
+
+            const auto otherNormal = normals.at(otherDir);
+            if (isAlignedWithNormal(grid, dir, otherNormal)) {
+                qNode[dir] = -1.0;
+            }
         }
+    }
+}
 
-        //! Only seting q's that partially point in the Side-normal direction
-        bool alignedWithNormal = (this->getNormal()[0]*grid->getDirection()[dir * DIMENSION + 0]+
-                                  this->getNormal()[1]*grid->getDirection()[dir * DIMENSION + 1]+
-                                  this->getNormal()[2]*grid->getDirection()[dir * DIMENSION + 2] ) > 0;
+bool Side::isAlignedWithMyNormal(const Grid *grid, int dir) const
+{
+    std::array<real, 3> normal = this->getNormal();
+    return isAlignedWithNormal(grid, dir, normal);
+}
 
-        uint neighborIndex = grid->transCoordToIndex( neighborX, neighborY, neighborZ );
-        if((grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY ||
-            grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_OUT_OF_GRID          ||
-            grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_SOLID)               &&
-            alignedWithNormal )
-            qNode[dir] = 0.5;
+bool Side::isAlignedWithNormal(const Grid *grid, int dir, const std::array<real, 3> &normal) const
+{
+    return (normal[0] * grid->getDirection()[dir * DIMENSION + 0] +
+            normal[1] * grid->getDirection()[dir * DIMENSION + 1] +
+            normal[2] * grid->getDirection()[dir * DIMENSION + 2]) > 0;
+}
+
+void Side::correctNeighborForPeriodicBoundaries(const Grid *grid, std::array<real, 3>& coords, std::array<real, 3>& neighborCoords) const
+{
+    // correct neighbor coordinates in case of periodic boundaries
+    if (grid->getPeriodicityX() &&
+        grid->getFieldEntry(grid->transCoordToIndex(neighborCoords[0], coords[1], coords[2])) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY) {
+        if (neighborCoords[0] > coords[0])
+            neighborCoords[0] = grid->getFirstFluidNode(coords.data(), 0, grid->getStartX());
         else
-            qNode[dir] = -1.0;
+            neighborCoords[0] = grid->getLastFluidNode(coords.data(), 0, grid->getEndX());
     }
 
-    boundaryCondition->qs.push_back(qNode);
+    if (grid->getPeriodicityY() &&
+        grid->getFieldEntry(grid->transCoordToIndex(coords[0], neighborCoords[1], coords[2])) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY) {
+        if (neighborCoords[1] > coords[1])
+            neighborCoords[1] = grid->getFirstFluidNode(coords.data(), 1, grid->getStartY());
+        else
+            neighborCoords[1] = grid->getLastFluidNode(coords.data(), 1, grid->getEndY());
+    }
+
+    if (grid->getPeriodicityZ() &&
+        grid->getFieldEntry(grid->transCoordToIndex(coords[0], coords[1], neighborCoords[2])) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY) {
+        if (neighborCoords[2] > coords[2])
+            neighborCoords[2] = grid->getFirstFluidNode(coords.data(), 2, grid->getStartZ());
+        else
+            neighborCoords[2] = grid->getLastFluidNode(coords.data(), 2, grid->getEndZ());
+    }
 }
 
 uint Side::getIndex(SPtr<Grid> grid, std::string coord, real constant, real v1, real v2)
@@ -201,7 +261,7 @@ uint Side::getIndex(SPtr<Grid> grid, std::string coord, real constant, real v1,
 }
 
 
-void Geometry::addIndices(std::vector<SPtr<Grid> > grids, uint level, SPtr<BoundaryCondition> boundaryCondition)
+void Geometry::addIndices(const std::vector<SPtr<Grid>> &grids, uint level, SPtr<BoundaryCondition> boundaryCondition)
 {
     auto geometryBoundaryCondition = std::dynamic_pointer_cast<GeometryBoundaryCondition>(boundaryCondition);
 
@@ -242,7 +302,7 @@ void Geometry::addIndices(std::vector<SPtr<Grid> > grids, uint level, SPtr<Bound
 
 
 
-void MX::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition)
+void MX::addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<BoundaryCondition> boundaryCondition)
 {
     real startInner = grid[level]->getStartY();
     real endInner = grid[level]->getEndY();
@@ -258,7 +318,7 @@ void MX::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCond
 
 }
 
-void PX::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition)
+void PX::addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<BoundaryCondition> boundaryCondition)
 {
     real startInner = grid[level]->getStartY();
     real endInner = grid[level]->getEndY();
@@ -273,7 +333,7 @@ void PX::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCond
     Side::addIndices(grid[level], boundaryCondition, "x", coordinateNormal, startInner, endInner, startOuter, endOuter);
 }
 
-void MY::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition)
+void MY::addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<BoundaryCondition> boundaryCondition)
 {
     real startInner = grid[level]->getStartX();
     real endInner = grid[level]->getEndX();
@@ -289,7 +349,7 @@ void MY::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCond
 }
 
 
-void PY::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition)
+void PY::addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<BoundaryCondition> boundaryCondition)
 {
     real startInner = grid[level]->getStartX();
     real endInner = grid[level]->getEndX();
@@ -305,7 +365,7 @@ void PY::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCond
 }
 
 
-void MZ::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition)
+void MZ::addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<BoundaryCondition> boundaryCondition)
 {
     real startInner = grid[level]->getStartX();
     real endInner = grid[level]->getEndX();
@@ -320,7 +380,7 @@ void MZ::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCond
     Side::addIndices(grid[level], boundaryCondition, "z", coordinateNormal, startInner, endInner, startOuter, endOuter);
 }
 
-void PZ::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCondition> boundaryCondition)
+void PZ::addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<BoundaryCondition> boundaryCondition)
 {
     real startInner = grid[level]->getStartX();
     real endInner = grid[level]->getEndX();
diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h
index 53a763bc562ee978042b28d24856fbcca256c5f9..624b3722a1c909ba26063b49565779b924d34adc 100644
--- a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h
+++ b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h
@@ -33,10 +33,14 @@
 #ifndef SIDE_H
 #define SIDE_H
 
+#include <cstddef>
 #include <string>
 #include <vector>
+#include <map>
+#include <array>
 
 #include "gpu/GridGenerator/global.h"
+#include "lbm/constants/D3Q27.h"
 
 #define X_INDEX 0
 #define Y_INDEX 1
@@ -59,20 +63,19 @@ enum class SideType
     MX, PX, MY, PY, MZ, PZ, GEOMETRY
 };
 
-
-
 class Side
 {
 public:
     virtual ~Side() = default;
-    virtual void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) = 0;
+    virtual void addIndices(const std::vector<SPtr<Grid>> &grid, uint level,
+                            SPtr<gg::BoundaryCondition> boundaryCondition) = 0;
 
     virtual int getCoordinate() const = 0;
     virtual int getDirection() const = 0;
 
     virtual SideType whoAmI() const = 0;
 
-    std::vector<real> getNormal();
+    std::array<real, 3> getNormal() const;
 
 protected:
     void addIndices(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, std::string coord, real constant,
@@ -84,14 +87,35 @@ protected:
 
     void setQs(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, uint index);
 
+    virtual void correctNeighborForPeriodicBoundaries(const Grid *grid, std::array<real, 3>& coords, std::array<real, 3>& neighbors) const;
+
+    virtual bool isAlignedWithMyNormal(const Grid *grid, int dir) const;
+    bool isAlignedWithNormal(const Grid *grid, int dir, const std::array<real, 3>& normal) const;
+
 private:
     static uint getIndex(SPtr<Grid> grid, std::string coord, real constant, real v1, real v2);
+    void resetDiagonalsInCaseOfOtherBC(Grid *grid, std::vector<real>& qNode, int dir, const std::array<real, 3> &coordinates) const;
+    std::array<real, 3> getNeighborCoordinates(Grid *grid, const std::array<real, 3> &coordinates,
+                                               size_t direction) const;
+    bool neighborNormalToSideIsAStopper(Grid *grid, const std::array<real, 3> &coordinates, SideType side) const;
+
+protected:
+    const std::map<SideType, const std::array<real, 3>> normals = {
+        { SideType::MX, { NEGATIVE_DIR, 0.0, 0.0 } }, { SideType::PX, { POSITIVE_DIR, 0.0, 0.0 } },
+        { SideType::MY, { 0.0, NEGATIVE_DIR, 0.0 } }, { SideType::PY, { 0.0, POSITIVE_DIR, 0.0 } },
+        { SideType::MZ, { 0.0, 0.0, NEGATIVE_DIR } }, { SideType::PZ, { 0.0, 0.0, POSITIVE_DIR } }
+    };
+    const std::map<SideType, size_t> sideToD3Q27 = {
+        { SideType::MX, vf::lbm::dir::DIR_M00 }, { SideType::PX, vf::lbm::dir::DIR_P00 },
+        { SideType::MY, vf::lbm::dir::DIR_0M0 }, { SideType::PY, vf::lbm::dir::DIR_0P0 },
+        { SideType::MZ, vf::lbm::dir::DIR_00M }, { SideType::PZ, vf::lbm::dir::DIR_00P }
+    };
 };
 
 class Geometry : public Side
 {
 public:
-    void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
+    void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
 
     int getCoordinate() const override
     {
@@ -112,7 +136,7 @@ public:
 class MX : public Side
 {
 public:
-    void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
+    void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
 
     int getCoordinate() const override
     {
@@ -133,7 +157,7 @@ public:
 class PX : public Side
 {
 public:
-    void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
+    void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
 
     int getCoordinate() const override
     {
@@ -155,7 +179,7 @@ public:
 class MY : public Side
 {
 public:
-    void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
+    void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
 
     int getCoordinate() const override
     {
@@ -176,7 +200,7 @@ public:
 class PY : public Side
 {
 public:
-    void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
+    void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
 
     int getCoordinate() const override
     {
@@ -198,7 +222,7 @@ public:
 class MZ : public Side
 {
 public:
-    void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
+    void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
 
     int getCoordinate() const override
     {
@@ -219,7 +243,7 @@ public:
 class PZ : public Side
 {
 public:
-    void addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
+    void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override;
 
     int getCoordinate() const override
     {
diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/SideTest.cpp b/src/gpu/GridGenerator/grid/BoundaryConditions/SideTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..36a286a8766db4af7e109eb3f8d47add401779f9
--- /dev/null
+++ b/src/gpu/GridGenerator/grid/BoundaryConditions/SideTest.cpp
@@ -0,0 +1,873 @@
+#include "Side.h"
+#include "PointerDefinitions.h"
+#include "gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h"
+#include "grid/GridImp.h"
+#include "grid/NodeValues.h"
+#include "lbm/constants/D3Q27.h"
+#include "gmock/gmock.h"
+#include <algorithm>
+#include <gtest/gtest.h>
+#include <iostream>
+#include <memory>
+#include <stdexcept>
+#include <vector>
+
+using namespace vf::gpu;
+using namespace vf::lbm::dir;
+
+class SideTestSpecificSubclass : public Side
+{
+
+public:
+    void setQs(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, uint index)
+    {
+        Side::setQs(grid, boundaryCondition, index);
+    };
+    int sideDirection = POSITIVE_DIR;
+    int coordinateDirection = X_INDEX;
+    SideType mySide = SideType::PX;
+
+private:
+    void correctNeighborForPeriodicBoundaries(const Grid *grid, std::array<real, 3>& coords, std::array<real, 3>& neighbors) const override
+    {
+    }
+
+    int getDirection() const override
+    {
+        return sideDirection;
+    }
+
+    void addIndices(const std::vector<SPtr<Grid>> &grid, uint level, SPtr<gg::BoundaryCondition> boundaryCondition) override
+    {
+    }
+
+    int getCoordinate() const override
+    {
+        return coordinateDirection;
+    }
+
+    SideType whoAmI() const override
+    {
+        return mySide;
+    }
+};
+
+class GridDouble : public GridImp
+{
+
+public:
+    int endDirection = -1;
+
+    GridDouble()
+    {
+        this->distribution = DistributionHelper::getDistribution27();
+    }
+
+    void transIndexToCoords(uint index, real &x, real &y, real &z) const override
+    {
+        x = 0;
+        y = 0;
+        z = 0;
+    }
+
+    real getDelta() const override
+    {
+        return 1.0;
+    }
+
+    uint transCoordToIndex(const real &x, const real &y, const real &z) const override
+    {
+        return 0;
+    }
+
+    char getFieldEntry(uint /*matrixIndex*/) const override
+    {
+        return STOPPER_OUT_OF_GRID_BOUNDARY;
+    }
+
+    int getEndDirection() const override
+    {
+        return endDirection;
+    }
+};
+
+class BoundaryConditionSpy : public gg::BoundaryCondition
+{
+public:
+    char getType() const override
+    {
+        return 't';
+    };
+    const std::vector<std::vector<real>> &getQs()
+    {
+        return this->qs;
+    }
+    void resetQVector()
+    {
+        this->qs.clear();
+    }
+};
+
+class SideTestBC : public testing::Test
+{
+protected:
+    SideTestSpecificSubclass side;
+    SPtr<GridDouble> grid = std::make_shared<GridDouble>();
+    SPtr<BoundaryConditionSpy> bc = std::make_shared<BoundaryConditionSpy>();
+    uint index = 0;
+
+    std::vector<real> noBC;
+
+    void SetUp() override
+    {
+        grid->endDirection = 26;
+    }
+};
+
+TEST_F(SideTestBC, setQs2D_whenSettingPX_setAllQsNormalToBC)
+{
+    grid->endDirection = 10;
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(11, -1);
+    expectedQs[DIR_P00] = 0.5;
+    expectedQs[DIR_PP0] = 0.5;
+    expectedQs[DIR_PM0] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs2D_givenPYhasBeenSet_thenSetPX_doNotSetSameQsAgain)
+{
+    grid->endDirection = 10;
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(11, -1);
+    expectedQs[DIR_P00] = 0.5;
+    expectedQs[DIR_PM0] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMXhasBeenSet_thenSetPX_setAllQsNormalToPX)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+
+    // no previous BC on this node
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_P00] = 0.5;
+    expectedQs[DIR_PP0] = 0.5;
+    expectedQs[DIR_PM0] = 0.5;
+    expectedQs[DIR_P0P] = 0.5;
+    expectedQs[DIR_P0M] = 0.5;
+    expectedQs[DIR_PPP] = 0.5;
+    expectedQs[DIR_PMP] = 0.5;
+    expectedQs[DIR_PPM] = 0.5;
+    expectedQs[DIR_PMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+
+    // node already has BC in MX direction, but this does not change anything
+
+    grid->addBCalreadySet(SideType::MX);
+
+    side.setQs(grid, bc, index);
+    actualQs = bc->getQs()[0];
+
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenGeometryBCInVector_thenSetPX_throws)
+{
+    // do not add Geometry BC to this vector, as it has an invalid normal
+    grid->addBCalreadySet(SideType::GEOMETRY);
+
+    EXPECT_THROW(side.setQs(grid, bc, index), std::out_of_range);
+}
+
+TEST_F(SideTestBC, setQs3D_whenSettingPX_setAllQsNormalToBC)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_P00] = 0.5;
+    expectedQs[DIR_PP0] = 0.5;
+    expectedQs[DIR_PM0] = 0.5;
+    expectedQs[DIR_P0P] = 0.5;
+    expectedQs[DIR_P0M] = 0.5;
+    expectedQs[DIR_PPP] = 0.5;
+    expectedQs[DIR_PMP] = 0.5;
+    expectedQs[DIR_PPM] = 0.5;
+    expectedQs[DIR_PMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPYhasBeenSet_thenSetPX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_P00] = 0.5;
+    expectedQs[DIR_PM0] = 0.5;
+    expectedQs[DIR_P0P] = 0.5;
+    expectedQs[DIR_P0M] = 0.5;
+    expectedQs[DIR_PMP] = 0.5;
+    expectedQs[DIR_PMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMYhasBeenSet_thenSetPX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::MY);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_P00] = 0.5;
+    expectedQs[DIR_PP0] = 0.5;
+    expectedQs[DIR_P0P] = 0.5;
+    expectedQs[DIR_P0M] = 0.5;
+    expectedQs[DIR_PPP] = 0.5;
+    expectedQs[DIR_PPM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPZhasBeenSet_thenSetPX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::PZ);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_P00] = 0.5;
+    expectedQs[DIR_PP0] = 0.5;
+    expectedQs[DIR_PM0] = 0.5;
+    expectedQs[DIR_P0M] = 0.5;
+    expectedQs[DIR_PPM] = 0.5;
+    expectedQs[DIR_PMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMZhasBeenSet_thenSetPX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::MZ);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_P00] = 0.5;
+    expectedQs[DIR_PP0] = 0.5;
+    expectedQs[DIR_PM0] = 0.5;
+    expectedQs[DIR_P0P] = 0.5;
+    expectedQs[DIR_PPP] = 0.5;
+    expectedQs[DIR_PMP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPYandMZhaveBeenSet_thenSetPX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+    grid->addBCalreadySet(SideType::MZ);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_P00] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_PM0] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_P0P] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_PMP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPYandPZhaveBeenSet_thenSetPX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+    grid->addBCalreadySet(SideType::PZ);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_P00] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_PM0] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_P0M] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_PMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMYandPZhaveBeenSet_thenSetPX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::MY);
+    grid->addBCalreadySet(SideType::PZ);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_P00] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_PP0] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_P0M] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_PPM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMYandMZhaveBeenSet_thenSetPX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::MY);
+    grid->addBCalreadySet(SideType::MZ);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_P00] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_PP0] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_P0P] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_PPP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_whenSettingMX_setAllQsNormalToBC)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_M00] = 0.5;
+    expectedQs[DIR_MP0] = 0.5;
+    expectedQs[DIR_MM0] = 0.5;
+    expectedQs[DIR_M0P] = 0.5;
+    expectedQs[DIR_M0M] = 0.5;
+    expectedQs[DIR_MPP] = 0.5;
+    expectedQs[DIR_MMP] = 0.5;
+    expectedQs[DIR_MPM] = 0.5;
+    expectedQs[DIR_MMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPYhasBeenSet_thenSetMX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_M00] = 0.5;
+    expectedQs[DIR_MM0] = 0.5;
+    expectedQs[DIR_M0P] = 0.5;
+    expectedQs[DIR_M0M] = 0.5;
+    expectedQs[DIR_MMP] = 0.5;
+    expectedQs[DIR_MMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMYhasBeenSet_thenSetMX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::MY);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_M00] = 0.5;
+    expectedQs[DIR_MP0] = 0.5;
+    expectedQs[DIR_M0P] = 0.5;
+    expectedQs[DIR_M0M] = 0.5;
+    expectedQs[DIR_MPP] = 0.5;
+    expectedQs[DIR_MPM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPZhasBeenSet_thenSetMX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::PZ);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_M00] = 0.5;
+    expectedQs[DIR_MP0] = 0.5;
+    expectedQs[DIR_MM0] = 0.5;
+    expectedQs[DIR_M0M] = 0.5;
+    expectedQs[DIR_MPM] = 0.5;
+    expectedQs[DIR_MMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMZhasBeenSet_thenSetMX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::MZ);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_M00] = 0.5;
+    expectedQs[DIR_MP0] = 0.5;
+    expectedQs[DIR_MM0] = 0.5;
+    expectedQs[DIR_M0P] = 0.5;
+    expectedQs[DIR_MPP] = 0.5;
+    expectedQs[DIR_MMP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPYandMZhaveBeenSet_thenSetMX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+    grid->addBCalreadySet(SideType::MZ);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_M00] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_MM0] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_M0P] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_MMP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPYandPZhaveBeenSet_thenSetMX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+    grid->addBCalreadySet(SideType::PZ);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_M00] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_MM0] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_M0M] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_MMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMYandPZhaveBeenSet_thenSetMX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::MY);
+    grid->addBCalreadySet(SideType::PZ);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_M00] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_MP0] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_M0M] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_MPM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMYandMZhaveBeenSet_thenSetMX_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = X_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::MY);
+    grid->addBCalreadySet(SideType::MZ);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_M00] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_MP0] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_M0P] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_MPP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_whenSettingMZ_setAllQsNormalToBC)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_00M] = 0.5;
+    expectedQs[DIR_P0M] = 0.5;
+    expectedQs[DIR_M0M] = 0.5;
+    expectedQs[DIR_0PM] = 0.5;
+    expectedQs[DIR_0MM] = 0.5;
+    expectedQs[DIR_PPM] = 0.5;
+    expectedQs[DIR_MPM] = 0.5;
+    expectedQs[DIR_PMM] = 0.5;
+    expectedQs[DIR_MMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMYhasBeenSet_thenSetMZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::MY);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_00M] = 0.5;
+    expectedQs[DIR_P0M] = 0.5;
+    expectedQs[DIR_M0M] = 0.5;
+    expectedQs[DIR_0PM] = 0.5;
+    expectedQs[DIR_PPM] = 0.5;
+    expectedQs[DIR_MPM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPYhasBeenSet_thenSetMZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_00M] = 0.5;
+    expectedQs[DIR_P0M] = 0.5;
+    expectedQs[DIR_M0M] = 0.5;
+    expectedQs[DIR_0MM] = 0.5;
+    expectedQs[DIR_PMM] = 0.5;
+    expectedQs[DIR_MMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPXhasBeenSet_thenSetMZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::PX);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_00M] = 0.5;
+    expectedQs[DIR_M0M] = 0.5;
+    expectedQs[DIR_0PM] = 0.5;
+    expectedQs[DIR_0MM] = 0.5;
+    expectedQs[DIR_MPM] = 0.5;
+    expectedQs[DIR_MMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMXhasBeenSet_thenSetMZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::MX);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_00M] = 0.5;
+    expectedQs[DIR_P0M] = 0.5;
+    expectedQs[DIR_0PM] = 0.5;
+    expectedQs[DIR_0MM] = 0.5;
+    expectedQs[DIR_PPM] = 0.5;
+    expectedQs[DIR_PMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMYandPXhaveBeenSet_thenSetMZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::MY);
+    grid->addBCalreadySet(SideType::PX);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_00M] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_M0M] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_0PM] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_MPM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMYandMXhaveBeenSet_thenSetMZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::MY);
+    grid->addBCalreadySet(SideType::MX);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_00M] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_P0M] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_0PM] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_PPM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPYandPXhaveBeenSet_thenSetMZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+    grid->addBCalreadySet(SideType::PX);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_00M] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_M0M] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_0MM] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_MMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPYandMXhaveBeenSet_thenSetMZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = NEGATIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+    grid->addBCalreadySet(SideType::MX);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_00M] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_P0M] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_0MM] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_PMM] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_whenSettingPZ_setAllQsNormalToBC)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_00P] = 0.5;
+    expectedQs[DIR_P0P] = 0.5;
+    expectedQs[DIR_M0P] = 0.5;
+    expectedQs[DIR_0PP] = 0.5;
+    expectedQs[DIR_0MP] = 0.5;
+    expectedQs[DIR_PPP] = 0.5;
+    expectedQs[DIR_MPP] = 0.5;
+    expectedQs[DIR_PMP] = 0.5;
+    expectedQs[DIR_MMP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMYhasBeenSet_thenSetPZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::MY);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_00P] = 0.5;
+    expectedQs[DIR_P0P] = 0.5;
+    expectedQs[DIR_M0P] = 0.5;
+    expectedQs[DIR_0PP] = 0.5;
+    expectedQs[DIR_PPP] = 0.5;
+    expectedQs[DIR_MPP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPYhasBeenSet_thenSetPZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_00P] = 0.5;
+    expectedQs[DIR_P0P] = 0.5;
+    expectedQs[DIR_M0P] = 0.5;
+    expectedQs[DIR_0MP] = 0.5;
+    expectedQs[DIR_PMP] = 0.5;
+    expectedQs[DIR_MMP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPXhasBeenSet_thenSetPZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::PX);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_00P] = 0.5;
+    expectedQs[DIR_M0P] = 0.5;
+    expectedQs[DIR_0PP] = 0.5;
+    expectedQs[DIR_0MP] = 0.5;
+    expectedQs[DIR_MPP] = 0.5;
+    expectedQs[DIR_MMP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMXhasBeenSet_thenSetPZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::MX);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQs(27, -1);
+    expectedQs[DIR_00P] = 0.5;
+    expectedQs[DIR_P0P] = 0.5;
+    expectedQs[DIR_0PP] = 0.5;
+    expectedQs[DIR_0MP] = 0.5;
+    expectedQs[DIR_PPP] = 0.5;
+    expectedQs[DIR_PMP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMYandPXhaveBeenSet_thenSetPZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::MY);
+    grid->addBCalreadySet(SideType::PX);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_00P] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_M0P] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_0PP] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_MPP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenMYandMXhaveBeenSet_thenSetPZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::MY);
+    grid->addBCalreadySet(SideType::MX);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_00P] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_P0P] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_0PP] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_PPP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPYandPXhaveBeenSet_thenSetPZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+    grid->addBCalreadySet(SideType::PX);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_00P] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_M0P] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_0MP] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_MMP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
+
+TEST_F(SideTestBC, setQs3D_givenPYandMXhaveBeenSet_thenSetPZ_doNotSetSameQsAgain)
+{
+    side.coordinateDirection = Z_INDEX;
+    side.sideDirection = POSITIVE_DIR;
+    grid->addBCalreadySet(SideType::PY);
+    grid->addBCalreadySet(SideType::MX);
+
+    side.setQs(grid, bc, index);
+    auto actualQs = bc->getQs()[0];
+
+    std::vector<real> expectedQsForTwoPreviousBCs(27, -1);
+    expectedQsForTwoPreviousBCs[DIR_00P] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_P0P] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_0MP] = 0.5;
+    expectedQsForTwoPreviousBCs[DIR_PMP] = 0.5;
+    EXPECT_THAT(actualQs, testing::Eq(expectedQsForTwoPreviousBCs));
+}
diff --git a/src/gpu/GridGenerator/grid/Grid.h b/src/gpu/GridGenerator/grid/Grid.h
index 85b19bedd470c9856954a1ca20fb446c3d875da2..d0969cd8a5318a91c5a486ba1f8811940aa69bd0 100644
--- a/src/gpu/GridGenerator/grid/Grid.h
+++ b/src/gpu/GridGenerator/grid/Grid.h
@@ -33,8 +33,6 @@
 #ifndef GRID_H
 #define GRID_H
 
-#include "Core/LbmOrGks.h"
-
 #include "gpu/GridGenerator/global.h"
 
 #include "gpu/GridGenerator/geometries/Vertex/Vertex.h"
@@ -47,13 +45,14 @@ struct Triangle;
 class GridInterface;
 class Object;
 class BoundingBox;
+enum class SideType;
 
 class GRIDGENERATOR_EXPORT Grid
 {
 public:
     virtual ~Grid() = default;
 
-    virtual const Object* getObject() const = 0;
+    virtual SPtr<const Object> getObject() const = 0;
 
     virtual real getDelta() const = 0;
     virtual uint getSparseSize() const = 0;
@@ -84,6 +83,8 @@ public:
     virtual void getGridInterfaceIndices(uint* iCellCfc, uint* iCellCff, uint* iCellFcc, uint* iCellFcf) const = 0;
     virtual bool isSparseIndexInFluidNodeIndicesBorder(uint &sparseIndex) const = 0;
 
+    virtual bool isStopperForBC(uint index) const = 0;
+
     virtual int *getNeighborsX() const = 0;
     virtual int *getNeighborsY() const = 0;
     virtual int *getNeighborsZ() const = 0;
@@ -98,7 +99,7 @@ public:
     virtual uint *getFC_offset() const = 0;
 
     virtual real *getDistribution() const = 0;
-    virtual int* getDirection() const = 0;
+    virtual const std::vector<int> &getDirection() const = 0;
     virtual int getStartDirection() const = 0;
     virtual int getEndDirection() const = 0;
 
@@ -111,11 +112,11 @@ public:
     
     virtual void setOddStart(bool xOddStart, bool yOddStart, bool zOddStart) = 0;
 
-    virtual void findGridInterface(SPtr<Grid> grid, LbmOrGks lbmOrGks) = 0;
+    virtual void findGridInterface(SPtr<Grid> grid) = 0;
 
     virtual void repairGridInterfaceOnMultiGPU(SPtr<Grid> fineGrid) = 0;
 
-    virtual void limitToSubDomain(SPtr<BoundingBox> subDomainBox, LbmOrGks lbmOrGks) = 0;
+    virtual void limitToSubDomain(SPtr<BoundingBox> subDomainBox) = 0;
 
     virtual void enableFindSolidBoundaryNodes() = 0;
     virtual void enableComputeQs()              = 0;
@@ -133,9 +134,9 @@ public:
     virtual void setPeriodicityY(bool periodicity) = 0;
     virtual void setPeriodicityZ(bool periodicity) = 0;
 
-    virtual bool getPeriodicityX() = 0;
-    virtual bool getPeriodicityY() = 0;
-    virtual bool getPeriodicityZ() = 0;
+    virtual bool getPeriodicityX() const = 0;
+    virtual bool getPeriodicityY() const = 0;
+    virtual bool getPeriodicityZ() const = 0;
 
     virtual void setEnableFixRefinementIntoTheWall(bool enableFixRefinementIntoTheWall) = 0;
 
@@ -158,7 +159,7 @@ public:
 
     virtual void setNumberOfLayers(uint numberOfLayers) = 0;
 
-    virtual void findCommunicationIndices(int direction, SPtr<BoundingBox> subDomainBox, LbmOrGks lbmOrGks) = 0;
+    virtual void findCommunicationIndices(int direction, SPtr<BoundingBox> subDomainBox) = 0;
 
     virtual uint getNumberOfSendNodes(int direction)    = 0;
     virtual uint getNumberOfReceiveNodes(int direction) = 0;
@@ -170,6 +171,11 @@ public:
 
     virtual void repairCommunicationIndices(int direction) = 0;
 
+    virtual bool nodeHasBC(uint index) const = 0;
+
+    virtual std::vector<SideType> getBCAlreadySet() = 0;
+    virtual void addBCalreadySet(SideType side) = 0;
+
     // needed for CUDA Streams 
     virtual void findFluidNodeIndices(bool onlyBulk) = 0;
     virtual uint getNumberOfFluidNodes() const = 0;
@@ -192,7 +198,6 @@ public:
     virtual void getFluidNodeIndicesMacroVars(uint *fluidNodeIndicesMacroVars) const = 0;
     virtual void getFluidNodeIndicesApplyBodyForce(uint *fluidNodeIndicesApplyBodyForce) const = 0;
     virtual void getFluidNodeIndicesAllFeatures(uint *fluidNodeIndicesAllFeatures) const = 0;
-
 };
 
 #endif
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp
index 003e6dcd223d2bf019c83f71349a9a7bec84efdc..87ec7bab021064de527d251b5fe2908af8b5055d 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp
+++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp
@@ -94,7 +94,7 @@ void LevelGridBuilder::setSlipBoundaryCondition(SideType sideType, real normalX,
             slipBoundaryCondition->fillSlipNormalLists();
             boundaryConditions[level]->slipBoundaryConditions.push_back(slipBoundaryCondition);
 
-            *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Slip BC on level " << level << " with " << (int)slipBoundaryCondition->indices.size() << "\n";
+            VF_LOG_INFO("Set Slip BC on level {} with {}", level, slipBoundaryCondition->indices.size());
         }
     }
 }
@@ -114,7 +114,7 @@ void LevelGridBuilder::setSlipGeometryBoundaryCondition(real normalX, real norma
 
             boundaryConditions[level]->geometryBoundaryCondition->fillSlipNormalLists();
 
-            *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Geometry Slip BC on level " << level << " with " << (int)boundaryConditions[level]->geometryBoundaryCondition->indices.size() <<"\n";
+            VF_LOG_INFO("Set Geometry Slip BC on level {} with {}", level, boundaryConditions[level]->geometryBoundaryCondition->indices.size());
         }
     }
 }
@@ -144,7 +144,7 @@ void LevelGridBuilder::setStressBoundaryCondition(  SideType sideType,
 
         boundaryConditions[level]->stressBoundaryConditions.push_back(stressBoundaryCondition);
 
-        *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Stress BC on level " << level << " with " << (int)stressBoundaryCondition->indices.size() << "\n";
+        VF_LOG_INFO("Set Stress BC on level {} with {}", level, stressBoundaryCondition->indices.size());
     }
 }
 
@@ -167,7 +167,7 @@ void LevelGridBuilder::setVelocityBoundaryCondition(SideType sideType, real vx,
 
             boundaryConditions[level]->velocityBoundaryConditions.push_back(velocityBoundaryCondition);
 
-            *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Velocity BC on level " << level << " with " << (int)velocityBoundaryCondition->indices.size() <<"\n";
+            VF_LOG_INFO("Set Velocity BC on level {} with {}", level, velocityBoundaryCondition->indices.size());
         }
     }
 }
@@ -187,7 +187,7 @@ void LevelGridBuilder::setVelocityGeometryBoundaryCondition(real vx, real vy, re
 
             boundaryConditions[level]->geometryBoundaryCondition->fillVelocityLists();
 
-            *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Geometry Velocity BC on level " << level << " with " << (int)boundaryConditions[level]->geometryBoundaryCondition->indices.size() <<"\n";
+            VF_LOG_INFO("Set Geometry BC on level {} with {}", level, boundaryConditions[level]->geometryBoundaryCondition->indices.size());
         }
     }
 }
@@ -204,7 +204,7 @@ void LevelGridBuilder::setPressureBoundaryCondition(SideType sideType, real rho)
 
         boundaryConditions[level]->pressureBoundaryConditions.push_back(pressureBoundaryCondition);
 
-        *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Pressure BC on level " << level << " with " << (int)pressureBoundaryCondition->indices.size() <<"\n";
+        VF_LOG_INFO("Set Pressure BC on level {} with {}", level, pressureBoundaryCondition->indices.size());
     }
 }
 
@@ -245,7 +245,7 @@ void LevelGridBuilder::setNoSlipGeometryBoundaryCondition()
         {
             boundaryConditions[level]->geometryBoundaryCondition->side->addIndices(grids, level, boundaryConditions[level]->geometryBoundaryCondition);
 
-            *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Geometry No-Slip BC on level " << level << " with " << (int)boundaryConditions[level]->geometryBoundaryCondition->indices.size() <<"\n";
+            VF_LOG_INFO("Set Geometry No-Slip BC on level {} with {}", level, boundaryConditions[level]->geometryBoundaryCondition->indices.size());
         }
     }
 }
@@ -255,7 +255,7 @@ void LevelGridBuilder::setPrecursorBoundaryCondition(SideType sideType, SPtr<Fil
 {
     if(fileLevelToGridLevelMap.empty())
     {
-        *logging::out << logging::Logger::INFO_INTERMEDIATE << "Mapping precursor file levels to the corresponding grid levels" << "\n";
+        VF_LOG_INFO("Mapping precursor file levels to the corresponding grid levels");
 
         for (uint level = 0; level < getNumberOfGridLevels(); level++)
             fileLevelToGridLevelMap.push_back(level);
@@ -264,7 +264,7 @@ void LevelGridBuilder::setPrecursorBoundaryCondition(SideType sideType, SPtr<Fil
     {
         if(fileLevelToGridLevelMap.size()!=getNumberOfGridLevels())
             throw std::runtime_error("In setPrecursorBoundaryCondition: fileLevelToGridLevelMap does not match with the number of levels");
-        *logging::out << logging::Logger::INFO_INTERMEDIATE << "Using user defined file to grid level mapping"  << "\n";
+        VF_LOG_INFO("Using user defined file to grid level mapping");
     }
 
     for (uint level = 0; level < getNumberOfGridLevels(); level++)
@@ -279,7 +279,7 @@ void LevelGridBuilder::setPrecursorBoundaryCondition(SideType sideType, SPtr<Fil
 
         boundaryConditions[level]->precursorBoundaryConditions.push_back(precursorBoundaryCondition);
 
-        *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Precursor BC on level " << level << " with " << (int)precursorBoundaryCondition->indices.size() << "\n";
+        VF_LOG_INFO("Set Precursor BC on level {} with {}", level, precursorBoundaryCondition->indices.size());
     }
 }
 
@@ -790,10 +790,10 @@ GRIDGENERATOR_EXPORT SPtr<GeometryBoundaryCondition> LevelGridBuilder::getGeomet
 
 void LevelGridBuilder::findFluidNodes(bool splitDomain)
 {
-    *logging::out << logging::Logger::INFO_HIGH << "Start findFluidNodes()\n";
+    VF_LOG_TRACE("Start findFluidNodes()");
     for (uint i = 0; i < grids.size(); i++)
         grids[i]->findFluidNodeIndices(splitDomain);
-    *logging::out << logging::Logger::INFO_HIGH << "Done with findFluidNodes()\n";
+    VF_LOG_TRACE("Done findFluidNodes()");
 }
 
 
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
index 2e0eaf13080c46260de2a0c845fbf784a2cc3e09..4924432dbf05ca2213e5fa08cf16a28ea75f8c9e 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
+++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
@@ -38,7 +38,7 @@
 #include <memory>
 #include <array>
 
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
 #include "gpu/GridGenerator/global.h"
 
@@ -47,7 +47,7 @@
 #include "gpu/GridGenerator/grid/GridInterface.h"
 #include "gpu/GridGenerator/grid/NodeValues.h"
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 
 struct Vertex;
 class  Grid;
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.cpp b/src/gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.cpp
index da18a883181069f089e7232c9cd1b4f19cc9dc35..e45ad99d8a63c332aa3723eb682b61dbc267f4db 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.cpp
+++ b/src/gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.cpp
@@ -61,7 +61,7 @@ SPtr<MultipleGridBuilder> MultipleGridBuilder::makeShared(SPtr<GridFactory> grid
 
 void MultipleGridBuilder::addCoarseGrid(real startX, real startY, real startZ, real endX, real endY, real endZ, real delta)
 {
-    boundaryConditions.push_back(SPtr<BoundaryConditions>(new BoundaryConditions));
+    boundaryConditions.push_back(std::make_shared<BoundaryConditions>());
 
     startX -= 0.5 * delta;
     startY -= 0.5 * delta;
@@ -70,11 +70,11 @@ void MultipleGridBuilder::addCoarseGrid(real startX, real startY, real startZ, r
     endY   += 0.5 * delta;
     endZ   += 0.5 * delta;
 
-    const auto grid = this->makeGrid(new Cuboid(startX, startY, startZ, endX, endY, endZ), startX, startY, startZ, endX, endY, endZ, delta, 0);
+    const auto grid = this->makeGrid(std::make_shared<Cuboid>(startX, startY, startZ, endX, endY, endZ), startX, startY, startZ, endX, endY, endZ, delta, 0);
     addGridToList(grid);
 }
 
-void MultipleGridBuilder::addGeometry(Object* solidObject)
+void MultipleGridBuilder::addGeometry(SPtr<Object> solidObject)
 {
     this->solidObject = solidObject;
 
@@ -85,7 +85,7 @@ void MultipleGridBuilder::addGeometry(Object* solidObject)
     }
 }
 
-void MultipleGridBuilder::addGeometry(Object* solidObject, uint level)
+void MultipleGridBuilder::addGeometry(SPtr<Object> solidObject, uint level)
 {
     this->solidObject = solidObject;
     auto gridShape = solidObject->clone();
@@ -94,7 +94,7 @@ void MultipleGridBuilder::addGeometry(Object* solidObject, uint level)
     this->addGrid(gridShape, level);
 }
 
-void MultipleGridBuilder::addGrid(Object* gridShape)
+void MultipleGridBuilder::addGrid(SPtr<Object> gridShape)
 {
     if (!coarseGridExists())
         return emitNoCoarseGridExistsWarning();
@@ -104,7 +104,7 @@ void MultipleGridBuilder::addGrid(Object* gridShape)
     addGridToListIfValid(grid);
 }
 
-void MultipleGridBuilder::addGrid(Object* gridShape, uint levelFine)
+void MultipleGridBuilder::addGrid(SPtr<Object> gridShape, uint levelFine)
 {
     if (!coarseGridExists())
         return emitNoCoarseGridExistsWarning();
@@ -140,13 +140,13 @@ void MultipleGridBuilder::addGrid(Object* gridShape, uint levelFine)
     //eraseGridsFromListIfInvalid(oldGridSize);
 }
 
-void MultipleGridBuilder::addFineGridToList(uint level, Object* gridShape)
+void MultipleGridBuilder::addFineGridToList(uint level, SPtr<Object> gridShape)
 {
     const auto grid = makeGrid(gridShape, level, 0);
     grids.push_back(grid);
 }
 
-void MultipleGridBuilder::addIntermediateGridsToList(uint levelDifference, uint levelFine, uint nodesBetweenGrids, Object* gridShape)
+void MultipleGridBuilder::addIntermediateGridsToList(uint levelDifference, uint levelFine, uint nodesBetweenGrids, SPtr<Object> gridShape)
 {
     if (levelDifference > 0)
     {
@@ -156,7 +156,7 @@ void MultipleGridBuilder::addIntermediateGridsToList(uint levelDifference, uint
         for (int i = levelDifference - 1; i >= 0; i--)
         {
             const real scalingFactor = nodesBetweenGrids * spacings[i] * calculateDelta(levelFine);
-            Object* gridShapeClone = gridShape->clone();
+            SPtr<Object> gridShapeClone = gridShape->clone();
             gridShapeClone->scale(scalingFactor);
 
             const auto grid = makeGrid(gridShapeClone, level++, 0);
@@ -193,7 +193,7 @@ void MultipleGridBuilder::addGridToListIfValid(SPtr<Grid> grid)
     addGridToList(grid);
 }
 
-SPtr<Grid> MultipleGridBuilder::makeGrid(Object* gridShape, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, uint level) const
+SPtr<Grid> MultipleGridBuilder::makeGrid(SPtr<Object> gridShape, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, uint level) const
 {
     return gridFactory->makeGrid(gridShape, startX, startY, startZ, endX, endY, endZ, delta, level);
 }
@@ -203,9 +203,9 @@ bool MultipleGridBuilder::coarseGridExists() const
     return !grids.empty();
 }
 
-SPtr<Grid> MultipleGridBuilder::makeGrid(Object* gridShape, uint level, uint levelFine)
+SPtr<Grid> MultipleGridBuilder::makeGrid(SPtr<Object> gridShape, uint level, uint levelFine)
 {
-    boundaryConditions.push_back(SPtr<BoundaryConditions>(new BoundaryConditions));
+    boundaryConditions.push_back(std::make_shared<BoundaryConditions>());
 
     const real delta = calculateDelta(level);
 
@@ -213,11 +213,11 @@ SPtr<Grid> MultipleGridBuilder::makeGrid(Object* gridShape, uint level, uint lev
 
 	auto staggeredCoordinates = getStaggeredCoordinates(gridShape, level, levelFine, xOddStart, yOddStart, zOddStart);
 
-	SPtr<Grid> newGrid = this->makeGrid(gridShape, staggeredCoordinates[0], 
-                                                   staggeredCoordinates[1], 
-                                                   staggeredCoordinates[2], 
-                                                   staggeredCoordinates[3], 
-                                                   staggeredCoordinates[4], 
+	SPtr<Grid> newGrid = this->makeGrid(gridShape, staggeredCoordinates[0],
+                                                   staggeredCoordinates[1],
+                                                   staggeredCoordinates[2],
+                                                   staggeredCoordinates[3],
+                                                   staggeredCoordinates[4],
                                                    staggeredCoordinates[5], delta, level);
 
     newGrid->setOddStart( xOddStart, yOddStart, zOddStart );
@@ -233,7 +233,7 @@ real MultipleGridBuilder::calculateDelta(uint level) const
     return delta;
 }
 
-std::array<real, 6> MultipleGridBuilder::getStaggeredCoordinates(Object* gridShape, uint level, uint levelFine, bool& xOddStart, bool& yOddStart, bool& zOddStart) const
+std::array<real, 6> MultipleGridBuilder::getStaggeredCoordinates(SPtr<Object> gridShape, uint level, uint levelFine, bool& xOddStart, bool& yOddStart, bool& zOddStart) const
 {
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //
@@ -466,7 +466,7 @@ std::vector<SPtr<Grid> > MultipleGridBuilder::getGrids() const
 //      => MultipleGridBuilder::findCommunicationIndices(...)
 //      => LevelGridBuilder::setCommunicationProcess(...)
 //
-void MultipleGridBuilder::buildGrids( LbmOrGks lbmOrGks, bool enableThinWalls )
+void MultipleGridBuilder::buildGrids(bool enableThinWalls )
 {
     //////////////////////////////////////////////////////////////////////////
 
@@ -490,7 +490,7 @@ void MultipleGridBuilder::buildGrids( LbmOrGks lbmOrGks, bool enableThinWalls )
     //
     for( int level = (int)grids.size()-1; level >= 0; level-- ) {
 
-        *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start initializing level " << level << "\n";
+        VF_LOG_INFO("Start initializing level {}", level);
 
         // On the coarse grid every thing is Fluid (w.r.t. the refinement)
         // On the finest grid the Fluid region is defined by the Object
@@ -502,7 +502,7 @@ void MultipleGridBuilder::buildGrids( LbmOrGks lbmOrGks, bool enableThinWalls )
         else
             grids[level]->inital( grids[level+1], this->numberOfLayersBetweenLevels );
 
-        *logging::out << logging::Logger::INFO_INTERMEDIATE << "Done initializing level " << level << "\n";
+        VF_LOG_INFO("Done initializing level {}", level);
     }
 
     //////////////////////////////////////////////////////////////////////////
@@ -516,8 +516,7 @@ void MultipleGridBuilder::buildGrids( LbmOrGks lbmOrGks, bool enableThinWalls )
     //
     if (solidObject)
     {
-
-        *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start with Q Computation\n";
+        VF_LOG_TRACE("Start with Q Computation");
 
         // Currently the solid object is only used on the finest grid,
         // because refinement into solid objects is not yet implemented.
@@ -529,13 +528,13 @@ void MultipleGridBuilder::buildGrids( LbmOrGks lbmOrGks, bool enableThinWalls )
         //for( uint level = 0; level < grids.size(); level++ )
         uint level = (uint)grids.size() - 1;
         {
-            // the Grid::mesh(...) method distinguishes inside and ouside regions
+            // the Grid::mesh(...) method distinguishes inside and outside regions
             // of the solid domain.:
             //      => set inner nodes to INVALID_SOLID
             //      => close needle sells
             //      => set one layer of STOPPER_SOLID nodes in the solid domain
             //      => set one layer of BC_SOLID nodes in the fluid domain
-            grids[level]->mesh(solidObject);
+            grids[level]->mesh(solidObject.get());
 
             // if thin walls are enables additional BC_SOLID nodes are found by
             // Grid::findOs(...). To prevent the actual Q computation, 
@@ -544,18 +543,17 @@ void MultipleGridBuilder::buildGrids( LbmOrGks lbmOrGks, bool enableThinWalls )
             // additionally some needle cells are closed in this process.
             if (enableThinWalls) {
                 grids[level]->enableFindSolidBoundaryNodes();
-                grids[level]->findQs(solidObject);
+                grids[level]->findQs(solidObject.get());
                 grids[level]->closeNeedleCellsThinWall();
                 grids[level]->enableComputeQs();
             }
 
             // compute the sub grid distances 
             // this works for STL and Sphere objects, but not yet for other primitives!
-            if( lbmOrGks == LBM )
-                grids[level]->findQs(solidObject);
+            grids[level]->findQs(solidObject.get());
         }
 
-        *logging::out << logging::Logger::INFO_INTERMEDIATE << "Done with Q Computation\n";
+        VF_LOG_TRACE("Done with Q Computation");
     }
 
     //////////////////////////////////////////////////////////////////////////
@@ -566,7 +564,7 @@ void MultipleGridBuilder::buildGrids( LbmOrGks lbmOrGks, bool enableThinWalls )
     // https://publikationsserver.tu-braunschweig.de/receive/dbbs_mods_00068716
     //
     for (size_t i = 0; i < grids.size() - 1; i++)
-        grids[i]->findGridInterface(grids[i + 1], lbmOrGks);
+        grids[i]->findGridInterface(grids[i + 1]);
 
     //////////////////////////////////////////////////////////////////////////
 
@@ -574,7 +572,7 @@ void MultipleGridBuilder::buildGrids( LbmOrGks lbmOrGks, bool enableThinWalls )
     // and INVALID_OUT_OF_GRID
     if( this->subDomainBox )
         for (size_t i = 0; i < grids.size(); i++)
-            grids[i]->limitToSubDomain( this->subDomainBox, lbmOrGks );
+            grids[i]->limitToSubDomain( this->subDomainBox);
 
     //////////////////////////////////////////////////////////////////////////
 
@@ -588,12 +586,10 @@ void MultipleGridBuilder::buildGrids( LbmOrGks lbmOrGks, bool enableThinWalls )
     //      => computes the sparse indices
     //      => generates neighbor connectivity taking into account periodic boundaries
     //      => undates the interface connectivity to sparse indices (overwrites matrix indices!)
-    if (lbmOrGks == LBM) {
-        for (size_t i = 0; i < grids.size() - 1; i++)
-           grids[i]->findSparseIndices(grids[i + 1]);
+    for (size_t i = 0; i < grids.size() - 1; i++)
+        grids[i]->findSparseIndices(grids[i + 1]);
 
-        grids[grids.size() - 1]->findSparseIndices(nullptr);
-    }
+    grids[grids.size() - 1]->findSparseIndices(nullptr);
 
     //////////////////////////////////////////////////////////////////////////
 }
@@ -606,24 +602,24 @@ GRIDGENERATOR_EXPORT void MultipleGridBuilder::setNumberOfLayers(uint numberOfLa
 
 void MultipleGridBuilder::emitNoCoarseGridExistsWarning()
 {
-    *logging::out << logging::Logger::WARNING << "No Coarse grid was added before. Actual Grid is not added, please create coarse grid before.\n";
+    VF_LOG_WARNING("No Coarse grid was added before. Actual Grid is not added, please create coarse grid before.");
 }
 
 
 void MultipleGridBuilder::emitGridIsNotInCoarseGridWarning()
 {
-    *logging::out << logging::Logger::WARNING << "Grid lies not inside of coarse grid. Actual Grid is not added.\n";
+    VF_LOG_WARNING("Grid lies not inside of coarse grid. Actual Grid is not added.");
 }
 
-void MultipleGridBuilder::findCommunicationIndices(int direction, LbmOrGks lbmOrGks)
+void MultipleGridBuilder::findCommunicationIndices(int direction)
 {
-    *logging::out << logging::Logger::INFO_HIGH << "Start findCommunicationIndices()\n";
+    VF_LOG_TRACE("Start findCommunicationIndices()");
 
     if( this->subDomainBox )
         for (size_t i = 0; i < grids.size(); i++)
-            grids[i]->findCommunicationIndices(direction, this->subDomainBox, lbmOrGks);
+            grids[i]->findCommunicationIndices(direction, this->subDomainBox);
 
-    *logging::out << logging::Logger::INFO_HIGH << "Done with findCommunicationIndices()\n";
+    VF_LOG_TRACE("Done findCommunicationIndices()");
 }
 
 void MultipleGridBuilder::writeGridsToVtk(const std::string& path) const
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.h
index 2d2dc8bf0345288912132551c5d94b5caec44965..8d94edd0f42b237ef4e45e5a4f4246d739a51b46 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.h
+++ b/src/gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.h
@@ -35,7 +35,6 @@
 
 #include <vector>
 #include <array>
-#include "Core/LbmOrGks.h"
 
 #include "global.h"
 
@@ -55,11 +54,11 @@ public:
     GRIDGENERATOR_EXPORT static SPtr<MultipleGridBuilder> makeShared(SPtr<GridFactory> gridFactory);
 
     GRIDGENERATOR_EXPORT void addCoarseGrid(real startX, real startY, real startZ, real endX, real endY, real endZ, real delta);
-    GRIDGENERATOR_EXPORT void addGrid(Object *gridShape);
-    GRIDGENERATOR_EXPORT void addGrid(Object *gridShape, uint levelFine);
+    GRIDGENERATOR_EXPORT void addGrid(SPtr<Object> gridShape);
+    GRIDGENERATOR_EXPORT void addGrid(SPtr<Object> gridShape, uint levelFine);
 
-    GRIDGENERATOR_EXPORT void addGeometry(Object *gridShape);
-    GRIDGENERATOR_EXPORT void addGeometry(Object *solidObject, uint level);
+    GRIDGENERATOR_EXPORT void addGeometry(SPtr<Object> gridShape);
+    GRIDGENERATOR_EXPORT void addGeometry(SPtr<Object> solidObject, uint level);
 
     GRIDGENERATOR_EXPORT uint getNumberOfLevels() const;
     GRIDGENERATOR_EXPORT real getDelta(uint level) const;
@@ -73,7 +72,7 @@ public:
     GRIDGENERATOR_EXPORT real getEndZ(uint level) const;
 
     GRIDGENERATOR_EXPORT std::vector<SPtr<Grid> > getGrids() const;
-    GRIDGENERATOR_EXPORT void buildGrids(LbmOrGks lbmOrGks, bool enableThinWalls = false);
+    GRIDGENERATOR_EXPORT void buildGrids(bool enableThinWalls = false);
 
     GRIDGENERATOR_EXPORT void setNumberOfLayers( uint numberOfLayersFine, uint numberOfLayersBetweenLevels );
 
@@ -87,24 +86,24 @@ private:
     bool coarseGridExists() const;
     bool isGridInCoarseGrid(SPtr<Grid> grid) const;
 
-    void addFineGridToList(uint level, Object *gridShape);
-    void addIntermediateGridsToList(uint levelDifference, uint levelFine, uint nodesBetweenGrids, Object *gridShape);
+    void addFineGridToList(uint level, SPtr<Object> gridShape);
+    void addIntermediateGridsToList(uint levelDifference, uint levelFine, uint nodesBetweenGrids, SPtr<Object>gridShape);
     void eraseGridsFromListIfInvalid(uint oldSize);
     void addGridToListIfValid(SPtr<Grid> grid);
 
-    std::array<real, 6> getStaggeredCoordinates(Object *gridShape, uint level, uint levelFine, bool &xOddStart, bool &yOddStart, bool &zOddStart) const;
+    std::array<real, 6> getStaggeredCoordinates(SPtr<Object> gridShape, uint level, uint levelFine, bool &xOddStart, bool &yOddStart, bool &zOddStart) const;
     std::array<real, 6> getStaggeredCoordinates(real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, uint level) const;
     std::array<real, 3> getOffset(real delta) const;
     std::vector<uint> getSpacingFactors(uint levelDifference) const;
 
-    SPtr<Grid> makeGrid(Object *gridShape, uint level, uint levelFine);
-    SPtr<Grid> makeGrid(Object *gridShape, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, uint level) const;
+    SPtr<Grid> makeGrid(SPtr<Object> gridShape, uint level, uint levelFine);
+    SPtr<Grid> makeGrid(SPtr<Object> gridShape, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, uint level) const;
 
     static void emitNoCoarseGridExistsWarning();
     static void emitGridIsNotInCoarseGridWarning();
 
     SPtr<GridFactory> gridFactory;
-    Object *solidObject = nullptr;
+    SPtr<Object> solidObject = nullptr;
 
     uint numberOfLayersFine;
     uint numberOfLayersBetweenLevels;
@@ -112,7 +111,7 @@ private:
     SPtr<BoundingBox> subDomainBox;
 
 public:
-    GRIDGENERATOR_EXPORT void findCommunicationIndices(int direction, LbmOrGks lbmOrGks);
+    GRIDGENERATOR_EXPORT void findCommunicationIndices(int direction);
 };
 
 #endif
diff --git a/src/gpu/GridGenerator/grid/GridFactory.h b/src/gpu/GridGenerator/grid/GridFactory.h
index 53d358d5325390394f1d38694de605e5cc0d2f56..a78dfe6d848a5f180bea9073056522d0e2196433 100644
--- a/src/gpu/GridGenerator/grid/GridFactory.h
+++ b/src/gpu/GridGenerator/grid/GridFactory.h
@@ -50,16 +50,16 @@ class GRIDGENERATOR_EXPORT GridFactory
 public:
     static SPtr<GridFactory> make()
     {
-        return SPtr<GridFactory>(new GridFactory());
+        return std::make_shared<GridFactory>();
     }
 
-    SPtr<Grid> makeGrid(Object* gridShape, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, uint level, const std::string& d3Qxx = "D3Q27")
+    SPtr<Grid> makeGrid(SPtr<Object> gridShape, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, uint level, const std::string& d3Qxx = "D3Q27")
     {
         SPtr<GridImp> grid;
         
         grid = GridImp::makeShared(gridShape, startX, startY, startZ, endX, endY, endZ, delta, d3Qxx, level);
 
-        grid->setTriangularMeshDiscretizationStrategy(new PointInObjectDiscretizationStrategy());
+        grid->setTriangularMeshDiscretizationStrategy(std::make_shared<PointInObjectDiscretizationStrategy>()); // Probably a bug, as this->triangularMeshDiscretizationStrategy is never used. Until ad5efd332a1d6808fccdf8e54fa547630eff401b this line was ``grid->setTriangularMeshDiscretizationStrategy(this->triangularMeshDiscretizationStrategy);``
 
         return grid;
     }
@@ -69,19 +69,19 @@ public:
         switch (triangularMeshDiscretizationMethod)
         {
         case TriangularMeshDiscretizationMethod::POINT_UNDER_TRIANGLE:
-            triangularMeshDiscretizationStrategy = new PointUnderTriangleStrategy();
+            triangularMeshDiscretizationStrategy = std::make_shared<PointUnderTriangleStrategy>();
             break;
         case TriangularMeshDiscretizationMethod::RAYCASTING:
-            triangularMeshDiscretizationStrategy = new RayCastingDiscretizationStrategy();
+            triangularMeshDiscretizationStrategy = std::make_shared<RayCastingDiscretizationStrategy>();
             break;
         case TriangularMeshDiscretizationMethod::POINT_IN_OBJECT:
-            triangularMeshDiscretizationStrategy = new PointInObjectDiscretizationStrategy();
+            triangularMeshDiscretizationStrategy = std::make_shared<PointInObjectDiscretizationStrategy>();
             break;
         }
     }
 
 private:
-    TriangularMeshDiscretizationStrategy* triangularMeshDiscretizationStrategy;
+    SPtr<TriangularMeshDiscretizationStrategy> triangularMeshDiscretizationStrategy;
 };
 
 
diff --git a/src/gpu/GridGenerator/grid/GridImp.cpp b/src/gpu/GridGenerator/grid/GridImp.cpp
index 05c684410166e329ba63bbe3bdbf0c09e3a881ab..8ece061168883544d9857f109d505614ca005a43 100644
--- a/src/gpu/GridGenerator/grid/GridImp.cpp
+++ b/src/gpu/GridGenerator/grid/GridImp.cpp
@@ -36,6 +36,7 @@
 #include <sstream>
 # include <algorithm>
 #include <cmath>
+#include <vector>
 
 #include "global.h"
 
@@ -60,7 +61,7 @@ int DIRECTIONS[DIR_END_MAX][DIMENSION];
 
 using namespace vf::gpu;
 
-GridImp::GridImp(Object* object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, Distribution distribution, uint level)
+GridImp::GridImp(SPtr<Object> object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, Distribution distribution, uint level)
             : object(object),
     startX(startX),
     startY(startY),
@@ -91,7 +92,7 @@ GridImp::GridImp(Object* object, real startX, real startY, real startZ, real end
     initalNumberOfNodesAndSize();
 }
 
-SPtr<GridImp> GridImp::makeShared(Object* object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, std::string d3Qxx, uint level)
+SPtr<GridImp> GridImp::makeShared(SPtr<Object> object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, std::string d3Qxx, uint level)
 {
     Distribution distribution = DistributionHelper::getDistribution(d3Qxx);
     SPtr<GridImp> grid(new GridImp(object, startX, startY, startZ, endX, endY, endZ, delta, distribution, level));
@@ -130,31 +131,31 @@ void GridImp::inital(const SPtr<Grid> fineGrid, uint numberOfLayers)
     for (uint i = 0; i < this->size; i++)
         this->qIndices[i] = INVALID_INDEX;
 
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start initalNodesToOutOfGrid()\n";
+    VF_LOG_TRACE("Start initalNodesToOutOfGrid()");
+
 #pragma omp parallel for
     for (int index = 0; index < (int)this->size; index++)
         this->initalNodeToOutOfGrid(index);
 
     if( this->innerRegionFromFinerGrid ){
-        *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start setInnerBasedOnFinerGrid()\n";
+        VF_LOG_TRACE("Start setInnerBasedOnFinerGrid()");
         this->setInnerBasedOnFinerGrid(fineGrid);
     }
     else{
-        *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start findInnerNodes()\n";
+        VF_LOG_TRACE("Start findInnerNodes()");
         this->object->findInnerNodes( shared_from_this() );
     }
-
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start addOverlap()\n";
+    VF_LOG_TRACE("Start addOverlap()");
     this->addOverlap();
 
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start fixOddCells()\n";
+    VF_LOG_TRACE("Start fixOddCells()");
 #pragma omp parallel for
     for (int index = 0; index < (int)this->size; index++)
         this->fixOddCell(index);
 
     if( enableFixRefinementIntoTheWall )
     {
-        *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start fixRefinementIntoWall()\n";
+        VF_LOG_TRACE("Start fixRefinementIntoWall()");
 #pragma omp parallel for
         for (int xIdx = 0; xIdx < (int)this->nx; xIdx++) {
             for (uint yIdx = 0; yIdx < this->ny; yIdx++) {
@@ -179,15 +180,13 @@ void GridImp::inital(const SPtr<Grid> fineGrid, uint numberOfLayers)
             }
         }
     }
-
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start findEndOfGridStopperNodes()\n";
+    VF_LOG_TRACE("Start findEndOfGridStopperNodes()");
 #pragma omp parallel for
     for (int index = 0; index < (int)this->size; index++)
         this->findEndOfGridStopperNode(index);
 
-    *logging::out << logging::Logger::INFO_INTERMEDIATE
-        << "Grid created: " << "from (" << this->startX << ", " << this->startY << ", " << this->startZ << ") to (" << this->endX << ", " << this->endY << ", " << this->endZ << ")\n"
-        << "nodes: " << this->nx << " x " << this->ny << " x " << this->nz << " = " << this->size << "\n";
+    VF_LOG_INFO("Grid created: from ({}, {}, {}) to ({}, {}, {})", this->startX, this->startY, this->startZ, this->endX, this->endY, this->endZ);
+    VF_LOG_INFO("nodes: {} x {} x {} = {}", this->nx, this->ny, this->nz, this->size);
 }
 
 void GridImp::setOddStart(bool xOddStart, bool yOddStart, bool zOddStart)
@@ -515,7 +514,7 @@ bool GridImp::cellContainsOnly(Cell &cell, char typeA, char typeB) const
     return true;
 }
 
-const Object * GridImp::getObject() const
+SPtr<const Object> GridImp::getObject() const
 {
     return this->object;
 }
@@ -720,6 +719,12 @@ void GridImp::setNonStopperOutOfGridCellTo(uint index, char type)
     }
 }
 
+bool GridImp::nodeHasBC(uint index) const
+{
+    return (getFieldEntry(index) == vf::gpu::BC_PRESSURE || getFieldEntry(index) == vf::gpu::BC_VELOCITY ||
+            getFieldEntry(index) == vf::gpu::BC_NOSLIP   || getFieldEntry(index) == vf::gpu::BC_SLIP     ||
+            getFieldEntry(index) == vf::gpu::BC_STRESS);
+}
 
 void GridImp::setPeriodicity(bool periodicityX, bool periodicityY, bool periodicityZ)
 {
@@ -743,17 +748,17 @@ void GridImp::setPeriodicityZ(bool periodicity)
     this->periodicityZ = periodicity;
 }
 
-bool GridImp::getPeriodicityX()
+bool GridImp::getPeriodicityX() const
 {
     return this->periodicityX;
 }
 
-bool GridImp::getPeriodicityY()
+bool GridImp::getPeriodicityY() const
 {
     return this->periodicityY;
 }
 
-bool GridImp::getPeriodicityZ()
+bool GridImp::getPeriodicityZ() const
 {
     return this->periodicityZ;
 }
@@ -806,12 +811,12 @@ uint GridImp::getLevel() const
     return this->level;
 }
 
-void GridImp::setTriangularMeshDiscretizationStrategy(TriangularMeshDiscretizationStrategy* triangularMeshDiscretizationStrategy)
+void GridImp::setTriangularMeshDiscretizationStrategy(SPtr<TriangularMeshDiscretizationStrategy> triangularMeshDiscretizationStrategy)
 {
     this->triangularMeshDiscretizationStrategy = triangularMeshDiscretizationStrategy;
 }
 
-TriangularMeshDiscretizationStrategy * GridImp::getTriangularMeshDiscretizationStrategy()
+SPtr<TriangularMeshDiscretizationStrategy> GridImp::getTriangularMeshDiscretizationStrategy()
 {
     return this->triangularMeshDiscretizationStrategy;
 }
@@ -855,7 +860,7 @@ void GridImp::setNumberOfLayers(uint numberOfLayers)
 
 void GridImp::findSparseIndices(SPtr<Grid> finerGrid)
 {
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Find sparse indices...";
+    VF_LOG_TRACE("Find sparse indices...");
     auto fineGrid = std::static_pointer_cast<GridImp>(finerGrid);
 
     this->updateSparseIndices();
@@ -870,8 +875,7 @@ void GridImp::findSparseIndices(SPtr<Grid> finerGrid)
     }
 
     const uint newGridSize = this->getSparseSize();
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "... done. new size: " << newGridSize
-                  << ", delete nodes:" << this->getSize() - newGridSize << "\n";
+    VF_LOG_TRACE("... done. new size: {}, delete nodes: {}", newGridSize, this->getSize() - newGridSize);
 }
 
 void GridImp::findForGridInterfaceNewIndices(SPtr<GridImp> fineGrid)
@@ -1099,14 +1103,13 @@ int GridImp::getSparseIndex(const real &x, const real &y, const real &z) const
 // --------------------------------------------------------- //
 //                    Find Interface                         //
 // --------------------------------------------------------- //
-void GridImp::findGridInterface(SPtr<Grid> finerGrid, LbmOrGks lbmOrGks)
+void GridImp::findGridInterface(SPtr<Grid> finerGrid)
 {
     auto fineGrid          = std::static_pointer_cast<GridImp>(finerGrid);
     const auto coarseLevel = this->getLevel();
     const auto fineLevel   = fineGrid->getLevel();
 
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "find interface level " << coarseLevel << " -> "
-                  << fineLevel;
+    VF_LOG_TRACE("find interface level {} -> {}", coarseLevel, fineLevel);
 
     this->gridInterface = new GridInterface();
     // TODO: this is stupid! concave refinements can easily have many more interface cells
@@ -1119,7 +1122,7 @@ void GridImp::findGridInterface(SPtr<Grid> finerGrid, LbmOrGks lbmOrGks)
     this->gridInterface->fc.offset = new uint[sizeCF];
 
     for (uint index = 0; index < this->getSize(); index++)
-        this->findGridInterfaceCF(index, *fineGrid, lbmOrGks);
+        this->findGridInterfaceCF(index, *fineGrid);
 
     for (uint index = 0; index < this->getSize(); index++)
         this->findGridInterfaceFC(index, *fineGrid);
@@ -1127,12 +1130,7 @@ void GridImp::findGridInterface(SPtr<Grid> finerGrid, LbmOrGks lbmOrGks)
     for (uint index = 0; index < this->getSize(); index++)
         this->findOverlapStopper(index, *fineGrid);
 
-    if (lbmOrGks == GKS) {
-        for (uint index = 0; index < this->getSize(); index++)
-            this->findInvalidBoundaryNodes(index);
-    }
-
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "  ... done. \n";
+    VF_LOG_TRACE("  ... done.");
 }
 
 void GridImp::repairGridInterfaceOnMultiGPU(SPtr<Grid> fineGrid)
@@ -1140,7 +1138,7 @@ void GridImp::repairGridInterfaceOnMultiGPU(SPtr<Grid> fineGrid)
     this->gridInterface->repairGridInterfaceOnMultiGPU( shared_from_this(), std::static_pointer_cast<GridImp>(fineGrid) );
 }
 
-void GridImp::limitToSubDomain(SPtr<BoundingBox> subDomainBox, LbmOrGks lbmOrGks)
+void GridImp::limitToSubDomain(SPtr<BoundingBox> subDomainBox)
 {
     for( uint index = 0; index < this->size; index++ ){
 
@@ -1151,8 +1149,7 @@ void GridImp::limitToSubDomain(SPtr<BoundingBox> subDomainBox, LbmOrGks lbmOrGks
             BoundingBox tmpSubDomainBox = *subDomainBox;
 
             // one layer for receive nodes and one for stoppers
-            if( lbmOrGks == LBM )
-                tmpSubDomainBox.extend(this->delta);
+            tmpSubDomainBox.extend(this->delta);
 
             if (!tmpSubDomainBox.isInside(x, y, z)
                 && ( this->getFieldEntry(index) == FLUID ||
@@ -1170,10 +1167,7 @@ void GridImp::limitToSubDomain(SPtr<BoundingBox> subDomainBox, LbmOrGks lbmOrGks
             BoundingBox tmpSubDomainBox = *subDomainBox;
 
             // one layer for receive nodes and one for stoppers
-            if( lbmOrGks == LBM )
-                tmpSubDomainBox.extend(2.0 * this->delta);
-            else
-                tmpSubDomainBox.extend(1.0 * this->delta);
+            tmpSubDomainBox.extend(2.0 * this->delta);
 
             if (!tmpSubDomainBox.isInside(x, y, z))
                 this->setFieldEntry(index, INVALID_OUT_OF_GRID);
@@ -1181,15 +1175,10 @@ void GridImp::limitToSubDomain(SPtr<BoundingBox> subDomainBox, LbmOrGks lbmOrGks
     }
 }
 
-void GridImp::findGridInterfaceCF(uint index, GridImp& finerGrid, LbmOrGks lbmOrGks)
+void GridImp::findGridInterfaceCF(uint index, GridImp& finerGrid)
 {
-    if (lbmOrGks == LBM)
-    {
-        gridInterface->findInterfaceCF            (index, this, &finerGrid);
-        gridInterface->findBoundaryGridInterfaceCF(index, this, &finerGrid);
-    }
-    else if (lbmOrGks == GKS)
-        gridInterface->findInterfaceCF_GKS(index, this, &finerGrid);
+    gridInterface->findInterfaceCF            (index, this, &finerGrid);
+    gridInterface->findBoundaryGridInterfaceCF(index, this, &finerGrid);
 }
 
 void GridImp::findGridInterfaceFC(uint index, GridImp& finerGrid)
@@ -1243,7 +1232,7 @@ void GridImp::mesh(TriangularMesh &triangularMesh)
     const clock_t end = clock();
     const real time = (real)(real(end - begin) / CLOCKS_PER_SEC);
 
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "time grid generation: " << time << "s\n";
+    VF_LOG_INFO("time grid generation: {}s", time);
 }
 
 void GridImp::mesh(Triangle &triangle)
@@ -1274,7 +1263,7 @@ void GridImp::mesh(Triangle &triangle)
 
 void GridImp::closeNeedleCells()
 {
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start closeNeedleCells()\n";
+    VF_LOG_TRACE("Start closeNeedleCells()");
 
     uint numberOfClosedNeedleCells = 0;
 
@@ -1286,7 +1275,7 @@ void GridImp::closeNeedleCells()
                 numberOfClosedNeedleCells++;
         }
 
-        *logging::out << logging::Logger::INFO_INTERMEDIATE << numberOfClosedNeedleCells << " cells closed!\n";
+        VF_LOG_TRACE("{} cells closed!", numberOfClosedNeedleCells);
     }
     while( numberOfClosedNeedleCells > 0 );
 }
@@ -1315,7 +1304,7 @@ bool GridImp::closeCellIfNeedle(uint index)
 
 void GridImp::closeNeedleCellsThinWall()
 {
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start closeNeedleCellsThinWall()\n";
+    VF_LOG_TRACE("Start closeNeedleCellsThinWall()");
 
     uint numberOfClosedNeedleCells = 0;
 
@@ -1326,8 +1315,7 @@ void GridImp::closeNeedleCellsThinWall()
             if (this->closeCellIfNeedleThinWall(index))
                 numberOfClosedNeedleCells++;
         }
-
-        *logging::out << logging::Logger::INFO_INTERMEDIATE << numberOfClosedNeedleCells << " cells closed!\n";
+        VF_LOG_TRACE("{} cells closed!", numberOfClosedNeedleCells);
     }
     while( numberOfClosedNeedleCells > 0 );
 }
@@ -1387,7 +1375,7 @@ void GridImp::findQs(TriangularMesh &triangularMesh)
     const clock_t end = clock();
     const real time = (real)(real(end - begin) / CLOCKS_PER_SEC);
 
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "time finding qs: " << time << "s\n";
+    VF_LOG_TRACE("time finding qs: {}s", time);
 }
 
 void GridImp::findQs(Triangle &triangle)
@@ -1644,7 +1632,7 @@ bool GridImp::checkIfAtLeastOneValidQ(const uint index, const Vertex & point, co
     return false;
 }
 
-void GridImp::findCommunicationIndices(int direction, SPtr<BoundingBox> subDomainBox, LbmOrGks lbmOrGks)
+void GridImp::findCommunicationIndices(int direction, SPtr<BoundingBox> subDomainBox)
 {
     for( uint index = 0; index < this->size; index++ ){
         real x, y, z;
@@ -1656,8 +1644,8 @@ void GridImp::findCommunicationIndices(int direction, SPtr<BoundingBox> subDomai
             this->getFieldEntry(index) == STOPPER_OUT_OF_GRID ||
             this->getFieldEntry(index) == STOPPER_COARSE_UNDER_FINE ) continue;
 
-        if( lbmOrGks == LBM && this->getFieldEntry(index) == STOPPER_OUT_OF_GRID_BOUNDARY ) continue;
-        if( lbmOrGks == LBM && this->getFieldEntry(index) == STOPPER_SOLID ) continue;
+        if( this->getFieldEntry(index) == STOPPER_OUT_OF_GRID_BOUNDARY ) continue;
+        if( this->getFieldEntry(index) == STOPPER_SOLID ) continue;
         if( direction == CommunicationDirections::MX ) findCommunicationIndex( index, x, subDomainBox->minX, direction);
         if( direction == CommunicationDirections::PX ) findCommunicationIndex( index, x, subDomainBox->maxX, direction);
         if( direction == CommunicationDirections::MY ) findCommunicationIndex( index, y, subDomainBox->minY, direction);
@@ -1734,12 +1722,8 @@ void GridImp::repairCommunicationIndices(int direction)
     this->communicationIndices[direction].receiveIndices = this->communicationIndices[direction+1].receiveIndices;
 
 
-
-
-
-
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "size send " << (int)this->communicationIndices[direction].sendIndices.size() << "\n";
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "recv send " << (int)this->communicationIndices[direction].receiveIndices.size() << "\n";
+    VF_LOG_INFO("size send {}", (int)this->communicationIndices[direction].sendIndices.size());
+    VF_LOG_INFO("recv send {}",(int)this->communicationIndices[direction].receiveIndices.size());
 }
 
 
@@ -1756,7 +1740,7 @@ real* GridImp::getDistribution() const
     return this->distribution.f;
 }
 
-int* GridImp::getDirection() const
+const std::vector<int>& GridImp::getDirection() const
 {
     return this->distribution.dirs;
 }
@@ -2109,16 +2093,22 @@ void GridImp::sortFluidNodeIndicesMacroVars()
         if(this->fluidNodeIndicesAllFeatures.size()>0)
         {
             this->fluidNodeIndicesMacroVars.erase(   std::remove_if(   this->fluidNodeIndicesMacroVars.begin(), this->fluidNodeIndicesMacroVars.end(),
-                                                        [&](auto x){return binary_search(fluidNodeIndicesAllFeatures.begin(),fluidNodeIndicesAllFeatures.end(),x);} ),
-                                            this->fluidNodeIndicesMacroVars.end()
-                                        );
+                                                    [&](auto x){return binary_search(fluidNodeIndicesAllFeatures.begin(),fluidNodeIndicesAllFeatures.end(),x);} ),
+                                                    this->fluidNodeIndicesMacroVars.end() );
+        }
+
+        // Remove all indices in fluidNodeIndicesBorder from fluidNodeIndicesApplyBodyForce
+        if(this->fluidNodeIndicesBorder.size()>0)
+        {
+            this->fluidNodeIndicesMacroVars.erase(  std::remove_if(   this->fluidNodeIndicesMacroVars.begin(), this->fluidNodeIndicesMacroVars.end(),
+                                                    [&](auto x){return binary_search(fluidNodeIndicesBorder.begin(),fluidNodeIndicesBorder.end(),x);} ),
+                                                    this->fluidNodeIndicesMacroVars.end() );
         }
 
         // Remove indices of fluidNodeIndicesMacroVars from fluidNodeIndices
         this->fluidNodeIndices.erase(   std::remove_if(   this->fluidNodeIndices.begin(), this->fluidNodeIndices.end(),
                                                         [&](auto x){return binary_search(fluidNodeIndicesMacroVars.begin(),fluidNodeIndicesMacroVars.end(),x);} ),
-                                        this->fluidNodeIndices.end()
-                                    );
+                                        this->fluidNodeIndices.end() );
     }
 }
 
@@ -2130,20 +2120,26 @@ void GridImp::sortFluidNodeIndicesApplyBodyForce()
         // Remove duplicates
         this->fluidNodeIndicesApplyBodyForce.erase( unique( this->fluidNodeIndicesApplyBodyForce.begin(), this->fluidNodeIndicesApplyBodyForce.end() ), this->fluidNodeIndicesApplyBodyForce.end() );
 
-         // Remove indices of fluidNodeIndicesAllFeatures from fluidNodeIndicesMacroVars
+         // Remove indices of fluidNodeIndicesAllFeatures from fluidNodeIndicesApplyBodyForce
         if(this->fluidNodeIndicesAllFeatures.size()>0)
         {
-            this->fluidNodeIndicesApplyBodyForce.erase(   std::remove_if(   this->fluidNodeIndicesApplyBodyForce.begin(), this->fluidNodeIndicesApplyBodyForce.end(),
+            this->fluidNodeIndicesApplyBodyForce.erase( std::remove_if(   this->fluidNodeIndicesApplyBodyForce.begin(), this->fluidNodeIndicesApplyBodyForce.end(),
                                                         [&](auto x){return binary_search(fluidNodeIndicesAllFeatures.begin(),fluidNodeIndicesAllFeatures.end(),x);} ),
-                                            this->fluidNodeIndicesApplyBodyForce.end()
-                                        );
+                                                        this->fluidNodeIndicesApplyBodyForce.end() );
+        }
+
+        // Remove all indices in fluidNodeIndicesBorder from fluidNodeIndicesApplyBodyForce
+        if(this->fluidNodeIndicesBorder.size()>0)
+        {
+            this->fluidNodeIndicesApplyBodyForce.erase( std::remove_if(   this->fluidNodeIndicesApplyBodyForce.begin(), this->fluidNodeIndicesApplyBodyForce.end(),
+                                                        [&](auto x){return binary_search(fluidNodeIndicesBorder.begin(),fluidNodeIndicesBorder.end(),x);} ),
+                                                        this->fluidNodeIndicesApplyBodyForce.end() );
         }
 
         // Remove indices of fluidNodeIndicesMacroVars from fluidNodeIndices
         this->fluidNodeIndices.erase(   std::remove_if(   this->fluidNodeIndices.begin(), this->fluidNodeIndices.end(),
-                                                        [&](auto x){return binary_search(fluidNodeIndicesApplyBodyForce.begin(),fluidNodeIndicesApplyBodyForce.end(),x);} ),
-                                        this->fluidNodeIndices.end()
-                                    );
+                                        [&](auto x){return binary_search(fluidNodeIndicesApplyBodyForce.begin(),fluidNodeIndicesApplyBodyForce.end(),x);} ),
+                                        this->fluidNodeIndices.end() );
     }
 }
 
@@ -2154,11 +2150,19 @@ void GridImp::sortFluidNodeIndicesAllFeatures()
         sort(this->fluidNodeIndicesAllFeatures.begin(), this->fluidNodeIndicesAllFeatures.end());
         // Remove duplicates
         this->fluidNodeIndicesAllFeatures.erase( unique( this->fluidNodeIndicesAllFeatures.begin(), this->fluidNodeIndicesAllFeatures.end() ), this->fluidNodeIndicesAllFeatures.end() );
-        // Remove indices of fluidNodeIndicesMacroVars from fluidNodeIndices
+
+        // Remove all indices in fluidNodeIndicesBorder from fluidNodeIndicesAllFeatures
+        if(this->fluidNodeIndicesBorder.size()>0)
+        {
+            this->fluidNodeIndicesAllFeatures.erase(    std::remove_if(   this->fluidNodeIndicesAllFeatures.begin(), this->fluidNodeIndicesAllFeatures.end(),
+                                                        [&](auto x){return binary_search(fluidNodeIndicesBorder.begin(),fluidNodeIndicesBorder.end(),x);} ),
+                                                        this->fluidNodeIndicesAllFeatures.end() );
+        }
+
+        // Remove indices of fluidNodeIndicesAllFeatures from fluidNodeIndices
         this->fluidNodeIndices.erase(   std::remove_if(   this->fluidNodeIndices.begin(), this->fluidNodeIndices.end(),
                                                         [&](auto x){return binary_search(fluidNodeIndicesAllFeatures.begin(),fluidNodeIndicesAllFeatures.end(),x);} ),
-                                        this->fluidNodeIndices.end()
-                                    );
+                                        this->fluidNodeIndices.end() );
     }
 }
 
@@ -2188,8 +2192,14 @@ void GridImp::getFluidNodeIndicesAllFeatures(uint *_fluidNodeIndicesAllFeatures)
 }
 
 
+std::vector<SideType> GridImp::getBCAlreadySet() {
+    return this->bcAlreadySet;
+}
 
-
+void GridImp::addBCalreadySet(SideType side)
+{
+    this->bcAlreadySet.push_back(side);
+}
 
 
 void GridImp::print() const
@@ -2199,3 +2209,10 @@ void GridImp::print() const
     if(this->gridInterface)
         this->gridInterface->print();
 }
+
+bool GridImp::isStopperForBC(uint index) const
+{
+    return (this->getFieldEntry(index) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY ||
+            this->getFieldEntry(index) == vf::gpu::STOPPER_OUT_OF_GRID ||
+            this->getFieldEntry(index) == vf::gpu::STOPPER_SOLID);
+}
diff --git a/src/gpu/GridGenerator/grid/GridImp.h b/src/gpu/GridGenerator/grid/GridImp.h
index 8283bf569e266b84f020334a306d93756b01c394..9377bbd407475add75a25af3fa38cb1fa74c833c 100644
--- a/src/gpu/GridGenerator/grid/GridImp.h
+++ b/src/gpu/GridGenerator/grid/GridImp.h
@@ -34,8 +34,7 @@
 #define GRID_IMP_H
 
 #include <array>
-
-#include "Core/LbmOrGks.h"
+#include <vector>
 
 #include "gpu/GridGenerator/global.h"
 
@@ -52,6 +51,7 @@ class Object;
 class BoundingBox;
 class TriangularMeshDiscretizationStrategy;
 
+
 #ifdef __GNUC__
     #ifndef __clang__
         #pragma push
@@ -72,11 +72,11 @@ class GRIDGENERATOR_EXPORT GridImp : public enableSharedFromThis<GridImp>, publi
 {
 protected:
     GridImp() = default;
-    GridImp(Object* object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, Distribution d, uint level);
+    GridImp(SPtr<Object> object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, Distribution d, uint level);
 
 public:
-    static SPtr<GridImp> makeShared(Object* object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, std::string d3Qxx, uint level);
-    virtual ~GridImp() = default;
+    static SPtr<GridImp> makeShared(SPtr<Object> object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, std::string d3Qxx, uint level);
+    ~GridImp() override = default;
 
 private:
     void initalNumberOfNodesAndSize();
@@ -92,6 +92,7 @@ private:
     bool nodeInPreviousCellIs(int index, char type) const;
     bool nodeInCellIs(Cell& cell, char type) const override;
 
+
     uint getXIndex(real x) const;
     uint getYIndex(real y) const;
     uint getZIndex(real z) const;
@@ -110,7 +111,7 @@ private:
     uint sparseSize;
     bool periodicityX = false, periodicityY = false, periodicityZ = false;
 
-    Object* object;
+    SPtr<Object> object;
     GridInterface *gridInterface;
 
     int *sparseIndices;
@@ -129,12 +130,14 @@ private:
 
     uint numberOfLayers;
 
-    TriangularMeshDiscretizationStrategy *triangularMeshDiscretizationStrategy;
+    SPtr<TriangularMeshDiscretizationStrategy> triangularMeshDiscretizationStrategy;
 
     uint numberOfSolidBoundaryNodes = 0;
 
     bool enableFixRefinementIntoTheWall;
 
+    std::vector<SideType> bcAlreadySet;
+
 protected:
     Field field;
     int *neighborIndexX, *neighborIndexY, *neighborIndexZ, *neighborIndexNegative;
@@ -149,9 +152,9 @@ public:
     void setPeriodicityY(bool periodicity) override;
     void setPeriodicityZ(bool periodicity) override;
 
-    bool getPeriodicityX() override;
-    bool getPeriodicityY() override;
-    bool getPeriodicityZ() override;
+    bool getPeriodicityX() const override;
+    bool getPeriodicityY() const override;
+    bool getPeriodicityZ() const override;
 
     void setEnableFixRefinementIntoTheWall(bool enableFixRefinementIntoTheWall) override;
 
@@ -161,19 +164,19 @@ public:
     uint transCoordToIndex(const real &x, const real &y, const real &z) const override;
     void transIndexToCoords(uint index, real &x, real &y, real &z) const override;
 
-    void findGridInterface(SPtr<Grid> grid, LbmOrGks lbmOrGks) override;
+    void findGridInterface(SPtr<Grid> grid) override;
 
     void repairGridInterfaceOnMultiGPU(SPtr<Grid> fineGrid) override;
 
-    void limitToSubDomain(SPtr<BoundingBox> subDomainBox, LbmOrGks lbmOrGks) override;
+    void limitToSubDomain(SPtr<BoundingBox> subDomainBox) override;
 
     void freeMemory() override;
 
     uint getLevel(real levelNull) const;
     uint getLevel() const;
 
-    void setTriangularMeshDiscretizationStrategy(TriangularMeshDiscretizationStrategy *triangularMeshDiscretizationStrategy);
-    TriangularMeshDiscretizationStrategy *getTriangularMeshDiscretizationStrategy();
+    void setTriangularMeshDiscretizationStrategy(SPtr<TriangularMeshDiscretizationStrategy> triangularMeshDiscretizationStrategy);
+    SPtr<TriangularMeshDiscretizationStrategy> getTriangularMeshDiscretizationStrategy();
 
     uint getNumberOfSolidBoundaryNodes() const override;
     void setNumberOfSolidBoundaryNodes(uint numberOfSolidBoundaryNodes) override;
@@ -185,6 +188,9 @@ public:
 
     void setNumberOfLayers(uint numberOfLayers) override;
 
+    std::vector<SideType> getBCAlreadySet() override;
+    void addBCalreadySet(SideType side) override;
+
 public:
     Distribution distribution;
 
@@ -209,7 +215,7 @@ public:
     void findSolidStopperNode(uint index);
     void findBoundarySolidNode(uint index);
 
-    void findGridInterfaceCF(uint index, GridImp &finerGrid, LbmOrGks lbmOrGks);
+    void findGridInterfaceCF(uint index, GridImp &finerGrid);
     void findGridInterfaceFC(uint index, GridImp &finerGrid);
     void findOverlapStopper(uint index, GridImp &finerGrid);
     void findInvalidBoundaryNodes(uint index);
@@ -219,10 +225,11 @@ public:
     bool nodeInNextCellIs(int index, char type) const;
     bool hasAllNeighbors(uint index) const;
     bool hasNeighborOfType(uint index, char type) const;
+    bool nodeHasBC(uint index) const override;
     bool cellContainsOnly(Cell &cell, char type) const;
     bool cellContainsOnly(Cell &cell, char typeA, char typeB) const;
 
-    const Object* getObject() const override;
+    SPtr<const Object> getObject() const override;
 
     Field getField() const;
     char getFieldEntry(uint index) const override;
@@ -234,7 +241,7 @@ public:
     uint getSparseSize() const override;
     int getSparseIndex(uint matrixIndex) const override;
     real* getDistribution() const override;
-    int* getDirection() const override;
+    const std::vector<int>& getDirection() const override;
     int getStartDirection() const override;
     int getEndDirection() const override;
 
@@ -259,6 +266,8 @@ public:
     static void getGridInterface(uint *gridInterfaceList, const uint *oldGridInterfaceList, uint size);
 
     bool isSparseIndexInFluidNodeIndicesBorder(uint &sparseIndex) const override;
+    
+    bool isStopperForBC(uint index) const override;
 
     int *getNeighborsX() const override;
     int* getNeighborsY() const override;
@@ -276,7 +285,7 @@ public:
     void print() const;
 
 public:
-    virtual void findSparseIndices(SPtr<Grid> fineGrid) override;
+    void findSparseIndices(SPtr<Grid> fineGrid) override;
 
     void findForGridInterfaceNewIndices(SPtr<GridImp> fineGrid);
     void updateSparseIndices();
@@ -344,7 +353,7 @@ private:
     void allocateQs();
 
 public:
-    void findCommunicationIndices(int direction, SPtr<BoundingBox> subDomainBox, LbmOrGks lbmOrGks) override;
+    void findCommunicationIndices(int direction, SPtr<BoundingBox> subDomainBox) override;
     void findCommunicationIndex(uint index, real coordinate, real limit, int direction);
 
     uint getNumberOfSendNodes(int direction) override;
diff --git a/src/gpu/GridGenerator/grid/GridImpTest.cpp b/src/gpu/GridGenerator/grid/GridImpTest.cpp
index 8f5ddb1b01dd88cca7d750017ec328efe02cd92f..07b2a03f429ad8baf0badd0c966dd88235644c0a 100644
--- a/src/gpu/GridGenerator/grid/GridImpTest.cpp
+++ b/src/gpu/GridGenerator/grid/GridImpTest.cpp
@@ -169,17 +169,12 @@ class findNeighborsIntegrationTest : public ::testing::Test
 {
 protected:
     SPtr<MultipleGridBuilder> gridBuilder;
+
     void SetUp() override
     {
         auto gridFactory = GridFactory::make();
         gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
         gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-        // init logger to avoid segmentation fault in buildGrids
-        logging::Logger::addStream(&std::cout);
-        logging::Logger::setDebugLevel(logging::Logger::Level::WARNING);
-        logging::Logger::timeStamp(logging::Logger::ENABLE);
-        logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
     }
 };
 
@@ -188,7 +183,7 @@ TEST_F(findNeighborsIntegrationTest, grid1)
     const real dx = 0.15;
     gridBuilder->addCoarseGrid(0.0, 0.0, 0.0, 1.0, 1.0, 1.0, dx);
 
-    gridBuilder->buildGrids(LBM, false);
+    gridBuilder->buildGrids(false);
     auto grid = gridBuilder->getGrid(0);
 
     // Only the last layer of nodes should have invalid neighbors. The grid is a cube with a side length of 9 nodes
@@ -211,7 +206,7 @@ TEST_F(findNeighborsIntegrationTest, grid2)
     const real dx = 1.0 / 64;
     gridBuilder->addCoarseGrid(-0.6, -0.6, -0.6, 0.6, 0.6, 0.6, dx);
 
-    gridBuilder->buildGrids(LBM, false);
+    gridBuilder->buildGrids(false);
     auto grid = gridBuilder->getGrid(0);
 
     // Only the last layer of nodes should have invalid neighbors. The grid is a cube with a side length of 79 nodes
@@ -234,7 +229,7 @@ TEST_F(findNeighborsIntegrationTest, validFluidNeighbors1)
     real dx = 0.17;
     gridBuilder->addCoarseGrid(0.0, 0.0, 0.0, 1.0, 1.0, 1.0, dx);
 
-    gridBuilder->buildGrids(LBM, false);
+    gridBuilder->buildGrids(false);
     auto grid = gridBuilder->getGrid(0);
 
     auto numberInvalidFluidNeighbors = testFluidNodeNeighbors(grid);
@@ -248,7 +243,7 @@ TEST_F(findNeighborsIntegrationTest, validFluidNeighbors2)
     real dx = 0.18;
     gridBuilder->addCoarseGrid(0.0, 0.0, 0.0, 1.0, 1.0, 1.0, dx);
 
-    gridBuilder->buildGrids(LBM, false);
+    gridBuilder->buildGrids(false);
     auto grid = gridBuilder->getGrid(0);
 
     auto numberInvalidFluidNeighbors = testFluidNodeNeighbors(grid);
diff --git a/src/gpu/GridGenerator/grid/GridInterface.cpp b/src/gpu/GridGenerator/grid/GridInterface.cpp
index a373b0eafbdb6a33338f54bf3efdc47a0edca315..4b69cebd16d4c529e4889b84ff881d4ed660f124 100644
--- a/src/gpu/GridGenerator/grid/GridInterface.cpp
+++ b/src/gpu/GridGenerator/grid/GridInterface.cpp
@@ -125,28 +125,6 @@ void GridInterface::findBoundaryGridInterfaceCF(const uint& indexOnCoarseGrid, G
     }
 }
 
-void GridInterface::findInterfaceCF_GKS(const uint& indexOnCoarseGrid, GridImp* coarseGrid, GridImp* fineGrid)
-{
-	const bool nodeOnCoarseGridIsFluid = coarseGrid->getField().isFluid(indexOnCoarseGrid);
-	if (!nodeOnCoarseGridIsFluid)
-		return;
-
-	real x, y, z;
-	coarseGrid->transIndexToCoords(indexOnCoarseGrid, x, y, z);
-
-	for (const auto dir : coarseGrid->distribution)
-	{
-		const uint indexOnFineGrid = fineGrid->transCoordToIndex(x + 0.25 * dir[0] * coarseGrid->getDelta(),
-																 y + 0.25 * dir[1] * coarseGrid->getDelta(),
-																 z + 0.25 * dir[2] * coarseGrid->getDelta());
-
-		if (indexOnFineGrid != INVALID_INDEX && fineGrid->getField().is(indexOnFineGrid, STOPPER_OUT_OF_GRID)) 
-		{
-			coarseGrid->getField().setFieldEntry(indexOnCoarseGrid, FLUID_CFC);
-			break;
-		}
-	}
-}
 
 void GridInterface::findInterfaceFC(const uint& indexOnCoarseGrid, GridImp* coarseGrid, GridImp* fineGrid)
 {
diff --git a/src/gpu/GridGenerator/grid/GridInterface.h b/src/gpu/GridGenerator/grid/GridInterface.h
index 713d495d4386e0fe743357a803b84be02c061561..ae90f513de436324e982a8ac34db7bb1a07f908d 100644
--- a/src/gpu/GridGenerator/grid/GridInterface.h
+++ b/src/gpu/GridGenerator/grid/GridInterface.h
@@ -46,9 +46,6 @@ public:
     void GRIDGENERATOR_EXPORT findInterfaceCF(const uint& indexOnCoarseGrid, GridImp* coarseGrid, GridImp* fineGrid);
     void GRIDGENERATOR_EXPORT findBoundaryGridInterfaceCF(const uint& indexOnCoarseGrid, GridImp* coarseGrid, GridImp* fineGrid);
 
-
-    void GRIDGENERATOR_EXPORT findInterfaceCF_GKS(const uint& indexOnCoarseGrid, GridImp* coarseGrid, GridImp* fineGrid);
-
     void GRIDGENERATOR_EXPORT findInterfaceFC(const uint& indexOnCoarseGrid, GridImp* coarseGrid, GridImp* fineGrid);
     void GRIDGENERATOR_EXPORT findOverlapStopper(const uint& indexOnCoarseGrid, GridImp* coarseGrid, GridImp* fineGrid);
     
diff --git a/src/gpu/GridGenerator/grid/distributions/Distribution.cpp b/src/gpu/GridGenerator/grid/distributions/Distribution.cpp
index a6e11ac909186618d765596f15263035b3b6401a..31d2e2f8a927e7141d80a55f4164dfcb0f52a8e0 100644
--- a/src/gpu/GridGenerator/grid/distributions/Distribution.cpp
+++ b/src/gpu/GridGenerator/grid/distributions/Distribution.cpp
@@ -46,9 +46,9 @@ Distribution DistributionHelper::getDistribution27()
     d27.dir_start = STARTDIR;
     d27.dir_end = ENDDIR;
 
-    d27.dirs = new int[(ENDDIR + 1) * DIMENSION];
+    d27.dirs.resize((ENDDIR + 1) * DIMENSION);
 
-    d27.directions = new Direction[ENDDIR + 1];
+    d27.directions.resize(ENDDIR + 1);
     d27.directions[DIR_P00] = Direction(DIR_27_E_X, DIR_27_E_Y, DIR_27_E_Z);
     d27.directions[DIR_M00] = Direction(DIR_27_W_X, DIR_27_W_Y, DIR_27_W_Z);
     d27.directions[DIR_0P0] = Direction(DIR_27_N_X, DIR_27_N_Y, DIR_27_N_Z);
diff --git a/src/gpu/GridGenerator/grid/distributions/Distribution.h b/src/gpu/GridGenerator/grid/distributions/Distribution.h
index 7982abc235b020003526b10c885f4936bf661936..39cea3747e71a5c26bfd9172267366284dbc94ff 100644
--- a/src/gpu/GridGenerator/grid/distributions/Distribution.h
+++ b/src/gpu/GridGenerator/grid/distributions/Distribution.h
@@ -73,8 +73,8 @@ struct Distribution
     typedef const Direction* const_iterator;
 
     real* f;
-    int *dirs;
-    Direction* directions;
+    std::vector<int> dirs;
+    std::vector<Direction> directions;
     int dir_start;
     int dir_end;
     const char* name;
diff --git a/src/gpu/GridGenerator/io/GridVTKWriter/GridVTKWriter.cpp b/src/gpu/GridGenerator/io/GridVTKWriter/GridVTKWriter.cpp
index 7f818f3217e682f21c2b41c62070924243fcb3b0..e5ddf7932e8505c713387f96157a80626d37aa1e 100644
--- a/src/gpu/GridGenerator/io/GridVTKWriter/GridVTKWriter.cpp
+++ b/src/gpu/GridGenerator/io/GridVTKWriter/GridVTKWriter.cpp
@@ -38,8 +38,8 @@
 #include <sstream>
 #include <memory>
 
-#include "basics/basics/writer/WbWriterVtkXmlBinary.h"
-#include "basics/basics/container/CbArray3D.h"
+#include "basics/writer/WbWriterVtkXmlBinary.h"
+#include "basics/container/CbArray3D.h"
 
 #include "geometries/Vertex/Vertex.h"
 
@@ -85,7 +85,7 @@ void GridVTKWriter::writeGridToVTKXML(SPtr<Grid> grid, const std::string& name)
         std::vector<std::string> nodedatanames;
         std::vector< std::vector<double> > nodedata;
 
-        *logging::out << logging::Logger::INFO_INTERMEDIATE << "Write Grid to XML VTK (*.vtu) output file : " + name + "_Part_" + std::to_string(part) + "\n";
+        VF_LOG_INFO("Write Grid to XML VTK (*.vtu) output file : {}_Part_{}", name, part);
 
         nodedatanames.emplace_back("types");
         nodedatanames.emplace_back("sparse_id");
@@ -154,7 +154,7 @@ void GridVTKWriter::writeGridToVTKXML(SPtr<Grid> grid, const std::string& name)
             }
         }
         WbWriterVtkXmlBinary::getInstance()->writeOctsWithNodeData(name + "_Part_" + std::to_string(part), nodes, cells, nodedatanames, nodedata);
-        *logging::out << logging::Logger::INFO_INTERMEDIATE << "done. \n";
+        VF_LOG_INFO("done.");
     }
 
 }
@@ -282,14 +282,14 @@ void GridVTKWriter::initalVtkWriter(WRITING_FORMAT format, const std::string& na
 {
     GridVTKWriter::format = format;
 
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Write Grid to vtk output file : " + name + "\n";
+    VF_LOG_INFO("Write Grid to vtk output file: {}", name);
 
     std::string mode = "w";
     if (isBinaryWritingFormat())
         mode = "wb";
     GridVTKWriter::openFile(name, mode);
 
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "  Output file opened ...\n";
+    VF_LOG_INFO("Output file opened ...");
 }
 
 bool GridVTKWriter::isBinaryWritingFormat()
@@ -306,14 +306,14 @@ void GridVTKWriter::writeVtkFile(SPtr<Grid> grid)
     GridVTKWriter::writeTypes(grid);
     GridVTKWriter::closeFile();
 
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Output file closed\n";
+    VF_LOG_INFO("Output file closed");
 }
 
 void GridVTKWriter::openFile(const std::string& name, const std::string& mode)
 {
     file = fopen(name.c_str(), mode.c_str());
     if(file==NULL)
-        *logging::out << logging::Logger::INFO_HIGH << "  cannot open file ...\n";
+        VF_LOG_CRITICAL("cannot open file {}", name);
 }
 
 void GridVTKWriter::closeFile()
diff --git a/src/gpu/GridGenerator/io/QLineWriter.cpp b/src/gpu/GridGenerator/io/QLineWriter.cpp
index 29894e8d589fb59c1dbf57e08692dddfc03619a1..1aece49ceeabc5ca195146f2ac6dcb7ba960f3ea 100644
--- a/src/gpu/GridGenerator/io/QLineWriter.cpp
+++ b/src/gpu/GridGenerator/io/QLineWriter.cpp
@@ -36,7 +36,7 @@
 #include <string>
 #include <fstream>
 
-#include "basics/basics/utilities/UbTuple.h"
+#include "basics/utilities/UbTuple.h"
 
 #include "geometries/Vertex/Vertex.h"
 
@@ -50,7 +50,7 @@ void QLineWriter::writeArrows(std::string fileName, SPtr<GeometryBoundaryConditi
 {
 	if (geometryBoundaryCondition == nullptr)
 	{
-		*logging::out << logging::Logger::WARNING << "(QLineWriter::writeArrows) no geometry bc on this grid level.\n";
+        VF_LOG_WARNING("(QLineWriter::writeArrows) no geometry bc on this grid level.");
 		return;
 	}
     std::vector<UbTupleFloat3> nodes;
diff --git a/src/gpu/GridGenerator/io/STLReaderWriter/STLReader.cpp b/src/gpu/GridGenerator/io/STLReaderWriter/STLReader.cpp
index d3eb221265b2f8c79d5aece8729585733c2d60e8..d73adc73eeffce56397829c115def5bfd1d8bc59 100644
--- a/src/gpu/GridGenerator/io/STLReaderWriter/STLReader.cpp
+++ b/src/gpu/GridGenerator/io/STLReaderWriter/STLReader.cpp
@@ -55,17 +55,17 @@ std::vector<Triangle> STLReader::readSTL(const std::string& name)
         line[strcspn(line.c_str(), "\r\n")] = 0;
         if (strcmp(line.c_str(), "solid ascii") == 0) {
             file.close();
-            *logging::out << logging::Logger::INFO_INTERMEDIATE << "start reading ascii STL file: " + name + "\n";
+            VF_LOG_INFO("start reading ascii STL file: {}" , name);
             return readASCIISTL(name);
         }
         else {
             file.close();
-            *logging::out << logging::Logger::INFO_INTERMEDIATE << "start reading binary STL file: " + name + "\n";
+            VF_LOG_INFO("start reading binary STL file: {}" , name);
+
             return readBinarySTL(name);
         }
     }
-
-     *logging::out << logging::Logger::INFO_INTERMEDIATE << "can't open STL-file" + name + " ... exit program! \n";
+    VF_LOG_CRITICAL("can't open STL-file {} ... exit program!" , name);
      exit(1);
 }
 
@@ -81,7 +81,7 @@ std::vector<Triangle> STLReader::readASCIISTL(const std::string& name)
     const int lines = countLinesInFile(name);
     const int nTriangles = (lines) / 7; // seven lines per triangle
 
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Number of Triangles: " << nTriangles << "\n";
+    VF_LOG_INFO("Number of Triangles: {}" , nTriangles);
     std::vector<Triangle> triangles;
 
     std::string line;
@@ -109,8 +109,7 @@ std::vector<Triangle> STLReader::readASCIISTL(const std::string& name)
 
 std::vector<Triangle> STLReader::readASCIISTLWithPatches(const std::string& name, const std::vector<uint> ignorePatches)
 {
-    *logging::out << logging::Logger::INFO_HIGH << "Start reading ascii STL file:\n";
-    *logging::out << logging::Logger::INFO_HIGH << "    " + name + "\n";
+    VF_LOG_INFO("Start reading ascii STL file: {}" , name);
 
     std::vector<Triangle> triangles;
 
@@ -141,9 +140,10 @@ std::vector<Triangle> STLReader::readASCIISTLWithPatches(const std::string& name
             ignoreCurrentPatch = std::find( ignorePatches.begin(), ignorePatches.end(), currentPatchIndex ) != ignorePatches.end();
 
             if( !ignoreCurrentPatch )
-                *logging::out << logging::Logger::INFO_INTERMEDIATE << "    Reading STL-Group " << line.substr( line.find(' ') + 1 ) << " as patch " << currentPatchIndex << "\n";
+                VF_LOG_INFO("    Reading STL-Group {} as patch {}" , line.substr( line.find(' ') + 1 ) , currentPatchIndex);
             else
-                *logging::out << logging::Logger::WARNING           << "    Ignoring STL-Group " << line.substr( line.find(' ') + 1 ) << " as patch " << currentPatchIndex << "\n";
+                VF_LOG_WARNING("    Ignoring STL-Group {} as patch {}" , line.substr( line.find(' ') + 1 ) , currentPatchIndex);
+
 
             currentFacetLine++;
         }
@@ -202,7 +202,7 @@ std::vector<Triangle> STLReader::readASCIISTLWithPatches(const std::string& name
 
     file.close();
 
-    *logging::out << logging::Logger::INFO_HIGH << "Done reading ascii STL file\n";
+    VF_LOG_INFO("Done reading ascii STL file");
 
     return triangles;
 }
@@ -218,7 +218,7 @@ std::vector<Triangle> STLReader::readBinarySTL(const std::string& name)
     char nTri[4];
     sizef                  = fread(nTri, sizeof(char), 4, file);
     unsigned long nTriLong = *((unsigned long*)nTri);
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Number of Triangles: " << nTriLong << "\n";
+    VF_LOG_INFO("Number of Triangles: {}" , nTriLong);
     std::vector<Triangle> triangles;
 
     char facet[50];
@@ -248,18 +248,18 @@ std::vector<Triangle> STLReader::readSTL(const BoundingBox &box, const std::stri
 		line[strcspn(line.c_str(), "\r\n")] = 0;
 		if (strcmp(line.c_str(), "solid ascii") == 0) {
 			file.close();
-			*logging::out << logging::Logger::INFO_INTERMEDIATE << "start reading ascii STL file: " + name + "\n";
+            VF_LOG_INFO("start reading ascii STL file {}", name);
 			return readASCIISTL(box, name);
 		}
 		else {
 			file.close();
-			*logging::out << logging::Logger::INFO_INTERMEDIATE << "start reading binary STL file: " + name + "\n";
+            VF_LOG_INFO("start reading binary STL file {}", name);
 			std::vector<Triangle> triangles = readBinarySTL(box, name);
 			return triangles;
 		}
 	}
 	else {
-		*logging::out << logging::Logger::INFO_INTERMEDIATE << "can't open STL-file" + name + "\n";
+        VF_LOG_CRITICAL("can't open STL-file {}", name);
 		exit(1);
 	}
 }
@@ -311,7 +311,7 @@ std::vector<Triangle> STLReader::readBinarySTL(const BoundingBox &box, const std
     sizef    = fread(nTri, sizeof(char), 4, file);
     nTriLong = *((unsigned long*)nTri);
 
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Number of Triangles complete geometry: " << nTriLong << "\n";
+    VF_LOG_INFO("Number of Triangles complete geometry: {}", nTriLong);
     std::vector<Triangle> triangles;
 
     char facet[50];
@@ -329,8 +329,8 @@ std::vector<Triangle> STLReader::readBinarySTL(const BoundingBox &box, const std
             triangles.push_back(t);
     }
     int size = (int)triangles.size();
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Number of Triangles in process: " << size << "\n";
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Complete reading STL file. \n";
+    VF_LOG_INFO("Number of Triangles in process: {}", size);
+    VF_LOG_INFO("Complete reading STL file");
     (void)sizef;
 	fclose(file);
 
diff --git a/src/gpu/GridGenerator/io/STLReaderWriter/STLWriter.cpp b/src/gpu/GridGenerator/io/STLReaderWriter/STLWriter.cpp
index e29320cbd6867e1ae5a65cf41623780cd83aead7..c1f306839e7ed7a9815c041c4e000df044d43630 100644
--- a/src/gpu/GridGenerator/io/STLReaderWriter/STLWriter.cpp
+++ b/src/gpu/GridGenerator/io/STLReaderWriter/STLWriter.cpp
@@ -42,7 +42,7 @@
 void STLWriter::writeSTL(std::vector<Triangle> &vec, const std::string &name, bool writeBinary)
 {
     const int size = (int)vec.size();
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Write " << size << " Triangles to STL : " + name + "\n";
+    VF_LOG_INFO("Write {} Triangles to STL : {}" , size, name);
 
     std::ios_base::openmode mode = std::ios::out;
     if (writeBinary)
@@ -51,7 +51,7 @@ void STLWriter::writeSTL(std::vector<Triangle> &vec, const std::string &name, bo
     std::ofstream ofstream(name, mode);
 
     if (!ofstream.is_open()) {
-        *logging::out << logging::Logger::INFO_HIGH << " Output file not open - exit function\n";
+        VF_LOG_WARNING("Output file not open - exit function");
         return;
     }
 
@@ -61,7 +61,7 @@ void STLWriter::writeSTL(std::vector<Triangle> &vec, const std::string &name, bo
         writeAsciiSTL(ofstream, vec);
 
     ofstream.close();
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Output file closed\n";
+    VF_LOG_INFO("Output file closed");
 }
 
 
diff --git a/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.cpp b/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.cpp
index 23fb0f4e7f3e16702e9cb2459606986af1032e49..eb7e6b22e160de4fedb5832be807dabf36531c9c 100644
--- a/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.cpp
+++ b/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.cpp
@@ -38,7 +38,7 @@
 #include <omp.h>
 #include <cmath>
 
-#include "Core/Timer/Timer.h"
+#include "Timer/Timer.h"
 
 #include "grid/NodeValues.h"
 #include "grid/Grid.h"
@@ -59,13 +59,13 @@ void SimulationFileWriter::write(const std::string& folder, SPtr<GridBuilder> bu
 {
     SimulationFileWriter::folder = folder;
 
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start writing simulation files to " << folder << ":\n";
+    VF_LOG_INFO("Start writing simulation files to {}", folder);
     auto timer = Timer::makeStart();
 
     write(builder, format);
 
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "    Time writing files: " << timer->getCurrentRuntimeInSeconds() << " sec\n";
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Done writing simulation Files!\n";
+    VF_LOG_INFO("    Time writing files: {} sec", timer->getCurrentRuntimeInSeconds());
+    VF_LOG_INFO("Done writing simulation Files!");
 }
 
 
@@ -79,7 +79,7 @@ void SimulationFileWriter::write(SPtr<GridBuilder> builder, FILEFORMAT format)
     writeLevel(numberOfLevel);
     //auto qs = createBCVectors(builder->getGrid(0));
 
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "    Coordinate and neighbor files:\n";
+    VF_LOG_INFO("   Coordinate and neighbor files:");
     for (uint level = 0; level < numberOfLevel; level++)
     {
         writeNumberNodes(builder, level);
@@ -95,10 +95,10 @@ void SimulationFileWriter::write(SPtr<GridBuilder> builder, FILEFORMAT format)
         }
     }
     
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "    Boundary Condition files:\n";
+    VF_LOG_INFO("   Boundary Condition files:");
     writeBoundaryQsFile(builder);
     
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "    Communication files:\n";
+    VF_LOG_INFO("    Communication files:");
     writeCommunicationFiles(builder);
 
     closeFiles();
@@ -417,10 +417,10 @@ void SimulationFileWriter::writeGridInterfaceToFile(SPtr<GridBuilder> builder, u
     }
 }
 
-void SimulationFileWriter::writeGridInterfaceToFile(const uint numberOfNodes, std::ofstream& coarseFile, uint* coarse, std::ofstream& fineFile, uint* fine)
+void SimulationFileWriter::writeGridInterfaceToFile(uint numberOfNodes, std::ofstream &coarseFile, uint *coarse,
+                                                    std::ofstream &fineFile, uint *fine)
 {
-    for (uint index = 0; index < numberOfNodes; index++)
-    {
+    for (uint index = 0; index < numberOfNodes; index++) {
         coarseFile << coarse[index] << " \n";
         fineFile << fine[index] << " \n";
     }
@@ -428,17 +428,15 @@ void SimulationFileWriter::writeGridInterfaceToFile(const uint numberOfNodes, st
     fineFile << "\n";
 }
 
-void SimulationFileWriter::writeGridInterfaceOffsetToFile(uint numberOfNodes, std::ofstream & offsetFile, real* offset_X, real* offset_Y, real* offset_Z)
+void SimulationFileWriter::writeGridInterfaceOffsetToFile(uint numberOfNodes, std::ofstream &offsetFile, real *offset_X,
+                                                          real *offset_Y, real *offset_Z)
 {
-    for (uint index = 0; index < numberOfNodes; index++)
-    {
+    for (uint index = 0; index < numberOfNodes; index++) {
         offsetFile << offset_X[index] << " " << offset_Y[index] << " " << offset_Z[index] << " \n";
     }
     offsetFile << "\n";
 }
 
-
-
 /*#################################################################################*/
 /*---------------------------------private methods---------------------------------*/
 /*---------------------------------------------------------------------------------*/
diff --git a/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h b/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h
index f3851abfd3372e5d3548cf7c0cd02344aa8acbaa..a4ba3d4b53f2feb63d0363bf47e92152bea05c57 100644
--- a/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h
+++ b/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h
@@ -40,8 +40,6 @@
 #include <vector>
 #include <array>
 
-#include "Core/NonCreatable.h"
-
 #include "gpu/GridGenerator/global.h"
 
 class UnstructuredGridBuilder;
@@ -57,7 +55,7 @@ enum class FILEFORMAT
     BINARY, ASCII
 };
 
-class SimulationFileWriter : private NonCreatable
+class SimulationFileWriter
 {
 public:
     GRIDGENERATOR_EXPORT static void write(const std::string& folder, SPtr<GridBuilder> builder, FILEFORMAT format);
diff --git a/src/gpu/Traffic/CMakeLists.txt b/src/gpu/Traffic/CMakeLists.txt
deleted file mode 100644
index 249594e0434bc01aecd8f95f524a25a0745013d4..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-project(Traffic LANGUAGES CUDA CXX)
-
-vf_add_library(BUILDTYPE shared PRIVATE_LINK GridGenerator basics)
diff --git a/src/gpu/Traffic/GPU/TrafficTimestep.cu b/src/gpu/Traffic/GPU/TrafficTimestep.cu
deleted file mode 100644
index 34ea17549bdec9b99631f999e4108018e8829318..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/GPU/TrafficTimestep.cu
+++ /dev/null
@@ -1,936 +0,0 @@
-#include "TrafficTimestep.h"
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-
-#include <cmath>
-
-#include "RoadNetwork/RoadNetworkData.h"
-#include "Junction/Junction.h"
-#include "Sink/Sink.h"
-#include "Source/Source.h"
-#include "Utilities/safe_casting.h"
-#include "Utilities/invalidInput_error.h"
-
-//kernel
-__global__ void trafficTimestepKernel(int* roadCurrent, int* roadNext, int* neighbors, real * pConcArray, uint* juncInCellIndices, bool* juncCarCanEnter,
-	int* juncCarsOnJunction, uint* juncAlreadyMoved, uint* juncOldSpeeds, real* sinkCarBlockedPossibilities,
-	uint size_road, uint size_juncInCells, uint maxVelocity, uint maxAcceleration, uint safetyDistance, bool useSlowToStart, real slowStartPossibility, real dawdlePossibility, curandState *state);
-
-__global__	void sourceTimestepKernel(int* roadCurrent, int* roadNext, int* neighbors, uint* sourceIndices, real* sinkCarBlockedPossibilities,
-	float* sourcePossibilities, real * pConcArray, uint maxVelocity, uint safetyDistance, uint size_sources, curandState *state);
-
-__global__ void junctionTimestepKernel(int* juncCarsOnJunction, uint* juncInCellIndices, int* juncOutCellIndices, uint* juncStartInIncells, uint* juncStartInOutcells,
-	uint* juncAlreadyMoved, int* juncCarCanNotEnterThisOutCell, bool* juncOutCellIsOpen, uint* juncOldSpeeds, bool* juncCarCanEnter, uint* juncTrafficLightSwitchTime,
-	int* roadCurrent, int* roadNext, int* neighbors, real * pConcArray, real* sinkCarBlockedPossibilities, uint safetyDistance,
-	uint size_juncInCells, uint size_juncOutCells, uint size_junctions, uint numTimestep, curandState *state);
-
-__global__ void calculationOfNaschVelocityForFluidBCKernel(int* roadNext, int* neighbors, int* naschVelocity, uint size_road, uint safetyDistance);
-
-__global__ void randomSetupKernel(curandState *state, uint size);
-
-__global__ void resetNext(int* roadNext, uint size_road);
-
-//device functions for movement
-__device__ inline uint getJunctionInCellsVectorIndex(uint * juncInCellIndices, uint size_juncInCells, uint cell);
-
-__device__ uint getGapAfterOutCell(int* roadCurrent, int* neighbors, real* sinkCarBlockedPossibilities, int sourceIndex, uint speed, uint safetyDistance, curandState* state);
-
-__device__ inline void carStaysOnJunction(int* juncCarsOnJunction, uint* juncInCellIndices, uint* juncAlreadyMoved, uint*juncOldSpeeds, real* pConcArray, uint inCellVectorIndex);
-
-
-
-//device functions for concentrations
-__device__ inline real calcConcentration(uint oldSpeed, uint newSpeed);
-
-__device__ inline void putConcIntoArray(real * pConcArray, uint oldSpeed, uint newSpeed, uint newIndex);
-
-__device__ inline void addConcToArray(real * pConcArray, uint oldSpeed, uint newSpeed, uint newIndex);
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-
-
-TrafficTimestep::TrafficTimestep(std::shared_ptr<RoadNetworkData> road, real * pConcArray, int* naschVelocity)
-{
-	//calculate sizes
-	this->size_roads = road->roadLength;
-	this->size_junctions = castSizeT_Uint(road->junctions.size());
-	this->size_sources = castSizeT_Uint(road->sources.size());
-	this->size_sinks = castSizeT_Uint(road->sinks.size());
-
-	//set attributes
-	this->maxVelocity = road->maxVelocity;
-	this->safetyDistance = road->safetyDistance;
-	this->dawdlePossibility = road->dawdlePossibility;
-	this->useSlowToStart = road->useSlowToStart;
-	this->slowStartPossibility = road->slowStartPossibility;
-	this->maxAcceleration = road->maxAcceleration;
-
-	//prepare road
-	this->neighbors = road->neighbors;
-	this->roadCurrent = *road->pcurrent;
-
-	this->roadNext.resize(size_roads);
-	thrust::fill(roadNext.begin(), roadNext.end(), -1);
-
-	//prepare junctions
-	combineJuncInCellIndices(road->junctions);
-	combineJuncOutCellIndices(road->junctions);
-	combineJuncCarCanNotEnterThisOutCell(road->junctions);
-	combineUseTrafficLights(road->junctions);
-
-	initJuncCarCanEnter();
-	initJuncCarsOnJunction();
-	initJuncAlreadyMoved();
-	initjuncOutCellIsOpen();
-	initJuncOldSpeeds();
-
-	//prepare sinks
-	combineSinkBlockedPossibilities(road->sinks);
-
-	//prepare sources
-	combineSourcePossibilities(road->sources);
-	combineSourceIndices(road->sources);
-
-	//preapre grid dimensions
-	calculateTrafficTimestepKernelDimensions();
-	calculateJunctionKernelDimensions();
-	calculateSourceKernelDimensions();
-
-	////setUp random numbers
-	checkCudaErrors(cudaMalloc((void **)&statesRoad, gridRoad.x * gridRoad.y * threadsRoads.x * sizeof(curandState)));
-	randomSetupKernel << <gridRoad, threadsRoads >> > (statesRoad, size_roads);
-	cudaDeviceSynchronize();
-	getLastCudaError("random_setup_kernel for roads execution failed");
-
-	checkCudaErrors(cudaMalloc((void **)&statesJunctions, gridJunctions.x * gridJunctions.y * threadsJunctions.x * sizeof(curandState)));
-	randomSetupKernel << <gridJunctions, threadsJunctions >> > (statesJunctions, size_junctions);
-	cudaDeviceSynchronize();
-	getLastCudaError("random_setup_kernel for junctions execution failed");
-
-	checkCudaErrors(cudaMalloc((void **)&statesSources, gridSources.x * gridSources.y * threadsSources.x * sizeof(curandState)));
-	randomSetupKernel << <gridSources, threadsSources >> > (statesSources, size_sources);
-	cudaDeviceSynchronize();
-	getLastCudaError("random_setup_kernel for sources execution failed");
-
-	//prepare ConcWriter
-	if (pConcArray == nullptr) {
-		checkCudaErrors(cudaMalloc((void **)&this->pConcArray, size_roads * sizeof(real)));
-		checkCudaErrors(cudaMemset(this->pConcArray, 0.0, size_t(size_roads) * sizeof(real)));
-		printf("No Pointer for pConcArray!!!");
-	}
-	else this->pConcArray = pConcArray;
-
-	//prepare naschVelocity for fluid BC 
-	if (naschVelocity == nullptr) {
-		checkCudaErrors(cudaMalloc((void **)&this->naschVelocity, size_roads * sizeof(int)));
-		checkCudaErrors(cudaMemset(this->naschVelocity, 0, size_t(size_roads) * sizeof(int)));
-		//printf("\nsize_roads = %d\n",size_roads);
-		printf("No Pointer for naschVelocity!!!");
-	}
-	else this->naschVelocity = naschVelocity;
-}
-
-
-void TrafficTimestep::calculateTimestep(std::shared_ptr<RoadNetworkData> road)
-{
-	switchCurrentNext();
-
-	//reset Junction open outcells
-	resetOutCellIsOpen();
-
-	//resetNext
-	resetNext();
-
-	callTrafficTimestepKernel();
-	getLastCudaError("trafficTimestepKernel execution failed");
-
-	callJunctionTimestepKernel();
-	getLastCudaError("junctionTimestepKernel execution failed");
-
-	callSourceTimestepKernel();
-	getLastCudaError("sourceTimestepKernel execution failed");
-
-	callCalculationOfNaschVelocityForFluidBCKernel();
-	getLastCudaError("callCalculationOfNaschVelocityForFluidBCKernel execution failed");
-
-	numTimestep++;
-}
-
-
-void TrafficTimestep::switchCurrentNext()
-{
-	if (timestepIsEven) {
-		timestepIsEven = false;
-		pRoadCurrent = roadCurrent.data().get();
-		pRoadNext = roadNext.data().get();
-	}
-	else {
-		timestepIsEven = true;
-		pRoadCurrent = roadNext.data().get();
-		pRoadNext = roadCurrent.data().get();
-	}
-}
-
-
-void TrafficTimestep::copyCurrentDeviceToHost(std::shared_ptr<RoadNetworkData> road)
-{
-	if (timestepIsEven) thrust::copy(roadCurrent.begin(), roadCurrent.end(), road->pcurrent->begin());
-	else thrust::copy(roadNext.begin(), roadNext.end(), road->pcurrent->begin());
-
-	//checkCudaErrors(cudaMemcpy(&(road->conc[0]), pConcArray, size_roads * sizeof(real), cudaMemcpyDeviceToHost)); //dispConcFromGPU
-}
-
-
-void TrafficTimestep::cleanUp()
-{
-	cudaFree(statesRoad);
-	cudaFree(statesJunctions);
-	cudaFree(statesSources);
-}
-
-
-void TrafficTimestep::callTrafficTimestepKernel()
-{
-	trafficTimestepKernel << < gridRoad, threadsRoads >> > (
-		pRoadCurrent,
-		pRoadNext,
-		neighbors.data().get(),
-		pConcArray,
-		juncInCellIndices.data().get(),
-		juncCarCanEnter.data().get(),
-		juncCarsOnJunction.data().get(),
-		juncAlreadyMoved.data().get(),
-		juncOldSpeeds.data().get(),
-		sinkCarBlockedPossibilities.data().get(),
-		size_roads,
-		size_juncInCells,
-		maxVelocity,
-		maxAcceleration,
-		safetyDistance,
-		useSlowToStart,
-		slowStartPossibility,
-		dawdlePossibility,
-		statesRoad);
-}
-
-
-void TrafficTimestep::callSourceTimestepKernel()
-{
-	sourceTimestepKernel << < gridSources, threadsSources >> > (
-		pRoadCurrent,
-		pRoadNext,
-		neighbors.data().get(),
-		sourceIndices.data().get(),
-		sinkCarBlockedPossibilities.data().get(),
-		sourcePossibilities.data().get(),
-		pConcArray,
-		maxVelocity,
-		safetyDistance,
-		size_sources,
-		statesSources);
-}
-
-
-void TrafficTimestep::callJunctionTimestepKernel()
-{
-	junctionTimestepKernel << < gridJunctions, threadsJunctions >> > (
-		juncCarsOnJunction.data().get(),
-		juncInCellIndices.data().get(),
-		juncOutCellIndices.data().get(),
-		juncStartInIncells.data().get(),
-		juncStartInOutcells.data().get(),
-		juncAlreadyMoved.data().get(),
-		juncCarCanNotEnterThisOutCell.data().get(),
-		juncOutCellIsOpen.data().get(),
-		juncOldSpeeds.data().get(),
-		juncCarCanEnter.data().get(),
-		juncTrafficLightSwitchTime.data().get(),
-		pRoadCurrent,
-		pRoadNext,
-		neighbors.data().get(),
-		pConcArray,
-		sinkCarBlockedPossibilities.data().get(),
-		safetyDistance,
-		size_juncInCells,
-		size_juncOutCells,
-		size_junctions,
-		numTimestep,
-		statesJunctions);
-}
-
-
-void TrafficTimestep::callCalculationOfNaschVelocityForFluidBCKernel()
-{
-	calculationOfNaschVelocityForFluidBCKernel << < gridRoad, threadsRoads >> > (
-		pRoadNext,
-		neighbors.data().get(),
-		naschVelocity,
-		size_roads,
-		safetyDistance);
-}
-
-
-
-__global__ void calculationOfNaschVelocityForFluidBCKernel(int* roadNext, int* neighbors, int* naschVelocity, uint size_road, uint safetyDistance)
-{
-	const uint x = threadIdx.x;  // Globaler x-Index 
-	const uint y = blockIdx.x;   // Globaler y-Index 
-	const uint z = blockIdx.y;   // Globaler z-Index 
-
-	const uint nx = blockDim.x;
-	const uint ny = gridDim.x;
-	const uint index = nx*(ny*z + y) + x;
-
-	if (index >= size_road) return;
-	//////////////////////////////////////////////////////////////////////////
-	//reset
-	naschVelocity[index] = -1;
-	//////////////////////////////////////////////////////////////////////////
-	if (roadNext[index] < 0) return;
-
-	int speed = roadNext[index];
-
-	naschVelocity[index] = speed;
-
-	int neighbor = neighbors[index];
-
-	for (uint j = 1; j <= safetyDistance; j++) {
-		if (neighbor <= -1000)
-			return;
-		else
-			naschVelocity[neighbor] = speed;
-		neighbor = neighbors[neighbor];
-	}
-}
-
-__global__ void resetNext(int * roadNext, uint size_road)
-{
-	const uint x = threadIdx.x;  // Globaler x-Index 
-	const uint y = blockIdx.x;   // Globaler y-Index 
-	const uint z = blockIdx.y;   // Globaler z-Index 
-
-	const uint nx = blockDim.x;
-	const uint ny = gridDim.x;
-	const uint index = nx*(ny*z + y) + x;
-
-	if (index >= size_road) return;
-
-	roadNext[index] = -1;
-}
-
-
-
-__global__ void trafficTimestepKernel(int* roadCurrent, int* roadNext, int* neighbors, real * pConcArray, uint* juncInCellIndices, bool* juncCarCanEnter,
-	int* juncCarsOnJunction, uint* juncAlreadyMoved, uint* juncOldSpeeds, real* sinkCarBlockedPossibilities,
-	uint size_road, uint size_juncInCells, uint maxVelocity, uint maxAcceleration, uint safetyDistance, bool useSlowToStart, real slowStartPossibility, real dawdlePossibility, curandState *state)
-{
-	//////////////////////////////////////////////////////////////////////////
-	const uint x = threadIdx.x;  // Globaler x-Index 
-	const uint y = blockIdx.x;   // Globaler y-Index 
-	const uint z = blockIdx.y;   // Globaler z-Index 
-
-	const uint nx = blockDim.x;
-	const uint ny = gridDim.x;
-	const uint index = nx*(ny*z + y) + x;
-
-	if (index >= size_road) return;
-	////////////////////////////////////////////////////////////////////////////////
-	if (roadCurrent[index] < 0) return;
-
-	//reset concentrations
-	pConcArray[index] = 0.0;
-
-	//printf("index %d ", index);
-
-	uint speed = roadCurrent[index];
-
-
-	//// accelerate car ////////////////////////////////////////////////////////////////////
-	if (speed < maxVelocity) {
-		if (speed <= maxVelocity - maxAcceleration)
-			speed += maxAcceleration;
-		else
-			speed = maxVelocity;
-	}
-
-
-	//////// brake car /////////////////////////////////////////////////////////////////////////
-	//getGapAfterCar
-	uint gap = speed;
-	uint idx = 0;
-	int neighbor = neighbors[index];
-	uint currentCell = index;
-
-	for (uint i = 0; i < (speed + safetyDistance); i++) {
-
-		//sink
-		if (neighbor <= -2000) {
-			curandState localState = state[index];
-			float random = curand_uniform(&localState);
-
-			if (i <= speed && !(random < sinkCarBlockedPossibilities[(neighbor + 2000)*-1]))  gap = speed;
-			else gap = i;
-
-			state[index] = localState;
-			break;
-		}
-
-		//junction
-		if (neighbor <= -1000 && neighbor > -2000) {
-			idx = getJunctionInCellsVectorIndex(juncInCellIndices, size_juncInCells, currentCell);
-			if (juncCarCanEnter[idx] && i <= speed) gap = speed;
-			else gap = i;
-			break;
-		}
-
-		//car in Cell
-		if (roadCurrent[neighbor] > -1) {
-			if (i <= safetyDistance) gap = 0;
-			else gap = i - safetyDistance;
-			break;
-		}
-
-		//empty cell -> get next neighbor, update currentCell
-		currentCell = neighbor;
-		neighbor = neighbors[neighbor];
-	}
-
-	//brake
-	if (speed > gap)
-		speed = gap;
-
-
-	////// dawdleCar ///////////////////////////////////////////////////////////////////////////
-	curandState localState = state[index];
-	float random = curand_uniform(&localState);
-	state[index] = localState;
-
-	//Barlovic / SlowToStart
-	if (useSlowToStart == true && roadCurrent[index] == 0) {
-		if (random < slowStartPossibility) speed = 0;
-	}
-	else if (random < dawdlePossibility) //Standard NaSch
-		if (speed >= maxAcceleration)
-			speed -= maxAcceleration;
-		else
-			speed = 0;
-
-
-	////// moveCar /////////////////////////////////////////////////////////////////////////////
-	if (speed == 0) {
-		(roadNext)[index] = 0;
-		putConcIntoArray(pConcArray, roadCurrent[index], speed, index);
-		return;
-	}
-
-	neighbor = neighbors[index];
-	currentCell = index;
-
-	// iterateNeighborsInMove
-	uint numberOfCellsMoved = 0;
-	for (uint i = 2; i <= speed; i++) {
-		if (neighbor >= 0) {
-			currentCell = neighbor;
-			neighbor = neighbors[neighbor];
-			++numberOfCellsMoved;
-		}
-		else
-			break;
-	}
-
-	if (neighbor <= -2000) return;
-
-	if (neighbor <= -1000 && neighbor > -2000) {
-		//registerCar
-
-		juncCarsOnJunction[idx] = speed;
-		//getCarsOnJunction[index] = speed -2; //all cars, which enter the junction have to slow down
-		juncOldSpeeds[idx] = roadCurrent[index];
-		juncCarCanEnter[idx] = false;
-		juncAlreadyMoved[idx] = numberOfCellsMoved;
-		return;
-	}
-
-	if (neighbor >= 0) {
-		roadNext[neighbor] = speed;
-		putConcIntoArray(pConcArray, roadCurrent[index], speed, neighbor);
-	}
-}
-
-
-
-__global__ void sourceTimestepKernel(int* roadCurrent, int* roadNext, int* neighbors, uint* sourceIndices, real* sinkCarBlockedPossibilities, float* sourcePossibilities, real* pConcArray,
-	uint maxVelocity, uint safetyDistance, uint size_sources, curandState *state)
-{
-	//////////////////////////////////////////////////////////////////////////
-	const uint x = threadIdx.x;  // Globaler x-Index 
-	const uint y = blockIdx.x;   // Globaler y-Index 
-	const uint z = blockIdx.y;   // Globaler z-Index 
-
-	const uint nx = blockDim.x;
-	const uint ny = gridDim.x;
-
-	const uint index = nx*(ny*z + y) + x;
-	////////////////////////////////////////////////////////////////////////////////
-
-	if (index >= size_sources) return;
-	////////////////////////////////////////////////////////////////////////////////
-
-	int sourceIndex = sourceIndices[index];
-
-	uint gap = getGapAfterOutCell(roadCurrent, neighbors, sinkCarBlockedPossibilities, sourceIndex, maxVelocity, safetyDistance, state);
-	if (gap > 0) {
-		//get car with random speed
-		curandState localState = state[index];
-		if (curand_uniform(&localState) < sourcePossibilities[index]) {
-			unsigned int speed = ceilf(curand_uniform(&localState) * (maxVelocity + 1)) - 1;
-			roadNext[sourceIndex] = speed;
-			putConcIntoArray(pConcArray, speed, speed, sourceIndex);
-		}
-		state[index] = localState;
-	}
-}
-
-
-
-__global__ void junctionTimestepKernel(int* juncCarsOnJunction, uint* juncInCellIndices, int* juncOutCellIndices, uint* juncStartInIncells, uint* juncStartInOutcells,
-	uint* juncAlreadyMoved, int* juncCarCanNotEnterThisOutCell, bool* juncOutCellIsOpen, uint* juncOldSpeeds, bool* juncCarCanEnter, uint* juncTrafficLightSwitchTime,
-	int* roadCurrent, int* roadNext, int* neighbors, real* pConcArray, real* sinkCarBlockedPossibilities, uint safetyDistance,
-	uint size_juncInCells, uint size_juncOutCells, uint size_junctions, uint numTimestep, curandState* state) {
-	//////////////////////////////////////////////////////////////////////////
-	const uint x = threadIdx.x;  // Globaler x-Index 
-	const uint y = blockIdx.x;   // Globaler y-Index 
-	const uint z = blockIdx.y;   // Globaler z-Index 
-
-	const uint nx = blockDim.x;
-	const uint ny = gridDim.x;
-
-	const uint index = nx*(ny*z + y) + x;
-
-	////////////////////////////////////////////////////////////////////////////////
-
-	if (index >= size_junctions) return;
-	//printf("junctionKernelIndex %d ", index);
-
-	////////////////////////////////////////////////////////////////////////////////
-
-	//calc indices
-
-	uint inCellsSize = 0;
-	uint firstInCellIndex = juncStartInIncells[index];
-	if (index < size_junctions - 1) inCellsSize = juncStartInIncells[index + 1] - firstInCellIndex;
-	else inCellsSize = size_juncInCells - firstInCellIndex;
-
-	uint outCellSize = 0;
-	uint firstOutCellIndex = juncStartInOutcells[index];
-	if (index < size_junctions - 1) outCellSize = juncStartInOutcells[index + 1] - firstOutCellIndex;
-	else outCellSize = size_juncOutCells - firstOutCellIndex;
-
-
-	//// loop through all cars /////////////////////////////////////////////////////////////////////////////
-
-	for (uint inCellVectorIndex = firstInCellIndex; inCellVectorIndex < firstInCellIndex + inCellsSize; inCellVectorIndex++) {
-
-		if (juncCarsOnJunction[inCellVectorIndex] >= 0) {
-
-			//// applyRules /////////////////////////////////////////////////////////////////////////////
-			uint speed = juncCarsOnJunction[inCellVectorIndex];
-			if (speed == 0 && juncAlreadyMoved[inCellVectorIndex] == 0) speed += 1;
-			int remainingDist = speed - static_cast<int>(juncAlreadyMoved[inCellVectorIndex]);
-
-			//printf(" incell %d, speed %d, remainingDist %d, alreadyMoved %d ", inCellVectorIndex, speed, remainingDist, juncAlreadyMoved[inCellVectorIndex]);
-
-			if (remainingDist == 0) { //car can't leave the junction
-				carStaysOnJunction(juncCarsOnJunction, juncInCellIndices, juncAlreadyMoved, juncOldSpeeds, pConcArray, inCellVectorIndex);
-				continue;
-			}
-
-			else {
-
-				//// calc numberOfPossibleOutCells ////////////////////////////////////////////////////////////////
-				uint numberOfPossibleOutCells = 0;
-
-				for (uint outCellIndex = firstOutCellIndex; outCellIndex < firstOutCellIndex + outCellSize; outCellIndex++)
-					if (juncCarCanNotEnterThisOutCell[inCellVectorIndex] != juncOutCellIndices[outCellIndex] && juncOutCellIsOpen[outCellIndex] == true)
-						numberOfPossibleOutCells++;
-
-
-				if (numberOfPossibleOutCells == 0)  //car can't leave the junction
-				{
-					carStaysOnJunction(juncCarsOnJunction, juncInCellIndices, juncAlreadyMoved, juncOldSpeeds, pConcArray, inCellVectorIndex);
-					continue;
-				}
-
-				//// chooseOutCell ///////////////////////////////////////////////////////////////////////////////
-				int chosenCell = -1;
-				curandState localState = state[index];
-				uint random = ceilf(curand_uniform(&localState) * numberOfPossibleOutCells);
-				for (uint outCellVectorIndex = firstOutCellIndex; outCellVectorIndex < firstOutCellIndex + outCellSize; outCellVectorIndex++) {
-					if (juncCarCanNotEnterThisOutCell[inCellVectorIndex] != juncOutCellIndices[outCellVectorIndex] && juncOutCellIsOpen[outCellVectorIndex] == true) {
-						if (random == 1) {
-							chosenCell = juncOutCellIndices[outCellVectorIndex];
-							juncOutCellIsOpen[outCellVectorIndex] = false;
-							break;
-						}
-						random--;
-					}
-				}
-				state[index] = localState;
-
-				//// brakeCar ////////////////////////////////////////////////////////////////////////////////////
-				if (chosenCell < 0);
-				uint gap = getGapAfterOutCell(roadCurrent, neighbors, sinkCarBlockedPossibilities, chosenCell, speed, safetyDistance, state);
-				if (gap < remainingDist) {
-					if (gap > speed) gap = speed;
-					speed = speed - remainingDist + gap;
-					remainingDist = gap;
-				}
-
-				//// moveCar /////////////////////////////////////////////////////////////////////////////////////
-				if (remainingDist <= 0) { //car can't leave the junction
-					carStaysOnJunction(juncCarsOnJunction, juncInCellIndices, juncAlreadyMoved, juncOldSpeeds, pConcArray, inCellVectorIndex);
-					continue;
-				}
-
-				if (remainingDist > 0) {
-
-					if (remainingDist == 1) {
-						roadNext[chosenCell] = speed;
-						putConcIntoArray(pConcArray, juncOldSpeeds[inCellVectorIndex], speed, chosenCell);
-						juncCarsOnJunction[inCellVectorIndex] = -1;
-						juncCarCanEnter[inCellVectorIndex] = true;
-						break;
-					}
-
-					//iterate through neighbors
-					int neighbor = chosenCell;
-					for (uint i = 2; i <= remainingDist; i++) {
-						if (neighbor >= 0) {
-							chosenCell = neighbor;
-							neighbor = neighbors[neighbor];
-						}
-						else
-							break;
-					}
-
-					if (neighbor >= 0) {
-						roadNext[neighbor] = speed;
-						putConcIntoArray(pConcArray, juncOldSpeeds[inCellVectorIndex], speed, neighbor);
-					}
-
-					juncCarsOnJunction[inCellVectorIndex] = -1;
-					juncCarCanEnter[inCellVectorIndex] = true;
-				}
-			}
-		}
-	}
-
-
-	//generate red TrafficLights
-
-	if (juncTrafficLightSwitchTime[index] > 0) {
-
-		uint halfNumStreets = (uint)(std::floor((float)inCellsSize * 0.5f));
-
-		if ((uint)(std::floor((float)numTimestep / (float)juncTrafficLightSwitchTime[index])) % 2 == 0) {
-			for (uint i = firstInCellIndex; i < firstInCellIndex + halfNumStreets; i++)
-				juncCarCanEnter[i] = false;
-
-			if (numTimestep % juncTrafficLightSwitchTime[index] == 0) //first timestep with green light --> open the streets that were closed before
-				for (uint i = firstInCellIndex + halfNumStreets; i < firstInCellIndex + inCellsSize; i++)
-					if (juncCarsOnJunction[i] == -1)
-						juncCarCanEnter[i] = true;
-		}
-		else {
-			for (uint i = firstInCellIndex + halfNumStreets; i < firstInCellIndex + inCellsSize; i++)
-				juncCarCanEnter[i] = false;
-
-			if (numTimestep % juncTrafficLightSwitchTime[index] == 0) //first timestep with green light --> open the streets that were closed before
-				for (uint i = firstInCellIndex; i < firstInCellIndex + halfNumStreets; i++)
-					if (juncCarsOnJunction[i] == -1)
-						juncCarCanEnter[i] = true;
-		}
-	}
-
-}
-
-
-__device__ inline void carStaysOnJunction(int* juncCarsOnJunction, uint* juncInCellIndices, uint* juncAlreadyMoved, uint*juncOldSpeeds, real* pConcArray, uint inCellVectorIndex) {
-	addConcToArray(pConcArray, juncOldSpeeds[inCellVectorIndex], 0, juncInCellIndices[inCellVectorIndex]);
-	juncCarsOnJunction[inCellVectorIndex] = 0;
-	juncAlreadyMoved[inCellVectorIndex] = 0;
-	juncOldSpeeds[inCellVectorIndex] = 0;
-}
-
-
-__device__ inline uint getGapAfterOutCell(int* roadCurrent, int* neighbors, real* sinkCarBlockedPossibilities, int outCellIndex, uint speed, uint safetyDistance, curandState* state)
-{
-	if (roadCurrent[outCellIndex] > -1)
-		return 0;
-
-	for (uint i = 0; i < (speed + safetyDistance); i++) {
-		//sink
-		if (outCellIndex <= -2000) {
-			const uint index = blockDim.x*(gridDim.x*blockIdx.y + blockIdx.x) + threadIdx.x;
-			curandState localState = state[index];
-			float randomNumber = curand_uniform(&localState);
-			state[index] = localState;
-			if (i <= speed && !(randomNumber < sinkCarBlockedPossibilities[(outCellIndex + 2000)*-1]))
-				return speed;
-			return i;
-		}
-		//junction
-		if (outCellIndex <= -1000)
-			return i;
-
-		//car in Cell
-		if (roadCurrent[outCellIndex] > -1) {
-			if (i <= safetyDistance) return 0;
-			return i - safetyDistance;
-		}
-
-		//empty cell -> get next neighbor
-		outCellIndex = neighbors[outCellIndex];
-	}
-	return speed;
-}
-
-
-inline __device__ uint getJunctionInCellsVectorIndex(uint* juncInCellIndices, uint size_juncInCells, uint cell) {
-	for (uint i = 0; i < size_juncInCells; i++)
-		if (juncInCellIndices[i] == cell)
-			return i;
-	//TODO real Error
-	printf("no matching incoming cell to a junction found in: getJunctionInCellsVectorIndex()");
-	return 65000;
-}
-
-
-__global__ void randomSetupKernel(curandState *state, uint size) {
-	const uint x = threadIdx.x;  // Globaler x-Index 
-	const uint y = blockIdx.x;   // Globaler y-Index 
-	const uint z = blockIdx.y;   // Globaler z-Index 
-
-	const uint nx = blockDim.x;
-	const uint ny = gridDim.x;
-
-	const uint index = nx*(ny*z + y) + x;
-
-	if (index >= size) return;
-
-	curand_init((unsigned long long)clock() + index, index, 0, &state[index]);
-}
-
-
-__device__ real calcConcentration(uint oldSpeed, uint newSpeed)
-{
-	//printf("newIndex %d ", newIndex );
-	if (oldSpeed == 0 && newSpeed > 0) //Start
-		return 0.833f;
-	else if (oldSpeed == 0 && newSpeed == 0) //Idle
-		return 0.069f;
-	else if (newSpeed == oldSpeed) //Drive
-		return 0.221f;
-	else if (newSpeed > oldSpeed) //Accelerate
-		return 0.625f;
-	else if (newSpeed < oldSpeed) //Brake
-		return 0.379f;
-	else
-		printf("couldn't choose driving state in calcConcentration");
-	return -1;
-}
-
-
-__device__ void putConcIntoArray(real * pConcArray, uint oldSpeed, uint newSpeed, uint newIndex)
-{
-	pConcArray[newIndex] = calcConcentration(oldSpeed, newSpeed);
-}
-
-
-__device__ void addConcToArray(real * pConcArray, uint oldSpeed, uint newSpeed, uint newIndex)
-{
-	pConcArray[newIndex] += calcConcentration(oldSpeed, newSpeed);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-
-void TrafficTimestep::combineJuncInCellIndices(std::vector<std::shared_ptr<Junction>> &junctions)
-{
-	for (auto& j : junctions) {
-		if (juncInCellIndices.size() == 0) juncStartInIncells.push_back(0);
-		else juncStartInIncells.push_back(castSizeT_Uint(juncInCellIndices.size()));
-
-		for (uint i : j->getInCellIndices())
-			this->juncInCellIndices.push_back(i);
-	}
-	size_juncInCells = castSizeT_Uint(juncInCellIndices.size());
-}
-
-
-void TrafficTimestep::combineJuncOutCellIndices(std::vector<std::shared_ptr<Junction>> &junctions)
-{
-	for (auto& j : junctions) {
-		if (juncOutCellIndices.size() == 0) juncStartInOutcells.push_back(0);
-		else juncStartInOutcells.push_back(castSizeT_Uint(juncOutCellIndices.size()));
-
-		for (uint i : j->getOutCellIndices())
-			this->juncOutCellIndices.push_back(i);
-	}
-	size_juncOutCells = castSizeT_Uint(juncOutCellIndices.size());
-}
-
-
-void TrafficTimestep::initJuncCarCanEnter()
-{
-	juncCarCanEnter.resize(size_juncInCells);
-	thrust::fill(juncCarCanEnter.begin(), juncCarCanEnter.end(), true);
-}
-
-void TrafficTimestep::initJuncCarsOnJunction()
-{
-	juncCarsOnJunction.resize(size_juncInCells);
-	thrust::fill(juncCarsOnJunction.begin(), juncCarsOnJunction.end(), -1);
-}
-
-
-void TrafficTimestep::combineSinkBlockedPossibilities(std::vector<std::shared_ptr<Sink>> &sinks)
-{
-	for (auto& s : sinks)
-		this->sinkCarBlockedPossibilities.push_back(s->getPossibilityBeingBlocked());
-}
-
-
-void TrafficTimestep::combineSourcePossibilities(std::vector<std::shared_ptr<Source>> &sources) {
-	for (auto& s : sources)
-		this->sourcePossibilities.push_back(s->getPossibility());
-}
-
-void TrafficTimestep::combineSourceIndices(std::vector<std::shared_ptr<Source>> &sources)
-{
-	for (auto& s : sources)
-		this->sourceIndices.push_back(s->getIndex());
-}
-
-
-void TrafficTimestep::initJuncAlreadyMoved()
-{
-	juncAlreadyMoved.resize(size_juncInCells);
-	thrust::fill(juncAlreadyMoved.begin(), juncAlreadyMoved.end(), 0);
-}
-
-void TrafficTimestep::initJuncOldSpeeds()
-{
-	juncOldSpeeds.resize(size_juncInCells);
-	thrust::fill(juncOldSpeeds.begin(), juncOldSpeeds.end(), 0);
-}
-
-void TrafficTimestep::combineUseTrafficLights(std::vector<std::shared_ptr<Junction>>& junctions)
-{
-	for (auto& j : junctions)
-		juncTrafficLightSwitchTime.push_back(j->getTrafficLightSwitchTime());
-}
-
-void TrafficTimestep::initjuncOutCellIsOpen()
-{
-	juncOutCellIsOpen.resize(juncOutCellIndices.size());
-	resetOutCellIsOpen();
-}
-
-void TrafficTimestep::combineJuncCarCanNotEnterThisOutCell(std::vector<std::shared_ptr<Junction>>& junctions)
-{
-	for (auto& j : junctions)
-		for (int i : j->getCarCanNotEnterThisOutCell())
-			this->juncCarCanNotEnterThisOutCell.push_back(i);
-
-	if (juncCarCanNotEnterThisOutCell.size() < size_juncInCells)
-		juncCarCanNotEnterThisOutCell.push_back(-2);
-}
-
-uint TrafficTimestep::getNumCarsOnJunctions()
-{
-	uint num = 0;
-	for (int car : juncCarsOnJunction)
-		if (car >= 0) ++num;
-	return num;
-}
-
-void TrafficTimestep::resetOutCellIsOpen()
-{
-	thrust::fill(juncOutCellIsOpen.begin(), juncOutCellIsOpen.end(), true);
-}
-
-void TrafficTimestep::resetNext()
-{
-	if (timestepIsEven)
-		thrust::fill(roadCurrent.begin(), roadCurrent.end(), -1);
-	else
-		thrust::fill(roadNext.begin(), roadNext.end(), -1);
-}
-
-
-void TrafficTimestep::calculateTrafficTimestepKernelDimensions()
-{
-	unsigned int numberOfThreads = 64;
-	int Grid = (size_roads / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid > 512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 gridRoad1(Grid1, Grid2);
-	this->gridRoad = gridRoad1;
-	dim3 threadsRoads1(numberOfThreads, 1, 1);
-	this->threadsRoads = threadsRoads1;
-}
-
-void TrafficTimestep::calculateJunctionKernelDimensions()
-{
-	unsigned int numberOfThreads = 32;
-	int Grid = (size_junctions / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid > 512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 gridJunctions1(Grid1, Grid2);
-	this->gridJunctions = gridJunctions1;
-	dim3 threadsJunctions1(numberOfThreads, 1, 1);
-	this->threadsJunctions = threadsJunctions1;
-}
-
-void TrafficTimestep::calculateSourceKernelDimensions()
-{
-	unsigned int numberOfThreads = 32;
-	int Grid = (size_sinks / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid > 512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 gridSources1(Grid1, Grid2);
-	this->gridSources = gridSources1;
-	dim3 threadsSources1(numberOfThreads, 1, 1);
-	this->threadsSources = threadsSources1;
-}
diff --git a/src/gpu/Traffic/GPU/TrafficTimestep.h b/src/gpu/Traffic/GPU/TrafficTimestep.h
deleted file mode 100644
index 113cc3d11260d7e89231e50951e75e43e17faf33..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/GPU/TrafficTimestep.h
+++ /dev/null
@@ -1,144 +0,0 @@
-#ifndef  TrafficTimestep_H
-#define  TrafficTimestep_H
-
-#include <vector>
-#include <memory>
-
-#include <thrust/device_vector.h>
-#include <curand_kernel.h>
-
-
-#include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
-#include "Traffic_export.h"
-
-struct RoadNetworkData;
-class Sink;
-class Junction;
-class Source;
-
-class TRAFFIC_EXPORT TrafficTimestep
-{
-private:
-
-	bool timestepIsEven = true;
-	uint numTimestep = 0;
-
-
-	uint maxVelocity;
-	uint safetyDistance;
-	real dawdlePossibility;
-	bool useSlowToStart;
-	real slowStartPossibility;
-	uint maxAcceleration;
-
-
-	//sizes
-	uint size_roads;
-	uint size_junctions;
-	uint size_juncInCells;
-	uint size_juncOutCells;
-	uint size_sources;
-	uint size_sinks;
-
-
-	//grids
-	dim3 gridRoad;
-	dim3 threadsRoads;
-	dim3 gridJunctions;
-	dim3 threadsJunctions;
-	dim3 gridSources;
-	dim3 threadsSources;
-
-
-	//road
-	thrust::device_vector<int> neighbors;
-	thrust::device_vector<int> roadCurrent;
-	thrust::device_vector<int> roadNext;
-
-
-	//junctions
-	thrust::device_vector<uint> juncTrafficLightSwitchTime; //no TrafficLight: 0
-
-	thrust::device_vector<uint> juncInCellIndices;
-	thrust::device_vector<bool> juncCarCanEnter;
-	thrust::device_vector<int> juncCarsOnJunction;
-	thrust::device_vector<uint> juncAlreadyMoved;
-	thrust::device_vector<uint> juncOldSpeeds;
-
-	thrust::device_vector<int> juncOutCellIndices;
-	thrust::device_vector<int> juncCarCanNotEnterThisOutCell; //no such inCell: -2
-	thrust::device_vector<bool> juncOutCellIsOpen;
-	thrust::device_vector<uint> juncStartInIncells;
-	thrust::device_vector<uint> juncStartInOutcells;
-
-
-	//sinks
-	thrust::device_vector<real> sinkCarBlockedPossibilities;
-
-
-	//sources
-	thrust::device_vector<float> sourcePossibilities;
-	thrust::device_vector<uint> sourceIndices;
-
-
-	//concentrations
-	real * pConcArray;
-
-	//nasch velocity to set fluidBC
-	int* naschVelocity;
-
-	//curandStates
-	curandState *statesJunctions;
-	curandState *statesSources;
-	curandState *statesRoad;
-
-	int* pRoadCurrent;
-	int* pRoadNext;
-
-public:
-
-	TrafficTimestep(std::shared_ptr<RoadNetworkData> road, real * pConcArray, int* naschVelocity);
-	void calculateTimestep(std::shared_ptr<RoadNetworkData> road);
-	void cleanUp();
-	uint getNumCarsOnJunctions(); //only used for debugging
-	void copyCurrentDeviceToHost(std::shared_ptr<RoadNetworkData> road);
-
-private:
-	
-	//timestep
-	void switchCurrentNext();
-	void resetOutCellIsOpen();
-	void resetNext();
-
-	//kernel calls
-	void callTrafficTimestepKernel();
-	void callSourceTimestepKernel();
-	void callJunctionTimestepKernel();
-	void callCalculationOfNaschVelocityForFluidBCKernel();
-
-	//init grids
-	void calculateTrafficTimestepKernelDimensions();
-	void calculateJunctionKernelDimensions();
-	void calculateSourceKernelDimensions();
-
-	//init junctions
-	void combineJuncInCellIndices(std::vector<std::shared_ptr<Junction> > &junctions);
-	void combineJuncOutCellIndices(std::vector<std::shared_ptr<Junction> > &junctions);
-	void combineJuncCarCanNotEnterThisOutCell(std::vector<std::shared_ptr<Junction> > &junctions);
-	void combineUseTrafficLights(std::vector<std::shared_ptr<Junction>>& junctions);
-	void initjuncOutCellIsOpen();
-	void initJuncCarCanEnter();
-	void initJuncCarsOnJunction();
-	void initJuncAlreadyMoved();
-	void initJuncOldSpeeds();
-	
-	//init sinks
-	void combineSinkBlockedPossibilities(std::vector<std::shared_ptr<Sink>>& sinks);
-
-	//init sources
-	void combineSourcePossibilities(std::vector<std::shared_ptr<Source> > &sources);
-	void combineSourceIndices(std::vector<std::shared_ptr<Source> > &sources);
-};
-
-#endif
diff --git a/src/gpu/Traffic/Junction/Junction.h b/src/gpu/Traffic/Junction/Junction.h
deleted file mode 100644
index 68da2bb1063c30e2259925dca6b6aeaec994107d..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Junction/Junction.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#pragma once
-#include <vector>
-
-
-#include "Core/DataTypes.h"
-
-#include "JunctionData.h"
-#include "Traffic_export.h"
-
-class TrafficMovement;
-
-class TRAFFIC_EXPORT Junction
-{
-public:
-	virtual void checkOutCellIndices(const uint roadLength) const = 0;
-
-	virtual void setCellIndicesForNoUTurn(std::vector<int> carCanNotEnterThisOutCell) = 0;
-
-	virtual bool acceptsCar(uint cellIndex) = 0; //determines if a car can enter the junction
-	virtual void registerCar(uint cellIndex, uint numberOfCellsAlreadyMoved, uint speed, uint oldSpeed) = 0; //registers all cars entering the junction
-	virtual void calculateTimeStep(TrafficMovement &road, uint currentTimestep) = 0;
-
-	virtual const std::vector<uint>& getInCellIndices()const = 0;
-	virtual const std::vector<uint>& getOutCellIndices() const = 0;
-	virtual const std::vector<bool>& getCarCanEnter() const = 0;
-	virtual const std::vector<int>& getCarsOnJunction()const = 0;
-	virtual const std::vector<uint>& getAlreadyMoved()const = 0;
-	virtual const std::vector<uint>& getOldSpeeds()const = 0;
-	virtual const std::vector<int>& getCarCanNotEnterThisOutCell() const = 0;
-	virtual uint getTrafficLightSwitchTime()const = 0;
-
-	virtual void dispJunction(const uint index, const uint roadLength) const = 0;
-	virtual uint getNumCarsOnJunction() const = 0;
-};
\ No newline at end of file
diff --git a/src/gpu/Traffic/Junction/JunctionData.h b/src/gpu/Traffic/Junction/JunctionData.h
deleted file mode 100644
index 3b72b1ef5db4d60515a2634586e6096aa115731a..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Junction/JunctionData.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#pragma once
-
-
-#include <vector>
-#include <memory>
-
-#include "Utilities/RandomHelper.h"
-#include "Traffic_export.h"
-
-
-struct TRAFFIC_EXPORT JunctionData
-{
-public:
-	std::vector<uint> inCellIndices;
-	std::vector<uint> outCellIndices; 
-	std::vector<int> carCanNotEnterThisOutCell; //no such inCell: -2
-
-	std::vector<uint> possibleOutCells;
-
-	std::vector<bool> carCanEnter;
-	std::vector<int> carsOnJunction;
-	std::vector<uint> alreadyMoved;
-	std::vector<uint> oldSpeeds;
-
-	uint trafficLightSwitchTime; //no TrafficLight: 0
-	uint halfNumStreets;
-
-	std::mt19937 engine = RandomHelper::make_engine();
-	std::uniform_real_distribution<float> distFloat{ 0.0, 1.0 };
-};
-
diff --git a/src/gpu/Traffic/Junction/JunctionRandom.cpp b/src/gpu/Traffic/Junction/JunctionRandom.cpp
deleted file mode 100644
index 7e6815a2a19193b6149e31e49e268a1606c7a3a7..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Junction/JunctionRandom.cpp
+++ /dev/null
@@ -1,298 +0,0 @@
-#include "JunctionRandom.h"
-
-#include <iostream>
-#include <iomanip> //formatting output streams
-#include <algorithm> //used for find()
-#include <math.h> //used for floor()
-
-#include "TrafficMovement.h"
-
-#include "Utilities/invalidInput_error.h"
-#include "Utilities/VectorHelper.h"
-#include "Utilities/safe_casting.h"
-
-
-JunctionRandom::JunctionRandom(const std::vector<uint> &inCellIndices, const std::vector<uint> &outCellIndices, uint trafficLightSwitchTime)
-{
-	data.inCellIndices = inCellIndices;
-	data.outCellIndices = outCellIndices;
-
-	uint inRoads = castSizeT_Uint(inCellIndices.size());
-
-	data.carCanEnter.resize(inRoads);
-	std::fill(data.carCanEnter.begin(), data.carCanEnter.end(), true);
-
-	data.carsOnJunction.resize(inRoads);
-	std::fill(data.carsOnJunction.begin(), data.carsOnJunction.end(), -1);
-
-	data.alreadyMoved.resize(inRoads);
-	std::fill(data.alreadyMoved.begin(), data.alreadyMoved.end(), 0);
-
-	data.oldSpeeds.resize(inRoads);
-
-	this->data.trafficLightSwitchTime = trafficLightSwitchTime;
-	data.halfNumStreets = static_cast<uint>(std::floor(static_cast<float>(data.inCellIndices.size()) * 0.5f));
-}
-
-
-void JunctionRandom::setCellIndicesForNoUTurn(std::vector<int> carCanNotEnterThisOutCell)
-{
-	try {
-
-		if (data.inCellIndices.size() != carCanNotEnterThisOutCell.size()) throw invalidInput_error("The Vector carCanNotEnterThisOutCell and inCellIndices have to be the same size.");
-		data.carCanNotEnterThisOutCell = carCanNotEnterThisOutCell;
-
-	}
-	catch (const std::exception& e) {
-		std::cerr << e.what() << std::endl;
-		std::cin.get();
-		exit(EXIT_FAILURE);
-	}
-}
-
-
-bool JunctionRandom::acceptsCar(uint cellIndex)
-{
-	return data.carCanEnter[getInCellsVectorIndex(cellIndex)];
-}
-
-
-void JunctionRandom::registerCar(uint cellIndex, uint alreadyMoved, uint speed, uint oldSpeed)
-{
-	uint index = getInCellsVectorIndex(cellIndex);
-
-	data.carsOnJunction[index] = speed;
-	//data.carsOnJunction[index] = 0; //all cars, which enter the junction have to stop
-	data.oldSpeeds[index] = oldSpeed;
-	data.carCanEnter[index] = false;
-	data.alreadyMoved[index] = alreadyMoved;
-}
-
-
-uint JunctionRandom::getInCellsVectorIndex(uint cellIndex)
-{
-	try {
-		auto it = find(data.inCellIndices.begin(), data.inCellIndices.end(), cellIndex);
-
-		if (it != data.inCellIndices.end())
-			return static_cast <uint> (distance(data.inCellIndices.begin(), it));
-
-		throw std::runtime_error("The passed cell is not an incoming cell to this junction.");
-	}
-	catch (const std::exception& e) {
-		std::cerr << e.what() << std::endl;
-		std::cin.get();
-		exit(EXIT_FAILURE);
-	}
-}
-
-
-void JunctionRandom::calculateTimeStep(TrafficMovement& road, uint currentTimestep)
-{
-	data.possibleOutCells = data.outCellIndices;
-
-	uint index = 0;
-	for (int carSpeed : data.carsOnJunction) {
-		if (carSpeed >= 0) { //check if there is a car on the junction
-			applyRules(carSpeed, index, road);
-		}
-		++index;
-	}
-	writeConcentrations(road);
-
-	calculateTrafficLightTimeStep(currentTimestep);
-}
-
-
-void JunctionRandom::calculateTrafficLightTimeStep(uint currentTimestep)
-{
-	if (data.trafficLightSwitchTime > 0) {
-		if (static_cast<uint>(std::floor(static_cast<float>(currentTimestep) / static_cast<float>(data.trafficLightSwitchTime))) % 2 == 0)
-			turnFirstHalfRed(currentTimestep);
-		else
-			turnSecondHalfRed(currentTimestep);
-	}
-}
-
-void JunctionRandom::turnFirstHalfRed(uint currentTimestep)
-{
-	for (uint i = 0; i < data.halfNumStreets; i++)
-		data.carCanEnter[i] = false;
-
-	if (currentTimestep % data.trafficLightSwitchTime == 0) //first timestep with green light --> open the streets that were closed before
-		for (uint i = data.halfNumStreets; i < data.inCellIndices.size(); i++)
-			if (data.carsOnJunction[i] == -1)
-				data.carCanEnter[i] = true;
-}
-
-void JunctionRandom::turnSecondHalfRed(uint currentTimestep)
-{
-	for (uint i = data.halfNumStreets; i < data.inCellIndices.size(); i++)
-		data.carCanEnter[i] = false;
-
-	if (currentTimestep % data.trafficLightSwitchTime == 0) //first timestep with green light --> open the streets that were closed before
-		for (uint i = 0; i < data.halfNumStreets; i++)
-			if (data.carsOnJunction[i] == -1)
-				data.carCanEnter[i] = true;
-}
-
-
-void JunctionRandom::applyRules(int & carSpeed, int index, TrafficMovement& road)
-{
-	if (carSpeed == 0 && data.alreadyMoved[index] == 0)
-		carSpeed += 1;
-
-	int remainingDistance = carSpeed - static_cast<int>(data.alreadyMoved[index]);
-	if (remainingDistance > 0) {
-		int outCell = chooseOutCell(index);
-		if (outCell >= 0) {
-			brakeCar(outCell, carSpeed, remainingDistance, road);
-			if (remainingDistance > 0) {
-				moveCar(outCell, carSpeed, index, remainingDistance, road);
-				return;
-			}
-		}
-	}
-	data.alreadyMoved[index] = 0;
-	data.carsOnJunction[index] = 0;				//cars, which can't cross the junctionin one timestep, because they already moved to many cells, loose their speed.
-	//data.getCarsOnJunction[index] = carSpeed;	//cars, which can't cross the junction in one timestep, because they already moved to many cells, keep their speed.
-}
-
-
-void JunctionRandom::brakeCar(uint outCellIndex, int &speed, int &remainingDistance, TrafficMovement& road)
-{
-	int gap = road.getGapAfterOutCell(outCellIndex, remainingDistance);
-	if (gap < remainingDistance) {
-		if (gap > speed) gap = speed;
-		speed = speed - remainingDistance + gap;
-		remainingDistance = gap;
-	}
-}
-
-
-void JunctionRandom::moveCar(uint outCell, int carSpeed, int index, int remainingDistance, TrafficMovement& road)
-{
-	road.moveJunctionCar(outCell, remainingDistance, carSpeed, data.oldSpeeds[index]);
-	data.carsOnJunction[index] = -1;
-	data.carCanEnter[index] = true;
-}
-
-
-int JunctionRandom::chooseOutCell(int index)
-{
-	std::vector<uint> outCellsTemp;
-
-	if (data.carCanNotEnterThisOutCell.size() > 0 && data.carCanNotEnterThisOutCell[index] >= 0) {
-		for (uint cell : data.possibleOutCells) {
-			if (cell != data.carCanNotEnterThisOutCell[index])
-				outCellsTemp.push_back(cell);
-		}
-	}
-	else
-		outCellsTemp = data.possibleOutCells;
-
-	if (outCellsTemp.size() == 0) return -1;
-	int random = generateRandomOutCellIndex(castSizeT_Uint(outCellsTemp.size()));
-
-	int outCell = outCellsTemp[random];
-	data.possibleOutCells.erase(std::remove(data.possibleOutCells.begin(), data.possibleOutCells.end(), outCell), data.possibleOutCells.end());
-	return outCell;
-}
-
-
-int JunctionRandom::generateRandomOutCellIndex(uint outCellsTempSize)
-{
-	if (outCellsTempSize == 0)
-		return 0;
-
-	return static_cast<int>(std::floor(data.distFloat(data.engine) * outCellsTempSize));
-}
-
-void JunctionRandom::writeConcentrations(TrafficMovement& road)
-{
-	int i = 0;
-	for (int carSpeed : data.carsOnJunction) {
-		if (carSpeed >= 0) {
-			road.writeConcentrationForJunction(data.inCellIndices[i], data.oldSpeeds[i], data.carsOnJunction[i]);
-			data.oldSpeeds[i] = data.carsOnJunction[i];
-		}
-		++i;
-	}
-}
-
-const std::vector<uint>& JunctionRandom::getInCellIndices() const
-{
-	return data.inCellIndices;
-}
-
-const std::vector<uint>& JunctionRandom::getOutCellIndices() const
-{
-	return data.outCellIndices;
-}
-
-const std::vector<bool>& JunctionRandom::getCarCanEnter() const
-{
-	return data.carCanEnter;
-}
-
-const std::vector<int>& JunctionRandom::getCarsOnJunction() const
-{
-	return data.carsOnJunction;
-}
-
-const std::vector<uint>& JunctionRandom::getAlreadyMoved() const
-{
-	return data.alreadyMoved;
-}
-
-const std::vector<uint>& JunctionRandom::getOldSpeeds() const
-{
-	return data.oldSpeeds;
-}
-
-const std::vector<int>& JunctionRandom::getCarCanNotEnterThisOutCell() const
-{
-	return data.carCanNotEnterThisOutCell;
-}
-
-uint JunctionRandom::getTrafficLightSwitchTime() const
-{
-	return data.trafficLightSwitchTime;
-}
-
-void JunctionRandom::dispJunction(const uint index, const uint roadLength) const
-{
-	if (find(data.inCellIndices.begin(), data.inCellIndices.end(), (roadLength - index - 1)) != data.inCellIndices.end()) {
-		std::cout << std::setw(4) << "in";
-	}
-	else if (find(data.outCellIndices.begin(), data.outCellIndices.end(), (roadLength - index - 1)) != data.outCellIndices.end()) {
-		std::cout << std::setw(4) << "out";
-	}
-	else {
-		std::cout << std::setw(4) << " ";
-	}
-}
-
-uint JunctionRandom::getNumCarsOnJunction() const
-{
-	uint num = 0;
-	for (auto car : data.carsOnJunction)
-		if (car >= 0)
-			++num;
-	return num;
-}
-
-void JunctionRandom::checkOutCellIndices(const uint roadLength) const
-{
-	try {
-		for (uint cell : data.outCellIndices)
-			if (cell >= roadLength) throw invalidInput_error("The indices of incoming cells to a junction are greater than the roadLength.");
-	}
-	catch (const std::exception& e) {
-		std::cerr << e.what() << std::endl;
-		std::cin.get();
-		exit(EXIT_FAILURE);
-	}
-}
-
-
diff --git a/src/gpu/Traffic/Junction/JunctionRandom.h b/src/gpu/Traffic/Junction/JunctionRandom.h
deleted file mode 100644
index 21ce459685ec6cb09fe5b9d12f2a7e923cb63748..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Junction/JunctionRandom.h
+++ /dev/null
@@ -1,61 +0,0 @@
-#pragma once
-#include <random> 
-#include <vector>
-
-
-#include "Core/DataTypes.h"
-
-#include "Junction.h"
-#include "Traffic_export.h"
-
-class TrafficMovement;
-
-class TRAFFIC_EXPORT JunctionRandom :
-	public Junction
-{
-
-private:
-	JunctionData data;
-
-public:
-	JunctionRandom(const std::vector<uint> &inCellIndices, const std::vector<uint> &outCellIndices, uint trafficLightSwitchTime = 0);
-	~JunctionRandom() {};
-
-	virtual void setCellIndicesForNoUTurn(std::vector<int> carCanNotEnterThisOutCell);
-
-	virtual bool acceptsCar(uint cellIndex); //determines if a car can enter the junction
-	virtual void registerCar(uint cellIndex, uint numberOfCellsAlreadyMoved,  uint speed, uint oldSpeed); //registers all cars entering the junction
-	virtual void calculateTimeStep(TrafficMovement &road, uint currentTimestep);
-
-	virtual const std::vector<uint>& getInCellIndices() const;
-	virtual const std::vector<uint>& getOutCellIndices() const;
-	virtual const std::vector<bool>& getCarCanEnter() const;
-	virtual const std::vector<int>& getCarsOnJunction()const;
-	virtual const std::vector<uint>& getAlreadyMoved()const;
-	virtual const std::vector<uint>& getOldSpeeds()const;
-	virtual const std::vector<int>& getCarCanNotEnterThisOutCell()const;
-	virtual uint getTrafficLightSwitchTime()const;
-	
-	virtual void dispJunction(const uint index, const uint roadLength) const;
-	virtual uint getNumCarsOnJunction() const; 
-
-	virtual void checkOutCellIndices(const uint roadLength) const; 
-
-private:
-	uint getInCellsVectorIndex(uint cellIndex);
-
-	void applyRules(int &carSpeed,int index, TrafficMovement &road);
-	void brakeCar(uint outCellIndex, int &speed, int &remainingDistance, TrafficMovement &road);
-	void moveCar(uint outCell, int carSpeed, int index, int remainingDistance, TrafficMovement &road);
-	int chooseOutCell(int index);
-	int generateRandomOutCellIndex(uint outCellsTempSize);
-
-	void calculateTrafficLightTimeStep(uint currentTimestep);
-	void turnFirstHalfRed(uint currentTimestep);
-	void turnSecondHalfRed(uint currentTimestep);
-
-
-	void writeConcentrations(TrafficMovement &road);
-
-};
-
diff --git a/src/gpu/Traffic/Output/CarDisplay.cpp b/src/gpu/Traffic/Output/CarDisplay.cpp
deleted file mode 100644
index 06d19ef187a6602e9ed5d70f935600dc2cd630f5..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Output/CarDisplay.cpp
+++ /dev/null
@@ -1,178 +0,0 @@
-#include "CarDisplay.h"
-
-#include <fstream>
-#include <iostream>
-#include <iomanip>	//formatting output streams
-#include <stdexcept>
-
-#include "Utilities/VectorHelper.h"
-#include "Utilities/safe_casting.h"
-#include "Utilities/ConsoleColor.h"
-
-CarDisplay::CarDisplay(std::vector<int> **pcurrent, const uint safetyDistance):
-	safetyDistance{ safetyDistance }
-{
-	this->ppcurrent = pcurrent;
-	roadLength = castSizeT_Uint((*pcurrent)->size());
-}
-
-
-void CarDisplay::initResults(uint timeSteps)
-{
-	this->timeSteps = timeSteps;
-
-	results.resize(roadLength, std::vector<int>(1));
-
-	for (uint i = 0; i < roadLength; i++) {
-		results[i].resize(timeSteps + 1);
-	}
-
-	VectorHelper::fillVector(results, -1);
-	putCurrentIntoResults(0);
-}
-
-
-void CarDisplay::putCurrentIntoResults(uint step)
-{
-	writingStep = step;
-	for (uint i = 0; i < roadLength; i++) 
-		results[i][writingStep] = (**ppcurrent)[i];	
-}
-
-
-void CarDisplay::writeResultsToFile() const
-{
-	try {
-
-
-		std::fstream outFile("results.txt", std::fstream::out | std::fstream::trunc);
-		if (outFile.is_open())
-		{
-			for (uint i = 0; i < results.size(); i++) {
-				for (uint j = 0; j < results[i].size() - 1; j++)
-					outFile << results[i][j] << " ";
-
-				outFile << results[i][results[i].size() - 1];
-				outFile << std::endl;
-			}
-			std::cout << "Finished writing data to file" << std::endl;
-		}
-
-
-		else
-			throw std::runtime_error("Couldn't open file");
-
-	}
-	catch (const std::exception& e) {
-		std::cerr << e.what() << std::endl;
-		std::cin.get();
-		exit(EXIT_FAILURE);
-	}
-	catch (...) {
-		std::cerr << "unknown exception while writing to file" << std::endl;
-		std::cin.get();
-		exit(EXIT_FAILURE);
-	}
-}
-
-
-void CarDisplay::dispCurrentRoad() const
-{
-	std::cout << "current: ( step: " << writingStep << " )" << std::endl;
-	VectorHelper::dispVectorColour(**ppcurrent);
-}
-
-
-void CarDisplay::dispResults(const std::vector<int> * neighbors, const std::vector<std::shared_ptr<Sink> > & sinks, const  std::vector<std::shared_ptr<Junction> > & junctions, const  std::vector<std::shared_ptr<Source> > & sources)
-{
-	writeResultsToFile();
-
-	visualizeSafetyDistanceForConsole(neighbors);
-	//reverse(results.begin(), results.end());
-	// new implementation based on https://en.cppreference.com/w/cpp/algorithm/reverse
-	{
-		auto first = results.begin();
-		auto last = results.end();
-		while ((first != last) && (first != --last)) {
-			std::iter_swap(first++, last);
-		}
-	}
-
-	for (uint i = 0; i < results.size(); i++) {
-
-		dispJunctionsAtCell(i, junctions);
-		dispSinksAtCell(i, sinks);
-		dispSourcesAtCell(i, sources);
-
-		for (uint j = 0; j < results[i].size(); j++) {
-			VectorHelper::makeVectorOutputColourful(results[i][j]);
-			std::cout << std::setw(4) << results[i][j];
-		}
-
-		std::cout << std::endl;
-	}
-	std::cout << std::endl;
-	ConsoleColor::setDefaultWhite();
-}
-
-
-void CarDisplay::dispJunctionsAtCell(uint index, const  std::vector<std::shared_ptr<Junction> > & junctions)  const
-{
-	for (auto& junc : junctions) {
-		ConsoleColor::setDefaultWhite();
-		junc->dispJunction(index, roadLength);
-	}
-}
-
-
-void CarDisplay::dispSinksAtCell(uint index, const std::vector<std::shared_ptr<Sink> > & sinks)  const
-{
-	for (auto& sink : sinks) {
-		if (sink->getIndex() == roadLength - index - 1) {
-			ConsoleColor::setBrightRed();
-			std::cout << std::setw(4) << 1 - (sink->getPossibilityBeingBlocked());
-			return;
-		}
-		std::cout << std::setw(4) << " ";
-	}
-}
-
-
-void CarDisplay::dispSourcesAtCell(uint index, const  std::vector<std::shared_ptr<Source> > & sources)  const
-{
-	for (auto& source : sources) {
-		if (source->getIndex() == roadLength - index - 1) {
-			ConsoleColor::setBrightRed();
-			std::cout << std::setw(4) << source->getPossibility();
-			return;
-		}
-		std::cout << std::setw(4) << " ";
-	}
-}
-
-
-void CarDisplay::visualizeSafetyDistanceForConsole(const std::vector<int>* neighbors)
-{
-	if (safetyDistance != 0) {
-		int neighbor;
-		for (uint step = 0; step <= timeSteps; step++) {
-			for (uint i = 0; i < roadLength; i++) {
-				if (results[i][step] > -1) {
-					neighbor = (*neighbors)[i];
-					for (uint j = 1; j <= safetyDistance; j++) {
-						//junction or sink
-						if (neighbor <= -1000)
-							break;
-						if (results[neighbor][step] > -1) {
-							std::cerr << "safetyDistance was violated: timestep: " << step << "\t carIndex: " << i << std::endl;
-							break;
-						}
-						else
-							results[neighbor][step] = -5;
-						neighbor = (*neighbors)[neighbor];
-					}
-				}
-			}
-		}
-	}
-}
diff --git a/src/gpu/Traffic/Output/CarDisplay.h b/src/gpu/Traffic/Output/CarDisplay.h
deleted file mode 100644
index 336160139220424346a0f4faf2fd269b01840ac1..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Output/CarDisplay.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#pragma once
-
-
-#include <vector>
-#include <memory>
-
-#include "Sink/Sink.h"
-#include "Source/Source.h"
-#include "Junction/Junction.h"
-
-#include "Traffic_export.h"
-
-
-class TRAFFIC_EXPORT CarDisplay {
-public:
-	CarDisplay(std::vector<int> **pcurrent, const uint safetyDistance);
-	~CarDisplay() {};
-
-	void initResults(uint timeSteps);
-
-	void dispCurrentRoad() const;
-	void dispResults(const std::vector<int> * neighbors, const std::vector<std::shared_ptr<Sink> > & sinks, const  std::vector<std::shared_ptr<Junction> > & junctions, const  std::vector<std::shared_ptr<Source> > & sources);
-	void writeResultsToFile() const;
-
-	void putCurrentIntoResults(uint step);
-
-private:
-	void visualizeSafetyDistanceForConsole(const std::vector<int> * neighbors);
-
-	void dispJunctionsAtCell(uint index, const  std::vector<std::shared_ptr<Junction> > & junctions) const;
-	void dispSinksAtCell(uint index, const std::vector<std::shared_ptr<Sink> > & sinks) const;
-	void dispSourcesAtCell(uint index, const  std::vector<std::shared_ptr<Source> > & sources) const;
-
-private:
-	std::vector<std::vector<int> > results;		//saves the results of the calculation; x-axis = timesteps, y axis = positions
-	
-	std::vector<int> **ppcurrent;
-	uint roadLength;
-	const uint safetyDistance;
-
-	uint timeSteps;
-	uint writingStep;
-};
\ No newline at end of file
diff --git a/src/gpu/Traffic/Output/ConcBySpeedAndAcceleration.cpp b/src/gpu/Traffic/Output/ConcBySpeedAndAcceleration.cpp
deleted file mode 100644
index e62f87f44492583c24d04aaf3bdf4b72667dd1df..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Output/ConcBySpeedAndAcceleration.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-#include "ConcBySpeedAndAcceleration.h"
-
-#include <iostream>
-
-ConcBySpeedAndAcceleration::ConcBySpeedAndAcceleration(uint roadLength, real * concArrayStart)
-{
-	if (concArrayStart == nullptr) {
-		std::cout << "using ConcBySpeedAndAcceleration::concentration-vector for concentrations" << std::endl;
-		concentration.resize(roadLength);
-		this->roadLength = roadLength;
-	}
-	else {
-		std::cout << "using passed array for concentrations" << std::endl;
-		useLBMConcArray = true;
-		this->roadLength = roadLength;
-		this->concArrayStart = concArrayStart;
-	}
-
-}
-
-void ConcBySpeedAndAcceleration::calculateConcForSingleCar(uint index, uint oldSpeed, uint speed)
-{
-	putConcIntoArrayOrVector(index, chooseConc(oldSpeed, speed));
-}
-
-
-void ConcBySpeedAndAcceleration::calculateConcForAllCars(const std::vector<int> oldSpeeds, const std::vector<int> newSpeeds)
-{
-	for (uint i = 0; i < roadLength; i++) {
-		if (newSpeeds[i] > -1)
-			putConcIntoArrayOrVector(i, chooseConc(oldSpeeds[i], newSpeeds[i]));
-	}
-}
-
-void ConcBySpeedAndAcceleration::calculateConcForJunctionCar(uint index, uint oldSpeed, uint speed)
-{
-	addConcToArrayOrVector(index, chooseConc(oldSpeed, speed));
-
-}
-
-real ConcBySpeedAndAcceleration::chooseConc(uint oldSpeed, uint speed)
-{
-	if (oldSpeed == 0 && speed > 0) //Start
-		return 0.833f;
-	else if (oldSpeed == 0 && speed == 0) //Idle
-		return 0.069f;
-	else if (speed == oldSpeed) //Drive
-		return 0.221f;
-	else if (speed > oldSpeed) //Accelerate
-		return 0.625f;
-	else if (speed < oldSpeed) //Brake
-		return 0.379f;
-	else
-		std::cerr << "couldn't choose driving state in ConcentrationBySpeedAndAcceleration::chooseConc" << std::endl;
-	return -1.0f;
-}
diff --git a/src/gpu/Traffic/Output/ConcBySpeedAndAcceleration.h b/src/gpu/Traffic/Output/ConcBySpeedAndAcceleration.h
deleted file mode 100644
index 50f9671080bf049baef6b37e509a54faa02d8110..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Output/ConcBySpeedAndAcceleration.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#pragma once
-
-#include "ConcentrationOutwriter.h"
-
-#include "Traffic_export.h"
-
-class TRAFFIC_EXPORT ConcBySpeedAndAcceleration :
-	public ConcentrationOutwriter
-{
-public:
-	ConcBySpeedAndAcceleration(uint roadlength, real* concArrayStart = 0);
-	~ConcBySpeedAndAcceleration() {};
-
-	virtual void calculateConcForSingleCar(uint index, uint oldSpeed, uint speed);
-	virtual void calculateConcForJunctionCar(uint index, uint oldSpeed, uint speed);
-	virtual void calculateConcForAllCars(const std::vector<int> oldSpeeds, const std::vector<int> newSpeeds);
-
-
-private:
-	real chooseConc(uint oldSpeed, uint speed);
-
-};
-
diff --git a/src/gpu/Traffic/Output/ConcentrationByPosition.cpp b/src/gpu/Traffic/Output/ConcentrationByPosition.cpp
deleted file mode 100644
index bdfc46e13b319e542805b50c5a76cb89f7aeacec..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Output/ConcentrationByPosition.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-#include "ConcentrationByPosition.h"
-
-#include <iostream>
-
-ConcentrationByPosition::ConcentrationByPosition(uint roadLength, real * concArrayStart, uint maxSpeed)
-{
-	if (concArrayStart == nullptr) {
-		std::cout << "using ConcentrationByPosition::concentration-vector for concentrations" << std::endl;
-		concentration.resize(roadLength);
-		this->roadLength = roadLength;
-	}
-	else {
-		std::cout << "using passed array for concentrations" << std::endl;
-		useLBMConcArray = true;
-		this->roadLength = roadLength;
-		this->concArrayStart = concArrayStart;
-	}
-
-}
-
-//void ConcentrationByPosition::calculateConcFromCarDistribution(const std::vector<int>& currentCarDistribution)
-//{
-//	for (uint i = 0; i < currentCarDistribution.size(); i++) {
-//		if (currentCarDistribution[i] >= 0) 
-//			concentration[i] = 1.0;
-//		else
-//			concentration[i] = 0.0;
-//	}
-//
-//	//dispConcentration();
-//}
-
-
-void ConcentrationByPosition::calculateConcForSingleCar(uint index, uint oldSpeed, uint speed)
-{
-	putConcIntoArrayOrVector(index, 1.0);
-}
-
-void ConcentrationByPosition::calculateConcForJunctionCar(uint index, uint oldSpeed, uint speed)
-{
-	addConcToArrayOrVector(index, 1.0);
-}
-
-void ConcentrationByPosition::calculateConcForAllCars(const std::vector<int> oldSpeeds, const std::vector<int> newSpeeds)
-{
-	for (uint i = 0; i < roadLength; i++) 
-		if (newSpeeds[i] >= 0)
-			putConcIntoArrayOrVector(i, 1.0);
-}
-
-
diff --git a/src/gpu/Traffic/Output/ConcentrationByPosition.h b/src/gpu/Traffic/Output/ConcentrationByPosition.h
deleted file mode 100644
index cd136d1689c6888b4da2097e02ee9bd2e97e2743..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Output/ConcentrationByPosition.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#pragma once
-
-#include "ConcentrationOutwriter.h"
-
-#include "Traffic_export.h"
-
-class TRAFFIC_EXPORT ConcentrationByPosition:
-	public ConcentrationOutwriter
-{
-public:
-	ConcentrationByPosition(uint roadlength, real* concArrayStart = nullptr, uint maxSpeed = 0);
-	~ConcentrationByPosition() {};
-
-	virtual void calculateConcForSingleCar(uint index, uint oldSpeed = 0, uint speed = 0);
-	virtual void calculateConcForJunctionCar(uint index, uint oldSpeed = 0, uint speed = 0);
-	virtual void calculateConcForAllCars(const std::vector<int> oldSpeeds, const std::vector<int> newSpeeds) = 0;
-};
-
diff --git a/src/gpu/Traffic/Output/ConcentrationOutwriter.cpp b/src/gpu/Traffic/Output/ConcentrationOutwriter.cpp
deleted file mode 100644
index cf8d13a7047855ee10eb452e7ff74fad7b8be410..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Output/ConcentrationOutwriter.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
-#include "ConcentrationOutwriter.h"
-
-#include <iostream>
-#include <iomanip>	//formatting output streams
-
-#include "Utilities/ConsoleColor.h"
-
-void ConcentrationOutwriter::resetConcentrations()
-{
-	if (useLBMConcArray)
-		for (real* p = concArrayStart; p < concArrayStart + roadLength; ++p)
-			*p = 0.0;
-	else
-		std::fill(concentration.begin(), concentration.end(), 0.0f);
-}
-
-
-void ConcentrationOutwriter::putConcIntoArrayOrVector(uint index, real conc)
-{
-	if (useLBMConcArray) {
-		real *pos = concArrayStart + index;
-		*pos = conc;
-	}
-	else
-		concentration[index] = conc;
-}
-
-
-void ConcentrationOutwriter::addConcToArrayOrVector(uint index, real conc)
-{
-	if (useLBMConcArray) {
-		real *pos = concArrayStart + index;
-		//if ((*pos + conc) > 1.0) *pos = 1.0;
-		//else
-		*pos += conc;
-	}
-	else
-		//	if (concentration[index] + conc > 1.0) concentration[index] = 1.0;
-		//	else	
-		concentration[index] += conc;
-}
-
-
-void ConcentrationOutwriter::dispCurrentConcentrations()
-{
-	if (useLBMConcArray)
-		for (real* p = concArrayStart; p < concArrayStart + roadLength; ++p)
-			dispSingleConcentration(*p);
-	else
-		for (auto cell : concentration)
-			dispSingleConcentration(cell);
-
-
-	std::cout << std::endl;
-
-	ConsoleColor::setDefaultWhite();
-}
-
-
-void ConcentrationOutwriter::dispSingleConcentration(real conc)
-{
-	if (conc > 0)
-		ConsoleColor::setBrightRed();
-	else
-		ConsoleColor::setDarkGrey();
-	std::cout << std::setw(4) << conc;
-}
diff --git a/src/gpu/Traffic/Output/ConcentrationOutwriter.h b/src/gpu/Traffic/Output/ConcentrationOutwriter.h
deleted file mode 100644
index 9589e5f4b6751899e20106cfa41cb4cd29b11205..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Output/ConcentrationOutwriter.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#pragma once
-
-#include <vector>
-
-
-#include "Core/DataTypes.h"
-
-#include "Traffic_export.h"
-
-class TRAFFIC_EXPORT ConcentrationOutwriter
-{
-public:
-	virtual void resetConcentrations();
-	virtual void calculateConcForSingleCar(uint index, uint oldSpeed = 0, uint speed = 0) = 0;
-	virtual void calculateConcForJunctionCar(uint index, uint oldSpeed = 0, uint speed = 0) = 0;
-	virtual void calculateConcForAllCars(const std::vector<int> oldSpeeds, const std::vector<int> newSpeeds)=0;
-	void dispCurrentConcentrations();
-
-protected:
-	void putConcIntoArrayOrVector(uint index, real conc);
-	void addConcToArrayOrVector(uint index, real conc);
-
-protected:
-	std::vector<real> concentration;
-	bool useLBMConcArray = false;
-	real* concArrayStart;
-	uint roadLength;
-
-private:
-	void dispSingleConcentration(real conc);
-	
-};
-
diff --git a/src/gpu/Traffic/RoadNetwork/RoadMaker.cpp b/src/gpu/Traffic/RoadNetwork/RoadMaker.cpp
deleted file mode 100644
index d024ab6b0c5d4af48bb464b162bdc916b557d6c3..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/RoadNetwork/RoadMaker.cpp
+++ /dev/null
@@ -1,292 +0,0 @@
-#include "RoadMaker.h"
-
-#include <iostream>
-
-#include "Utilities/VectorHelper.h"
-#include "Utilities/invalidInput_error.h"
-#include "Utilities/safe_casting.h"
-
-
-//random vehicle Distribution
-RoadMaker::RoadMaker(const uint roadLength, const uint maxVelocity, uint vehicleLength, const real vehicleDensity)
-{
-	std::uniform_int_distribution<uint> distInt2{ 0, maxVelocity };
-	distInt = distInt2;
-
-	this->roadLength = roadLength;
-	this->maxVelocity = maxVelocity;
-	initVehicleLength(vehicleLength);
-
-	//init vectors
-	//this->conc.resize(this->roadLength);
-	initCurrentAsEmpty();
-	initNext();
-	initNeighbors();
-	initCurrentWithLongVehicles();
-
-	initVehicleDensity(vehicleDensity);
-}
-
-
-//given vehicle distribution
-RoadMaker::RoadMaker(const std::vector<int> vehicleDistribution, const uint maxVelocity, uint vehicleLength)
-{
-	this->roadLength = castSizeT_Uint(vehicleDistribution.size());
-
-	this->maxVelocity = maxVelocity;
-	initVehicleLength(vehicleLength);
-
-	//init vectors
-	//this->conc.resize(this->roadLength);
-	current = vehicleDistribution;
-	initNext();
-	initNeighbors();
-	initCurrentWithLongVehicles();
-}
-
-
-//empty road
-RoadMaker::RoadMaker(const uint roadLength, const uint maxVelocity, uint vehicleLength)
-{
-	this->roadLength = roadLength;
-	this->maxVelocity = maxVelocity;
-	initVehicleLength(vehicleLength);
-
-	//init vectors
-	//this->conc.resize(this->roadLength);
-	initCurrentAsEmpty();
-	initNext();
-	initNeighbors();
-	initCurrentWithLongVehicles();
-}
-
-
-RoadMaker::~RoadMaker()
-{
-}
-
-
-void RoadMaker::initNext()
-{
-	next.resize(roadLength);
-	VectorHelper::fillVector(next, -1);
-}
-
-
-void RoadMaker::initNeighbors()
-{
-	neighbors.resize(roadLength);
-	for (uint i = 0; i < roadLength - 1; i++) {
-		neighbors[i] = i + 1;
-	}
-	neighbors[roadLength - 1] = 0;
-}
-
-
-void RoadMaker::initCurrentAsEmpty()
-{
-	current.resize(roadLength);
-	VectorHelper::fillVector(current, -1);
-}
-
-
-void RoadMaker::initCurrentWithLongVehicles()
-{
-	currentWithLongVehicles.resize(roadLength);
-}
-
-
-void RoadMaker::initVehicleDensity(const real vehicleDensity)
-{
-	try {
-		if (vehicleDensity > 0 && vehicleDensity < 1) {
-			initRandomCars(vehicleDensity);
-		}
-		else {
-			throw invalidInput_error("The vehicleDensity should be between 0 and 1");
-		}
-	}
-	catch (const std::exception& e) {
-		std::cerr << e.what() << std::endl;
-		std::cin.get();
-		exit(EXIT_FAILURE);
-	}
-}
-
-
-void RoadMaker::initRandomCars(const real vehicleDensity)
-{
-	//this method doesn't fill the first cells, so that the safetyDistance isn't violated in a periodic road
-	for (uint i = safetyDistance; i < roadLength; i++) {
-		double randomNumber = distFloat(engine);
-		if (randomNumber <= vehicleDensity) {
-			current[i] = randomSpeed();
-			i += safetyDistance;
-		}
-	}
-}
-
-
-int RoadMaker::randomSpeed()
-{
-	return distInt(engine);
-}
-
-
-void RoadMaker::initVehicleLength(const uint vehicleLength)
-{
-	try {
-		if (vehicleLength == 0) throw  invalidInput_error("The vehicleLength has to be greater than 0");
-		this->vehicleLength = vehicleLength;
-		this->safetyDistance = vehicleLength - 1;
-	}
-	catch (const std::exception& e) {
-		std::cerr << e.what() << std::endl;
-		std::cin.get();
-		exit(EXIT_FAILURE);
-	}
-}
-
-
-void RoadMaker::setJunctions(std::vector<std::shared_ptr<Junction> > & junctions)
-{
-	for (auto& junc : junctions)
-		addJunction(junc);
-}
-
-
-void RoadMaker::addJunction(std::shared_ptr<Junction>& junction)
-{
-	try {
-
-		junction->checkOutCellIndices(roadLength);
-		setJunctionAsNeighbor(junction);
-		this->junctions.push_back(junction);
-
-		if (junctions.size() > 999) throw std::runtime_error("too many junctions");
-
-	}
-	catch (const std::exception& e) {
-		std::cerr << e.what() << std::endl;
-		std::cin.get();
-		exit(EXIT_FAILURE);
-	}
-}
-
-
-void RoadMaker::setJunctionAsNeighbor(std::shared_ptr<Junction> & junction)
-{
-	//set the junction as neighbor of the incoming cells
-
-	int junctionIndex = -1000 - castSizeT_Int(junctions.size()); //value range: -1000 to -1999
-	std::vector<uint> inCells = junction->getInCellIndices();
-
-	try {
-
-		for (auto cell : inCells) {
-			if (cell >= roadLength) throw invalidInput_error("The index of an incoming cell to a junction ist greater than the roadLength.");
-			if (neighbors[cell] < 0)				
-				std::cout << "The neighboring cell of cell " << cell << " was already definded as sink or junction, no new junction added." << std::endl;
-			else
-				neighbors[cell] = junctionIndex;
-		}
-
-	}
-	catch (const std::exception& e) {
-		std::cerr << e.what() << std::endl;
-		std::cin.get();
-		exit(EXIT_FAILURE);
-	}
-}
-
-
-void RoadMaker::setSinks(std::vector<std::shared_ptr<Sink> > & sinks)
-{
-	for (auto& sink : sinks)
-		addSink(sink);
-}
-
-
-void RoadMaker::addSink(std::shared_ptr<Sink>& sink)
-{
-
-	try {
-
-		setSinkAsNeighbor(sink);
-		this->sinks.push_back(sink);
-		if (sinks.size() > 999) throw std::runtime_error("too many sinks");
-
-
-	}
-	catch (const std::exception& e) {
-		std::cerr << e.what() << std::endl;
-		std::cin.get();
-		exit(EXIT_FAILURE);
-	}
-
-}
-
-
-void RoadMaker::setSinkAsNeighbor(std::shared_ptr<Sink> & sink)
-{
-	//set the sink as neighbor of the incoming cell
-
-	int sinkIndex = -2000 - castSizeT_Int(sinks.size()); //value range: -2000 to -2999
-	uint sinkCell = sink->getIndex();
-
-	if (sinkCell >= roadLength) throw invalidInput_error("The index of a sink ist greater than the roadLength.");
-
-	if (neighbors[sinkCell] < 0) {
-		std::cout << "The neighboring cell of cell " << sinkCell << " was already definded as sink or junction, no new sink added." << std::endl;
-	}
-	else
-	{
-		neighbors[sinkCell] = sinkIndex;
-	}
-}
-
-
-
-void RoadMaker::setSources(std::vector< std::shared_ptr<Source> > & sources)
-{
-	for (auto& source : sources)
-		addSource(source);
-}
-
-
-void RoadMaker::addSource(std::shared_ptr<Source>& source)
-{
-	try {
-		if (source->getIndex() >= roadLength) throw invalidInput_error("Source index is greater than roadlength");
-		this->sources.push_back(source);
-	}
-
-	catch (const std::exception& e) {
-		std::cerr << e.what() << std::endl;
-		std::cin.get();
-		exit(EXIT_FAILURE);
-	}
-}
-
-
-void RoadMaker::setNeighbor(uint index, uint neighbor)
-{
-	this->neighbors[index] = neighbor;
-}
-
-void RoadMaker::setNeighborForCurve(uint index, uint neighbor)
-{
-	this->neighbors[index] = neighbor;
-	for (uint i = 0; i < this->vehicleLength; i++) {
-		if (neighbor < 0) break;
-		this->current[neighbor] = -1;		
-		neighbor = neighbors[neighbor];
-	}
-}
-
-
-uint RoadMaker::getMaxVelocity()
-{
-	return maxVelocity;
-}
-
diff --git a/src/gpu/Traffic/RoadNetwork/RoadMaker.h b/src/gpu/Traffic/RoadNetwork/RoadMaker.h
deleted file mode 100644
index 9d6d324608db6f348a6a8dacb1121c279ba6452f..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/RoadNetwork/RoadMaker.h
+++ /dev/null
@@ -1,52 +0,0 @@
- #pragma once
-#include <random>
-
-#include "RoadNetworkData.h"
-
-#include "Utilities/RandomHelper.h"
-
-#include "Traffic_export.h"
-
-
-struct TRAFFIC_EXPORT RoadMaker :
-	public RoadNetworkData
-{
-public:
-	RoadMaker(const uint roadLength, const uint maxVelocity, uint vehicleLength, const real vehicleDensity); //random vehicle Distribution
-	RoadMaker(const std::vector<int> vehicleDistribution, const uint maxVelocity, uint vehicleLength); //given vehicle distribution
-	RoadMaker(const uint roadLength, const uint maxVelocity, uint vehicleLength);//empty road
-
-	~RoadMaker();
-
-	void setJunctions( std::vector<std::shared_ptr<Junction> > & junctions); //max 999 junctions
-	void addJunction(std::shared_ptr<Junction> & junction);
-	void setSinks(std::vector< std::shared_ptr<Sink> > & sinks); //max 999 sinks
-	void addSink(std::shared_ptr<Sink> & sink);
-	void setSources(std::vector< std::shared_ptr<Source> > & sources);
-	void addSource(std::shared_ptr<Source> & source);
-
-	void setNeighbor(uint index, uint neighbor); // don't use it for setting sinks or junctions!
-	void setNeighborForCurve(uint index, uint neighbor);
-
-	uint getMaxVelocity();
-
-private:
-	std::mt19937 engine = RandomHelper::make_engine();
-	std::uniform_real_distribution<real> distFloat{ 0.0, 1.0 };
-	std::uniform_int_distribution<uint> distInt{ 0, maxVelocity };
-
-private:
-	void initNext();
-	void initNeighbors();
-	void initCurrentAsEmpty();
-	void initCurrentWithLongVehicles();
-	void initVehicleDensity(const real vehicleDensity);
-	void initRandomCars(const real vehicleDensity);
-	void initVehicleLength(const uint vehicleLength);
-	int randomSpeed();
-
-	void setJunctionAsNeighbor(std::shared_ptr<Junction> & junction);
-	void setSinkAsNeighbor(std::shared_ptr<Sink> & sink);
-
-};
-
diff --git a/src/gpu/Traffic/RoadNetwork/RoadNetworkData.h b/src/gpu/Traffic/RoadNetwork/RoadNetworkData.h
deleted file mode 100644
index 3839c53c8449b6c52bfaf7cf0e026559ee5aa9d7..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/RoadNetwork/RoadNetworkData.h
+++ /dev/null
@@ -1,47 +0,0 @@
-#pragma once
-#include <memory>
-#include <vector>
-
-
-
-#include "Source/Source.h"
-#include "Sink/Sink.h"
-#include "Junction/Junction.h"
-
-#include "Traffic_export.h"
-
-
-struct TRAFFIC_EXPORT RoadNetworkData
-{
-protected:
-	friend class TrafficMovement;
-	friend class TrafficTimestep;
-
-	uint roadLength;
-	uint maxVelocity;
-	uint vehicleLength;
-	uint safetyDistance;
-
-	std::vector<int> current;
-	std::vector<int> currentWithLongVehicles;
-	std::vector<int> next;						//for temporary calculations
-	std::vector<int> neighbors;
-
-	std::vector<std::shared_ptr<Sink> > sinks; 
-	std::vector<std::shared_ptr<Junction> > junctions;
-	std::vector<std::shared_ptr<Source> > sources;
-
-	std::vector<int> *pcurrent;
-	std::vector<int> *pnext;
-	std::vector<int> *pdummy;
-
-	std::vector<int> oldSpeeds;
-
-	real dawdlePossibility;
-	bool useSlowToStart = false;
-	real slowStartPossibility;
-	uint maxAcceleration = 1;
-
-	std::vector<real> conc; //dispConcFromGPU
-};
-
diff --git a/src/gpu/Traffic/Sink/Sink.h b/src/gpu/Traffic/Sink/Sink.h
deleted file mode 100644
index 792e32e8dd4031722d3a65da8148ffbd402e3e6c..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Sink/Sink.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#pragma once
-
-#include "SinkData.h"
-
-#include "Traffic_export.h"
-
-class TRAFFIC_EXPORT Sink
-{
-public:
-	virtual real getPossibilityBeingBlocked() const = 0;
-	virtual bool carCanEnter() = 0;
-	virtual uint getIndex() const = 0;
-};
-
diff --git a/src/gpu/Traffic/Sink/SinkData.h b/src/gpu/Traffic/Sink/SinkData.h
deleted file mode 100644
index 9603c816ae0733e7f73e4b70c165ba06f5a40e9f..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Sink/SinkData.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#pragma once
-
-
-#include "Core/DataTypes.h"
-
-#include "Traffic_export.h"
-
-struct TRAFFIC_EXPORT SinkData {
-	uint sinkIndex;
-	real sinkBlockedPossibility;
-};
\ No newline at end of file
diff --git a/src/gpu/Traffic/Sink/SinkRandom.cpp b/src/gpu/Traffic/Sink/SinkRandom.cpp
deleted file mode 100644
index 9e49f83d1b693eace90b285aafaaa54be0ee58c7..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Sink/SinkRandom.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-#include "SinkRandom.h"
-
-#include <iostream>
-
-#include "Utilities/invalidInput_error.h"
-
-SinkRandom::SinkRandom(uint sinkIndex, real sinkBlockedPossibility)
-{
-	data.sinkIndex = sinkIndex;
-
-	try {
-		if (sinkBlockedPossibility >= 0 && sinkBlockedPossibility <= 1) {
-			data.sinkBlockedPossibility = sinkBlockedPossibility;
-		}
-		else {
-			throw invalidInput_error("possibility of the sink being blocked should be between 0 and 1");
-		}
-	}
-	catch (const std::exception& e) {
-		std::cerr << e.what() << std::endl;
-		std::cin.get();
-		exit(EXIT_FAILURE);
-	};
-}
-
-
-bool SinkRandom::carCanEnter()
-{	
-	return  !(distFloat(engine) < data.sinkBlockedPossibility);
-}
-
-
-real SinkRandom::getPossibilityBeingBlocked() const
-{
-	return data.sinkBlockedPossibility;
-}
-
-
-uint SinkRandom::getIndex() const
-{
-	return data.sinkIndex;
-}
-
diff --git a/src/gpu/Traffic/Sink/SinkRandom.h b/src/gpu/Traffic/Sink/SinkRandom.h
deleted file mode 100644
index d6126acd1dc90b36ee1faed7f7d0ea3131f279c9..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Sink/SinkRandom.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#pragma once
-
-
-#include <random>
-
-#include "Sink.h"
-
-#include "Utilities/RandomHelper.h"
-
-#include "Traffic_export.h"
-
-class TRAFFIC_EXPORT SinkRandom:
-	public Sink
-{
-private:
-	SinkData data;
-
-	std::mt19937 engine = RandomHelper::make_engine();
-	std::uniform_real_distribution<float> distFloat{ 0.0, 1.0 };
-
-public:
-	SinkRandom(uint sinkIndex, real sinkBlockedPossibility);
-	~SinkRandom() {};
-
-	real getPossibilityBeingBlocked() const;
-	bool carCanEnter();
-	uint getIndex() const;
-};
-
diff --git a/src/gpu/Traffic/Source/Source.h b/src/gpu/Traffic/Source/Source.h
deleted file mode 100644
index 3c786b7ee7e1e1a2c0ec38c5a42da293b927f9a1..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Source/Source.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#pragma once
-
-
-#include "SourceData.h"
-#include "Traffic_export.h"
-
-class TRAFFIC_EXPORT Source
-{
-public:
-	virtual uint getIndex() const = 0;
-	virtual real getPossibility() const = 0;
-	virtual uint getSourceCar() = 0;
-};
-
diff --git a/src/gpu/Traffic/Source/SourceData.h b/src/gpu/Traffic/Source/SourceData.h
deleted file mode 100644
index e71ec4b5c91581f364a60074f60e1d574fe7d832..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Source/SourceData.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#pragma once
-
-#include "Core/DataTypes.h"
-
-#include <memory>
-#include <vector>
-#include "Traffic_export.h"
-
-
-struct TRAFFIC_EXPORT SourceData {
-	uint sourceIndex;
-	real sourcePossibility;
-	uint maxVelocity;
-};
\ No newline at end of file
diff --git a/src/gpu/Traffic/Source/SourceRandom.cpp b/src/gpu/Traffic/Source/SourceRandom.cpp
deleted file mode 100644
index 7def2c237b81d5c014cab2bebbea1474dac69b36..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Source/SourceRandom.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-#include "SourceRandom.h"
-
-#include <iostream>
-
-#include "Utilities/invalidInput_error.h"
-
-SourceRandom::SourceRandom(const uint sourceIndex, const real sourcePossibility, uint maxVelocity) 
-{
-	data.sourceIndex = sourceIndex;
-	data.maxVelocity = maxVelocity;
-
-	try {
-		if (sourcePossibility >= 0 && sourcePossibility <= 1) {
-			data.sourcePossibility = sourcePossibility;
-			std::uniform_int_distribution<uint> distInt2{ 0, maxVelocity };
-			distInt = distInt2;
-		}
-		else {
-			throw invalidInput_error("possibility of a car leaving the sink should be between 0 and 1");
-		}
-	}
-	catch (const std::exception& e) {
-		std::cerr << e.what() << std::endl;
-		std::cin.get();
-		exit(EXIT_FAILURE);
-	};
-}
-
-
-
-SourceRandom::~SourceRandom()
-{
-}
-
-uint SourceRandom::getIndex() const
-{
-	return data.sourceIndex;
-}
-
-real SourceRandom::getPossibility() const
-{
-	return data.sourcePossibility;
-}
-
-
-uint SourceRandom::getSourceCar()
-{
-	randomNumber = distFloat(engine);
-	if (randomNumber < data.sourcePossibility) {
-		return randomSpeed();
-	}
-	return -1;
-}
-
-
-uint SourceRandom::randomSpeed()
-{
-	return distInt(engine);
-}
diff --git a/src/gpu/Traffic/Source/SourceRandom.h b/src/gpu/Traffic/Source/SourceRandom.h
deleted file mode 100644
index 5acf83fd788223c011717130d82c1867b5501757..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Source/SourceRandom.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#pragma once
-
-
-#include <random>
-
-#include "Source.h"
-#include "Utilities/RandomHelper.h"
-
-#include "Traffic_export.h"
-
-class TRAFFIC_EXPORT SourceRandom:
-	public Source
-{
-private:
-	SourceData data;
-
-	std::mt19937 engine = RandomHelper::make_engine();
-	std::uniform_real_distribution<float> distFloat{ 0.0, 1.0 };
-	std::uniform_int_distribution<uint> distInt{ 0, 1 };
-
-public:
-	SourceRandom(const uint sourceIndex, const real sourcePossibility, uint maxVelocity);
-	~SourceRandom();
-
-	virtual uint getIndex() const;
-	virtual real getPossibility() const;
-	virtual uint getSourceCar();
-
-private:
-	uint randomSpeed();
-
-private:
-	//variables for temporaray calculations
-	real randomNumber;
-};
-
diff --git a/src/gpu/Traffic/TrafficMovement.cpp b/src/gpu/Traffic/TrafficMovement.cpp
deleted file mode 100644
index fccf6f583ed0a87a8da0e87d67a62e5d0f531cf5..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/TrafficMovement.cpp
+++ /dev/null
@@ -1,575 +0,0 @@
-#include "TrafficMovement.h"
-
-#include <memory>
-#include <iostream>
-#include <stdexcept>
-
-#include "Utilities/invalidInput_error.h"
-#include "Utilities/VectorHelper.h"
-#include "Utilities/RandomHelper.h"
-#include "Utilities/Logger.h"
-#include "GPU/TrafficTimestep.h"
-
-#include "Output/ConcBySpeedAndAcceleration.h"
-#include "Output/ConcentrationByPosition.h"
-
-
-TrafficMovement::TrafficMovement(std::shared_ptr<RoadNetworkData> road, const real dawdlePossibility)
-{
-	this->road = std::move(road);
-
-	this->road->pcurrent = &(this->road->current);
-	this->road->pnext = &(this->road->next);
-
-	checkCurrentForSafetyDistance();
-
-	initDawdle(dawdlePossibility);
-
-	//this->road->conc.resize(this->road->roadLength); //dispConcFromGPU
-}
-
-
-TrafficMovement::~TrafficMovement()
-{
-	gpuCalculation->cleanUp();
-	road->pcurrent = NULL;
-	road->pnext = NULL;
-	road->pdummy = NULL;
-}
-
-
-void TrafficMovement::initDawdle(const real dawdlePossibility)
-{
-	try {
-		if (dawdlePossibility >= 0 && dawdlePossibility < 1) {
-			this->road->dawdlePossibility = dawdlePossibility;
-		}
-		else {
-			throw invalidInput_error("The dawdlePossibility should be between 0 and 1.");
-		}
-	}
-	catch (const std::exception& e) {
-		std::cerr << e.what() << std::endl;
-		std::cin.get();
-		exit(EXIT_FAILURE);
-	}
-}
-
-
-void TrafficMovement::setSlowToStart(const real slowStartPossibility)
-{
-	try {
-		if (slowStartPossibility >= 0 && slowStartPossibility < 1) {
-			if (slowStartPossibility > 0) {
-				this->road->slowStartPossibility = slowStartPossibility;
-				road->useSlowToStart = true;
-			}
-		}
-		else {
-			throw invalidInput_error("The slowStartPossibility should be between 0 and 1.");
-		}
-	}
-	catch (const std::exception& e) {
-		std::cerr << e.what() << std::endl;
-		std::cin.get();
-		exit(EXIT_FAILURE);
-	}
-}
-
-void TrafficMovement::setUseGPU(real * pConcArray, int* naschVelocity)
-{
-	std::cout << "usingGPU for calculation" << std::endl;
-	this->useGPU = true;
-	this->gpuCalculation = std::make_unique<TrafficTimestep>(TrafficTimestep(this->road, pConcArray, naschVelocity));
-}
-
-void TrafficMovement::setMaxAcceleration(uint maxAcceleration)
-{
-	this->road->maxAcceleration = maxAcceleration;
-}
-
-void TrafficMovement::setConcentrationOutwriter(uint roadlength, real* concArrayStart)
-{
-	this->concWriter = std::make_unique<ConcBySpeedAndAcceleration>(ConcBySpeedAndAcceleration(roadlength, concArrayStart));
-}
-
-
-void TrafficMovement::setSaveResultsTrue(uint timeSteps)
-{
-	this->display = std::make_unique<CarDisplay>(&road->pcurrent, road->safetyDistance);
-	if (display != nullptr) display->initResults(timeSteps);
-}
-
-
-uint TrafficMovement::getNumberOfCars() const
-{
-	uint num = 0;
-	if (useGPU) num = gpuCalculation->getNumCarsOnJunctions();
-	else
-		for (auto& junc : road->junctions)
-			num += junc->getNumCarsOnJunction();
-
-	for (auto cell : *(road->pcurrent))
-		if (cell >= 0) ++num;
-
-	return num;
-}
-
-
-int TrafficMovement::getSpeedAtPosition(uint pos) const
-{
-	return (*(road->pcurrent))[pos];
-}
-
-
-uint TrafficMovement::getRoadLength() const
-{
-	return road->roadLength;
-}
-
-
-uint TrafficMovement::getMaxVelocity() const
-{
-	return road->maxVelocity;
-}
-
-
-real TrafficMovement::getDawdlePossibility()
-{
-	return road->dawdlePossibility;
-}
-
-bool TrafficMovement::getUseSlowToStart()
-{
-	return road->useSlowToStart;
-}
-
-real TrafficMovement::getSlowToStartPossibility()
-{
-	return road->slowStartPossibility;
-}
-
-uint TrafficMovement::getMaxAcceleration()
-{
-	return road->maxAcceleration;
-}
-
-
-
-void TrafficMovement::loopTroughTimesteps(uint timeSteps)
-{
-	for (uint step = 1; step < timeSteps + 1; step++) {
-		calculateTimestep(step);
-	}
-	dispResults();
-}
-
-
-void TrafficMovement::calculateTimestep(uint step)
-{
-
-	if (useGPU) {
-
-		//GPU
-
-		copiedDevToHost = false;
-		this->gpuCalculation->calculateTimestep(road);
-
-
-	}
-	else {
-		
-		//CPU
-
-		if (concWriter != nullptr) concWriter->resetConcentrations();
-
-		VectorHelper::fillVector(*(road->pnext), -1);
-
-		for (uint i = 0; i < road->roadLength; i++)
-			if ((*(road->pcurrent))[i] > -1)
-				applyRules(i);
-
-		calculateJunctionStep();
-
-		calculateSourceStep();
-
-		switchCurrentNext();
-		
-	}
-
-
-	if (display != nullptr) {
-		if (useGPU) copyDevToHost();
-		display->putCurrentIntoResults(step);
-	}
-
-	////disp current road
-	//if (display != nullptr) {
-	//	if (useGPU) copyDevToHost();
-	//	display->dispCurrentRoad();
-	//}
-	
-	////disp current conc
-	//if (useGPU)	dispCurrentConcFromGPU();
-	//else if (concWriter != nullptr) concWriter->dispCurrentConcentrations();
-
-	currentStep += 1;
-}
-
-
-void TrafficMovement::calculateSourceStep()
-{
-	uint sourceIndex;
-	uint gap;
-	for (auto &source : road->sources) {
-		sourceIndex = source->getIndex();
-		gap = getGapAfterOutCell(sourceIndex, road->maxVelocity);
-		if (gap > 0) {
-			uint speed = source->getSourceCar();
-			(*(road->pnext))[sourceIndex] = speed;
-			writeConcentration(sourceIndex, speed);
-		}
-	}
-}
-
-void TrafficMovement::calculateJunctionStep()
-{
-	for (auto &junction : road->junctions) {
-		junction->calculateTimeStep(*this, currentStep);
-	}
-}
-
-void TrafficMovement::switchCurrentNext()
-{
-	road->pdummy = road->pcurrent;
-	road->pcurrent = road->pnext;
-	road->pnext = road->pdummy;
-}
-
-void TrafficMovement::applyRules(uint carIndex)
-{
-	uint speed = (*(road->pcurrent))[carIndex];
-	accelerateCar(speed);
-	brakeCar(carIndex, speed);
-	dawdleCar(carIndex, speed);
-	moveCar(carIndex, speed);
-}
-
-void TrafficMovement::accelerateCar(uint & speed)
-{
-	if (speed < road->maxVelocity) {
-		if (speed <= road->maxVelocity - road->maxAcceleration)
-			speed += road->maxAcceleration;
-		else
-			speed = road->maxVelocity;
-	}
-}
-
-void TrafficMovement::brakeCar(uint carIndex, uint &speed)
-{
-	int neighbor = road->neighbors[carIndex];
-	gap = getGapAfterCar(carIndex, speed, neighbor);
-	if (speed > gap)
-		speed = gap;
-}
-
-void TrafficMovement::dawdleCar(uint carIndex, uint & speed)
-{
-	randomNumber = distFloat(engine);
-
-	//Barlovic / SlowToStart
-	if (road->useSlowToStart == true && (*(road->pcurrent))[carIndex] == 0) {
-		if (randomNumber < road->slowStartPossibility) {
-			speed = 0;
-		}
-		return;
-	}
-
-	//Standard NaSch
-	if (randomNumber < road->dawdlePossibility) {
-		if (speed >= road->maxAcceleration)
-			speed -= road->maxAcceleration;
-		else
-			speed = 0;
-	}
-}
-
-void TrafficMovement::moveCar(const uint carIndex, uint speed)
-{
-	if (speed == 0) {
-		(*(road->pnext))[carIndex] = 0;
-		writeConcentration(carIndex, (*(road->pcurrent))[carIndex]);
-		return;
-	}
-
-	int neighbor = road->neighbors[carIndex];
-	uint currentCell = carIndex;
-
-	uint numberOfCellsMoved = iterateNeighborsInMove(currentCell, speed, neighbor);
-
-	if (neighbor <= -1000 && neighbor > -2000) {
-		getJunctionFromNeighbor(neighbor)->registerCar(currentCell, numberOfCellsMoved, speed, (*(road->pcurrent))[carIndex]);
-		return;
-	}
-
-	if (neighbor >= 0) {
-		(*(road->pnext))[neighbor] = speed;
-		writeConcentration(neighbor, (*(road->pcurrent))[carIndex]);
-	}
-}
-
-
-void TrafficMovement::moveJunctionCar(uint outCellIndex, uint remainingDistance, uint speed, uint oldSpeed)
-{
-	if (remainingDistance == 1) {
-		(*(road->pnext))[outCellIndex] = speed;
-		writeConcentration(outCellIndex, oldSpeed);
-		return;
-	}
-
-	int neighbor = outCellIndex;
-
-	uint numberOfCellsMoved = iterateNeighborsInMove(outCellIndex, remainingDistance, neighbor);
-
-	try {
-		if (neighbor <= -1000 && neighbor > -2000) {
-			throw std::runtime_error("car entered two junctions in one timestep");
-		}
-	}
-	catch (const std::exception& e) {
-		std::cerr << e.what() << std::endl;
-		std::cin.get();
-		exit(EXIT_FAILURE);
-	}
-
-	if (neighbor >= 0) {
-		(*(road->pnext))[neighbor] = speed;
-		writeConcentration(neighbor, oldSpeed);
-	}
-}
-
-
-uint TrafficMovement::iterateNeighborsInMove(uint & currentCell, uint speed, int & neighbor)
-{
-	uint numberOfCellsMoved = 1;
-
-	for (uint i = 2; i <= speed; i++) {
-		if (neighbor >= 0) {
-			currentCell = neighbor;
-			neighbor = road->neighbors[neighbor];
-			++numberOfCellsMoved;
-		}
-		else
-			break;
-	}
-	return numberOfCellsMoved;
-}
-
-
-std::shared_ptr<Junction>& TrafficMovement::getJunctionFromNeighbor(int neighbor)
-{
-	//calculate index in junctions vector for neighbor (-1000 to -1999)
-	return road->junctions[((neighbor + 1000)*-1)];
-}
-
-
-std::shared_ptr<Sink>& TrafficMovement::getSinkFromNeighbor(int neighbor)
-{
-	//calculate index in junctions vector for neighbor (-2000 to -2999)
-	int index = ((neighbor + 2000)*-1);
-	return road->sinks[index];
-}
-
-
-uint TrafficMovement::getGapAfterCar(uint carIndex, uint speed, int neighbor)
-{
-	for (uint i = 0; i < (speed + road->safetyDistance); i++) {
-
-		if (neighbor <= -2000)
-			return getGapToSink(neighbor, i, speed);
-		if (neighbor <= -1000)
-			return getGapToJunction(neighbor, i, speed, carIndex);
-
-		//car in Cell
-		if ((*(road->pcurrent))[neighbor] > -1)
-			return adjustGapToSafetyDist(i);
-
-		//empty cell -> get next neighbor, update currentCell
-		carIndex = neighbor;
-		neighbor = road->neighbors[neighbor];
-	}
-	return speed;
-}
-
-
-uint TrafficMovement::getGapAfterOutCell(uint outCellIndex, uint speed)
-{
-	if ((*(road->pcurrent))[outCellIndex] > -1)
-		return 0;
-
-	int neighbor = outCellIndex;
-
-	for (uint i = 0; i < (speed + road->safetyDistance); i++) {
-
-		if (neighbor <= -2000)
-			return getGapToSink(neighbor, i, speed);
-		if (neighbor <= -1000)
-			return i;
-
-		//car in Cell
-		if ((*(road->pcurrent))[neighbor] > -1)
-			return adjustGapToSafetyDist(i);
-
-		//empty cell -> get next neighbor
-		neighbor = road->neighbors[neighbor];
-	}
-	return speed;
-}
-
-
-uint TrafficMovement::getGapToSink(int neighbor, uint i, uint speed)
-{
-	if (getSinkFromNeighbor(neighbor)->carCanEnter() && i <= speed)
-		return speed;
-	return i;
-}
-
-
-uint TrafficMovement::getGapToJunction(int neighbor, uint i, uint speed, uint currentCell)
-{
-	if (getJunctionFromNeighbor(neighbor)->acceptsCar(currentCell) && i <= speed)
-		return speed;
-	return i;
-}
-
-
-uint TrafficMovement::adjustGapToSafetyDist(uint gap)
-{
-	if (gap <= road->safetyDistance)
-		return 0;
-	else
-		return gap - road->safetyDistance;
-}
-
-void TrafficMovement::writeConcentration(uint index, uint oldSpeed)
-{
-	if (concWriter != nullptr) {
-		concWriter->calculateConcForSingleCar(index, oldSpeed, (*(road->pnext))[index]);
-	}
-}
-
-void TrafficMovement::copyDevToHost()
-{
-	if (copiedDevToHost == false) {
-		gpuCalculation->copyCurrentDeviceToHost(road);
-		copiedDevToHost = true;
-	}
-}
-
-
-void TrafficMovement::writeConcentrationForJunction(uint inCellIndex, uint oldSpeed, uint speed)
-{
-	if (concWriter != nullptr) {
-		concWriter->calculateConcForJunctionCar(inCellIndex, oldSpeed, speed);
-	}
-}
-
-
-void TrafficMovement::dispResults() {
-	if (display == nullptr)
-		std::cout << "No results were saved." << std::endl;
-	else
-		display->dispResults(&road->neighbors, road->sinks, road->junctions, road->sources);
-}
-
-
-void TrafficMovement::dispCurrentConcFromGPU()
-{
-	//for (auto cell : road->conc) {
-	//	if (cell > 0)
-	//		SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), 12);
-	//	else
-	//		SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), 8);
-	//	std::cout << std::setw(4) << cell;
-	//}
-	//std::cout << std::endl;
-	//SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), 7);
-}
-
-
-
-void TrafficMovement::checkCurrentForSafetyDistance()
-{
-	if (road->safetyDistance > 0) {
-		uint neighbor;
-		for (uint i = 0; i < road->roadLength; i++) {
-			if ((*(road->pcurrent))[i] > -1) {
-				neighbor = road->neighbors[i];
-
-				for (uint j = 1; j <= road->safetyDistance; j++) {
-					if (neighbor <= -1000)
-						break;
-					if ((*(road->pcurrent))[neighbor] > -1) {
-						std::cerr << "timestep 0: safetyDistance was violated: carIndex: " << i << std::endl;
-						if (useLogger) TrafficLogger::writeError("safetyDistance was violated : carIndex: " + std::to_string(i), 0);
-						break;
-					}
-					neighbor = road->neighbors[neighbor];
-				}
-			}
-		}
-	}
-}
-
-
-const std::vector<int> & TrafficMovement::getVehiclesForVTK()
-{
-	return road->currentWithLongVehicles;
-}
-
-
-void TrafficMovement::visualizeVehicleLengthForVTK()
-{
-	if (useGPU) copyDevToHost();
-
-	road->currentWithLongVehicles = *(road->pcurrent);
-	int speed;
-
-	if (road->safetyDistance != 0) {
-		for (uint i = 0; i < road->roadLength; i++) {
-			speed = (*(road->pcurrent))[i];
-			if (speed > -1) {
-				//checkSpeed((*(road->pcurrent))[i]);
-				int neighbor = road->neighbors[i];
-				for (uint j = 1; j <= road->safetyDistance; j++) {
-
-					if (neighbor <= -1000)
-						break;
-					if ((*(road->pcurrent))[neighbor] > -1) {
-						std::cerr << "safetyDistance was violated: timestep: " << currentStep << "\t carIndex: " << i << std::endl;
-						if (useLogger)	TrafficLogger::writeError("safetyDistance was violated : carIndex: " + std::to_string(i), currentStep);
-						break;
-					}
-					else
-						(road->currentWithLongVehicles)[neighbor] = speed;					
-					neighbor = road->neighbors[neighbor];
-				}
-			}
-		}
-	}
-}
-
-
-void TrafficMovement::checkSpeed(uint speed)
-{
-	if (speed > road->maxVelocity) {
-		std::cerr << "Speed is greater than allowed maxSpeed" << std::endl;
-		if (useLogger) TrafficLogger::writeError("Speed is greater than allowed maxSpeed", currentStep);
-	}
-}
-
-
-void TrafficMovement::setUseLogger()
-{
-	this->useLogger = true;
-}
\ No newline at end of file
diff --git a/src/gpu/Traffic/TrafficMovement.h b/src/gpu/Traffic/TrafficMovement.h
deleted file mode 100644
index 8655e8af0fb1e8aba747cac837ce88e17e3fe0b8..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/TrafficMovement.h
+++ /dev/null
@@ -1,125 +0,0 @@
-#pragma once
-
-#include <vector>
-#include <random> 
-#include <memory>
-
-
-#include "Core/DataTypes.h"
-
-#include "RoadNetwork/RoadNetworkData.h"
-#include "Utilities/RandomHelper.h"
-
-#include "Output/ConcentrationOutwriter.h"
-#include "Output/CarDisplay.h"
-
-#include "Traffic_export.h"
-
-class TrafficLogger;
-//class ConcentrationOutwriter;
-//class CarDisplay;
-class TrafficTimestep;
-
-class TRAFFIC_EXPORT TrafficMovement
-{
-public:
-	TrafficMovement(std::shared_ptr<RoadNetworkData> road, const real dawdlePossibility);
-	//TrafficMovement() {};
-	TrafficMovement(const TrafficMovement&) = delete;
-	~TrafficMovement();
-
-	//setUp
-	void setSlowToStart(const real slowStartPossibility);
-	void setMaxAcceleration(uint maxAcceleration);
-	void setConcentrationOutwriter(uint roadlength, real* concArrayStart = 0);
-	void setSaveResultsTrue(uint timeSteps);
-	void setUseGPU(real * pConcArray = nullptr, int* naschVelocity = nullptr);
-	void setUseLogger();
-
-	//timpestep
-	void loopTroughTimesteps(uint numberOfTimesteps);
-	void calculateTimestep(uint step);
-
-	//get
-	uint getRoadLength() const;
-	uint getMaxVelocity() const;
-	uint getNumberOfCars() const;			//only use for testing
-	int getSpeedAtPosition(uint pos) const;       //only use for testing
-	real getDawdlePossibility();
-	bool getUseSlowToStart();
-	real getSlowToStartPossibility();
-	uint getMaxAcceleration();
-
-
-	//methods used by junctions and sources
-	uint getGapAfterOutCell(uint outCellIndex, uint speed);
-	void moveJunctionCar(uint outCellIndex, uint remainingDistance, uint speed, uint oldSpeed);
-	void writeConcentrationForJunction(uint inCellIndex, uint oldSpeed, uint speed);
-
-	//vtk
-	void visualizeVehicleLengthForVTK();
-	const std::vector<int> & getVehiclesForVTK();
-
-	//for debugging
-	void checkSpeed(uint speed);
-
-private:
-	//init
-	void initDawdle(const real dawdlePossibility);
-	void checkCurrentForSafetyDistance();
-
-	//calculate timestep
-	void calculateSourceStep();
-	void calculateJunctionStep();
-	void switchCurrentNext();
-
-	//gap
-	uint getGapAfterCar(uint carIndex, uint speed, int neighbor);
-	uint getGapToSink(int neighbor, uint i, uint speed);
-	uint getGapToJunction(int neighbor, uint i, uint speed, uint currentCell);
-	uint adjustGapToSafetyDist(uint gap);
-
-	//getVectorIndex
-	std::shared_ptr<Junction>& getJunctionFromNeighbor(int neighbor);
-	std::shared_ptr<Sink>& getSinkFromNeighbor(int neighbor);
-
-	//apply rules
-	void applyRules(uint carIndex);
-	void accelerateCar(uint &speed);
-	void brakeCar(uint carIndex, uint &speed);
-	void dawdleCar(uint carIndex, uint &speed);
-	void moveCar(const uint carIndex, uint speed);
-	uint iterateNeighborsInMove(uint &currentCell, uint speed, int &neighbor);
-
-	//disp
-	void dispResults();
-	void dispCurrentConcFromGPU();
-
-	//pollution
-	void writeConcentration(uint index, uint oldSpeed);
-
-	//gpu
-	void copyDevToHost();
-
-private:
-	std::shared_ptr<RoadNetworkData> road;
-	std::unique_ptr<ConcentrationOutwriter> concWriter = nullptr;
-	std::unique_ptr<CarDisplay> display = nullptr;
-
-	bool useGPU = false;
-	std::unique_ptr<TrafficTimestep> gpuCalculation;
-	bool copiedDevToHost = false;
-
-	bool useLogger = false;
-
-	uint currentStep = 0;
-
-	std::mt19937 engine = RandomHelper::make_engine();
-	std::uniform_real_distribution<real> distFloat{ 0.0, 1.0 };
-
-private:
-	//temporary variables for calculation
-	uint gap;
-	float randomNumber;
-};
-
diff --git a/src/gpu/Traffic/TrafficMovementFactory - Kopie.cpp b/src/gpu/Traffic/TrafficMovementFactory - Kopie.cpp
deleted file mode 100644
index 539d8cc37f46417be75a9b5b9ef2ce990d4a5c90..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/TrafficMovementFactory - Kopie.cpp	
+++ /dev/null
@@ -1,90 +0,0 @@
-#include "TrafficMovementFactory - Kopie.h"
-
-#include <iostream>
-
-#include "GridGenerator/StreetPointFinder/JunctionReader.h"
-#include "GridGenerator/StreetPointFinder/SourceReader.h"
-#include "GridGenerator/StreetPointFinder/SinkReader.h"
-
-#include "RoadNetwork/RoadMaker.h"
-#include "TrafficMovement.h"
-#include "Source/SourceRandom.h"
-#include "Junction/JunctionRandom.h"
-#include "Sink/SinkRandom.h"
-#include "Output/ConcentrationByPosition.h"
-#include "Output/ConcBySpeedAndAcceleration.h"
-#include "Utilities/safe_casting.h"
-
-
-void TrafficMovementFactoryTest::initTrafficMovement(bool useGPU, real * pConcArray)
-{
-	//Variables
-
-	uint roadLength = 40;
-
-	real vehicleDensity = 0.1f;
-
-	uint vehicleLength = 2;
-	uint maxVelocity = 5;
-	uint maxAcceleration = 1;
-
-	real dawdlePossibility = (real) 0.2; //typical value: 0.2
-	real slowToStartPossibility = (real) 0.4;
-
-	this->useGPU = true;
-	bool useSlowToStart = true;
-
-
-	//make RoadNetwork
-	std::vector<int> road(40);
-	std::fill(road.begin(), road.end(), -1);
-	road[9] = 5;
-	auto roadNetwork = std::make_shared<RoadMaker>(road, maxVelocity, vehicleLength);
-	//RoadMaker(const uint roadLength, const uint maxVelocity, uint vehicleLength, const real vehicleDensity); //random vehicle Distribution
-	//RoadMaker(const std::vector<int> vehicleDistribution, const uint maxVelocity, uint vehicleLength); //given vehicle distribution
-	//RoadMaker(const uint roadLength, const uint maxVelocity, uint vehicleLength);//empty road
-
-	//Sources
-	std::shared_ptr<Source> source = std::make_shared <SourceRandom>(SourceRandom(0, 0.9f, maxVelocity));
-	std::shared_ptr<Source> source1 = std::make_shared <SourceRandom>(SourceRandom(11, 0.9f, maxVelocity));
-	roadNetwork->addSource(source);
-	roadNetwork->addSource(source1);
-
-	//Sinks
-	std::shared_ptr<Sink> s = std::make_shared <SinkRandom>(SinkRandom(roadLength-1, 0.5f));
-	std::shared_ptr<Sink> s1 = std::make_shared <SinkRandom>(SinkRandom(29, 0.5f));
-	roadNetwork->addSink(s);
-	roadNetwork->addSink(s1);
-
-	//Junctions
-	std::vector<uint> inCellIndices = { 9,19 };
-	std::vector<uint> outCellIndices = { 21,31 };
-	
-	std::shared_ptr<Junction> j = std::make_shared<JunctionRandom>(JunctionRandom(inCellIndices, outCellIndices,5));
-	roadNetwork->addJunction(j);
-
-	//init TrafficMovement
-	this->simulator = std::make_shared<TrafficMovement>(roadNetwork, dawdlePossibility);
-	if (useSlowToStart) simulator->setSlowToStart(slowToStartPossibility);	
-	simulator->setMaxAcceleration(maxAcceleration);
-	if (this->useGPU) simulator->setUseGPU(pConcArray);
-
-	//init ConcentrationOutwriter
-	if (!this->useGPU) {;
-		simulator->setConcentrationOutwriter(simulator->getRoadLength(), pConcArray);
-	}
-}
-
-
-void TrafficMovementFactoryTest::calculateTimestep(uint step, uint stepForVTK)
-{
-	simulator->calculateTimestep(step);
-	writeTimestep(step);
-}
-
-void TrafficMovementFactoryTest::loopThroughTimesteps(uint timeSteps)
-{
-	simulator->setSaveResultsTrue(timeSteps);
-	simulator->loopTroughTimesteps(timeSteps);
-	//std::cout << "Number of Cars: " << simulator->getNumberOfCars() << std::endl;
-}
diff --git a/src/gpu/Traffic/TrafficMovementFactory - Kopie.h b/src/gpu/Traffic/TrafficMovementFactory - Kopie.h
deleted file mode 100644
index ba27cf68e34dff215984835a097829240c349a2d..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/TrafficMovementFactory - Kopie.h	
+++ /dev/null
@@ -1,23 +0,0 @@
-# pragma once
-
-
-#include "Core/DataTypes.h"
-
-#include <vector>
-#include <memory>
-
-#include "TrafficMovementFactory.h"
-#include "GridGenerator/StreetPointFinder/StreetPointFinder.h"
-
-#include "Traffic_export.h"
-
-
-class TRAFFIC_EXPORT TrafficMovementFactoryTest :
-	public TrafficMovementFactory {
-public:
-	TrafficMovementFactoryTest() {};
-	~TrafficMovementFactoryTest() {};
-	virtual void initTrafficMovement(bool useGPU, real * pConcArray = nullptr);
-	virtual void calculateTimestep(uint step, uint stepForVTK);
-	void loopThroughTimesteps(uint timeSteps);
-};
\ No newline at end of file
diff --git a/src/gpu/Traffic/TrafficMovementFactory.cpp b/src/gpu/Traffic/TrafficMovementFactory.cpp
deleted file mode 100644
index 0e3f116a81d3e35d1d791bd2fe56b269cc26bb63..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/TrafficMovementFactory.cpp
+++ /dev/null
@@ -1,197 +0,0 @@
-#include "TrafficMovementFactory.h"
-
-#include <iostream>
-
-#include "GridGenerator/StreetPointFinder/JunctionReader.h"
-#include "GridGenerator/StreetPointFinder/SourceReader.h"
-#include "GridGenerator/StreetPointFinder/SinkReader.h"
-
-#include "RoadNetwork/RoadMaker.h"
-#include "TrafficMovement.h"
-#include "Source/SourceRandom.h"
-#include "Junction/JunctionRandom.h"
-#include "Sink/SinkRandom.h"
-#include "Output/ConcentrationByPosition.h"
-#include "Output/ConcBySpeedAndAcceleration.h"
-#include "Utilities/safe_casting.h"
-#include "Utilities/Logger.h"
-
-
-TrafficMovementFactory::TrafficMovementFactory()
-{
-}
-
-
-void TrafficMovementFactory::initTrafficMovement(std::string path, bool useGPU, real * pConcArray, int* naschVelocity)
-{
-	//Variables
-
-	real vehicleDensity = 0.03f;
-
-	uint vehicleLength = 7;
-	uint maxVelocity = 14;
-	uint maxAcceleration = 2;
-
-	real dawdlePossibility = (real) 0.2; //typical value: 0.2
-	real slowToStartPossibility = (real) 0.3;
-
-	this->useGPU = useGPU;
-	bool useSlowToStart = true;
-	useLogger = false;
-
-	std::string info = "Only Traffic";
-
-
-
-	//Paths
-
-#ifdef _WIN32
-	//Baumbart
-	inputPath = "C:/Users/schoen/Desktop/git/MS2/git/targets/apps/LBM/Basel/resources/";
-	outputPath = path + "results/";
-	//Gamling
-	//inputPath = path + "VirtualFluidsGPU/git/targets/apps/LBM/Basel/resources/";
-	//outputPath = path + "Basel_Ergebnisse/";
-#else
-	//Phoenix
-	inputPath = "/work/marschoe/Basel4GPU/source/git/targets/apps/LBM/Basel/resources/";
-	outputPath = path + "/traffic/";
-#endif
-	outputFilename = "Basel_Traffic_Test";
-	std::string logfile = outputPath + "TrafficLog.txt";
-
-
-
-	//TrafficLogger	
-	if (useLogger) {
-		TrafficLogger::startLogger(logfile);
-		TrafficLogger::writeSimulationStart(info, this->useGPU);
-	}
-
-
-	//StreetPointFinder M:/Basel2019  C:/Users/schoen/Desktop/git/MS2
-	//finder.readStreets("C:/Users/schoen/Desktop/git/MS2/git/targets/apps/LBM/streetTest/resources/ExampleStreets.txt");
-	//finder.writeVTK("M:/Basel2019/results/ExampleStreets.vtk");
-	finder.readStreets(inputPath + "Streets.txt");
-	finder.writeVTK(outputPath + outputFilename + ".vtk");
-	finder.write3DVTK(outputPath + outputFilename + ".vtk");
-
-
-	JunctionReader junctionReader;
-	//junctionReader.readJunctions("C:/Users/schoen/Desktop/git/MS2/git/targets/apps/LBM/Basel/resources/Junctions.txt", finder);
-	junctionReader.readJunctions(inputPath + "Junctions.txt", &finder);
-
-
-	SinkReader sinkReader;
-	//sinkReader.readSinks("C:/Users/schoen/Desktop/git/MS2/git/targets/apps/LBM/Basel/resources/Sinks.txt", finder);
-	sinkReader.readSinks(inputPath + "Sinks.txt", &finder);
-
-
-	SourceReader sourceReader;
-	//sourceReader.readSources("C:/Users/schoen/Desktop/git/MS2/git/targets/apps/LBM/Basel/resources/Sources.txt", finder);
-	sourceReader.readSources(inputPath + "Sources.txt", &finder);
-
-
-	//calculate RoadLength
-	uint roadLength = 0;
-	uint numberOfStreets = castSizeT_Uint(finder.streets.size());
-	for (uint i = 0; i < numberOfStreets; i++) {
-		roadLength += finder.streets[i].numberOfCells;
-	}
-
-
-	//make RoadNetwork
-	auto roadNetwork = std::make_shared<RoadMaker>(roadLength, maxVelocity, vehicleLength, vehicleDensity);
-
-
-	//Sources
-	std::shared_ptr<Source> source;
-	for (uint i = 0; i < sourceReader.sources.size(); i++) {
-		source = std::make_shared <SourceRandom>(sourceReader.sources[i].sourceIndex, sourceReader.sources[i].sourcePossibility, roadNetwork->getMaxVelocity());
-		roadNetwork->addSource(source);
-	}	
-
-
-	//Sinks
-	std::shared_ptr<Sink>  sink;
-		for (uint i = 0; i < sinkReader.sinks.size(); i++) {
-			sink = std::make_shared <SinkRandom>(sinkReader.sinks[i].sinkIndex, sinkReader.sinks[i].sinkBlockedPossibility);
-			roadNetwork->addSink(sink);
-		}
-
-
-	//Junctions
-	std::shared_ptr<Junction> junction;
-	for (uint i = 0; i < junctionReader.junctions.size(); i++) {
-		junction = std::make_shared <JunctionRandom>(junctionReader.junctions[i].inCells, junctionReader.junctions[i].outCells, junctionReader.junctions[i].trafficLightSwitchTime);
-		junction->setCellIndicesForNoUTurn(junctionReader.junctions[i].carCanNotEnterThisOutCell);
-		roadNetwork->addJunction(junction);
-	}
-
-
-	//set neighbors for curves
-	for (uint i = 0; i < junctionReader.specialNeighbors.cells.size(); i++) {
-		roadNetwork->setNeighborForCurve(junctionReader.specialNeighbors.cells[i], junctionReader.specialNeighbors.neighbors[i]);
-	}
-
-
-	//init TrafficMovement
-	this->simulator = std::make_shared<TrafficMovement>(roadNetwork, dawdlePossibility);
-	simulator->setMaxAcceleration(maxAcceleration);
-	if (useSlowToStart) simulator->setSlowToStart(slowToStartPossibility);
-	if (useLogger) simulator->setUseLogger();
-
-
-	//init ConcentrationOutwriter
-	if (!this->useGPU) {
-		simulator->setConcentrationOutwriter(simulator->getRoadLength(), pConcArray);
-	}
-
-
-	//prepare writing to vtk
-	//this->outputPath = "M:/Basel2019/results/";
-	this->cars = &(simulator->getVehiclesForVTK());
-
-
-	//write initial Timestep
-	simulator->visualizeVehicleLengthForVTK();
-	finder.writeVTK(outputPath + outputFilename + "_" + std::to_string(0) + ".vtk", *cars);
-	finder.write3DVTK(outputPath + outputFilename + "_3D_" + std::to_string(0) + ".vtk", *cars);
-
-
-	//GPU
-	if (this->useGPU) simulator->setUseGPU(pConcArray, naschVelocity);
-}
-
-
-void TrafficMovementFactory::calculateTimestep(uint step)
-{
-	simulator->calculateTimestep(step);
-
-	//std::cout << "Number of Cars: " << simulator->getNumberOfCars() << std::endl;
-}
-
-void TrafficMovementFactory::writeTimestep(uint stepForVTK)
-{
-	simulator->visualizeVehicleLengthForVTK();
-	finder.writeVTK  (outputPath + outputFilename + "_" + std::to_string(stepForVTK) + ".vtk", *cars);
-	finder.write3DVTK(outputPath + outputFilename + "_3D_" + std::to_string(stepForVTK) + ".vtk", *cars);
-}
-
-
-void TrafficMovementFactory::writeReducedTimestep(uint stepForVTK)
-{
-	simulator->visualizeVehicleLengthForVTK();
-	finder.writeReducedVTK(outputPath + outputFilename + "_" + std::to_string(stepForVTK) + ".vtk", *cars);
-	finder.write3DVTK     (outputPath + outputFilename + "_3D_" + std::to_string(stepForVTK) + ".vtk", *cars);
-}
-
-
-void TrafficMovementFactory::endSimulation(uint numTimesteps, double duration)
-{
-	if (!useLogger) return;
-	TrafficLogger::writeSimulationEnd(simulator->getRoadLength(), numTimesteps, duration);
-}
-
-
-
diff --git a/src/gpu/Traffic/TrafficMovementFactory.h b/src/gpu/Traffic/TrafficMovementFactory.h
deleted file mode 100644
index c934d276073afe9c6bec69786a48148470e69b61..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/TrafficMovementFactory.h
+++ /dev/null
@@ -1,38 +0,0 @@
-# pragma once
-
-
-#include "Core/DataTypes.h"
-
-#include <vector>
-#include <memory>
-
-#include "Traffic_export.h"
-
-#include "GridGenerator/StreetPointFinder/StreetPointFinder.h"
-
-class TrafficMovement;
-class TrafficLogger;
-
-class TRAFFIC_EXPORT TrafficMovementFactory
-{
-public:
-	TrafficMovementFactory();
-	~TrafficMovementFactory() {};
-	virtual void initTrafficMovement(std::string path, bool useGPU, real * pConcArray = nullptr, int* naschVelocity = nullptr);
-	virtual void calculateTimestep(uint step);
-	virtual void writeTimestep(uint stepForVTK);
-	void writeReducedTimestep(uint stepForVTK);
-	virtual void endSimulation(uint numTimesteps, double duration);
-
-protected:
-	StreetPointFinder finder;
-	std::shared_ptr<TrafficMovement> simulator;
-
-	std::string inputPath;
-	std::string outputPath;
-	std::string outputFilename;
-	const std::vector<int>* cars;
-
-	bool useLogger;
-	bool useGPU;
-};
\ No newline at end of file
diff --git a/src/gpu/Traffic/TrafficMovementFactoryTestRoads.cpp b/src/gpu/Traffic/TrafficMovementFactoryTestRoads.cpp
deleted file mode 100644
index 3439b4c803d7c64822ed4d1cda75ab4fb068e22a..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/TrafficMovementFactoryTestRoads.cpp
+++ /dev/null
@@ -1,91 +0,0 @@
-#include "TrafficMovementFactory - Kopie.h"
-
-#include <iostream>
-
-#include "GridGenerator/StreetPointFinder/JunctionReader.h"
-#include "GridGenerator/StreetPointFinder/SourceReader.h"
-#include "GridGenerator/StreetPointFinder/SinkReader.h"
-
-#include "RoadNetwork/RoadMaker.h"
-#include "TrafficMovement.h"
-#include "Source/SourceRandom.h"
-#include "Junction/JunctionRandom.h"
-#include "Sink/SinkRandom.h"
-#include "Output/ConcentrationByPosition.h"
-#include "Output/ConcBySpeedAndAcceleration.h"
-#include "Utilities/safe_casting.h"
-
-
-void TrafficMovementFactoryTest::initTrafficMovement(real * pConcArray)
-{
-	//Variables
-
-	uint roadLength = 40;
-
-	real vehicleDensity = 0.1f;
-
-	uint vehicleLength = 2;
-	uint maxVelocity = 5;
-	uint maxAcceleration = 1;
-
-	real dawdlePossibility = (real) 0.2; //typical value: 0.2
-	real slowToStartPossibility = (real) 0.4;
-
-	bool useGPU = true;
-	bool useSlowToStart = true;
-
-
-	//make RoadNetwork
-	std::vector<int> road(40);
-	std::fill(road.begin(), road.end(), -1);
-	road[9] = 5;
-	auto roadNetwork = std::make_unique<RoadMaker>(road, maxVelocity, vehicleLength);
-	//RoadMaker(const uint roadLength, const uint maxVelocity, uint vehicleLength, const real vehicleDensity); //random vehicle Distribution
-	//RoadMaker(const std::vector<int> vehicleDistribution, const uint maxVelocity, uint vehicleLength); //given vehicle distribution
-	//RoadMaker(const uint roadLength, const uint maxVelocity, uint vehicleLength);//empty road
-
-	//Sources
-	std::unique_ptr<Source> source = std::make_unique <SourceRandom>(SourceRandom(0, 0.9f, maxVelocity));
-	std::unique_ptr<Source> source1 = std::make_unique <SourceRandom>(SourceRandom(11, 0.9f, maxVelocity));
-	roadNetwork->addSource(source);
-	roadNetwork->addSource(source1);
-
-	//Sinks
-	std::unique_ptr<Sink> s = std::make_unique <SinkRandom>(SinkRandom(roadLength-1, 0.5f));
-	std::unique_ptr<Sink> s1 = std::make_unique <SinkRandom>(SinkRandom(29, 0.5f));
-	roadNetwork->addSink(move(s));
-	roadNetwork->addSink(move(s1));
-
-	//Junctions
-	std::vector<uint> inCellIndices = { 9,19 };
-	std::vector<uint> outCellIndices = { 21,31 };
-	
-	std::unique_ptr<Junction> j = std::make_unique<JunctionRandom>(JunctionRandom(inCellIndices, outCellIndices,5));
-	roadNetwork->addJunction(std::move(j));
-
-	//init TrafficMovement
-	this->simulator = std::make_shared<TrafficMovement>(std::move(roadNetwork), dawdlePossibility);
-	if (useSlowToStart) simulator->setSlowToStart(slowToStartPossibility);	
-	simulator->setMaxAcceleration(maxAcceleration);
-	if (useGPU) simulator->setUseGPU(pConcArray);
-
-	//init ConcentrationOutwriter
-	if (!useGPU) {
-		std::unique_ptr<ConcentrationOutwriter> writer = std::make_unique<ConcBySpeedAndAcceleration>(ConcBySpeedAndAcceleration(simulator->getRoadLength(), pConcArray));
-		simulator->setConcentrationOutwriter(move(writer));
-	}
-}
-
-
-void TrafficMovementFactoryTest::calculateTimestep(uint step, uint stepForVTK)
-{
-	simulator->calculateTimestep(step);
-	writeTimestep(step);
-}
-
-void TrafficMovementFactoryTest::loopThroughTimesteps(uint timeSteps)
-{
-	simulator->setSaveResultsTrue(timeSteps);
-	simulator->loopTroughTimesteps(timeSteps);
-	//std::cout << "Number of Cars: " << simulator->getNumberOfCars() << std::endl;
-}
diff --git a/src/gpu/Traffic/TrafficMovementFactoryTestRoads.h b/src/gpu/Traffic/TrafficMovementFactoryTestRoads.h
deleted file mode 100644
index faafcf89f7911a0589369bca0dfb60ae431d6e03..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/TrafficMovementFactoryTestRoads.h
+++ /dev/null
@@ -1,23 +0,0 @@
-# pragma once
-
-
-
-#include <vector>
-#include <memory>
-
-#include "TrafficMovementFactory.h"
-#include "Core/DataTypes.h"
-#include "GridGenerator/StreetPointFinder/StreetPointFinder.h"
-
-#include "Traffic_export.h"
-
-
-class TRAFFIC_EXPORT TrafficMovementFactoryTest :
-	public TrafficMovementFactory {
-public:
-	TrafficMovementFactoryTest() {};
-	~TrafficMovementFactoryTest() {};
-	virtual void initTrafficMovement(real * pConcArray = nullptr);
-	virtual void calculateTimestep(uint step, uint stepForVTK);
-	void loopThroughTimesteps(uint timeSteps);
-};
\ No newline at end of file
diff --git a/src/gpu/Traffic/Utilities/ConsoleColor.cpp b/src/gpu/Traffic/Utilities/ConsoleColor.cpp
deleted file mode 100644
index f015fe18fefa656c1495d285b5b928ebcfe26965..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Utilities/ConsoleColor.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-#include "ConsoleColor.h"
-
-
-
-
-#include "Core/DataTypes.h"
-
-
-//// Windows //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-#ifdef WIN32
-
-#include <iostream>
-#include <iomanip>	//formatting output streams
-#include <windows.h> //for colourful console output
-
-
-void ConsoleColor::setDefaultWhite()
-{
-	SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), 7); // set output default white 7;
-}
-
-
-void ConsoleColor::setDarkGrey()
-{
-	SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), 8); //set output dark grey 8, dark blue 1, black 0;
-}
-
-
-void ConsoleColor::setBrightRed()
-{
-	SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), 12); //set output bright green 10, bright red 12;
-}
-
-
-void ConsoleColor::setBlack()
-{
-	SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), 0); //set output dark grey 8, dark blue 1, black 0;
-}
-
-
-void ConsoleColor::setBrightGreen()
-{
-	SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), 10); //set output bright green 10, bright red 12;
-}
-
-
-#endif WIN32
-
-
-
-
-
-//// Linux //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#ifndef WIN32
-
-void ConsoleColor::setDefaultWhite()
-{}
-
-void ConsoleColor::setDarkGrey()
-{}
-
-void ConsoleColor::setBrightRed()
-{}
-
-void ConsoleColor::setBlack()
-{}
-
-void ConsoleColor::setBrightGreen()
-{}
-
-#endif // !WIN32
-
-
diff --git a/src/gpu/Traffic/Utilities/ConsoleColor.h b/src/gpu/Traffic/Utilities/ConsoleColor.h
deleted file mode 100644
index 8b5234bde9f42ac8d3daf24887c81dab6558639f..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Utilities/ConsoleColor.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#pragma once
-
-#include "Traffic_export.h"
-
-class TRAFFIC_EXPORT ConsoleColor
-{
-public:
-	static void setDefaultWhite();
-	static void setDarkGrey();
-	static void setBrightRed();
-	static void setBlack();
-	static void setBrightGreen();
-
-};
-
diff --git a/src/gpu/Traffic/Utilities/Logger.cpp b/src/gpu/Traffic/Utilities/Logger.cpp
deleted file mode 100644
index af060b90b6a9ff31d0fe3fe2be68b5db7509ece0..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Utilities/Logger.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-#include "Logger.h"
-
-#include <stdexcept>
-#include <iostream>
-#include <time.h>
-#include <cmath>
-
-TrafficLogger TrafficLogger::instance;
-
-void TrafficLogger::startLogger(std::string filename)
-{
-	instance.filename = filename;
-
-	instance.file.open(filename.c_str(), std::ios::app);
-	try { if (instance.file.fail()) throw std::runtime_error("couldn't open file for logger: " + filename); }
-	catch (const std::exception& e) {
-		std::cerr << e.what() << std::endl;
-		std::cin.get();
-		exit(EXIT_FAILURE);
-	}
-}
-
-
-void TrafficLogger::writeSimulationStart(std::string info, bool useGPU)
-{
-	time_t now = time(0);
-
-	instance.file << "Simulation started at: " << ctime(&now);
-	instance.file << "Info: " << info << "\t \t" << "simulating on the ";
-	if (useGPU) instance.file << "GPU \t\t";
-	else instance.file << "CPU \t\t";
-	#ifdef NDEBUG
-		instance.file << "Release \n";
-	#else
-		instance.file << "Debug \n";
-	#endif
-}
-
-
-void TrafficLogger::writeError(std::string error, uint currentTimestep)
-{
-	instance.file << "Error: " << error << "\t timestep: " << currentTimestep << "\n";
-}
-
-
-void TrafficLogger::writeSimulationEnd(uint numRoadCells, uint numTimesteps, double duration)
-{
-	uint hours = static_cast<uint>(std::floor(duration / 3600));
-	uint minutes = static_cast<uint>(std::floor((duration - 3600 * hours) / 60));
-	uint seconds = static_cast<uint>(duration - 3600 * hours - 60 * minutes);
-
-	std::string durationString = std::to_string(hours) + " h \t" + std::to_string(minutes) + " m \t" + std::to_string(seconds) + " s";
-
-	instance.file << "Simulation finished: Number of roadcells : \t" << numRoadCells << "\t total number of timesteps: " << numTimesteps;
-	instance.file << "\t duration: " << duration << " s" << "\t\t => " << durationString << "\n\n";
-	instance.file.close();
-}
\ No newline at end of file
diff --git a/src/gpu/Traffic/Utilities/Logger.h b/src/gpu/Traffic/Utilities/Logger.h
deleted file mode 100644
index a10a567ac5f0e986fafab735e36019698feaf395..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Utilities/Logger.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#pragma once
-
-
-#include "Core/DataTypes.h"
-
-#include <string>
-#include <fstream>
-
-#include "Traffic_export.h"
-
-class TRAFFIC_EXPORT TrafficLogger
-{
-private:
-	std::string filename;	
-	std::ofstream file;
-	static TrafficLogger instance;
-
-public:	
-	TrafficLogger() {};
-	TrafficLogger(const TrafficLogger& logger) {}
-	static void startLogger(std::string filename);
-
-	static void writeSimulationStart(std::string info, bool useGPU);
-	static void writeError(std::string error, uint currentTimestep);
-	static void writeSimulationEnd(uint numRoadCells, uint numTimesteps, double duration);	
-};
-
diff --git a/src/gpu/Traffic/Utilities/RandomHelper.cpp b/src/gpu/Traffic/Utilities/RandomHelper.cpp
deleted file mode 100644
index 264dcd6eb9b5335914119a034f7e879b66126dd3..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Utilities/RandomHelper.cpp
+++ /dev/null
@@ -1,9 +0,0 @@
-#include "RandomHelper.h"
-
-
-std::mt19937 RandomHelper::make_engine()
-{
-	std::random_device r;
-	std::seed_seq seed{r()};
-	return std::mt19937(seed);
-}
\ No newline at end of file
diff --git a/src/gpu/Traffic/Utilities/RandomHelper.h b/src/gpu/Traffic/Utilities/RandomHelper.h
deleted file mode 100644
index 869e93ed3e3e2b013a1c79090dc02ccaf6e23897..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Utilities/RandomHelper.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#pragma once
-
-#include <random>
-
-#include "Traffic_export.h"
-
-class TRAFFIC_EXPORT RandomHelper
-{
-public:
-	static std::mt19937 make_engine();
-};
-
diff --git a/src/gpu/Traffic/Utilities/VectorHelper.cpp b/src/gpu/Traffic/Utilities/VectorHelper.cpp
deleted file mode 100644
index a239991aa8f81d17efdb113457fc87cb3589842d..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Utilities/VectorHelper.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-#include "VectorHelper.h"
-
-#include <iostream>
-#include <iomanip>	//formatting output streams
-
-#include "ConsoleColor.h"
-
-void VectorHelper::fillVector(std::vector<int> &vector, int insertNumber) {
-	fill(vector.begin(), vector.end(), insertNumber);
-}
-
-void VectorHelper::fillVector(std::vector<std::vector<int> > &vector, int insertNumber) {
-	for (uint i = 0; i < vector.size(); i++) {
-		fill(vector[i].begin(), vector[i].end(), insertNumber);
-	}
-}
-
-void VectorHelper::dispVector(const std::vector<int> &vector)
-{
-	for (int number : vector) {
-		std::cout << std::setw(4) << number;
-	}
-	std::cout << std::endl;
-}
-
-void VectorHelper::dispVector(const std::vector<std::vector<int> > &vector)
-{
-	for (uint i = 0; i < vector.size(); i++) {
-		for (uint j = 0; j < vector[i].size(); j++) {
-			std::cout << std::setw(4) << vector[i][j];
-		}
-		std::cout << std::endl;
-	}
-	std::cout << std::endl;
-}
-
-void VectorHelper::dispVectorColour(const std::vector<int> &vector)
-{
-	for (int number : vector) {
-		makeVectorOutputColourful(number);
-		std::cout << std::setw(4) << number;
-	}
-	std::cout << std::endl;
-	ConsoleColor::setDefaultWhite();
-}
-
-void VectorHelper::dispVectorColour(const std::vector<std::vector<int>>& vector)
-{
-	for (uint i = 0; i < vector.size(); i++) {
-		for (uint j = 0; j < vector[i].size(); j++) {
-			makeVectorOutputColourful(vector[i][j]);
-			std::cout << std::setw(4) << vector[i][j];
-		}
-		std::cout << std::endl;
-	}
-	std::cout << std::endl;
-	ConsoleColor::setDefaultWhite();
-}
-
-void VectorHelper::makeVectorOutputColourful(int outputNumber)
-{
-	switch (outputNumber) {
-	case -1:
-		ConsoleColor::setDarkGrey();
-		break;
-	case 0:
-		ConsoleColor::setBrightRed();
-		break;
-	case -5:
-		ConsoleColor::setBlack();
-		break;
-	default:
-		ConsoleColor::setBrightGreen();
-	}
-
-}
\ No newline at end of file
diff --git a/src/gpu/Traffic/Utilities/VectorHelper.h b/src/gpu/Traffic/Utilities/VectorHelper.h
deleted file mode 100644
index eccc2b385ea72b3804a5d5aacc03e239e571808c..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Utilities/VectorHelper.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#pragma once
-
-#include <vector>
-
-
-#include "Core/DataTypes.h"
-
-#include "Traffic_export.h"
-
-class TRAFFIC_EXPORT VectorHelper
-{
-public:
-	static void fillVector(std::vector<int>& vector, int insertNumber);
-	static void fillVector(std::vector<std::vector<int> > &vector, int insertNumber);
-
-	static void dispVector(const std::vector<int> &vector);
-	static void dispVector(const std::vector<std::vector<int> >& vector);
-	static void dispVectorColour(const std::vector<int> &vector);
-	static void dispVectorColour(const std::vector<std::vector<int> >& vector);
-	static void makeVectorOutputColourful(const int outputNumber);
-};
-
diff --git a/src/gpu/Traffic/Utilities/invalidInput_error.cpp b/src/gpu/Traffic/Utilities/invalidInput_error.cpp
deleted file mode 100644
index 9d72fd451fd080b176b3cf99ef6ffd251bfda9bb..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Utilities/invalidInput_error.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-#include "invalidInput_error.h"
-
-#include <iostream>
-
-
-invalidInput_error::invalidInput_error(char const * const message) throw() : runtime_error(message)
-{
-}
-
-char const * invalidInput_error::what() const throw()
-{
-	return runtime_error::what();
-}
-
diff --git a/src/gpu/Traffic/Utilities/invalidInput_error.h b/src/gpu/Traffic/Utilities/invalidInput_error.h
deleted file mode 100644
index d3ce3654588328d68f66f398f002f74125d1ea25..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Utilities/invalidInput_error.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#pragma once
-
-
-// using standard exceptions
-#include <stdexcept>
-
-#include "Traffic_export.h"
-
-class TRAFFIC_EXPORT invalidInput_error :
-	public std::runtime_error
-{
-public:
-	invalidInput_error(char const* const message) throw();
-	virtual char const* what() const throw();
-};
-
diff --git a/src/gpu/Traffic/Utilities/safe_casting.h b/src/gpu/Traffic/Utilities/safe_casting.h
deleted file mode 100644
index ae8bcc2efba997ca5117ed79b0a7d814bf90f5b2..0000000000000000000000000000000000000000
--- a/src/gpu/Traffic/Utilities/safe_casting.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#pragma once
-
-#include <stdexcept>
-#include "Core/DataTypes.h"
-#include <climits>
-
-
-static uint castSizeT_Uint(size_t number) {
-	if (number > UINT_MAX)
-	{
-		throw std::overflow_error("number is larger than UINT_MAX");
-	}
-	return static_cast<uint>(number);
-}
-
-static int castSizeT_Int(size_t number) {
-	if (number > INT_MAX)
-	{
-		throw std::overflow_error("number is larger than INT_MAX");
-	}
-	return static_cast<uint>(number);
-}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/CMakeLists.txt b/src/gpu/VirtualFluids_GPU/CMakeLists.txt
index 759528e5346ba8d9899cb90eb64503b20a44c4fc..ed647cb406bca23ef90667b7d17171c7b3f46283 100644
--- a/src/gpu/VirtualFluids_GPU/CMakeLists.txt
+++ b/src/gpu/VirtualFluids_GPU/CMakeLists.txt
@@ -8,7 +8,7 @@ if(MSVC)
     set(additional_libraries ws2_32 Traffic) # ws_32 throws an error on Phoenix
 endif()
 
-vf_add_library(PUBLIC_LINK basics lbmCuda PRIVATE_LINK ${additional_libraries} GridGenerator MPI::MPI_CXX vf_cuda)
+vf_add_library(PUBLIC_LINK basics lbm PRIVATE_LINK ${additional_libraries} GridGenerator MPI::MPI_CXX vf_cuda)
 
 #SET(TPN_WIN32 "/EHsc")
 #https://stackoverflow.com/questions/6832666/lnk2019-when-including-asio-headers-solution-generated-with-cmake
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/Calc2ndMoments.cpp b/src/gpu/VirtualFluids_GPU/Calculation/Calc2ndMoments.cpp
index f8f5c42b835a1a4ba55e378e624230bbb43dc05a..e3f344231dc9d5e19c09f7ce1fde7d31f1770232 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/Calc2ndMoments.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/Calc2ndMoments.cpp
@@ -17,11 +17,11 @@ void alloc2ndMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 
 void init2ndMoments(Parameter* para)
 {
-	for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
+	for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
 	{
 		//////////////////////////////////////////////////////////////////////////
 		//init host arrays
-		for (unsigned int pos=0;pos<para->getParH(lev)->numberOfNodes;pos++)
+		for (size_t pos = 0; pos < para->getParH(lev)->numberOfNodes; pos++)
 		{
 			para->getParH(lev)->kxyFromfcNEQ[pos]    = 0.0;
 			para->getParH(lev)->kyzFromfcNEQ[pos]    = 0.0;
@@ -116,7 +116,7 @@ void init3rdMoments(Parameter* para)
 	{
 		//////////////////////////////////////////////////////////////////////////
 		//init host arrays
-		for (unsigned int pos=0;pos<para->getParH(lev)->numberOfNodes;pos++)
+		for (size_t pos = 0; pos < para->getParH(lev)->numberOfNodes; pos++)
 		{
 			para->getParH(lev)->CUMbbb[pos] = 0.0;
 			para->getParH(lev)->CUMabc[pos] = 0.0;
@@ -198,7 +198,7 @@ void calc3rdMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 
 void allocHigherOrderMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 {
-	for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
+	for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
 	{
 		//////////////////////////////////////////////////////////////////////////
 		//allocation (device-memory + host-memory)
@@ -211,11 +211,11 @@ void allocHigherOrderMoments(Parameter* para, CudaMemoryManager* cudaMemoryManag
 
 void initHigherOrderMoments(Parameter* para)
 {
-	for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
+	for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
 	{
 		//////////////////////////////////////////////////////////////////////////
 		//init host arrays
-		for (unsigned int pos=0;pos<para->getParH(lev)->numberOfNodes;pos++)
+		for (size_t pos = 0; pos < para->getParH(lev)->numberOfNodes; pos++)
 		{
 			para->getParH(lev)->CUMcbb[pos] = 0.0;
 			para->getParH(lev)->CUMbcb[pos] = 0.0;
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/CalcMedian.cpp b/src/gpu/VirtualFluids_GPU/Calculation/CalcMedian.cpp
index 77db571f7f10e0ea0bff827400270dd074d4e666..80a667f91976b745b619fed5d5763b5429a6559c 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/CalcMedian.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/CalcMedian.cpp
@@ -11,16 +11,16 @@
 
 void allocMedian(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 {
-	for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
+	for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
 	{
 		cudaMemoryManager->cudaAllocMedianOut(lev);
-		for (unsigned int i = 0; i < para->getParH(lev)->numberOfNodes; i++)
+		for (size_t pos = 0; pos < para->getParH(lev)->numberOfNodes; pos++)
 		{
-			para->getParH(lev)->vx_SP_Med_Out[i]    = (real)0.0;
-			para->getParH(lev)->vy_SP_Med_Out[i]    = (real)0.0;
-			para->getParH(lev)->vz_SP_Med_Out[i]    = (real)0.0;
-			para->getParH(lev)->rho_SP_Med_Out[i]   = (real)0.0;
-			para->getParH(lev)->press_SP_Med_Out[i] = (real)0.0;
+			para->getParH(lev)->vx_SP_Med_Out[pos]    = (real)0.0;
+			para->getParH(lev)->vy_SP_Med_Out[pos]    = (real)0.0;
+			para->getParH(lev)->vz_SP_Med_Out[pos]    = (real)0.0;
+			para->getParH(lev)->rho_SP_Med_Out[pos]   = (real)0.0;
+			para->getParH(lev)->press_SP_Med_Out[pos] = (real)0.0;
 		}
 	}
 }
@@ -31,15 +31,15 @@ void allocMedian(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 
 void calcMedian(Parameter* para, uint tdiff)
 {
-	for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
+	for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
 	{
-		for (uint i = 0; i < para->getParH(lev)->numberOfNodes; i++)
+        for (size_t pos = 0; pos < para->getParH(lev)->numberOfNodes; pos++)
 		{
-			para->getParH(lev)->vx_SP_Med_Out[i]    = para->getParH(lev)->vx_SP_Med[i]   / (real)tdiff;
-			para->getParH(lev)->vy_SP_Med_Out[i]    = para->getParH(lev)->vy_SP_Med[i]   / (real)tdiff;
-			para->getParH(lev)->vz_SP_Med_Out[i]    = para->getParH(lev)->vz_SP_Med[i]   / (real)tdiff;
-			para->getParH(lev)->rho_SP_Med_Out[i]   = para->getParH(lev)->rho_SP_Med[i]  / (real)tdiff;
-			para->getParH(lev)->press_SP_Med_Out[i] = para->getParH(lev)->press_SP_Med[i]/ (real)tdiff;
+			para->getParH(lev)->vx_SP_Med_Out[pos]    = para->getParH(lev)->vx_SP_Med[pos]   / (real)tdiff;
+			para->getParH(lev)->vy_SP_Med_Out[pos]    = para->getParH(lev)->vy_SP_Med[pos]   / (real)tdiff;
+			para->getParH(lev)->vz_SP_Med_Out[pos]    = para->getParH(lev)->vz_SP_Med[pos]   / (real)tdiff;
+			para->getParH(lev)->rho_SP_Med_Out[pos]   = para->getParH(lev)->rho_SP_Med[pos]  / (real)tdiff;
+			para->getParH(lev)->press_SP_Med_Out[pos] = para->getParH(lev)->press_SP_Med[pos]/ (real)tdiff;
 		}
 	}
 }
@@ -75,14 +75,14 @@ void allocMedianAD(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 	for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
 	{
         cudaMemoryManager->cudaAllocMedianOutAD(lev);
-		for (unsigned int i = 0; i < para->getParH(lev)->numberOfNodes; i++)
+		for (size_t pos = 0; pos < para->getParH(lev)->numberOfNodes; pos++)
 		{
-			para->getParH(lev)->vx_SP_Med_Out[i]    = (real)0.0;
-			para->getParH(lev)->vy_SP_Med_Out[i]    = (real)0.0;
-			para->getParH(lev)->vz_SP_Med_Out[i]    = (real)0.0;
-			para->getParH(lev)->rho_SP_Med_Out[i]   = (real)0.0;
-			para->getParH(lev)->press_SP_Med_Out[i] = (real)0.0;
-			para->getParH(lev)->Conc_Med_Out[i]     = (real)0.0;
+			para->getParH(lev)->vx_SP_Med_Out[pos]    = (real)0.0;
+			para->getParH(lev)->vy_SP_Med_Out[pos]    = (real)0.0;
+			para->getParH(lev)->vz_SP_Med_Out[pos]    = (real)0.0;
+			para->getParH(lev)->rho_SP_Med_Out[pos]   = (real)0.0;
+			para->getParH(lev)->press_SP_Med_Out[pos] = (real)0.0;
+			para->getParH(lev)->Conc_Med_Out[pos]     = (real)0.0;
 		}
 	}
 }
@@ -95,14 +95,14 @@ void calcMedianAD(Parameter* para, uint tdiff)
 {
 	for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
 	{
-		for (uint i = 0; i < para->getParH(lev)->numberOfNodes; i++)
+		for (size_t pos = 0; pos < para->getParH(lev)->numberOfNodes; pos++)
 		{
-			para->getParH(lev)->vx_SP_Med_Out[i]    = para->getParH(lev)->vx_SP_Med[i]    / (real)tdiff;
-			para->getParH(lev)->vy_SP_Med_Out[i]    = para->getParH(lev)->vy_SP_Med[i]    / (real)tdiff;
-			para->getParH(lev)->vz_SP_Med_Out[i]    = para->getParH(lev)->vz_SP_Med[i]    / (real)tdiff;
-			para->getParH(lev)->rho_SP_Med_Out[i]   = para->getParH(lev)->rho_SP_Med[i]   / (real)tdiff;
-			para->getParH(lev)->press_SP_Med_Out[i] = para->getParH(lev)->press_SP_Med[i] / (real)tdiff;
-			para->getParH(lev)->Conc_Med_Out[i]     = para->getParH(lev)->Conc_Med[i]     / (real)tdiff;
+			para->getParH(lev)->vx_SP_Med_Out[pos]    = para->getParH(lev)->vx_SP_Med[pos]    / (real)tdiff;
+			para->getParH(lev)->vy_SP_Med_Out[pos]    = para->getParH(lev)->vy_SP_Med[pos]    / (real)tdiff;
+			para->getParH(lev)->vz_SP_Med_Out[pos]    = para->getParH(lev)->vz_SP_Med[pos]    / (real)tdiff;
+			para->getParH(lev)->rho_SP_Med_Out[pos]   = para->getParH(lev)->rho_SP_Med[pos]   / (real)tdiff;
+			para->getParH(lev)->press_SP_Med_Out[pos] = para->getParH(lev)->press_SP_Med[pos] / (real)tdiff;
+			para->getParH(lev)->Conc_Med_Out[pos]     = para->getParH(lev)->Conc_Med[pos]     / (real)tdiff;
 		}
 	}
 }
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.cpp b/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.cpp
index e91fb6f5c232bd98073a1c930149693f8af4b078..681e4702b5f119c0f0c29273ee5d868a32fca0c7 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.cpp
@@ -8,7 +8,7 @@
 #include "Calculation/CalcTurbulenceIntensity.h"
 #include <cuda_runtime.h>
 #include <helper_cuda.h>
-#include <basics/Core/StringUtilities/StringUtil.h>
+#include <basics/StringUtilities/StringUtil.h>
 
 void allocTurbulenceIntensity(Parameter *para, CudaMemoryManager *cudaMemoryManager)
 {
@@ -25,32 +25,32 @@ void calcVelocityAndFluctuations(Parameter *para, CudaMemoryManager *cudaMemoryM
     for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) {
         cudaMemoryManager->cudaCopyTurbulenceIntensityDH(lev, para->getParH(lev)->numberOfNodes);
 
-        for (uint i = 0; i < para->getParH(lev)->numberOfNodes; i++) {
+        for (size_t pos = 0; pos < para->getParH(lev)->numberOfNodes; pos++) {
             // mean velocity
-            para->getParH(lev)->vx_mean[i] = para->getParH(lev)->vx_mean[i] / (real)tdiff;
-            para->getParH(lev)->vy_mean[i] = para->getParH(lev)->vy_mean[i] / (real)tdiff;
-            para->getParH(lev)->vz_mean[i] = para->getParH(lev)->vz_mean[i] / (real)tdiff;
+            para->getParH(lev)->vx_mean[pos] = para->getParH(lev)->vx_mean[pos] / (real)tdiff;
+            para->getParH(lev)->vy_mean[pos] = para->getParH(lev)->vy_mean[pos] / (real)tdiff;
+            para->getParH(lev)->vz_mean[pos] = para->getParH(lev)->vz_mean[pos] / (real)tdiff;
 
             // fluctuations
-            para->getParH(lev)->vxx[i] = para->getParH(lev)->vxx[i] / (real)tdiff;
-            para->getParH(lev)->vyy[i] = para->getParH(lev)->vyy[i] / (real)tdiff;
-            para->getParH(lev)->vzz[i] = para->getParH(lev)->vzz[i] / (real)tdiff;
-            para->getParH(lev)->vxy[i] = para->getParH(lev)->vxy[i] / (real)tdiff;
-            para->getParH(lev)->vxz[i] = para->getParH(lev)->vxz[i] / (real)tdiff;
-            para->getParH(lev)->vyz[i] = para->getParH(lev)->vyz[i] / (real)tdiff;
-
-            para->getParH(lev)->vxx[i] =
-                para->getParH(lev)->vxx[i] - para->getParH(lev)->vx_mean[i] * para->getParH(lev)->vx_mean[i];
-            para->getParH(lev)->vyy[i] =
-                para->getParH(lev)->vyy[i] - para->getParH(lev)->vy_mean[i] * para->getParH(lev)->vy_mean[i];
-            para->getParH(lev)->vzz[i] =
-                para->getParH(lev)->vzz[i] - para->getParH(lev)->vz_mean[i] * para->getParH(lev)->vz_mean[i];
-            para->getParH(lev)->vxy[i] =
-                para->getParH(lev)->vxy[i] - para->getParH(lev)->vx_mean[i] * para->getParH(lev)->vy_mean[i];
-            para->getParH(lev)->vxz[i] =
-                para->getParH(lev)->vxz[i] - para->getParH(lev)->vx_mean[i] * para->getParH(lev)->vz_mean[i];
-            para->getParH(lev)->vyz[i] =
-                para->getParH(lev)->vyz[i] - para->getParH(lev)->vy_mean[i] * para->getParH(lev)->vz_mean[i];
+            para->getParH(lev)->vxx[pos] = para->getParH(lev)->vxx[pos] / (real)tdiff;
+            para->getParH(lev)->vyy[pos] = para->getParH(lev)->vyy[pos] / (real)tdiff;
+            para->getParH(lev)->vzz[pos] = para->getParH(lev)->vzz[pos] / (real)tdiff;
+            para->getParH(lev)->vxy[pos] = para->getParH(lev)->vxy[pos] / (real)tdiff;
+            para->getParH(lev)->vxz[pos] = para->getParH(lev)->vxz[pos] / (real)tdiff;
+            para->getParH(lev)->vyz[pos] = para->getParH(lev)->vyz[pos] / (real)tdiff;
+
+            para->getParH(lev)->vxx[pos] =
+                para->getParH(lev)->vxx[pos] - para->getParH(lev)->vx_mean[pos] * para->getParH(lev)->vx_mean[pos];
+            para->getParH(lev)->vyy[pos] =
+                para->getParH(lev)->vyy[pos] - para->getParH(lev)->vy_mean[pos] * para->getParH(lev)->vy_mean[pos];
+            para->getParH(lev)->vzz[pos] =
+                para->getParH(lev)->vzz[pos] - para->getParH(lev)->vz_mean[pos] * para->getParH(lev)->vz_mean[pos];
+            para->getParH(lev)->vxy[pos] =
+                para->getParH(lev)->vxy[pos] - para->getParH(lev)->vx_mean[pos] * para->getParH(lev)->vy_mean[pos];
+            para->getParH(lev)->vxz[pos] =
+                para->getParH(lev)->vxz[pos] - para->getParH(lev)->vx_mean[pos] * para->getParH(lev)->vz_mean[pos];
+            para->getParH(lev)->vyz[pos] =
+                para->getParH(lev)->vyz[pos] - para->getParH(lev)->vy_mean[pos] * para->getParH(lev)->vz_mean[pos];
         }
     }
 }
@@ -146,7 +146,7 @@ void writeAllTiDatafToFile(Parameter *para, uint timestep)
     }
 }
 
-void writeTiStuffToFile(Parameter *para, uint timestep, int sizeOfTiArray, std::vector<real *> &data,
+void writeTiStuffToFile(Parameter *para, uint timestep, unsigned long long sizeOfTiArray, std::vector<real *> &data,
                         std::vector<std::string> &datanames)
 {
     ////////////////////////////////////////////////////////////////////////
@@ -169,10 +169,10 @@ void writeTiStuffToFile(Parameter *para, uint timestep, int sizeOfTiArray, std::
     ostr << std::endl;
     ////////////////////////////////////////////////////////////////////////
     // fill file with data
-    for (int i = 0; i < sizeOfTiArray; i++) {
-        ostr << i;
+    for (size_t pos = 0; pos < sizeOfTiArray; pos++) {
+        ostr << pos;
         for (auto dataset : data)
-            ostr << "\t" << dataset[i];
+            ostr << "\t" << dataset[pos];
         ostr << std::endl;
     }
     ////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.h b/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.h
index f70973eb5921a17c3229a026623de2a0ef9f3ce4..a76c2d0dde99ad9fb3fd38137b6c72e5c3f5a6c3 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.h
@@ -18,7 +18,7 @@ void writeVeloFluctuationToFile(Parameter *para, uint timeste);
 void writeVeloMeansToFile(Parameter *para, uint timestep);
 void writeAllTiDatafToFile(Parameter *para, uint timestep);
 
-void writeTiStuffToFile(Parameter *para, uint timestep, int sizeOfTiArray, std::vector<real *> &data,
+void writeTiStuffToFile(Parameter *para, uint timestep, unsigned long long sizeOfTiArray, std::vector<real *> &data,
                   std::vector<std::string> &datanames);
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp b/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp
index 49543f37df7fb54290f4ab6c09edb8d10c0b67be..334212f903e62608676a3473523182113a7a7723 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp
@@ -1,7 +1,7 @@
 #include "CollisionStrategy.h"
 #include "Parameter/CudaStreamManager.h"
 #include "Parameter/Parameter.h"
-#include "logger/Logger.h"
+#include <logger/Logger.h>
 
 std::function<void(UpdateGrid27 *updateGrid, Parameter *para, int level, unsigned int t)>
 getFunctionForCollisionAndExchange(const bool useStreams, const int numberOfMpiProcesses,
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/Cp.cpp b/src/gpu/VirtualFluids_GPU/Calculation/Cp.cpp
index 9ee4cb917cdbf76dddf988b4456d5d611c9a11e0..3956bcbee4478e7c780559ef2dc74b2bd1fc79f7 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/Cp.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/Cp.cpp
@@ -7,7 +7,7 @@
 #include <fstream>
 #include <sstream>
 
-#include "Core/StringUtilities/StringUtil.h"
+#include "StringUtilities/StringUtil.h"
 #include <cassert>
 
 //#include <math.h>
@@ -230,16 +230,16 @@ void excludeGridInterfaceNodesForMirror(Parameter* para, int lev)
 	//define bool vector for nodes outside the interface
 	for (unsigned int it = 0; it < para->getParH(lev + 1)->numberOfPointsCpTop; it++)
 	{
-		for (unsigned int ifit = 0; ifit < para->getParH((int)lev)->K_CF; ifit++)
+        for (unsigned int ifit = 0; ifit < para->getParH((int)lev)->coarseToFine.numberOfCells; ifit++)
 		{
-			if ((para->getParH(lev + 1)->cpTopIndex[it] == (int)para->getParH((int)lev)->intCF.ICellCFF[ifit]) ||
-				(para->getParH(lev + 1)->cpTopIndex[it] == (int)para->getParH(lev + 1)->neighborX[para->getParH((int)lev)->intCF.ICellCFF[ifit]]) ||
-				(para->getParH(lev + 1)->cpTopIndex[it] == (int)para->getParH(lev + 1)->neighborY[para->getParH((int)lev)->intCF.ICellCFF[ifit]]) ||
-				(para->getParH(lev + 1)->cpTopIndex[it] == (int)para->getParH(lev + 1)->neighborZ[para->getParH((int)lev)->intCF.ICellCFF[ifit]]) ||
-				(para->getParH(lev + 1)->cpTopIndex[it] == (int)para->getParH(lev + 1)->neighborY[para->getParH(lev + 1)->neighborX[para->getParH((int)lev)->intCF.ICellCFF[ifit]]]) ||
-				(para->getParH(lev + 1)->cpTopIndex[it] == (int)para->getParH(lev + 1)->neighborZ[para->getParH(lev + 1)->neighborX[para->getParH((int)lev)->intCF.ICellCFF[ifit]]]) ||
-				(para->getParH(lev + 1)->cpTopIndex[it] == (int)para->getParH(lev + 1)->neighborZ[para->getParH(lev + 1)->neighborY[para->getParH((int)lev)->intCF.ICellCFF[ifit]]]) ||
-				(para->getParH(lev + 1)->cpTopIndex[it] == (int)para->getParH(lev + 1)->neighborZ[para->getParH(lev + 1)->neighborY[para->getParH(lev + 1)->neighborX[para->getParH((int)lev)->intCF.ICellCFF[ifit]]]]))
+			if ((para->getParH(lev + 1)->cpTopIndex[it] == (int)para->getParH((int)lev)->coarseToFine.fineCellIndices[ifit]) ||
+				(para->getParH(lev + 1)->cpTopIndex[it] == (int)para->getParH(lev + 1)->neighborX[para->getParH((int)lev)->coarseToFine.fineCellIndices[ifit]]) ||
+				(para->getParH(lev + 1)->cpTopIndex[it] == (int)para->getParH(lev + 1)->neighborY[para->getParH((int)lev)->coarseToFine.fineCellIndices[ifit]]) ||
+				(para->getParH(lev + 1)->cpTopIndex[it] == (int)para->getParH(lev + 1)->neighborZ[para->getParH((int)lev)->coarseToFine.fineCellIndices[ifit]]) ||
+				(para->getParH(lev + 1)->cpTopIndex[it] == (int)para->getParH(lev + 1)->neighborY[para->getParH(lev + 1)->neighborX[para->getParH((int)lev)->coarseToFine.fineCellIndices[ifit]]]) ||
+				(para->getParH(lev + 1)->cpTopIndex[it] == (int)para->getParH(lev + 1)->neighborZ[para->getParH(lev + 1)->neighborX[para->getParH((int)lev)->coarseToFine.fineCellIndices[ifit]]]) ||
+				(para->getParH(lev + 1)->cpTopIndex[it] == (int)para->getParH(lev + 1)->neighborZ[para->getParH(lev + 1)->neighborY[para->getParH((int)lev)->coarseToFine.fineCellIndices[ifit]]]) ||
+				(para->getParH(lev + 1)->cpTopIndex[it] == (int)para->getParH(lev + 1)->neighborZ[para->getParH(lev + 1)->neighborY[para->getParH(lev + 1)->neighborX[para->getParH((int)lev)->coarseToFine.fineCellIndices[ifit]]]]))
 			{
 				para->getParH(lev + 1)->isOutsideInterface.push_back(false);
 				tempBool = false;
@@ -256,9 +256,9 @@ void excludeGridInterfaceNodesForMirror(Parameter* para, int lev)
 	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	for (unsigned int it = 0; it < para->getParH((int)lev)->numberOfPointsCpTop; it++)
 	{
-		for (unsigned int ifit = 0; ifit < para->getParH((int)lev)->K_FC; ifit++)
+        for (unsigned int ifit = 0; ifit < para->getParH((int)lev)->fineToCoarse.numberOfCells; ifit++)
 		{
-			if (para->getParH((int)lev)->cpTopIndex[it] == (int)para->getParH((int)lev)->intFC.ICellFCC[ifit])
+			if (para->getParH((int)lev)->cpTopIndex[it] == (int)para->getParH((int)lev)->fineToCoarse.coarseCellIndices[ifit])
 			{
 				para->getParH((int)lev)->isOutsideInterface.push_back(false);
 				tempBool = false;
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/DragLift.cpp b/src/gpu/VirtualFluids_GPU/Calculation/DragLift.cpp
index 97d2af28ef7f801e817dd6cae6ad58d244249e02..2c2dc19795959c87a02705778a9768210580be41 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/DragLift.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/DragLift.cpp
@@ -7,7 +7,7 @@
 #include <fstream>
 #include <sstream>
 
-#include "Core/StringUtilities/StringUtil.h"
+#include "StringUtilities/StringUtil.h"
 
 //#include <math.h>
 //#include "LB.h"
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/ForceCalculations.cpp b/src/gpu/VirtualFluids_GPU/Calculation/ForceCalculations.cpp
index d62e8fee24dad1cde7ccd2044a5a5f9573f7ff82..f220c9a811486d14977ac9e55527c9e3e60c2478 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/ForceCalculations.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/ForceCalculations.cpp
@@ -9,7 +9,7 @@
 #include <sstream>
 #include "GPU/CudaMemoryManager.h"
 
-#include "Core/StringUtilities/StringUtil.h"
+#include "StringUtilities/StringUtil.h"
 //using namespace std;
 //////////////////////////////////////////////////////////////////////////
 
@@ -53,7 +53,7 @@ void ForceCalculations::calcPIDControllerForForce(Parameter* para, CudaMemoryMan
 	 {
 		 //////////////////////////////////////////////////////////////////////
 		 //measure the velocity
-		 int numberOfElements = para->getParH(lev)->numberOfNodes;
+		 unsigned long long numberOfElements = para->getParH(lev)->numberOfNodes;
 		 if (numberOfElements > 0)
 		 {
 			 CalcMacCompSP27(para->getParD(lev)->velocityX,
@@ -74,11 +74,11 @@ void ForceCalculations::calcPIDControllerForForce(Parameter* para, CudaMemoryMan
 			 cudaMemoryManager->cudaCopyPrint(lev);
 //			 para->cudaCopyForceVelo(i,numberOfElements);
 			 //////////////////////////////////////////////////////////////////
-			 for (int j = 0; j < numberOfElements; j++)
+			 for (size_t pos = 0; pos < numberOfElements; pos++)
 			 {
-				 tempVeloX += (double)para->getParH(lev)->velocityX[j];
-				 tempVeloY += (double)para->getParH(lev)->velocityY[j];
-				 tempVeloZ += (double)para->getParH(lev)->velocityZ[j];
+				 tempVeloX += (double)para->getParH(lev)->velocityX[pos];
+				 tempVeloY += (double)para->getParH(lev)->velocityY[pos];
+				 tempVeloZ += (double)para->getParH(lev)->velocityZ[pos];
 			 }
 			 tempVeloX /= (double)numberOfElements;
 			 tempVeloY /= (double)numberOfElements;
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/PlaneCalculations.cpp b/src/gpu/VirtualFluids_GPU/Calculation/PlaneCalculations.cpp
index 13b6bd662a1b51a9a7a850211751c8b8b5ecf329..6557a2a0730c14cc1b26097c5025827128d3a1ce 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/PlaneCalculations.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/PlaneCalculations.cpp
@@ -9,7 +9,7 @@
 #include <sstream>
 //using namespace std;
 //////////////////////////////////////////////////////////////////////////
-#include "Core/StringUtilities/StringUtil.h"
+#include "StringUtilities/StringUtil.h"
 
 void setSizeOfPlane(Parameter* para, int lev, unsigned int z)
 {
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.cpp b/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.cpp
index b8ca4e9c2020e17cd0192267ac5d931b510afc3a..9428b07987f832ae0101196a8a430495fe27d508 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.cpp
@@ -1,7 +1,7 @@
 #include "RefinementStrategy.h"
 #include "Parameter/CudaStreamManager.h"
 #include "Parameter/Parameter.h"
-#include "logger/Logger.h"
+#include <logger/Logger.h>
 
 std::function<void(UpdateGrid27 *updateGrid, Parameter *para, int level)>
     getFunctionForRefinementAndExchange(const bool useStreams, const int numberOfMpiProcesses, const int maxLevel,
@@ -42,7 +42,7 @@ void RefinementAndExchange_streams_exchangeInterface::operator()(UpdateGrid27 *u
     //!
     //! 1. Interpolation fine to coarse for nodes which are at the border of the gpus/processes
     //!
-    updateGrid->fineToCoarse(level, &para->getParD(level)->intFCBorder, para->getParD(level)->offFC, CudaStreamIndex::SubDomainBorder);
+    updateGrid->fineToCoarse(level, &para->getParD(level)->fineToCoarseBorder, para->getParD(level)->neighborFineToCoarse, CudaStreamIndex::SubDomainBorder);
 
     //! 2. prepare the exchange between gpus (collect the send nodes for communication in a buffer on the gpu) and trigger bulk kernel execution when finished
     //!
@@ -53,8 +53,8 @@ void RefinementAndExchange_streams_exchangeInterface::operator()(UpdateGrid27 *u
     //! 3. launch the bulk kernels for both interpolation processes (fine to coarse and coarse to fine)
     //!
     para->getStreamManager()->waitOnStartBulkKernelEvent(CudaStreamIndex::Bulk);
-    updateGrid->fineToCoarse(level, &para->getParD(level)->intFCBulk, para->getParD(level)->offFCBulk, CudaStreamIndex::SubDomainBorder);
-    updateGrid->coarseToFine(level, &para->getParD(level)->intCFBulk, para->getParD(level)->offCFBulk, CudaStreamIndex::SubDomainBorder);
+    updateGrid->fineToCoarse(level, &para->getParD(level)->fineToCoarseBulk, para->getParD(level)->neighborFineToCoarseBulk, CudaStreamIndex::SubDomainBorder);
+    updateGrid->coarseToFine(level, &para->getParD(level)->coarseToFineBulk, para->getParD(level)->neighborCoarseToFineBulk, CudaStreamIndex::SubDomainBorder);
 
     //! 4. exchange information between GPUs (only nodes which are part of the interpolation)
     //!
@@ -62,7 +62,7 @@ void RefinementAndExchange_streams_exchangeInterface::operator()(UpdateGrid27 *u
 
     // 5. interpolation fine to coarse for nodes which are at the border of the gpus/processes
     //!
-    updateGrid->coarseToFine(level, &para->getParD(level)->intCFBorder, para->getParD(level)->offCF, CudaStreamIndex::SubDomainBorder);
+    updateGrid->coarseToFine(level, &para->getParD(level)->coarseToFineBorder, para->getParD(level)->neighborCoarseToFine, CudaStreamIndex::SubDomainBorder);
 
     cudaDeviceSynchronize();
 }
@@ -73,7 +73,7 @@ void RefinementAndExchange_streams_exchangeAllNodes::operator()(UpdateGrid27 *up
     //!
     //! 1. interpolation fine to coarse for nodes which are at the border of the gpus/processes
     //!
-    updateGrid->fineToCoarse(level, &para->getParD(level)->intFCBorder, para->getParD(level)->offFC, CudaStreamIndex::SubDomainBorder);
+    updateGrid->fineToCoarse(level, &para->getParD(level)->fineToCoarseBorder, para->getParD(level)->neighborFineToCoarse, CudaStreamIndex::SubDomainBorder);
 
     //! 2. prepare the exchange between gpus (collect the send nodes for communication in a buffer on the gpu) and trigger bulk kernel execution when finished
     //!
@@ -84,8 +84,8 @@ void RefinementAndExchange_streams_exchangeAllNodes::operator()(UpdateGrid27 *up
     //! 3. launch the bulk kernels for both interpolation processes (fine to coarse and coarse to fine)
     //!
     para->getStreamManager()->waitOnStartBulkKernelEvent(CudaStreamIndex::Bulk);
-    updateGrid->fineToCoarse(level, &para->getParD(level)->intFCBulk, para->getParD(level)->offFCBulk, CudaStreamIndex::SubDomainBorder);
-    updateGrid->coarseToFine(level, &para->getParD(level)->intCFBulk, para->getParD(level)->offCFBulk, CudaStreamIndex::SubDomainBorder);
+    updateGrid->fineToCoarse(level, &para->getParD(level)->fineToCoarseBulk, para->getParD(level)->neighborFineToCoarseBulk, CudaStreamIndex::SubDomainBorder);
+    updateGrid->coarseToFine(level, &para->getParD(level)->coarseToFineBulk, para->getParD(level)->neighborCoarseToFineBulk, CudaStreamIndex::SubDomainBorder);
 
     //! 4. exchange information between GPUs (all nodes)
     //!
@@ -93,7 +93,7 @@ void RefinementAndExchange_streams_exchangeAllNodes::operator()(UpdateGrid27 *up
 
     // 5. interpolation fine to coarse for nodes which are at the border of the gpus/processes
     //!
-    updateGrid->coarseToFine(level, &para->getParD(level)->intCFBorder, para->getParD(level)->offCF, CudaStreamIndex::SubDomainBorder);
+    updateGrid->coarseToFine(level, &para->getParD(level)->coarseToFineBorder, para->getParD(level)->neighborCoarseToFine, CudaStreamIndex::SubDomainBorder);
 
     cudaDeviceSynchronize();
 }
@@ -104,14 +104,14 @@ void RefinementAndExchange_noStreams_exchangeInterface::operator()(UpdateGrid27
     //!
     //! 1. interpolation fine to coarse
     //!
-    updateGrid->fineToCoarse(level, &para->getParD(level)->intFC, para->getParD(level)->offFC, CudaStreamIndex::Legacy);
+    updateGrid->fineToCoarse(level, &para->getParD(level)->fineToCoarse, para->getParD(level)->neighborFineToCoarse, CudaStreamIndex::Legacy);
 
     //! 2. exchange information between GPUs (only nodes which are part of the interpolation)
     //!
     updateGrid->exchangeMultiGPU_noStreams_withPrepare(level, true);
 
     //! 3. interpolation coarse to fine
-    updateGrid->coarseToFine(level, &para->getParD(level)->intCF, para->getParD(level)->offCF, CudaStreamIndex::Legacy);
+    updateGrid->coarseToFine(level, &para->getParD(level)->coarseToFine, para->getParD(level)->neighborCoarseToFine, CudaStreamIndex::Legacy);
 }
 
 void RefinementAndExchange_noStreams_exchangeAllNodes::operator()(UpdateGrid27 *updateGrid, Parameter *para, int level)
@@ -120,14 +120,14 @@ void RefinementAndExchange_noStreams_exchangeAllNodes::operator()(UpdateGrid27 *
     //!
     //! 1. interpolation fine to coarse
     //!
-    updateGrid->fineToCoarse(level, &para->getParD(level)->intFC, para->getParD(level)->offFC, CudaStreamIndex::Legacy);
+    updateGrid->fineToCoarse(level, &para->getParD(level)->fineToCoarse, para->getParD(level)->neighborFineToCoarse, CudaStreamIndex::Legacy);
 
     //! 2. exchange information between GPUs (all nodes)
     //!
     updateGrid->exchangeMultiGPU_noStreams_withPrepare(level, false);
 
     //! 3. interpolation coarse to fine
-    updateGrid->coarseToFine(level, &para->getParD(level)->intCF, para->getParD(level)->offCF, CudaStreamIndex::Legacy);
+    updateGrid->coarseToFine(level, &para->getParD(level)->coarseToFine, para->getParD(level)->neighborCoarseToFine, CudaStreamIndex::Legacy);
 }
 
 void Refinement_noExchange::operator()(UpdateGrid27 *updateGrid, Parameter *para, int level)
@@ -136,7 +136,7 @@ void Refinement_noExchange::operator()(UpdateGrid27 *updateGrid, Parameter *para
     //!
     //! 1. interpolation fine to coarse
     //!
-    updateGrid->fineToCoarse(level, &para->getParD(level)->intFC, para->getParD(level)->offFC, CudaStreamIndex::Legacy);
+    updateGrid->fineToCoarse(level, &para->getParD(level)->fineToCoarse, para->getParD(level)->neighborFineToCoarse, CudaStreamIndex::Legacy);
     //! 2. interpolation coarse to fine
-    updateGrid->coarseToFine(level, &para->getParD(level)->intCF, para->getParD(level)->offCF, CudaStreamIndex::Legacy);
+    updateGrid->coarseToFine(level, &para->getParD(level)->coarseToFine, para->getParD(level)->neighborCoarseToFine, CudaStreamIndex::Legacy);
 }
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
index 4136614dfbfc9e0d2fc1bf7f4b01624f94eabb6f..a2b1039afca4eaa3fcd75e28cae16cb5f68f6c9b 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
@@ -328,9 +328,9 @@ void UpdateGrid27::preCollisionBC(int level, unsigned int t)
     //////////////////////////////////////////////////////////////////////////////////
 }
 
-void UpdateGrid27::fineToCoarse(int level, InterpolationCellFC* icellFC, OffFC &offFC, CudaStreamIndex streamIndex)
+void UpdateGrid27::fineToCoarse(int level, InterpolationCells* fineToCoarse, ICellNeigh &neighborFineToCoarse, CudaStreamIndex streamIndex)
 {
-    gridScalingKernelManager->runFineToCoarseKernelLB(level, icellFC, offFC, streamIndex);
+    gridScalingKernelManager->runFineToCoarseKernelLB(level, fineToCoarse, neighborFineToCoarse, streamIndex);
 
     if (para->getDiffOn()) {
         if (para->getStreamManager()->streamIsRegistered(streamIndex)) {
@@ -341,9 +341,9 @@ void UpdateGrid27::fineToCoarse(int level, InterpolationCellFC* icellFC, OffFC &
     }
 }
 
-void UpdateGrid27::coarseToFine(int level, InterpolationCellCF* icellCF, OffCF &offCF, CudaStreamIndex streamIndex)
+void UpdateGrid27::coarseToFine(int level, InterpolationCells* coarseToFine, ICellNeigh &neighborCoarseToFine, CudaStreamIndex streamIndex)
 {
-    this->gridScalingKernelManager->runCoarseToFineKernelLB(level, icellCF, offCF, streamIndex);
+    this->gridScalingKernelManager->runCoarseToFineKernelLB(level, coarseToFine, neighborCoarseToFine, streamIndex);
 
     if (para->getDiffOn())
     {
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
index 8ce2cf5bfd72f9f53cdb35bc92502ee9ca0d3ad8..9c6ff48725f4e17121de0a1a8681d0bafcfb58ee 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
@@ -38,8 +38,8 @@ private:
     void preCollisionBC(int level, unsigned int t);
     void collisionPorousMedia(int level);
 
-    void fineToCoarse(int level, InterpolationCellFC* icellFC, OffFC &offFC, CudaStreamIndex streamIndex);
-    void coarseToFine(int level, InterpolationCellCF* icellCF, OffCF &offCF, CudaStreamIndex streamIndex);
+    void fineToCoarse(int level, InterpolationCells* fineToCoarse, ICellNeigh &neighborFineToCoarse, CudaStreamIndex streamIndex);
+    void coarseToFine(int level, InterpolationCells* coarseToFine, ICellNeigh &neighborCoarseToFine, CudaStreamIndex streamIndex);
 
     void prepareExchangeMultiGPU(int level, CudaStreamIndex streamIndex);
     void prepareExchangeMultiGPUAfterFtoC(int level, CudaStreamIndex streamIndex);
diff --git a/src/gpu/VirtualFluids_GPU/Communication/CommunicationRoutine.h b/src/gpu/VirtualFluids_GPU/Communication/CommunicationRoutine.h
index 39866728b713c9c0c72cb5c62ba02f242e0ca68c..26c017f939b0795457d74008a21cb9e7a4b75bd0 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/CommunicationRoutine.h
+++ b/src/gpu/VirtualFluids_GPU/Communication/CommunicationRoutine.h
@@ -1,7 +1,7 @@
 #ifndef INDEX_EXCHANGE
 #define INDEX_EXCHANGE
 
-#include <basics/Core/DataTypes.h>
+#include <basics/DataTypes.h>
 
 namespace vf::gpu
 {
diff --git a/src/gpu/VirtualFluids_GPU/Communication/Communicator.cpp b/src/gpu/VirtualFluids_GPU/Communication/Communicator.cpp
index 5df66c788b2186b0bdfad1afa4798cee670ba53b..89f3595cf6ed4919548d27d47ae987f89053e1d5 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/Communicator.cpp
+++ b/src/gpu/VirtualFluids_GPU/Communication/Communicator.cpp
@@ -170,7 +170,7 @@ void Communicator::exchngDataGeo(int *sbuf_t, int *rbuf_t, int *sbuf_b, int *rbu
     MPI_Waitall(4, request, status);
 }
 int Communicator::getPID() const { return PID; }
-int Communicator::getNummberOfProcess() const { return numprocs; }
+int Communicator::getNumberOfProcess() const { return numprocs; }
 int Communicator::getNeighbourTop() { return nbrtop; }
 int Communicator::getNeighbourBottom() { return nbrbottom; }
 MPI_Comm Communicator::getCommunicator() { return comm1d; }
diff --git a/src/gpu/VirtualFluids_GPU/Communication/Communicator.h b/src/gpu/VirtualFluids_GPU/Communication/Communicator.h
index aa63e1a09c305ceb5abe2b567b6988753bc48345..716eb6c7c2c091cb79ce502504d634d53ea40937 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/Communicator.h
+++ b/src/gpu/VirtualFluids_GPU/Communication/Communicator.h
@@ -8,7 +8,7 @@
 #include "VirtualFluids_GPU_export.h"
 
 #include "CommunicationRoutine.h"
-#include <basics/Core/DataTypes.h>
+#include <basics/DataTypes.h>
 
 //////////////////////////////////
 #ifdef VF_DOUBLE_ACCURACY
@@ -34,7 +34,7 @@ public:
     void waitAll();
     void distributeGeometry(unsigned int *dataRoot, unsigned int *dataNode, int dataSizePerNode);
     int getPID() const override;
-    int getNummberOfProcess() const;
+    int getNumberOfProcess() const;
     int getNeighbourTop();
     int getNeighbourBottom();
     void exchngData(float *sbuf_t, float *rbuf_t, float *sbuf_b, float *rbuf_b, int count);
diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
index 00a7b45668e2050467f3d1122455dc74d0ad4f1c..48a27efa674e5fa85d47cb9439c52d0c558dac44 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
@@ -82,7 +82,7 @@ void copyEdgeNodes(std::vector<LBMSimulationParameter::EdgeNodePositions> &edgeN
     int numNodesInBufferSend = 0;
 
 #pragma omp parallel for
-    for (uint i = 0; i < edgeNodes.size(); i++) {
+    for (int i = 0; i < (int)edgeNodes.size(); i++) {
         indexInSubdomainRecv = edgeNodes[i].indexOfProcessNeighborRecv;
         indexInSubdomainSend = edgeNodes[i].indexOfProcessNeighborSend;
         numNodesInBufferRecv = recvProcessNeighborHost[indexInSubdomainRecv].numberOfNodes;
@@ -777,7 +777,7 @@ void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, C
     //copy Device to Host
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
     {
-        GetSendFsPreDev27(para->getParD(level)->distributionsAD27.f[0],
+        GetSendFsPreDev27(para->getParD(level)->distributionsAD.f[0],
                           para->getParD(level)->sendProcessNeighborADX[i].f[0],
                           para->getParD(level)->sendProcessNeighborADX[i].index,
                           para->getParD(level)->sendProcessNeighborADX[i].numberOfNodes,
@@ -838,7 +838,7 @@ void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, C
     {
         cudaMemoryManager->cudaCopyProcessNeighborADXFsHD(level, i);
         //////////////////////////////////////////////////////////////////////////
-        SetRecvFsPreDev27(para->getParD(level)->distributionsAD27.f[0],
+        SetRecvFsPreDev27(para->getParD(level)->distributionsAD.f[0],
                           para->getParD(level)->recvProcessNeighborADX[i].f[0],
                           para->getParD(level)->recvProcessNeighborADX[i].index,
                           para->getParD(level)->recvProcessNeighborADX[i].numberOfNodes,
@@ -858,7 +858,7 @@ void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm,
     //copy Device to Host
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
     {
-        GetSendFsPostDev27(para->getParD(level)->distributionsAD27.f[0],
+        GetSendFsPostDev27(para->getParD(level)->distributionsAD.f[0],
                            para->getParD(level)->sendProcessNeighborADX[i].f[0],
                            para->getParD(level)->sendProcessNeighborADX[i].index,
                            para->getParD(level)->sendProcessNeighborADX[i].numberOfNodes,
@@ -919,7 +919,7 @@ void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm,
     {
         cudaMemoryManager->cudaCopyProcessNeighborADXFsHD(level, i);
         //////////////////////////////////////////////////////////////////////////
-        SetRecvFsPostDev27(para->getParD(level)->distributionsAD27.f[0],
+        SetRecvFsPostDev27(para->getParD(level)->distributionsAD.f[0],
                            para->getParD(level)->recvProcessNeighborADX[i].f[0],
                            para->getParD(level)->recvProcessNeighborADX[i].index,
                            para->getParD(level)->recvProcessNeighborADX[i].numberOfNodes,
@@ -946,7 +946,7 @@ void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, C
     //copy Device to Host
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
     {
-        GetSendFsPreDev27(para->getParD(level)->distributionsAD27.f[0],
+        GetSendFsPreDev27(para->getParD(level)->distributionsAD.f[0],
                           para->getParD(level)->sendProcessNeighborADY[i].f[0],
                           para->getParD(level)->sendProcessNeighborADY[i].index,
                           para->getParD(level)->sendProcessNeighborADY[i].numberOfNodes,
@@ -1007,7 +1007,7 @@ void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, C
     {
         cudaMemoryManager->cudaCopyProcessNeighborADYFsHD(level, i);
         //////////////////////////////////////////////////////////////////////////
-        SetRecvFsPreDev27(para->getParD(level)->distributionsAD27.f[0],
+        SetRecvFsPreDev27(para->getParD(level)->distributionsAD.f[0],
                           para->getParD(level)->recvProcessNeighborADY[i].f[0],
                           para->getParD(level)->recvProcessNeighborADY[i].index,
                           para->getParD(level)->recvProcessNeighborADY[i].numberOfNodes,
@@ -1027,7 +1027,7 @@ void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm,
     //copy Device to Host
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
     {
-        GetSendFsPostDev27(para->getParD(level)->distributionsAD27.f[0],
+        GetSendFsPostDev27(para->getParD(level)->distributionsAD.f[0],
                            para->getParD(level)->sendProcessNeighborADY[i].f[0],
                            para->getParD(level)->sendProcessNeighborADY[i].index,
                            para->getParD(level)->sendProcessNeighborADY[i].numberOfNodes,
@@ -1088,7 +1088,7 @@ void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm,
     {
         cudaMemoryManager->cudaCopyProcessNeighborADYFsHD(level, i);
         //////////////////////////////////////////////////////////////////////////
-        SetRecvFsPostDev27(para->getParD(level)->distributionsAD27.f[0],
+        SetRecvFsPostDev27(para->getParD(level)->distributionsAD.f[0],
                            para->getParD(level)->recvProcessNeighborADY[i].f[0],
                            para->getParD(level)->recvProcessNeighborADY[i].index,
                            para->getParD(level)->recvProcessNeighborADY[i].numberOfNodes,
@@ -1115,7 +1115,7 @@ void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, C
     //copy Device to Host
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
     {
-        GetSendFsPreDev27(para->getParD(level)->distributionsAD27.f[0],
+        GetSendFsPreDev27(para->getParD(level)->distributionsAD.f[0],
                           para->getParD(level)->sendProcessNeighborADZ[i].f[0],
                           para->getParD(level)->sendProcessNeighborADZ[i].index,
                           para->getParD(level)->sendProcessNeighborADZ[i].numberOfNodes,
@@ -1176,7 +1176,7 @@ void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, C
     {
         cudaMemoryManager->cudaCopyProcessNeighborADZFsHD(level, i);
         //////////////////////////////////////////////////////////////////////////
-        SetRecvFsPreDev27(para->getParD(level)->distributionsAD27.f[0],
+        SetRecvFsPreDev27(para->getParD(level)->distributionsAD.f[0],
                           para->getParD(level)->recvProcessNeighborADZ[i].f[0],
                           para->getParD(level)->recvProcessNeighborADZ[i].index,
                           para->getParD(level)->recvProcessNeighborADZ[i].numberOfNodes,
@@ -1196,7 +1196,7 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm,
     //copy Device to Host
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
     {
-        GetSendFsPostDev27(para->getParD(level)->distributionsAD27.f[0],
+        GetSendFsPostDev27(para->getParD(level)->distributionsAD.f[0],
                            para->getParD(level)->sendProcessNeighborADZ[i].f[0],
                            para->getParD(level)->sendProcessNeighborADZ[i].index,
                            para->getParD(level)->sendProcessNeighborADZ[i].numberOfNodes,
@@ -1257,7 +1257,7 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm,
     {
         cudaMemoryManager->cudaCopyProcessNeighborADZFsHD(level, i);
         //////////////////////////////////////////////////////////////////////////
-        SetRecvFsPostDev27(para->getParD(level)->distributionsAD27.f[0],
+        SetRecvFsPostDev27(para->getParD(level)->distributionsAD.f[0],
                            para->getParD(level)->recvProcessNeighborADZ[i].f[0],
                            para->getParD(level)->recvProcessNeighborADZ[i].index,
                            para->getParD(level)->recvProcessNeighborADZ[i].numberOfNodes,
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
index c996525ee78e6ba87fe0972df084aedccfefd39a..3b511264e9c7edc80bbe367cac4a9b6d8725674b 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
@@ -19,29 +19,29 @@ std::shared_ptr<GridProvider> GridProvider::makeGridReader(FILEFORMAT format, st
     return std::shared_ptr<GridProvider>(new GridReader(format, para, cudaMemoryManager));
 }
 
-void GridProvider::setNumberOfNodes(const int numberOfNodes, const int level) const
+void GridProvider::setNumberOfNodes(uint numberOfNodes, int level) const
 {
-    para->getParH(level)->numberOfNodes = numberOfNodes;
-    para->getParD(level)->numberOfNodes = numberOfNodes;
-    para->getParH(level)->mem_size_real_SP = sizeof(real) * para->getParH(level)->numberOfNodes;
-    para->getParH(level)->mem_size_int_SP = sizeof(uint) * para->getParH(level)->numberOfNodes;
-    para->getParD(level)->mem_size_real_SP = sizeof(real) * para->getParD(level)->numberOfNodes;
-    para->getParD(level)->mem_size_int_SP = sizeof(uint) * para->getParD(level)->numberOfNodes;
+    para->getParH(level)->numberOfNodes          = (unsigned long long)numberOfNodes;
+    para->getParD(level)->numberOfNodes          = (unsigned long long)numberOfNodes;
+    para->getParH(level)->memSizeRealLBnodes     = sizeof(real) * para->getParH(level)->numberOfNodes;
+    para->getParD(level)->memSizeRealLBnodes     = sizeof(real) * para->getParD(level)->numberOfNodes;
+    para->getParH(level)->memSizeLonglongLBnodes = sizeof(unsigned long long) * para->getParH(level)->numberOfNodes;
+    para->getParD(level)->memSizeLonglongLBnodes = sizeof(unsigned long long) * para->getParD(level)->numberOfNodes;
 }
 
-void GridProvider::setNumberOfTaggedFluidNodes(const int numberOfNodes, CollisionTemplate tag, const int level) const
+void GridProvider::setNumberOfTaggedFluidNodes(uint numberOfNodes, CollisionTemplate tag, int level) const
 {
     para->getParH(level)->numberOfTaggedFluidNodes[tag] = numberOfNodes;
     para->getParD(level)->numberOfTaggedFluidNodes[tag] = numberOfNodes;
 }
 
-void GridProvider::setInitalNodeValues(const int numberOfNodes, const int level) const
+void GridProvider::setInitalNodeValues(uint numberOfNodes, int level) const
 {
-    for (int j = 1; j <= numberOfNodes; j++)
+    for (uint pos = 1; pos <= numberOfNodes; pos++)
     {
-        const real coordX = para->getParH(level)->coordinateX[j];
-        const real coordY = para->getParH(level)->coordinateY[j];
-        const real coordZ = para->getParH(level)->coordinateZ[j];
+        const real coordX = para->getParH(level)->coordinateX[pos];
+        const real coordY = para->getParH(level)->coordinateY[pos];
+        const real coordZ = para->getParH(level)->coordinateZ[pos];
 
         real rho, vx, vy, vz;
 
@@ -58,40 +58,40 @@ void GridProvider::setInitalNodeValues(const int numberOfNodes, const int level)
             vz  = real(0.0);
         }
 
-        para->getParH(level)->rho[j] = rho; 
-        para->getParH(level)->velocityX[j]  = vx; 
-        para->getParH(level)->velocityY[j]  = vy;
-        para->getParH(level)->velocityZ[j]  = vz; 
+        para->getParH(level)->rho[pos] = rho; 
+        para->getParH(level)->velocityX[pos]  = vx; 
+        para->getParH(level)->velocityY[pos]  = vy;
+        para->getParH(level)->velocityZ[pos]  = vz; 
 
         //////////////////////////////////////////////////////////////////////////
 
         if (para->getCalcMedian()) {
-            para->getParH(level)->vx_SP_Med[j] = 0.0f;
-            para->getParH(level)->vy_SP_Med[j] = 0.0f;
-            para->getParH(level)->vz_SP_Med[j] = 0.0f;
-            para->getParH(level)->rho_SP_Med[j] = 0.0f;
-            para->getParH(level)->press_SP_Med[j] = 0.0f;
+            para->getParH(level)->vx_SP_Med[pos] = 0.0f;
+            para->getParH(level)->vy_SP_Med[pos] = 0.0f;
+            para->getParH(level)->vz_SP_Med[pos] = 0.0f;
+            para->getParH(level)->rho_SP_Med[pos] = 0.0f;
+            para->getParH(level)->press_SP_Med[pos] = 0.0f;
         }
         if (para->getUseWale()) {
-            para->getParH(level)->turbViscosity[j] = 0.0f;
+            para->getParH(level)->turbViscosity[pos] = 0.0f;
             //Debug
-            para->getParH(level)->gSij[j] = 0.0f;
-            para->getParH(level)->gSDij[j] = 0.0f;
-            para->getParH(level)->gDxvx[j] = 0.0f;
-            para->getParH(level)->gDyvx[j] = 0.0f;
-            para->getParH(level)->gDzvx[j] = 0.0f;
-            para->getParH(level)->gDxvy[j] = 0.0f;
-            para->getParH(level)->gDyvy[j] = 0.0f;
-            para->getParH(level)->gDzvy[j] = 0.0f;
-            para->getParH(level)->gDxvz[j] = 0.0f;
-            para->getParH(level)->gDyvz[j] = 0.0f;
-            para->getParH(level)->gDzvz[j] = 0.0f;
+            para->getParH(level)->gSij[pos] = 0.0f;
+            para->getParH(level)->gSDij[pos] = 0.0f;
+            para->getParH(level)->gDxvx[pos] = 0.0f;
+            para->getParH(level)->gDyvx[pos] = 0.0f;
+            para->getParH(level)->gDzvx[pos] = 0.0f;
+            para->getParH(level)->gDxvy[pos] = 0.0f;
+            para->getParH(level)->gDyvy[pos] = 0.0f;
+            para->getParH(level)->gDzvy[pos] = 0.0f;
+            para->getParH(level)->gDxvz[pos] = 0.0f;
+            para->getParH(level)->gDyvz[pos] = 0.0f;
+            para->getParH(level)->gDzvz[pos] = 0.0f;
         }
 
         if (para->getIsBodyForce()) {
-            para->getParH(level)->forceX_SP[j] = 0.0f;
-            para->getParH(level)->forceY_SP[j] = 0.0f;
-            para->getParH(level)->forceZ_SP[j] = 0.0f;
+            para->getParH(level)->forceX_SP[pos] = 0.0f;
+            para->getParH(level)->forceY_SP[pos] = 0.0f;
+            para->getParH(level)->forceZ_SP[pos] = 0.0f;
         }
     }
 
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h
index 4c50d458c60f04db14c247e16ef3dc44833cb8f7..007db1e0d8e27b3810aa38c089bae8069bbe5813 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h
@@ -24,35 +24,35 @@ public:
     static std::shared_ptr<GridProvider> makeGridGenerator(std::shared_ptr<GridBuilder> builder, std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager, vf::gpu::Communicator& communicator);
     static std::shared_ptr<GridProvider> makeGridReader(FILEFORMAT format, std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager);
 
-	virtual void allocArrays_CoordNeighborGeo() = 0;
-	virtual void allocArrays_BoundaryValues() = 0;
-	virtual void allocArrays_BoundaryQs() = 0;
+    virtual void allocArrays_CoordNeighborGeo() = 0;
+    virtual void allocArrays_BoundaryValues() = 0;
+    virtual void allocArrays_BoundaryQs() = 0;
     virtual void allocArrays_OffsetScale() = 0;
     virtual void allocArrays_taggedFluidNodes() = 0;
 
     virtual void tagFluidNodeIndices(const std::vector<uint>& taggedFluidNodeIndices, CollisionTemplate tag, uint level) = 0;
     virtual void sortFluidNodeTags() = 0;
 
-	virtual void setDimensions() = 0;
-	virtual void setBoundingBox() = 0;
-	virtual void initPeriodicNeigh(std::vector<std::vector<std::vector<unsigned int> > > periodV, std::vector<std::vector<unsigned int> > periodIndex, std::string way) = 0;
+    virtual void setDimensions() = 0;
+    virtual void setBoundingBox() = 0;
+    virtual void initPeriodicNeigh(std::vector<std::vector<std::vector<unsigned int> > > periodV, std::vector<std::vector<unsigned int> > periodIndex, std::string way) = 0;
 
     virtual void allocAndCopyForcing();
     virtual void allocAndCopyQuadricLimiters();
     virtual void freeMemoryOnHost();
     virtual void cudaCopyDataToHost(int level);
 
-	virtual ~GridProvider() = default;
+    virtual ~GridProvider() = default;
     virtual void initalGridInformations() = 0;
 
 protected:
-	void setNumberOfNodes(const int numberOfNodes, const int level) const;
-    void setNumberOfTaggedFluidNodes(const int numberOfNodes, CollisionTemplate tag, const int level) const;
-    virtual void setInitalNodeValues(const int numberOfNodes, const int level) const;
+    void setNumberOfNodes(uint numberOfNodes, int level) const;
+    void setNumberOfTaggedFluidNodes(uint numberOfNodes, CollisionTemplate tag, int level) const;
+    virtual void setInitalNodeValues(uint numberOfNodes, int level) const;
 
-	void setPressSizePerLevel(int level, int sizePerLevel) const;
-	void setVelocitySizePerLevel(int level, int sizePerLevel) const;
-	void setOutflowSizePerLevel(int level, int sizePerLevel) const;
+    void setPressSizePerLevel(int level, int sizePerLevel) const;
+    void setVelocitySizePerLevel(int level, int sizePerLevel) const;
+    void setOutflowSizePerLevel(int level, int sizePerLevel) const;
 
     std::shared_ptr<Parameter> para;
     std::shared_ptr<CudaMemoryManager> cudaMemoryManager;
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/BoundaryValues.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/BoundaryValues.cpp
index e987e74be46c881da98fea35cfad6606395f0aca..9fbf2105b85424996ac67c8edbc42915b369a04e 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/BoundaryValues.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/BoundaryValues.cpp
@@ -93,8 +93,6 @@ int BoundaryValues::getNumberOfColumns()
 		return 0;
 	if (boundaryCondition == "concentration")
 		return 0;
-	if (boundaryCondition == "streetVector")
-		return 1;
 	else
 		return -1;
 }
@@ -261,13 +259,3 @@ void BoundaryValues::setOutflowValues(real *RhoBC, int* kN, int level) const
 	}
 }
 
-void BoundaryValues::setStreetVelocityFractions(real *vxf, real *vyf, int level) const
-{
-	for (std::size_t column = 0; column < values[level].size(); column++) {
-		for (std::size_t index = 0; index < values[level][column].size(); index++) {
-			if (column == 0) vxf[index] = values[level][column][index];
-			if (column == 1) vyf[index] = values[level][column][index];
-		}
-	}
-}
-
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/BoundaryValues.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/BoundaryValues.h
index 9adf0b08e308ea6e080fe7ce0d5410a35e2cdeb5..da3693d5c7e7fcf3c8879ec14464050359a44e28 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/BoundaryValues.h
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/BoundaryValues.h
@@ -42,7 +42,6 @@ public:
 	void setPressValues(real *RhoBC, int* kN, int level) const;
 	void setVelocityValues(real *vx, real *vy, real *vz, int level) const;
 	void setOutflowValues(real *RhoBC, int* kN, int level) const;
-	void setStreetVelocityFractions(real *vxf, real *vyf, int level) const;
 
 private:
 	void init();
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp
index 66af0d1c4603353148c6201de15d13d6243b0612..c126b2f79e02272a0bd86bfe0f76fe5efe09a5a7 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp
@@ -35,11 +35,6 @@ GridReader::GridReader(FILEFORMAT format, std::shared_ptr<Parameter> para, std::
 	channelDirections[5] = "bottom";
 }
 
-GridReader::~GridReader()
-{
-
-}
-
 bool GridReader::getBinaer()
 {
 	return binaer;
@@ -47,7 +42,7 @@ bool GridReader::getBinaer()
 
 void rearrangeGeometry(Parameter* para, int lev)
 {
-    for (uint index = 0; index < para->getParH(lev)->numberOfNodes; index++)
+    for (size_t index = 0; index < para->getParH(lev)->numberOfNodes; index++)
     {
         if (para->getParH(lev)->typeOfGridNode[index] == GEO_FLUID_OLD)
         {
@@ -58,7 +53,7 @@ void rearrangeGeometry(Parameter* para, int lev)
 
 void GridReader::allocArrays_CoordNeighborGeo()
 {
-	std::cout << "-----Config Arrays Coord, Neighbor, Geo------" << std::endl;
+    VF_LOG_TRACE("-----Config Arrays Coord, Neighbor, Geo------");
 
 	CoordNeighborGeoV coordX(para->getcoordX(), binaer, true);
 	CoordNeighborGeoV coordY(para->getcoordY(), binaer, true);
@@ -70,15 +65,15 @@ void GridReader::allocArrays_CoordNeighborGeo()
     CoordNeighborGeoV geoV(para->getgeoVec(), binaer, false);
 
 	uint maxLevel = coordX.getLevel();
-	std::cout << "Number of Level: " << maxLevel + 1 << std::endl;
+    VF_LOG_INFO("Number of Level: {}", maxLevel + 1);
 	uint numberOfNodesGlobal = 0;
-	std::cout << "Number of Nodes: " << std::endl;
+    VF_LOG_INFO("Number of Nodes: ");
 
     for (uint level = 0; level <= maxLevel; level++)
     {
-        int numberOfNodesPerLevel = coordX.getSize(level) + 1;
+        const uint numberOfNodesPerLevel = coordX.getSize(level) + 1;
         numberOfNodesGlobal += numberOfNodesPerLevel;
-        std::cout << "Level " << level << " = " << numberOfNodesPerLevel << " Nodes" << std::endl;
+        VF_LOG_INFO("Level {} = {} Nodes", level, numberOfNodesPerLevel);
 
 		setNumberOfNodes(numberOfNodesPerLevel, level);
 
@@ -105,13 +100,14 @@ void GridReader::allocArrays_CoordNeighborGeo()
         cudaMemoryManager->cudaCopySP(level);
         cudaMemoryManager->cudaCopyCoord(level);
 	}
-	std::cout << "Number of Nodes: " << numberOfNodesGlobal << std::endl;
-	std::cout << "-----finish Coord, Neighbor, Geo------" <<std::endl;
+    VF_LOG_INFO("Number of Nodes: {}", numberOfNodesGlobal);
+    VF_LOG_TRACE("-----finish Config Arrays Coord, Neighbor, Geo------");
 }
 
 void GridReader::allocArrays_BoundaryValues()
 {
-	std::cout << "------read BoundaryValues------" <<std::endl;
+    VF_LOG_TRACE("------read BoundaryValues-------");
+    
 
 	this->makeReader(para);
 	this->setChannelBoundaryCondition();
@@ -142,7 +138,7 @@ void GridReader::allocArrays_BoundaryValues()
 
 void GridReader::allocArrays_OffsetScale()
 {
-    std::cout << "-----Config Arrays OffsetScale------" << std::endl;
+    VF_LOG_TRACE("------Config Arrays OffsetScale-------");
     OffsetScale *obj_offCF = new OffsetScale(para->getscaleOffsetCF(), true);
     OffsetScale *obj_offFC = new OffsetScale(para->getscaleOffsetFC(), true);
     OffsetScale *obj_scaleCFC = new OffsetScale(para->getscaleCFC(), false);
@@ -157,33 +153,21 @@ void GridReader::allocArrays_OffsetScale()
 
     for (int i = 0; i<level; i++) {
         unsigned int tempCF = obj_offCF->getSize(i);
-        std::cout << "Groesse der Daten CF vom Level " << i << " : " << tempCF << std::endl;
+        VF_LOG_INFO("Size Data CF from Level {}: {}", i, tempCF);
         unsigned int tempFC = obj_offFC->getSize(i);
-        std::cout << "Groesse der Daten FC vom Level " << i << " : " << tempFC << std::endl;
+        VF_LOG_INFO("Size Data CF from Level {}: {}", i, tempFC);
 
         AnzahlKnotenGesCF += tempCF;
         AnzahlKnotenGesFC += tempFC;
 
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        //size + memsize CF
-        para->getParH(i)->K_CF = tempCF;
-        para->getParD(i)->K_CF = para->getParH(i)->K_CF;
-        para->getParH(i)->intCF.kCF = para->getParH(i)->K_CF;
-        para->getParD(i)->intCF.kCF = para->getParH(i)->K_CF;
-        para->getParH(i)->mem_size_kCF = sizeof(unsigned int)* para->getParH(i)->K_CF;
-        para->getParD(i)->mem_size_kCF = sizeof(unsigned int)* para->getParD(i)->K_CF;
-        para->getParH(i)->mem_size_kCF_off = sizeof(real)* para->getParH(i)->K_CF;
-        para->getParD(i)->mem_size_kCF_off = sizeof(real)* para->getParD(i)->K_CF;
+        //size CF
+        para->getParH(i)->coarseToFine.numberOfCells = tempCF;
+        para->getParD(i)->coarseToFine.numberOfCells = para->getParH(i)->coarseToFine.numberOfCells;
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        //size + memsize FC
-        para->getParH(i)->K_FC = tempFC;
-        para->getParD(i)->K_FC = para->getParH(i)->K_FC;
-        para->getParH(i)->intFC.kFC = para->getParH(i)->K_FC;
-        para->getParD(i)->intFC.kFC = para->getParH(i)->K_FC;
-        para->getParH(i)->mem_size_kFC = sizeof(unsigned int)* para->getParH(i)->K_FC;
-        para->getParD(i)->mem_size_kFC = sizeof(unsigned int)* para->getParD(i)->K_FC;
-        para->getParH(i)->mem_size_kFC_off = sizeof(real)* para->getParH(i)->K_FC;
-        para->getParD(i)->mem_size_kFC_off = sizeof(real)* para->getParD(i)->K_FC;
+        //size FC
+        para->getParH(i)->fineToCoarse.numberOfCells = tempFC;
+        para->getParD(i)->fineToCoarse.numberOfCells = para->getParH(i)->fineToCoarse.numberOfCells;
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         //alloc
 		cudaMemoryManager->cudaAllocInterfaceCF(i);
@@ -192,12 +176,12 @@ void GridReader::allocArrays_OffsetScale()
 		cudaMemoryManager->cudaAllocInterfaceOffFC(i);
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         //init
-        obj_offCF->initArrayOffset(para->getParH(i)->offCF.xOffCF, para->getParH(i)->offCF.yOffCF, para->getParH(i)->offCF.zOffCF, i);
-        obj_offFC->initArrayOffset(para->getParH(i)->offFC.xOffFC, para->getParH(i)->offFC.yOffFC, para->getParH(i)->offFC.zOffFC, i);
-        obj_scaleCFC->initScale(para->getParH(i)->intCF.ICellCFC, i);
-        obj_scaleCFF->initScale(para->getParH(i)->intCF.ICellCFF, i);
-        obj_scaleFCC->initScale(para->getParH(i)->intFC.ICellFCC, i);
-        obj_scaleFCF->initScale(para->getParH(i)->intFC.ICellFCF, i);
+        obj_offCF->initArrayOffset(para->getParH(i)->neighborCoarseToFine.x, para->getParH(i)->neighborCoarseToFine.y, para->getParH(i)->neighborCoarseToFine.z, i);
+        obj_offFC->initArrayOffset(para->getParH(i)->neighborFineToCoarse.x, para->getParH(i)->neighborFineToCoarse.y, para->getParH(i)->neighborFineToCoarse.z, i);
+        obj_scaleCFC->initScale(para->getParH(i)->coarseToFine.coarseCellIndices, i);
+        obj_scaleCFF->initScale(para->getParH(i)->coarseToFine.fineCellIndices, i);
+        obj_scaleFCC->initScale(para->getParH(i)->fineToCoarse.coarseCellIndices, i);
+        obj_scaleFCF->initScale(para->getParH(i)->fineToCoarse.fineCellIndices, i);
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         //copy
 		cudaMemoryManager->cudaCopyInterfaceCF(i);
@@ -206,8 +190,8 @@ void GridReader::allocArrays_OffsetScale()
 		cudaMemoryManager->cudaCopyInterfaceOffFC(i);
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     }
-    std::cout << "Gesamtanzahl Knoten CF = " << AnzahlKnotenGesCF << std::endl;
-    std::cout << "Gesamtanzahl Knoten FC = " << AnzahlKnotenGesFC << std::endl;
+    VF_LOG_INFO("Total number of Nodes CF = {}", AnzahlKnotenGesCF);
+    VF_LOG_INFO("Total number of Nodes FC = {}", AnzahlKnotenGesFC);
 
     delete obj_offCF;
     delete obj_offFC;
@@ -215,21 +199,21 @@ void GridReader::allocArrays_OffsetScale()
     delete obj_scaleCFF;
     delete obj_scaleFCC;
     delete obj_scaleFCF;
-    std::cout << "-----Ende OffsetScale------" << std::endl;
+    VF_LOG_TRACE("Finish OffsetScale");
 }
 
 void GridReader::allocArrays_taggedFluidNodes() {
-    std::cout << "GridReader::allocArrays_fluidNodeIndices not implemented" << std::endl;
+     VF_LOG_WARNING("GridReader::allocArrays_fluidNodeIndices not implemented");
 	// TODO
 }
 
 void GridReader::tagFluidNodeIndices(const std::vector<uint>& taggedFluidNodeIndices, CollisionTemplate tag, uint level){
-    std::cout << "GridReader::tagFluidNodeIndices not implemented" << std::endl;
+    VF_LOG_WARNING("GridReader::tagFluidNodeIndices not implemented");
     // TODO
 }
 
 void GridReader::sortFluidNodeTags(){
-    std::cout << "GridReader::sortFluidNodeTags not implemented" << std::endl;
+    VF_LOG_WARNING("GridReader::sortFluidNodeTags not implemented");
     // TODO
 }
 
@@ -242,7 +226,7 @@ void GridReader::setPressureValues(int channelSide) const
 
 		if (sizePerLevel > 0)
 		{
-			std::cout << "size pressure level " << level << " : " << sizePerLevel << std::endl;
+            VF_LOG_INFO("size pressure level {}: {}", level, sizePerLevel);
 
             cudaMemoryManager->cudaAllocPress(level);
 
@@ -273,7 +257,7 @@ void GridReader::fillVelocityVectors(int channelSide)
             real *veloY_ValuesPerSide = new real[sizePerLevel];
             real *veloZ_ValuesPerSide = new real[sizePerLevel];
 
-            std::cout << "size velocity level " << level << " : " << sizePerLevel << std::endl;
+            VF_LOG_INFO("size velocity level {}: {}", level, sizePerLevel);
             BC_Values[channelSide]->setVelocityValues(veloX_ValuesPerSide, veloY_ValuesPerSide, veloZ_ValuesPerSide, level);
 
             for (int i = 0; i < sizePerLevel; i++) {
@@ -295,7 +279,7 @@ void GridReader::setVelocityValues() {
     for (int level = 0; level < (int)(velocityX_BCvalues.size()); level++) {
 
         int sizePerLevel = (int) velocityX_BCvalues[level].size();
-        std::cout << "complete size velocity level " << level << " : " << sizePerLevel << std::endl;
+        VF_LOG_INFO("Complete size velocity level {}: {}", level, sizePerLevel);
         setVelocitySizePerLevel(level, sizePerLevel);
 
         if (sizePerLevel > 1) {
@@ -326,7 +310,7 @@ void GridReader::setOutflowValues(int channelSide) const
 
 		if (sizePerLevel > 1)
 		{
-			std::cout << "size outflow level " << level << " : " << sizePerLevel << std::endl;
+            VF_LOG_INFO("size outflow level {}: {}", level, sizePerLevel);
 
             cudaMemoryManager->cudaAllocOutflowBC(level);
 
@@ -402,7 +386,7 @@ void GridReader::initalValuesDomainDecompostion(int level)
 				{
 					////////////////////////////////////////////////////////////////////////////////////////
 					//send
-					*logging::out << logging::Logger::INFO_INTERMEDIATE << "size of Data for X send buffer, Level " << i << " : " << tempSend << "\n";
+                    VF_LOG_INFO("size of Data for X send buffer, Level {} : {}", i, tempSend);
 					////////////////////////////////////////////////////////////////////////////////////////
 					para->setNumberOfProcessNeighborsX((unsigned int)procNeighborsSendX.size(), i, "send");
 					para->getParH(i)->sendProcessNeighborX[j].rankNeighbor = neighborRankX[j];
@@ -560,7 +544,7 @@ void GridReader::initalValuesDomainDecompostion(int level)
 
 void GridReader::allocArrays_BoundaryQs()
 {
-	std::cout << "------read BoundaryQs-------" <<std::endl;
+    VF_LOG_TRACE("------read BoundaryQs-------");
 
 	std::vector<std::shared_ptr<BoundaryQs> > BC_Qs(channelDirections.size());
 	this->makeReader(BC_Qs, para);
@@ -584,7 +568,7 @@ void GridReader::allocArrays_BoundaryQs()
 	if (para->getIsGeo())
 		setGeoQs(obj_geomQ);
 
-	std::cout << "-----finish BoundaryQs------" <<std::endl;
+	VF_LOG_TRACE("------finish BoundaryQs-------");
 }
 
 
@@ -924,6 +908,6 @@ void GridReader::setChannelBoundaryCondition()
     for (std::size_t i = 0; i < channelDirections.size(); i++)
     {
         this->channelBoundaryConditions[i] = BC_Values[i]->getBoundaryCondition();
-        std::cout << this->channelDirections[i] << " Boundary: " << channelBoundaryConditions[i] << std::endl;
+        VF_LOG_INFO("{} Boundary: {}", this->channelDirections[i], channelBoundaryConditions[i]);
     }
 }
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h
index 041d2c3ce94592f792c5a850eebd14c07f4db1b4..d244e76569fe213978086ab412725e4450da66e4 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h
@@ -35,7 +35,7 @@ private:
 public:
     GridReader(FILEFORMAT format, std::shared_ptr<Parameter> para,
                std::shared_ptr<CudaMemoryManager> cudaMemoryManager);
-    ~GridReader() override;
+     ~GridReader() {};
     void allocArrays_CoordNeighborGeo() override;
     void allocArrays_BoundaryValues() override;
     void allocArrays_OffsetScale() override;
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
index d14973be77ab6766dc04ec95213efe663db3b873..15bb8a43375bda7a90ef61548c1c68bafa9d1af5 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
@@ -31,7 +31,7 @@ GridGenerator::GridGenerator(std::shared_ptr<GridBuilder> builder, std::shared_p
         std::make_unique<InterpolationCellGrouper>(para->getParHallLevels(), para->getParDallLevels(), builder);
 }
 
-GridGenerator::~GridGenerator() = default;
+GridGenerator::~GridGenerator() {}
 
 void GridGenerator::setIndexRearrangementForStreams(std::unique_ptr<IndexRearrangementForStreams> &&indexRearrangement)
 {
@@ -58,15 +58,15 @@ void GridGenerator::initalGridInformations()
 void GridGenerator::allocArrays_CoordNeighborGeo()
 {
     const uint numberOfLevels = builder->getNumberOfGridLevels();
-    std::cout << "Number of Level: " << numberOfLevels << std::endl;
+    VF_LOG_INFO("Number of Level: {}", numberOfLevels);
     int numberOfNodesGlobal = 0;
-    std::cout << "Number of Nodes: " << std::endl;
+    VF_LOG_INFO("Number of Nodes: ");
 
     for (uint level = 0; level < numberOfLevels; level++)
     {
-        const int numberOfNodesPerLevel = builder->getNumberOfNodes(level) + 1;
+        const uint numberOfNodesPerLevel = builder->getNumberOfNodes(level) + 1;
         numberOfNodesGlobal += numberOfNodesPerLevel;
-        std::cout << "Level " << level << " = " << numberOfNodesPerLevel << " Nodes" << std::endl;
+        VF_LOG_INFO("Level {} = {} Nodes", level, numberOfNodesPerLevel);
 
         setNumberOfNodes(numberOfNodesPerLevel, level);
 
@@ -99,11 +99,9 @@ void GridGenerator::allocArrays_CoordNeighborGeo()
         cudaMemoryManager->cudaCopyCoord(level);
         if(para->getIsBodyForce())
             cudaMemoryManager->cudaCopyBodyForce(level);
-
-        //std::cout << verifyNeighborIndices(level);
     }
-    std::cout << "Number of Nodes: " << numberOfNodesGlobal << std::endl;
-    std::cout << "-----finish Coord, Neighbor, Geo------" << std::endl;
+    VF_LOG_INFO("Number of Nodes: {}", numberOfNodesGlobal);
+    VF_LOG_TRACE("-----finish Coord, Neighbor, Geo------");
 }
 
 void GridGenerator::allocArrays_taggedFluidNodes() {
@@ -202,20 +200,18 @@ void GridGenerator::sortFluidNodeTags() {
 
 void GridGenerator::allocArrays_BoundaryValues()
 {
-    std::cout << "------read BoundaryValues------" << std::endl;
-    int blocks;
+    VF_LOG_TRACE("-----alloc BoundaryValues------");
 
     for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) {
         const auto numberOfPressureValues = int(builder->getPressureSize(level));
-        *logging::out << logging::Logger::INFO_INTERMEDIATE  << "size pressure level " << level << " : " << numberOfPressureValues << "\n";
+        VF_LOG_INFO("size pressure level {}: {}", level, numberOfPressureValues);
 
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         para->getParH(level)->pressureBC.numberOfBCnodes = 0;
         para->getParD(level)->outflowPressureCorrectionFactor = para->getOutflowPressureCorrectionFactor();
         if (numberOfPressureValues > 1)
         {
-            blocks = (numberOfPressureValues / para->getParH(level)->numberofthreads) + 1;
-            para->getParH(level)->pressureBC.numberOfBCnodes = blocks * para->getParH(level)->numberofthreads;
+            para->getParH(level)->pressureBC.numberOfBCnodes = numberOfPressureValues;
             cudaMemoryManager->cudaAllocPress(level);
             builder->getPressureValues(para->getParH(level)->pressureBC.RhoBC, para->getParH(level)->pressureBC.k, para->getParH(level)->pressureBC.kN, level);
             cudaMemoryManager->cudaCopyPress(level);
@@ -227,13 +223,13 @@ void GridGenerator::allocArrays_BoundaryValues()
 
     for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) {
         const auto numberOfSlipValues = int(builder->getSlipSize(level));
-        *logging::out << logging::Logger::INFO_INTERMEDIATE  << "size slip level " << level << " : " << numberOfSlipValues << "\n";
+        VF_LOG_INFO("size slip level {}: {}", level, numberOfSlipValues);
 
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         para->getParH(level)->slipBC.numberOfBCnodes = 0;
-        if (numberOfSlipValues > 1) {
-            blocks = (numberOfSlipValues / para->getParH(level)->numberofthreads) + 1;
-            para->getParH(level)->slipBC.numberOfBCnodes = blocks * para->getParH(level)->numberofthreads;
+        if (numberOfSlipValues > 1)
+        {
+            para->getParH(level)->slipBC.numberOfBCnodes = numberOfSlipValues;
             cudaMemoryManager->cudaAllocSlipBC(level);
             builder->getSlipValues(para->getParH(level)->slipBC.normalX, para->getParH(level)->slipBC.normalY,
                                    para->getParH(level)->slipBC.normalZ, para->getParH(level)->slipBC.k, level);
@@ -246,14 +242,13 @@ void GridGenerator::allocArrays_BoundaryValues()
 
     for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) {
         const auto numberOfStressValues = int(builder->getStressSize(level));
-        *logging::out << logging::Logger::INFO_INTERMEDIATE  << "size stress level " << level << " : " << numberOfStressValues << "\n";
+        VF_LOG_INFO("size stress level {}: {}", level, numberOfStressValues);
 
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         para->getParH(level)->stressBC.numberOfBCnodes = 0;
         if (numberOfStressValues > 1)
         {
-            blocks = (numberOfStressValues / para->getParH(level)->numberofthreads) + 1;
-            para->getParH(level)->stressBC.numberOfBCnodes = blocks * para->getParH(level)->numberofthreads;
+            para->getParH(level)->stressBC.numberOfBCnodes = numberOfStressValues;
             cudaMemoryManager->cudaAllocStressBC(level);
             cudaMemoryManager->cudaAllocWallModel(level, para->getHasWallModelMonitor());
             builder->getStressValues(   para->getParH(level)->stressBC.normalX,  para->getParH(level)->stressBC.normalY,  para->getParH(level)->stressBC.normalZ,
@@ -274,15 +269,14 @@ void GridGenerator::allocArrays_BoundaryValues()
 
     for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) {
         const auto numberOfVelocityValues = int(builder->getVelocitySize(level));
-        *logging::out << logging::Logger::INFO_INTERMEDIATE  << "size velocity level " << level << " : " << numberOfVelocityValues << "\n";
+        VF_LOG_INFO("size velocity level {}: {}", level, numberOfVelocityValues);
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
         para->getParH(level)->velocityBC.numberOfBCnodes = 0;
 
         if (numberOfVelocityValues > 1)
         {
-            blocks = (numberOfVelocityValues / para->getParH(level)->numberofthreads) + 1;
-            para->getParH(level)->velocityBC.numberOfBCnodes = blocks * para->getParH(level)->numberofthreads;
+            para->getParH(level)->velocityBC.numberOfBCnodes = numberOfVelocityValues;
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             cudaMemoryManager->cudaAllocVeloBC(level);
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -300,8 +294,8 @@ void GridGenerator::allocArrays_BoundaryValues()
                 //////////////////////////////////////////////////////////////////////////
                 para->getParH(level)->TempVel.kTemp = para->getParH(level)->velocityBC.numberOfBCnodes;
                 //cout << "Groesse kTemp = " << para->getParH(i)->TempPress.kTemp << endl;
-                std::cout << "getTemperatureInit = " << para->getTemperatureInit() << std::endl;
-                std::cout << "getTemperatureBC = " << para->getTemperatureBC() << std::endl;
+                VF_LOG_INFO("getTemperatureInit = {}", para->getTemperatureInit());
+                VF_LOG_INFO("getTemperatureBC = {}", para->getTemperatureBC());
                 //////////////////////////////////////////////////////////////////////////
                 cudaMemoryManager->cudaAllocTempVeloBC(level);
                 //cout << "nach alloc " << endl;
@@ -328,9 +322,9 @@ void GridGenerator::allocArrays_BoundaryValues()
 
     for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) {
         const auto numberOfPrecursorValues = int(builder->getPrecursorSize(level));
-        *logging::out << logging::Logger::INFO_INTERMEDIATE << "size precursor level " << level << " : " << numberOfPrecursorValues << "\n";
+        VF_LOG_INFO("size precursor level {}: {}", level, numberOfPrecursorValues);
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        blocks = (numberOfPrecursorValues / para->getParH(level)->numberofthreads) + 1;
+        auto blocks = (numberOfPrecursorValues / para->getParH(level)->numberofthreads) + 1;
         para->getParH(level)->precursorBC.sizeQ = blocks * para->getParH(level)->numberofthreads;
         para->getParD(level)->precursorBC.sizeQ = para->getParH(level)->precursorBC.sizeQ;
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -426,14 +420,13 @@ void GridGenerator::allocArrays_BoundaryValues()
         para->setUseGeometryValues(true);
         for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) {
             int numberOfGeometryValues = builder->getGeometrySize(level);
-            *logging::out << logging::Logger::INFO_INTERMEDIATE  << "size geometry values, Level " << level << " : " << numberOfGeometryValues << "\n";
+            VF_LOG_INFO("size geometry values, Level {} : {}", level, numberOfGeometryValues);
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
             para->getParH(level)->geometryBC.numberOfBCnodes = 0;
             if (numberOfGeometryValues > 0)
-            {
-                blocks = (numberOfGeometryValues / para->getParH(level)->numberofthreads) + 1;
-                para->getParH(level)->geometryBC.numberOfBCnodes = blocks * para->getParH(level)->numberofthreads;
+            {;
+                para->getParH(level)->geometryBC.numberOfBCnodes = numberOfGeometryValues;
                 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
                 cudaMemoryManager->cudaAllocGeomValuesBC(level);
                 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -521,8 +514,7 @@ void GridGenerator::initalValuesDomainDecompostion()
                         para->getParH(level)->sendProcessNeighborX.back().rankNeighbor =
                             builder->getCommunicationProcess(direction);
                         ////////////////////////////////////////////////////////////////////////////////////////
-                        *logging::out << logging::Logger::INFO_INTERMEDIATE << "size of Data for X send buffer, \t\tLevel " << level << " : " << tempSend
-                                  << " \t(neighbor rank: " << builder->getCommunicationProcess(direction) << ")\n";
+                        VF_LOG_INFO("size of Data for X send buffer, \t\tLevel {}: {} \t(neighbor rank: {})", level, tempSend, builder->getCommunicationProcess(direction));
                         ////////////////////////////////////////////////////////////////////////////////////////
                         para->getParH(level)->sendProcessNeighborX.back().numberOfNodes = tempSend;
                         para->getParD(level)->sendProcessNeighborX.back().numberOfNodes = tempSend;
@@ -536,8 +528,7 @@ void GridGenerator::initalValuesDomainDecompostion()
                         para->getParD(level)->sendProcessNeighborX.back().memsizeFs = sizeof(real) * tempSend;
                         ////////////////////////////////////////////////////////////////////////////////////////
                         // recv
-                        *logging::out << logging::Logger::INFO_INTERMEDIATE << "size of Data for X receive buffer, \tLevel " << level << " : " << tempRecv
-                                  << " \t(neighbor rank: " << builder->getCommunicationProcess(direction) << ")\n";
+                        VF_LOG_INFO("size of Data for X receive buffer, \t\tLevel {}: {} \t(neighbor rank: {})", level, tempRecv, builder->getCommunicationProcess(direction));
                         ////////////////////////////////////////////////////////////////////////////////////////
                         para->getParH(level)->recvProcessNeighborX.back().rankNeighbor =
                             builder->getCommunicationProcess(direction);
@@ -587,8 +578,7 @@ void GridGenerator::initalValuesDomainDecompostion()
                         }
                         ////////////////////////////////////////////////////////////////////////////////////////
                         // send
-                        *logging::out << logging::Logger::INFO_INTERMEDIATE  << "size of Data for Y send buffer, \t\tLevel " << level << " : " << tempSend
-                                  << " \t(neighbor rank: " << builder->getCommunicationProcess(direction) << ")\n";
+                        VF_LOG_INFO("size of Data for Y send buffer, \t\tLevel {}: {} \t(neighbor rank: {})", level, tempSend, builder->getCommunicationProcess(direction));
                         ////////////////////////////////////////////////////////////////////////////////////////
                         para->getParH(level)->sendProcessNeighborY.back().rankNeighbor =
                             builder->getCommunicationProcess(direction);
@@ -605,8 +595,7 @@ void GridGenerator::initalValuesDomainDecompostion()
                         para->getParD(level)->sendProcessNeighborY.back().memsizeFs = sizeof(real) * tempSend;
                         ////////////////////////////////////////////////////////////////////////////////////////
                         // recv
-                        *logging::out << logging::Logger::INFO_INTERMEDIATE  << "size of Data for Y receive buffer, \tLevel " << level << " : " << tempRecv
-                                  << " \t(neighbor rank: " << builder->getCommunicationProcess(direction) << ")\n";
+                        VF_LOG_INFO("size of Data for Y receive buffer, \t\tLevel {}: {} \t(neighbor rank: {})", level, tempRecv, builder->getCommunicationProcess(direction));
                         ////////////////////////////////////////////////////////////////////////////////////////
                         para->getParH(level)->recvProcessNeighborY.back().rankNeighbor =
                             builder->getCommunicationProcess(direction);
@@ -656,8 +645,7 @@ void GridGenerator::initalValuesDomainDecompostion()
                         }
                         ////////////////////////////////////////////////////////////////////////////////////////
                         // send
-                        *logging::out << logging::Logger::INFO_INTERMEDIATE  << "size of Data for Z send buffer, \t\tLevel " << level << " : " << tempSend
-                                  << " \t(neighbor rank: " << builder->getCommunicationProcess(direction) << ")\n";
+                        VF_LOG_INFO("size of Data for Z send buffer, \t\tLevel {}: {} \t(neighbor rank: {})", level, tempSend, builder->getCommunicationProcess(direction));
                         ////////////////////////////////////////////////////////////////////////////////////////
                         para->getParH(level)->sendProcessNeighborZ.back().rankNeighbor =
                             builder->getCommunicationProcess(direction);
@@ -674,8 +662,7 @@ void GridGenerator::initalValuesDomainDecompostion()
                         para->getParD(level)->sendProcessNeighborZ.back().memsizeFs = sizeof(real) * tempSend;
                         ////////////////////////////////////////////////////////////////////////////////////////
                         // recv
-                        *logging::out << logging::Logger::INFO_INTERMEDIATE  << "size of Data for Z receive buffer, \tLevel " << level << " : " << tempRecv
-                                  << " \t(neighbor rank: " << builder->getCommunicationProcess(direction) << ")\n";
+                        VF_LOG_INFO("size of Data for Z receive buffer, \t\tLevel {}: {} \t(neighbor rank: {})", level, tempRecv, builder->getCommunicationProcess(direction));
                         ////////////////////////////////////////////////////////////////////////////////////////
                         para->getParH(level)->recvProcessNeighborZ.back().rankNeighbor =
                             builder->getCommunicationProcess(direction);
@@ -729,8 +716,7 @@ void GridGenerator::initalValuesDomainDecompostion()
                     if (tempSend > 0) {
                         ////////////////////////////////////////////////////////////////////////////////////////
                         // send
-                        *logging::out << logging::Logger::INFO_INTERMEDIATE  << "size of Data for X send buffer, \t\tLevel " << level << " : " << tempSend
-                                  << " \t(neighbor rank: " << builder->getCommunicationProcess(direction) << ")\n";
+                        VF_LOG_INFO("size of Data for X send buffer, \t\tLevel {}: {} \t(neighbor rank: {})", level, tempSend, builder->getCommunicationProcess(direction));
                         ////////////////////////////////////////////////////////////////////////////////////////
                         para->getParH(level)->sendProcessNeighborF3X.back().rankNeighbor =
                             builder->getCommunicationProcess(direction);
@@ -749,8 +735,7 @@ void GridGenerator::initalValuesDomainDecompostion()
                             sizeof(real) * para->getParH(level)->sendProcessNeighborF3X.back().numberOfGs;
                         ////////////////////////////////////////////////////////////////////////////////////////
                         // recv
-                        *logging::out << logging::Logger::INFO_INTERMEDIATE  << "size of Data for X receive buffer, \tLevel " << level << " : " << tempRecv
-                                  << " \t(neighbor rank: " << builder->getCommunicationProcess(direction) << ")\n";
+                        VF_LOG_INFO("size of Data for X recv buffer, \t\tLevel {}: {} \t(neighbor rank: {})", level, tempRecv, builder->getCommunicationProcess(direction));
                         ////////////////////////////////////////////////////////////////////////////////////////
                         para->getParH(level)->recvProcessNeighborF3X.back().rankNeighbor =
                             builder->getCommunicationProcess(direction);
@@ -795,8 +780,7 @@ void GridGenerator::initalValuesDomainDecompostion()
                     if (tempSend > 0) {
                         ////////////////////////////////////////////////////////////////////////////////////////
                         // send
-                        *logging::out << logging::Logger::INFO_INTERMEDIATE  << "size of Data for Y send buffer, \t\tLevel " << level << " : " << tempSend
-                                  << " \t(neighbor rank: " << builder->getCommunicationProcess(direction) << ")\n";
+                        VF_LOG_INFO("size of Data for Y send buffer, \t\tLevel {}: {} \t(neighbor rank: {})", level, tempSend, builder->getCommunicationProcess(direction));
                         ////////////////////////////////////////////////////////////////////////////////////////
                         para->getParH(level)->sendProcessNeighborF3Y.back().rankNeighbor =
                             builder->getCommunicationProcess(direction);
@@ -815,8 +799,7 @@ void GridGenerator::initalValuesDomainDecompostion()
                             sizeof(real) * para->getParH(level)->sendProcessNeighborF3Y.back().numberOfGs;
                         ////////////////////////////////////////////////////////////////////////////////////////
                         // recv
-                        *logging::out << logging::Logger::INFO_INTERMEDIATE  << "size of Data for Y receive buffer, \tLevel " << level << " : " << tempRecv
-                                  << " \t(neighbor rank: " << builder->getCommunicationProcess(direction) << ")\n";
+                        VF_LOG_INFO("size of Data for Y recv buffer, \t\tLevel {}: {} \t(neighbor rank: {})", level, tempRecv, builder->getCommunicationProcess(direction));
                         ////////////////////////////////////////////////////////////////////////////////////////
                         para->getParH(level)->recvProcessNeighborF3Y.back().rankNeighbor =
                             builder->getCommunicationProcess(direction);
@@ -861,8 +844,7 @@ void GridGenerator::initalValuesDomainDecompostion()
                     if (tempSend > 0) {
                         ////////////////////////////////////////////////////////////////////////////////////////
                         // send
-                        *logging::out << logging::Logger::INFO_INTERMEDIATE  << "size of Data for Z send buffer, \t\tLevel " << level << " : " << tempSend
-                                  << " \t(neighbor rank: " << builder->getCommunicationProcess(direction) << ")\n";
+                        VF_LOG_INFO("size of Data for Z send buffer, \t\tLevel {}: {} \t(neighbor rank: {})", level, tempSend, builder->getCommunicationProcess(direction));
                         ////////////////////////////////////////////////////////////////////////////////////////
                         para->getParH(level)->sendProcessNeighborF3Z.back().rankNeighbor =
                             builder->getCommunicationProcess(direction);
@@ -881,8 +863,7 @@ void GridGenerator::initalValuesDomainDecompostion()
                             sizeof(real) * para->getParH(level)->sendProcessNeighborF3Z.back().numberOfGs;
                         ////////////////////////////////////////////////////////////////////////////////////////
                         // recv
-                        *logging::out << logging::Logger::INFO_INTERMEDIATE  << "size of Data for Z receive buffer, \tLevel " << level << " : " << tempRecv
-                                  << " \t(neighbor rank: " << builder->getCommunicationProcess(direction) << ")\n";
+                        VF_LOG_INFO("size of Data for Z recv buffer, \t\tLevel {}: {} \t(neighbor rank: {})", level, tempRecv, builder->getCommunicationProcess(direction));
                         ////////////////////////////////////////////////////////////////////////////////////////
                         para->getParH(level)->recvProcessNeighborF3Z.back().rankNeighbor =
                             builder->getCommunicationProcess(direction);
@@ -920,37 +901,34 @@ void GridGenerator::initalValuesDomainDecompostion()
 
 void GridGenerator::allocArrays_BoundaryQs()
 {
-    std::cout << "------read BoundaryQs-------" << std::endl;
+    VF_LOG_TRACE("allocArrays_BoundaryQs()");
 
 
     for (uint i = 0; i < builder->getNumberOfGridLevels(); i++) {
         const auto numberOfPressureValues = (int)builder->getPressureSize(i);
         if (numberOfPressureValues > 0)
         {
-            *logging::out << logging::Logger::INFO_INTERMEDIATE  << "size Pressure:  " << i << " : " << numberOfPressureValues << "\n";
-            //cout << "Groesse Pressure:  " << i << " : " << temp1 << "MyID: " << para->getMyID() << endl;
+            VF_LOG_INFO("size Pressure: {}: {}", i, numberOfPressureValues);
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             //preprocessing
             real* QQ = para->getParH(i)->pressureBC.q27[0];
             unsigned int sizeQ = para->getParH(i)->pressureBC.numberOfBCnodes;
-            QforBoundaryConditions Q;
+            QforBoundaryConditions &Q = para->getParH(i)->pressureBC;
             getPointersToBoundaryConditions(Q, QQ, sizeQ);
 
             builder->getPressureQs(Q.q27, i);
 
 
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-            // advection - diffusion stuff
-            //cout << "vor advec diff" << endl;
-            if (para->getDiffOn() == true) {
+            // advection - diffusion
+            if (para->getDiffOn()) {
                 //////////////////////////////////////////////////////////////////////////
                 //cout << "vor setzen von kTemp" << endl;
                 para->getParH(i)->TempPress.kTemp = numberOfPressureValues;
                 para->getParD(i)->TempPress.kTemp = numberOfPressureValues;
-                std::cout << "Groesse TempPress.kTemp = " << para->getParH(i)->TempPress.kTemp << std::endl;
+                VF_LOG_INFO("size TempPress.kTemp: {}: {}", i, para->getParH(i)->TempPress.kTemp);
                 //////////////////////////////////////////////////////////////////////////
                 cudaMemoryManager->cudaAllocTempPressBC(i);
-                //cout << "nach alloc" << endl;
                 //////////////////////////////////////////////////////////////////////////
                 for (int m = 0; m < numberOfPressureValues; m++)
                 {
@@ -959,9 +937,7 @@ void GridGenerator::allocArrays_BoundaryQs()
                     para->getParH(i)->TempPress.k[m] = para->getParH(i)->pressureBC.k[m];
                 }
                 //////////////////////////////////////////////////////////////////////////
-                //cout << "vor copy" << endl;
                 cudaMemoryManager->cudaCopyTempPressBCHD(i);
-                //cout << "nach copy" << endl;
                 //////////////////////////////////////////////////////////////////////////
             }
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -974,13 +950,12 @@ void GridGenerator::allocArrays_BoundaryQs()
         int numberOfSlipValues = (int)builder->getSlipSize(i);
         if (numberOfSlipValues > 0)
         {
-            *logging::out << logging::Logger::INFO_INTERMEDIATE  << "size Slip:  " << i << " : " << numberOfSlipValues << "\n";
-            //cout << "Groesse Pressure:  " << i << " : " << temp1 << "MyID: " << para->getMyID() << endl;
+            VF_LOG_INFO("size Slip:  {}: {}", i, numberOfSlipValues);
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             //preprocessing
             real* QQ = para->getParH(i)->slipBC.q27[0];
             unsigned int sizeQ = para->getParH(i)->slipBC.numberOfBCnodes;
-            QforBoundaryConditions Q;
+            QforBoundaryConditions &Q = para->getParH(i)->slipBC;
             getPointersToBoundaryConditions(Q, QQ, sizeQ);
 
             builder->getSlipQs(Q.q27, i);
@@ -994,15 +969,14 @@ void GridGenerator::allocArrays_BoundaryQs()
         int numberOfStressValues = (int)builder->getStressSize(i);
         if (numberOfStressValues > 0)
         {
-            *logging::out << logging::Logger::INFO_INTERMEDIATE  << "size Stress:  " << i << " : " << numberOfStressValues << "\n";
-            //cout << "Groesse Pressure:  " << i << " : " << temp1 << "MyID: " << para->getMyID() << endl;
+            VF_LOG_INFO("size Stress:  {}: {}", i, numberOfStressValues);
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             //preprocessing
             real* QQ = para->getParH(i)->stressBC.q27[0];
             unsigned int sizeQ = para->getParH(i)->stressBC.numberOfBCnodes;
-            QforBoundaryConditions Q;
+            QforBoundaryConditions &Q = para->getParH(i)->stressBC;
             getPointersToBoundaryConditions(Q, QQ, sizeQ);
-
+            
             builder->getStressQs(Q.q27, i);
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             cudaMemoryManager->cudaCopyStressBC(i);
@@ -1014,13 +988,12 @@ void GridGenerator::allocArrays_BoundaryQs()
         const auto numberOfVelocityNodes = int(builder->getVelocitySize(i));
         if (numberOfVelocityNodes > 0)
         {
-            *logging::out << logging::Logger::INFO_INTERMEDIATE  << "size velocity level " << i << " : " << numberOfVelocityNodes << "\n";
-            //cout << "Groesse velocity level:  " << i << " : " << temp3 << "MyID: " << para->getMyID() << "\n";
+            VF_LOG_INFO("size velocity level {}: {}", i, numberOfVelocityNodes);
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             //preprocessing
             real* QQ = para->getParH(i)->velocityBC.q27[0];
             unsigned int sizeQ = para->getParH(i)->velocityBC.numberOfBCnodes;
-            QforBoundaryConditions Q;
+            QforBoundaryConditions &Q = para->getParH(i)->velocityBC;
             getPointersToBoundaryConditions(Q, QQ, sizeQ);
             builder->getVelocityQs(Q.q27, i);
 
@@ -1028,12 +1001,11 @@ void GridGenerator::allocArrays_BoundaryQs()
                 //////////////////////////////////////////////////////////////////////////
                 para->getParH(i)->TempVel.kTemp = numberOfVelocityNodes;
                 para->getParD(i)->TempVel.kTemp = numberOfVelocityNodes;
-                std::cout << "Groesse TempVel.kTemp = " << para->getParH(i)->TempPress.kTemp << std::endl;
-                std::cout << "getTemperatureInit = " << para->getTemperatureInit() << std::endl;
-                std::cout << "getTemperatureBC = " << para->getTemperatureBC() << std::endl;
+                VF_LOG_INFO("size TempVel.kTemp: {}",  para->getParH(i)->TempVel.kTemp);
+                VF_LOG_INFO("getTemperatureInit: {}",  para->getTemperatureInit());
+                VF_LOG_INFO("getTemperatureBC: {}",  para->getTemperatureBC());
                 //////////////////////////////////////////////////////////////////////////
                 cudaMemoryManager->cudaAllocTempVeloBC(i);
-                //cout << "nach alloc " << "\n";
                 //////////////////////////////////////////////////////////////////////////
                 for (int m = 0; m < numberOfVelocityNodes; m++)
                 {
@@ -1042,11 +1014,7 @@ void GridGenerator::allocArrays_BoundaryQs()
                     para->getParH(i)->TempVel.velo[m] = para->getVelocity();
                     para->getParH(i)->TempVel.k[m] = para->getParH(i)->velocityBC.k[m];
                 }
-                //////////////////////////////////////////////////////////////////////////
-                //cout << "vor copy " << "\n";
                 cudaMemoryManager->cudaCopyTempVeloBCHD(i);
-                //cout << "nach copy " << "\n";
-                //////////////////////////////////////////////////////////////////////////
             }
             cudaMemoryManager->cudaCopyVeloBC(i);
         }
@@ -1056,7 +1024,7 @@ void GridGenerator::allocArrays_BoundaryQs()
         const auto numberOfPrecursorNodes = int(builder->getPrecursorSize(i));
         if (numberOfPrecursorNodes > 0)
         {
-            std::cout << "size velocity level " << i << " : " << numberOfPrecursorNodes << std::endl;
+            VF_LOG_INFO("size velocity level {}: {}", i, numberOfPrecursorNodes);
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             //preprocessing
             real* QQ = para->getParH(i)->precursorBC.q27[0];
@@ -1099,7 +1067,7 @@ void GridGenerator::allocArrays_BoundaryQs()
 
     for (uint i = 0; i < builder->getNumberOfGridLevels(); i++) {
         const int numberOfGeometryNodes = builder->getGeometrySize(i);
-        *logging::out << logging::Logger::INFO_INTERMEDIATE  << "size of GeomBoundaryQs, Level " << i << " : " << numberOfGeometryNodes << "\n";
+        VF_LOG_INFO("size of GeomBoundaryQs, Level {}: {}", i, numberOfGeometryNodes);
 
         para->getParH(i)->geometryBC.numberOfBCnodes = numberOfGeometryNodes;
         para->getParD(i)->geometryBC.numberOfBCnodes = para->getParH(i)->geometryBC.numberOfBCnodes;
@@ -1120,7 +1088,7 @@ void GridGenerator::allocArrays_BoundaryQs()
             //preprocessing
             real* QQ = para->getParH(i)->geometryBC.q27[0];
             unsigned int sizeQ = para->getParH(i)->geometryBC.numberOfBCnodes;
-            QforBoundaryConditions Q;
+            QforBoundaryConditions &Q = para->getParH(i)->geometryBC;
             getPointersToBoundaryConditions(Q, QQ, sizeQ);
             //////////////////////////////////////////////////////////////////
 
@@ -1164,8 +1132,7 @@ void GridGenerator::allocArrays_BoundaryQs()
         }
     }
 
-
-    std::cout << "-----finish BoundaryQs------" << std::endl;
+    VF_LOG_TRACE("-----finish BoundaryQs------");
 }
 
 void GridGenerator::allocArrays_OffsetScale()
@@ -1175,29 +1142,17 @@ void GridGenerator::allocArrays_OffsetScale()
         const uint numberOfNodesPerLevelCF = builder->getNumberOfNodesCF(level);
         const uint numberOfNodesPerLevelFC = builder->getNumberOfNodesFC(level);
 
-        std::cout << "number of nodes CF Level " << level << " : " << numberOfNodesPerLevelCF << std::endl;
-        std::cout << "number of nodes FC level " << level << " : " << numberOfNodesPerLevelFC << std::endl;
+        VF_LOG_INFO("number of nodes CF Level {}: {}", level, numberOfNodesPerLevelCF);
+        VF_LOG_INFO("number of nodes FC Level {}: {}", level, numberOfNodesPerLevelFC);
 
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        //size + memsize CF
-        para->getParH(level)->K_CF = numberOfNodesPerLevelCF;
-        para->getParD(level)->K_CF = para->getParH(level)->K_CF;
-        para->getParH(level)->intCF.kCF = para->getParH(level)->K_CF;
-        para->getParD(level)->intCF.kCF = para->getParH(level)->K_CF;
-        para->getParH(level)->mem_size_kCF = sizeof(uint)* para->getParH(level)->K_CF;
-        para->getParD(level)->mem_size_kCF = sizeof(uint)* para->getParD(level)->K_CF;
-        para->getParH(level)->mem_size_kCF_off = sizeof(real)* para->getParH(level)->K_CF;
-        para->getParD(level)->mem_size_kCF_off = sizeof(real)* para->getParD(level)->K_CF;
+        //size CF
+        para->getParH(level)->coarseToFine.numberOfCells = numberOfNodesPerLevelCF;
+        para->getParD(level)->coarseToFine.numberOfCells = para->getParH(level)->coarseToFine.numberOfCells;
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        //size + memsize FC
-        para->getParH(level)->K_FC = numberOfNodesPerLevelFC;
-        para->getParD(level)->K_FC = para->getParH(level)->K_FC;
-        para->getParH(level)->intFC.kFC = para->getParH(level)->K_FC;
-        para->getParD(level)->intFC.kFC = para->getParH(level)->K_FC;
-        para->getParH(level)->mem_size_kFC = sizeof(uint)* para->getParH(level)->K_FC;
-        para->getParD(level)->mem_size_kFC = sizeof(uint)* para->getParD(level)->K_FC;
-        para->getParH(level)->mem_size_kFC_off = sizeof(real)* para->getParH(level)->K_FC;
-        para->getParD(level)->mem_size_kFC_off = sizeof(real)* para->getParD(level)->K_FC;
+        //size FC
+        para->getParH(level)->fineToCoarse.numberOfCells = numberOfNodesPerLevelFC;
+        para->getParD(level)->fineToCoarse.numberOfCells = para->getParH(level)->fineToCoarse.numberOfCells;
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         //alloc
         cudaMemoryManager->cudaAllocInterfaceCF(level);
@@ -1206,9 +1161,9 @@ void GridGenerator::allocArrays_OffsetScale()
         cudaMemoryManager->cudaAllocInterfaceOffFC(level);
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         //init
-        builder->getOffsetCF(para->getParH(level)->offCF.xOffCF, para->getParH(level)->offCF.yOffCF, para->getParH(level)->offCF.zOffCF, level);
-        builder->getOffsetFC(para->getParH(level)->offFC.xOffFC, para->getParH(level)->offFC.yOffFC, para->getParH(level)->offFC.zOffFC, level);
-        builder->getGridInterfaceIndices(para->getParH(level)->intCF.ICellCFC, para->getParH(level)->intCF.ICellCFF, para->getParH(level)->intFC.ICellFCC, para->getParH(level)->intFC.ICellFCF, level);
+        builder->getOffsetCF(para->getParH(level)->neighborCoarseToFine.x, para->getParH(level)->neighborCoarseToFine.y, para->getParH(level)->neighborCoarseToFine.z, level);
+        builder->getOffsetFC(para->getParH(level)->neighborFineToCoarse.x, para->getParH(level)->neighborFineToCoarse.y, para->getParH(level)->neighborFineToCoarse.z, level);
+        builder->getGridInterfaceIndices(para->getParH(level)->coarseToFine.coarseCellIndices, para->getParH(level)->coarseToFine.fineCellIndices, para->getParH(level)->fineToCoarse.coarseCellIndices, para->getParH(level)->fineToCoarse.fineCellIndices, level);
 
         if (para->getUseStreams() || para->getNumprocs() > 1) {
             // split fine-to-coarse indices into border and bulk
@@ -1282,8 +1237,8 @@ std::string GridGenerator::verifyNeighborIndices(int level) const
     int wrongNeighbors = 0;
     int stopperNodes = 0;
 
-    for (uint index = 0; index < para->getParH(level)->numberOfNodes; index++)
-        oss << verifyNeighborIndex(level, index, invalidNodes, stopperNodes, wrongNeighbors);
+    for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++)
+        oss << verifyNeighborIndex(level, (int)index, invalidNodes, stopperNodes, wrongNeighbors);
 
 
     oss << "invalid nodes found: " << invalidNodes << "\n";
@@ -1362,31 +1317,31 @@ std::string GridGenerator::checkNeighbor(int level, real x, real y, real z, int
 }
 
 void GridGenerator::getPointersToBoundaryConditions(QforBoundaryConditions& boundaryConditionStruct, real* subgridDistances, const unsigned int numberOfBCnodes){
-    boundaryConditionStruct.q27[DIR_P00] =    &subgridDistances[DIR_P00   * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_M00] =    &subgridDistances[DIR_M00   * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_0P0] =    &subgridDistances[DIR_0P0   * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_0M0] =    &subgridDistances[DIR_0M0   * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_00P] =    &subgridDistances[DIR_00P   * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_00M] =    &subgridDistances[DIR_00M   * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_PP0] =   &subgridDistances[DIR_PP0  * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_MM0] =   &subgridDistances[DIR_MM0  * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_PM0] =   &subgridDistances[DIR_PM0  * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_MP0] =   &subgridDistances[DIR_MP0  * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_P0P] =   &subgridDistances[DIR_P0P  * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_M0M] =   &subgridDistances[DIR_M0M  * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_P0M] =   &subgridDistances[DIR_P0M  * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_M0P] =   &subgridDistances[DIR_M0P  * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_0PP] =   &subgridDistances[DIR_0PP  * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_0MM] =   &subgridDistances[DIR_0MM  * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_0PM] =   &subgridDistances[DIR_0PM  * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_0MP] =   &subgridDistances[DIR_0MP  * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_000] = &subgridDistances[DIR_000* numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_PPP] =  &subgridDistances[DIR_PPP * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_MMP] =  &subgridDistances[DIR_MMP * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_PMP] =  &subgridDistances[DIR_PMP * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_MPP] =  &subgridDistances[DIR_MPP * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_PPM] =  &subgridDistances[DIR_PPM * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_MMM] =  &subgridDistances[DIR_MMM * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_PMM] =  &subgridDistances[DIR_PMM * numberOfBCnodes];
-    boundaryConditionStruct.q27[DIR_MPM] =  &subgridDistances[DIR_MPM * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_P00] = &subgridDistances[DIR_P00 * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_M00] = &subgridDistances[DIR_M00 * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_0P0] = &subgridDistances[DIR_0P0 * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_0M0] = &subgridDistances[DIR_0M0 * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_00P] = &subgridDistances[DIR_00P * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_00M] = &subgridDistances[DIR_00M * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_PP0] = &subgridDistances[DIR_PP0 * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_MM0] = &subgridDistances[DIR_MM0 * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_PM0] = &subgridDistances[DIR_PM0 * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_MP0] = &subgridDistances[DIR_MP0 * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_P0P] = &subgridDistances[DIR_P0P * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_M0M] = &subgridDistances[DIR_M0M * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_P0M] = &subgridDistances[DIR_P0M * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_M0P] = &subgridDistances[DIR_M0P * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_0PP] = &subgridDistances[DIR_0PP * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_0MM] = &subgridDistances[DIR_0MM * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_0PM] = &subgridDistances[DIR_0PM * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_0MP] = &subgridDistances[DIR_0MP * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_000] = &subgridDistances[DIR_000 * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_PPP] = &subgridDistances[DIR_PPP * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_MMP] = &subgridDistances[DIR_MMP * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_PMP] = &subgridDistances[DIR_PMP * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_MPP] = &subgridDistances[DIR_MPP * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_PPM] = &subgridDistances[DIR_PPM * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_MMM] = &subgridDistances[DIR_MMM * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_PMM] = &subgridDistances[DIR_PMM * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_MPM] = &subgridDistances[DIR_MPM * numberOfBCnodes];
 }
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h
index c97ed02a64da1d5fafa18150c75d149f96484d44..157eb5c37660f4de5f5d547b7bac2bbc5c749fc8 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h
@@ -64,8 +64,7 @@ private:
 
 public:
     VIRTUALFLUIDS_GPU_EXPORT GridGenerator(std::shared_ptr<GridBuilder> builder, std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager, vf::gpu::Communicator& communicator);
-    VIRTUALFLUIDS_GPU_EXPORT ~GridGenerator() override;
-
+    ~GridGenerator() override;
     //! \brief overwrites the default IndexRearrangementForStreams
     void setIndexRearrangementForStreams(std::unique_ptr<IndexRearrangementForStreams>&& indexRearrangement);
 
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGeneratorTest.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGeneratorTest.cpp
index 3009bc4ae449b917f494cdf39145a2e94df2ddb8..23d858f5bb5d8abcfda34a9ccfb5b3ff91ff313c 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGeneratorTest.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGeneratorTest.cpp
@@ -104,9 +104,6 @@ protected:
 private:
     void SetUp() override
     {
-        logging::Logger::addStream(&std::cout);
-        logging::Logger::setDebugLevel(logging::Logger::WARNING);
-
         para = std::make_shared<Parameter>();
         para->setMaxLevel(level + 1); // setMaxLevel resizes parH and parD
         for (uint i = 0; i <= level; i++) {
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.cpp
index 3dc3893f5df441b143aa86d19f3446e7e4adca43..d59fa8d017069b665bd6b91f6cf1e685782fab24 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.cpp
@@ -333,7 +333,7 @@ void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoC(
 {
     VF_LOG_INFO("Reorder send indices for communication after fine to coarse: level: {} direction: {}", level,
                 direction);
-    if (para->getParH(level)->intCF.kCF == 0 || para->getParH(level)->intFC.kFC == 0)
+    if (para->getParH(level)->coarseToFine.numberOfCells == 0 || para->getParH(level)->fineToCoarse.numberOfCells == 0)
         VF_LOG_CRITICAL("reorderSendIndicesForCommAfterFtoC(): para->getParH(level)->intCF needs to be initialized "
                         "before calling this function");
 
@@ -342,19 +342,19 @@ void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoC(
     std::vector<int> sendIndicesOther;
     uint numberOfSendIndices = builder->getNumberOfSendIndices(direction, level);
 
-    // iCellFCC
+    // coarse cells of interpolation fine to coarse (iCellFCC)
     for (uint posInSendIndices = 0; posInSendIndices < numberOfSendIndices; posInSendIndices++) {
         sparseIndexSend = sendIndices[posInSendIndices];
-        if (isSparseIndexInICellFCC(para->getParH(level)->intFC.kFC, sparseIndexSend, level)) {
+        if (isSparseIndexInCoarseIndexForFtoC(para->getParH(level)->fineToCoarse.numberOfCells, sparseIndexSend, level)) {
             addUniqueIndexToCommunicationVectors(sendIndicesAfterFtoC, sparseIndexSend,
                                                  sendIndicesForCommAfterFtoCPositions, posInSendIndices);
         }
     }
 
-    // iCellCFC
-    std::vector<uint> nodesCFC;
-    aggregateNodesInICellCFC(level, nodesCFC);
-    for (auto sparseIndex : nodesCFC)
+    // coarse cells of interpolation coarse to fine (iCellCFC)
+    std::vector<uint> coarseCellsForCtoF;
+    aggregateCoarseNodesForCtoF(level, coarseCellsForCtoF);
+    for (auto sparseIndex : coarseCellsForCtoF)
         findIfSparseIndexIsInSendIndicesAndAddToCommVectors(sparseIndex, sendIndices, numberOfSendIndices,
                                                             sendIndicesAfterFtoC, sendIndicesForCommAfterFtoCPositions);
 
@@ -378,27 +378,27 @@ void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoC(
     }
 }
 
-bool IndexRearrangementForStreams::isSparseIndexInICellFCC(uint sizeOfICellFCC, int sparseIndex, int level) const
+bool IndexRearrangementForStreams::isSparseIndexInCoarseIndexForFtoC(uint numberOfCoarseNodesForFtoC, int sparseIndex, int level) const
 {
-    for (uint j = 0; j < sizeOfICellFCC; j++) {
+    for (uint j = 0; j < numberOfCoarseNodesForFtoC; j++) {
         if (sparseIndex < 0)
             return false;
-        if (para->getParH(level)->intFC.ICellFCC[j] == (uint)sparseIndex) {
+        if (para->getParH(level)->fineToCoarse.coarseCellIndices[j] == (uint)sparseIndex) {
             return true;
         }
     }
     return false;
 }
 
-void IndexRearrangementForStreams::aggregateNodesInICellCFC(int level, std::vector<uint> &nodesCFC) const
+void IndexRearrangementForStreams::aggregateCoarseNodesForCtoF(int level, std::vector<uint> &nodesCFC) const
 {
     uint sparseIndex;
     uint *neighborX = para->getParH(level)->neighborX;
     uint *neighborY = para->getParH(level)->neighborY;
     uint *neighborZ = para->getParH(level)->neighborZ;
 
-    for (uint x = 0; x < para->getParH(level)->intCF.kCF; x++) {
-        sparseIndex = para->getParH(level)->intCF.ICellCFC[x];
+    for (uint x = 0; x < para->getParH(level)->coarseToFine.numberOfCells; x++) {
+        sparseIndex = para->getParH(level)->coarseToFine.coarseCellIndices[x];
         nodesCFC.push_back(sparseIndex);
         nodesCFC.push_back(neighborX[sparseIndex]);
         nodesCFC.push_back(neighborY[sparseIndex]);
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h
index ded8a287717ded03929bed2422b8f1a30f851270..0b0401d3424f7a953bf2fa92d0382fd9256a635a 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h
@@ -9,7 +9,7 @@
 #include <memory>
 #include <vector>
 
-#include <basics/Core/DataTypes.h>
+#include <basics/DataTypes.h>
 
 class Parameter;
 class GridBuilder;
@@ -90,16 +90,15 @@ protected:
     //! \param sendIndices is the pointer to the vector with the send indices, which will be reordered in this function
     //! \param numberOfSendNodesAfterFtoC will be set in this method
     //! \param sendIndicesForCommAfterFtoCPositions stores each sendIndex's positions before reordering
-    void reorderSendIndicesForCommAfterFtoC(int *sendIndices, int &numberOfSendNodesAfterFtoC, int direction, int level,
-                                            std::vector<uint> &sendIndicesForCommAfterFtoCPositions) const;
-    //! \brief Check if a sparse index occurs in the ICellFCC
-    bool isSparseIndexInICellFCC(uint sizeOfICellFCC, int sparseIndexSend, int level) const;
+    void reorderSendIndicesForCommAfterFtoC(int *sendIndices, int &numberOfSendNodesAfterFtoC, int direction,
+                                            int level, std::vector<uint> &sendIndicesForCommAfterFtoCPositions) const;
+    //! \brief Check if a sparse index occurs in the coarse nodes for the interpolation from fine to coarse
+    bool isSparseIndexInCoarseIndexForFtoC(uint numberOfCoarseNodesForFtoC, int sparseIndexSend, int level) const;
     //! \brief Aggregate all nodes in the coarse cells for the interpolation in coarse to fine
     //! \details For the coarse cells in the interpolation from coarse to fine only one node is stored. This methods
     //! looks for the other nodes of each cell and puts them into vector. Duplicate nodes are only stored once.
-    void aggregateNodesInICellCFC(int level, std::vector<uint> &nodesCFC) const;
-    //! \brief Add index to sendIndicesAfterFtoC and sendIndicesForCommAfterFtoCPositions, but omit indices which are
-    //! already in sendIndicesAfterFtoC
+    void aggregateCoarseNodesForCtoF(int level, std::vector<uint> &nodesCFC) const;
+    //! \brief Add index to sendIndicesAfterFtoC and sendIndicesForCommAfterFtoCPositions, but omit indices which are already in sendIndicesAfterFtoC
     void addUniqueIndexToCommunicationVectors(std::vector<int> &sendIndicesAfterFtoC, int &sparseIndexSend,
                                               std::vector<unsigned int> &sendIndicesForCommAfterFtoCPositions,
                                               uint &posInSendIndices) const;
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp
index 04221280d8a9f723db241ba2686cc88711ca9a16..216031ac558f17491b1f2dc52e1cb2a2d7624e62 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp
@@ -68,16 +68,16 @@ private:
     std::vector<uint> fluidNodeIndicesBorder;
 
 public:
-    GridImpDouble(Object *object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta,
+    GridImpDouble(SPtr<Object> object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta,
                   Distribution d, uint level)
         : GridImp(object, startX, startY, startZ, endX, endY, endZ, delta, d, level)
     {
     }
 
-    static SPtr<GridImpDouble> makeShared(Object *object, real startX, real startY, real startZ, real endX, real endY,
+    static SPtr<GridImpDouble> makeShared(SPtr<Object> object, real startX, real startY, real startZ, real endX, real endY,
                                           real endZ, real delta, Distribution d, uint level)
     {
-        SPtr<GridImpDouble> grid(new GridImpDouble(object, startX, startY, startZ, endX, endY, endZ, delta, d, level));
+        SPtr<GridImpDouble> grid(std::make_shared<GridImpDouble>(object, startX, startY, startZ, endX, endY, endZ, delta, d, level));
         return grid;
     }
 };
@@ -97,10 +97,10 @@ struct SendIndicesForCommAfterFtoCX {
     const int numberOfProcessNeighbors = 1;
     const int indexOfProcessNeighbor = 0;
 
-    std::vector<uint> iCellCFC = { 8, 10, 12 };
-    std::vector<uint> iCellFCC = { 14, 16, 18 };
-    const uint kCF = (uint)iCellCFC.size();
-    const uint kFC = (uint)iCellFCC.size();
+    std::vector<uint> interpolationCellCoarseToFineCoarse = { 8, 10, 12 };
+    std::vector<uint> interpolationCellFineToCoarseCoarse = { 14, 16, 18 };
+    const uint numNodesCtoF = (uint)interpolationCellCoarseToFineCoarse.size();
+    const uint numNodesFtoC = (uint)interpolationCellFineToCoarseCoarse.size();
     uint neighborX[18] = { 0u };
     uint neighborY[18] = { 0u };
     uint neighborZ[18] = { 0u };
@@ -117,39 +117,37 @@ struct SendIndicesForCommAfterFtoCX {
 class IndexRearrangementForStreamsTest_reorderSendIndices : public testing::Test
 {
 protected:
-    SendIndicesForCommAfterFtoCX si;
+    SendIndicesForCommAfterFtoCX sendIndices;
     SPtr<Parameter> para;
     std::unique_ptr<IndexRearrangementForStreams> testSubject;
 
     void act()
     {
-        testSubject->reorderSendIndicesForCommAfterFtoCX(si.direction, si.level, si.indexOfProcessNeighbor,
-                                                         si.sendIndicesForCommAfterFtoCPositions);
+        testSubject->reorderSendIndicesForCommAfterFtoCX(sendIndices.direction, sendIndices.level, sendIndices.indexOfProcessNeighbor,
+                                                         sendIndices.sendIndicesForCommAfterFtoCPositions);
     };
 
 private:
     void SetUp() override
     {
-        logging::Logger::addStream(&std::cout);
-
         SPtr<GridImpDouble> grid =
             GridImpDouble::makeShared(nullptr, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, Distribution(), 1);
         std::shared_ptr<LevelGridBuilderDouble> builder = std::make_shared<LevelGridBuilderDouble>(grid);
-        builder->setNumberOfSendIndices((uint)si.sendIndices.size());
+        builder->setNumberOfSendIndices((uint)sendIndices.sendIndices.size());
 
-        para = testingVF::createParameterForLevel(si.level);
+        para = testingVF::createParameterForLevel(sendIndices.level);
 
-        para->getParH(si.level)->intFC.kFC = si.kFC;
-        para->getParH(si.level)->intFC.ICellFCC = &(si.iCellFCC.front());
-        para->getParH(si.level)->intCF.ICellCFC = &(si.iCellCFC.front());
-        para->getParH(si.level)->intCF.kCF = si.kCF;
-        para->getParH(si.level)->neighborX = si.neighborX;
-        para->getParH(si.level)->neighborY = si.neighborY;
-        para->getParH(si.level)->neighborZ = si.neighborZ;
+        para->getParH(sendIndices.level)->fineToCoarse.numberOfCells = sendIndices.numNodesFtoC;
+        para->getParH(sendIndices.level)->fineToCoarse.coarseCellIndices = &(sendIndices.interpolationCellFineToCoarseCoarse.front());
+        para->getParH(sendIndices.level)->coarseToFine.coarseCellIndices = &(sendIndices.interpolationCellCoarseToFineCoarse.front());
+        para->getParH(sendIndices.level)->coarseToFine.numberOfCells = sendIndices.numNodesCtoF;
+        para->getParH(sendIndices.level)->neighborX = sendIndices.neighborX;
+        para->getParH(sendIndices.level)->neighborY = sendIndices.neighborY;
+        para->getParH(sendIndices.level)->neighborZ = sendIndices.neighborZ;
 
-        para->setNumberOfProcessNeighborsX(si.numberOfProcessNeighbors, si.level, "send");
-        para->getParH(si.level)->sendProcessNeighborX[si.indexOfProcessNeighbor].index = si.sendIndices.data();
-        para->initProcessNeighborsAfterFtoCX(si.level);
+        para->setNumberOfProcessNeighborsX(sendIndices.numberOfProcessNeighbors, sendIndices.level, "send");
+        para->getParH(sendIndices.level)->sendProcessNeighborX[sendIndices.indexOfProcessNeighbor].index = sendIndices.sendIndices.data();
+        para->initProcessNeighborsAfterFtoCX(sendIndices.level);
 
         testSubject = std::make_unique<IndexRearrangementForStreams>(
             IndexRearrangementForStreams(para, builder, vf::gpu::Communicator::getInstance()));
@@ -160,14 +158,14 @@ TEST_F(IndexRearrangementForStreamsTest_reorderSendIndices, reorderSendIndicesFo
 {
     act();
 
-    EXPECT_THAT(si.sendIndicesForCommAfterFtoCPositions.size(),
-                testing::Eq(si.sendIndicesForCommAfterFtoCPositions_expected.size()));
-    EXPECT_THAT(si.sendIndicesForCommAfterFtoCPositions, testing::Eq(si.sendIndicesForCommAfterFtoCPositions_expected));
+    EXPECT_THAT(sendIndices.sendIndicesForCommAfterFtoCPositions.size(),
+                testing::Eq(sendIndices.sendIndicesForCommAfterFtoCPositions_expected.size()));
+    EXPECT_THAT(sendIndices.sendIndicesForCommAfterFtoCPositions, testing::Eq(sendIndices.sendIndicesForCommAfterFtoCPositions_expected));
 
-    EXPECT_THAT(para->getParH(si.level)->sendProcessNeighborsAfterFtoCX[si.indexOfProcessNeighbor].numberOfNodes,
-                testing::Eq(si.numberOfSendNodesAfterFtoC_expected));
-    EXPECT_TRUE(vectorsAreEqual(para->getParH(si.level)->sendProcessNeighborX[si.indexOfProcessNeighbor].index,
-                                si.sendProcessNeighborX_expected))
+    EXPECT_THAT(para->getParH(sendIndices.level)->sendProcessNeighborsAfterFtoCX[sendIndices.indexOfProcessNeighbor].numberOfNodes,
+                testing::Eq(sendIndices.numberOfSendNodesAfterFtoC_expected));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(sendIndices.level)->sendProcessNeighborX[sendIndices.indexOfProcessNeighbor].index,
+                                sendIndices.sendProcessNeighborX_expected))
         << "sendProcessNeighborX[].index does not match the expected vector";
 }
 
@@ -227,8 +225,6 @@ protected:
 private:
     void SetUp() override
     {
-        logging::Logger::addStream(&std::cout);
-
         para = testingVF::createParameterForLevel(level);
 
         para->setNumberOfProcessNeighborsX(numberOfProcessNeighbors, level, "send");
@@ -348,8 +344,6 @@ protected:
 private:
     void SetUp() override
     {
-        logging::Logger::addStream(&std::cout);
-
         para = testingVF::createParameterForLevel(level);
 
         para->setNumberOfProcessNeighborsY(numberOfProcessNeighbors, level, "send");
@@ -469,8 +463,6 @@ protected:
 private:
     void SetUp() override
     {
-        logging::Logger::addStream(&std::cout);
-
         para = testingVF::createParameterForLevel(level);
 
         para->setNumberOfProcessNeighborsZ(numberOfProcessNeighbors, level, "send");
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/InterpolationCellGrouper.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/InterpolationCellGrouper.cpp
index 347ab362d1f6b28a6c2b46f2e885085f955fb34e..f3717b58fca0e81e23be100eb8d15371703f8010 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/InterpolationCellGrouper.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/InterpolationCellGrouper.cpp
@@ -14,77 +14,77 @@ void InterpolationCellGrouper::splitFineToCoarseIntoBorderAndBulk(uint level) co
 {
     this->reorderFineToCoarseIntoBorderAndBulk(level);
 
-    parDs[level]->intFCBorder.kFC = parHs[level]->intFCBorder.kFC;
-    parDs[level]->intFCBulk.kFC = parHs[level]->intFCBulk.kFC;
-    parDs[level]->intFCBorder.ICellFCC = parDs[level]->intFC.ICellFCC;
-    parDs[level]->intFCBulk.ICellFCC = parDs[level]->intFCBorder.ICellFCC + parDs[level]->intFCBorder.kFC;
-    parDs[level]->intFCBorder.ICellFCF = parDs[level]->intFC.ICellFCF;
-    parDs[level]->intFCBulk.ICellFCF = parDs[level]->intFCBorder.ICellFCF + parDs[level]->intFCBorder.kFC;
-    parDs[level]->offFCBulk.xOffFC = parDs[level]->offFC.xOffFC + parDs[level]->intFCBorder.kFC;
-    parDs[level]->offFCBulk.yOffFC = parDs[level]->offFC.yOffFC + parDs[level]->intFCBorder.kFC;
-    parDs[level]->offFCBulk.zOffFC = parDs[level]->offFC.zOffFC + parDs[level]->intFCBorder.kFC;
+    parDs[level]->fineToCoarseBorder.numberOfCells = parHs[level]->fineToCoarseBorder.numberOfCells;
+    parDs[level]->fineToCoarseBulk.numberOfCells = parHs[level]->fineToCoarseBulk.numberOfCells;
+    parDs[level]->fineToCoarseBorder.coarseCellIndices = parDs[level]->fineToCoarse.coarseCellIndices;
+    parDs[level]->fineToCoarseBulk.coarseCellIndices = parDs[level]->fineToCoarseBorder.coarseCellIndices + parDs[level]->fineToCoarseBorder.numberOfCells;
+    parDs[level]->fineToCoarseBorder.fineCellIndices = parDs[level]->fineToCoarse.fineCellIndices;
+    parDs[level]->fineToCoarseBulk.fineCellIndices = parDs[level]->fineToCoarseBorder.fineCellIndices + parDs[level]->fineToCoarseBorder.numberOfCells;
+    parDs[level]->neighborFineToCoarseBulk.x = parDs[level]->neighborFineToCoarse.x + parDs[level]->fineToCoarseBorder.numberOfCells;
+    parDs[level]->neighborFineToCoarseBulk.y = parDs[level]->neighborFineToCoarse.y + parDs[level]->fineToCoarseBorder.numberOfCells;
+    parDs[level]->neighborFineToCoarseBulk.z = parDs[level]->neighborFineToCoarse.z + parDs[level]->fineToCoarseBorder.numberOfCells;
 }
 
 void InterpolationCellGrouper::reorderFineToCoarseIntoBorderAndBulk(uint level) const
 {
     // create some local variables for better readability
-    uint *iCellFccAll = parHs[level]->intFC.ICellFCC;
-    uint *iCellFcfAll = parHs[level]->intFC.ICellFCF;
+    uint *fineToCoarseCoarseAll = parHs[level]->fineToCoarse.coarseCellIndices;
+    uint *fineToCoarseFineAll = parHs[level]->fineToCoarse.fineCellIndices;
     auto grid = this->builder->getGrid(level);
 
-    std::vector<uint> iCellFccBorderVector;
-    std::vector<uint> iCellFccBulkVector;
-    std::vector<uint> iCellFcfBorderVector;
-    std::vector<uint> iCellFcfBulkVector;
-    std::vector<real> xOffFCBorderVector;
-    std::vector<real> yOffFCBorderVector;
-    std::vector<real> zOffFCBorderVector;
-    std::vector<real> xOffFCBulkVector;
-    std::vector<real> yOffFCBulkVector;
-    std::vector<real> zOffFCBulkVector;
-
-    // fill border and bulk vectors with iCellFCs
-    for (uint i = 0; i < parHs[level]->intFC.kFC; i++)
-        if (grid->isSparseIndexInFluidNodeIndicesBorder(iCellFccAll[i])) {
-            iCellFccBorderVector.push_back(iCellFccAll[i]);
-            iCellFcfBorderVector.push_back(iCellFcfAll[i]);
-            xOffFCBorderVector.push_back(parHs[level]->offFC.xOffFC[i]);
-            yOffFCBorderVector.push_back(parHs[level]->offFC.yOffFC[i]);
-            zOffFCBorderVector.push_back(parHs[level]->offFC.zOffFC[i]);
+    std::vector<uint> fineToCoarseCoarseBorderVector;
+    std::vector<uint> fineToCoarseCoarseBulkVector;
+    std::vector<uint> fineToCoarseFineBorderVector;
+    std::vector<uint> fineToCoarseFineBulkVector;
+    std::vector<real> neighborXBorder;
+    std::vector<real> neighborYBorder;
+    std::vector<real> neighborZBorder;
+    std::vector<real> neighborXBulk;
+    std::vector<real> neighborYBulk;
+    std::vector<real> neighborZBulk;
+
+    // fill border and bulk vectors with interpolation cells fine to coarse
+    for (uint i = 0; i < parHs[level]->fineToCoarse.numberOfCells; i++)
+        if (grid->isSparseIndexInFluidNodeIndicesBorder(fineToCoarseCoarseAll[i])) {
+            fineToCoarseCoarseBorderVector.push_back(fineToCoarseCoarseAll[i]);
+            fineToCoarseFineBorderVector.push_back(fineToCoarseFineAll[i]);
+            neighborXBorder.push_back(parHs[level]->neighborFineToCoarse.x[i]);
+            neighborYBorder.push_back(parHs[level]->neighborFineToCoarse.y[i]);
+            neighborZBorder.push_back(parHs[level]->neighborFineToCoarse.z[i]);
         } else {
-            iCellFccBulkVector.push_back(iCellFccAll[i]);
-            iCellFcfBulkVector.push_back(iCellFcfAll[i]);
-            xOffFCBulkVector.push_back(parHs[level]->offFC.xOffFC[i]);
-            yOffFCBulkVector.push_back(parHs[level]->offFC.yOffFC[i]);
-            zOffFCBulkVector.push_back(parHs[level]->offFC.zOffFC[i]);
+            fineToCoarseCoarseBulkVector.push_back(fineToCoarseCoarseAll[i]);
+            fineToCoarseFineBulkVector.push_back(fineToCoarseFineAll[i]);
+            neighborXBulk.push_back(parHs[level]->neighborFineToCoarse.x[i]);
+            neighborYBulk.push_back(parHs[level]->neighborFineToCoarse.y[i]);
+            neighborZBulk.push_back(parHs[level]->neighborFineToCoarse.z[i]);
         }
 
     // set new sizes and pointers
-    parHs[level]->intFCBorder.ICellFCC = iCellFccAll;
-    parHs[level]->intFCBorder.ICellFCF = iCellFcfAll;
-    parHs[level]->intFCBorder.kFC = (uint)iCellFccBorderVector.size();
-    parHs[level]->intFCBulk.kFC = (uint)iCellFccBulkVector.size();
-    parHs[level]->intFCBulk.ICellFCC = iCellFccAll + parHs[level]->intFCBorder.kFC;
-    parHs[level]->intFCBulk.ICellFCF = iCellFcfAll + parHs[level]->intFCBorder.kFC;
-    parHs[level]->offFCBulk.xOffFC = parHs[level]->offFC.xOffFC + parHs[level]->intFCBorder.kFC;
-    parHs[level]->offFCBulk.yOffFC = parHs[level]->offFC.yOffFC + parHs[level]->intFCBorder.kFC;
-    parHs[level]->offFCBulk.zOffFC = parHs[level]->offFC.zOffFC + parHs[level]->intFCBorder.kFC;
+    parHs[level]->fineToCoarseBorder.coarseCellIndices = fineToCoarseCoarseAll;
+    parHs[level]->fineToCoarseBorder.fineCellIndices = fineToCoarseFineAll;
+    parHs[level]->fineToCoarseBorder.numberOfCells = (uint)fineToCoarseCoarseBorderVector.size();
+    parHs[level]->fineToCoarseBulk.numberOfCells = (uint)fineToCoarseCoarseBulkVector.size();
+    parHs[level]->fineToCoarseBulk.coarseCellIndices = fineToCoarseCoarseAll + parHs[level]->fineToCoarseBorder.numberOfCells;
+    parHs[level]->fineToCoarseBulk.fineCellIndices = fineToCoarseFineAll + parHs[level]->fineToCoarseBorder.numberOfCells;
+    parHs[level]->neighborFineToCoarseBulk.x = parHs[level]->neighborFineToCoarse.x + parHs[level]->fineToCoarseBorder.numberOfCells;
+    parHs[level]->neighborFineToCoarseBulk.y = parHs[level]->neighborFineToCoarse.y + parHs[level]->fineToCoarseBorder.numberOfCells;
+    parHs[level]->neighborFineToCoarseBulk.z = parHs[level]->neighborFineToCoarse.z + parHs[level]->fineToCoarseBorder.numberOfCells;
 
     // copy the created vectors to the memory addresses of the old arrays
     // this is inefficient :(
-    for (uint i = 0; i < (uint)iCellFccBorderVector.size(); i++) {
-        iCellFccAll[i] = iCellFccBorderVector[i];
-        iCellFcfAll[i] = iCellFcfBorderVector[i];
-        parHs[level]->offFC.xOffFC[i] = xOffFCBorderVector[i];
-        parHs[level]->offFC.yOffFC[i] = yOffFCBorderVector[i];
-        parHs[level]->offFC.zOffFC[i] = zOffFCBorderVector[i];
+    for (uint i = 0; i < (uint)fineToCoarseCoarseBorderVector.size(); i++) {
+        fineToCoarseCoarseAll[i] = fineToCoarseCoarseBorderVector[i];
+        fineToCoarseFineAll[i] = fineToCoarseFineBorderVector[i];
+        parHs[level]->neighborFineToCoarse.x[i] = neighborXBorder[i];
+        parHs[level]->neighborFineToCoarse.y[i] = neighborYBorder[i];
+        parHs[level]->neighborFineToCoarse.z[i] = neighborZBorder[i];
     }
-    for (uint i = 0; i < (uint)iCellFccBulkVector.size(); i++) {
-        parHs[level]->intFCBulk.ICellFCC[i] = iCellFccBulkVector[i];
-        parHs[level]->intFCBulk.ICellFCF[i] = iCellFcfBulkVector[i];
-        parHs[level]->offFCBulk.xOffFC[i] = xOffFCBulkVector[i];
-        parHs[level]->offFCBulk.yOffFC[i] = yOffFCBulkVector[i];
-        parHs[level]->offFCBulk.zOffFC[i] = zOffFCBulkVector[i];
+    for (uint i = 0; i < (uint)fineToCoarseCoarseBulkVector.size(); i++) {
+        parHs[level]->fineToCoarseBulk.coarseCellIndices[i] = fineToCoarseCoarseBulkVector[i];
+        parHs[level]->fineToCoarseBulk.fineCellIndices[i] = fineToCoarseFineBulkVector[i];
+        parHs[level]->neighborFineToCoarseBulk.x[i] = neighborXBulk[i];
+        parHs[level]->neighborFineToCoarseBulk.y[i] = neighborYBulk[i];
+        parHs[level]->neighborFineToCoarseBulk.z[i] = neighborZBulk[i];
     }
 }
 
@@ -92,42 +92,42 @@ void InterpolationCellGrouper::splitCoarseToFineIntoBorderAndBulk(uint level) co
 {
     this->reorderCoarseToFineIntoBorderAndBulk(level);
 
-    parDs[level]->intCFBorder.kCF = parHs[level]->intCFBorder.kCF;
-    parDs[level]->intCFBulk.kCF = parHs[level]->intCFBulk.kCF;
-    parDs[level]->intCFBorder.ICellCFC = parDs[level]->intCF.ICellCFC;
-    parDs[level]->intCFBulk.ICellCFC = parDs[level]->intCFBorder.ICellCFC + parDs[level]->intCFBorder.kCF;
-    parDs[level]->intCFBorder.ICellCFF = parDs[level]->intCF.ICellCFF;
-    parDs[level]->intCFBulk.ICellCFF = parDs[level]->intCFBorder.ICellCFF + parDs[level]->intCFBorder.kCF;
-    parDs[level]->offCFBulk.xOffCF = parDs[level]->offCF.xOffCF + parDs[level]->intCFBorder.kCF;
-    parDs[level]->offCFBulk.yOffCF = parDs[level]->offCF.yOffCF + parDs[level]->intCFBorder.kCF;
-    parDs[level]->offCFBulk.zOffCF = parDs[level]->offCF.zOffCF + parDs[level]->intCFBorder.kCF;
+    parDs[level]->coarseToFineBorder.numberOfCells = parHs[level]->coarseToFineBorder.numberOfCells;
+    parDs[level]->coarseToFineBulk.numberOfCells = parHs[level]->coarseToFineBulk.numberOfCells;
+    parDs[level]->coarseToFineBorder.coarseCellIndices = parDs[level]->coarseToFine.coarseCellIndices;
+    parDs[level]->coarseToFineBulk.coarseCellIndices = parDs[level]->coarseToFineBorder.coarseCellIndices + parDs[level]->coarseToFineBorder.numberOfCells;
+    parDs[level]->coarseToFineBorder.fineCellIndices = parDs[level]->coarseToFine.fineCellIndices;
+    parDs[level]->coarseToFineBulk.fineCellIndices = parDs[level]->coarseToFineBorder.fineCellIndices + parDs[level]->coarseToFineBorder.numberOfCells;
+    parDs[level]->neighborCoarseToFineBulk.x = parDs[level]->neighborCoarseToFine.x + parDs[level]->coarseToFineBorder.numberOfCells;
+    parDs[level]->neighborCoarseToFineBulk.y = parDs[level]->neighborCoarseToFine.y + parDs[level]->coarseToFineBorder.numberOfCells;
+    parDs[level]->neighborCoarseToFineBulk.z = parDs[level]->neighborCoarseToFine.z + parDs[level]->coarseToFineBorder.numberOfCells;
 }
 
 void InterpolationCellGrouper::reorderCoarseToFineIntoBorderAndBulk(uint level) const
 {
     // create some local variables for better readability
-    uint *iCellCfcAll = parHs[level]->intCF.ICellCFC;
-    uint *iCellCffAll = parHs[level]->intCF.ICellCFF;
+    uint *coarseToFineCoarseAll = parHs[level]->coarseToFine.coarseCellIndices;
+    uint *coarseToFineFineAll = parHs[level]->coarseToFine.fineCellIndices;
     uint *neighborX = this->parHs[level]->neighborX;
     uint *neighborY = this->parHs[level]->neighborY;
     uint *neighborZ = this->parHs[level]->neighborZ;
     auto grid = this->builder->getGrid(level);
 
-    std::vector<uint> iCellCfcBorderVector;
-    std::vector<uint> iCellCfcBulkVector;
-    std::vector<uint> iCellCffBorderVector;
-    std::vector<uint> iCellCffBulkVector;
-    std::vector<real> xOffCFBorderVector;
-    std::vector<real> yOffCFBorderVector;
-    std::vector<real> zOffCFBorderVector;
-    std::vector<real> xOffCFBulkVector;
-    std::vector<real> yOffCFBulkVector;
-    std::vector<real> zOffCFBulkVector;
+    std::vector<uint> coarseToFineCoarseBorderVector;
+    std::vector<uint> coarseToFineCoarseBulkVector;
+    std::vector<uint> coarseToFineFineBorderVector;
+    std::vector<uint> coarseToFineFineBulkVector;
+    std::vector<real> neighborXBorder;
+    std::vector<real> neighborYBorder;
+    std::vector<real> neighborZBorder;
+    std::vector<real> neighborXBulk;
+    std::vector<real> neighborYBulk;
+    std::vector<real> neighborZBulk;
     uint sparseIndexOfICellBSW;
 
-    // fill border and bulk vectors with iCellCFs
-    for (uint i = 0; i < parHs[level]->intCF.kCF; i++) {
-        sparseIndexOfICellBSW = iCellCfcAll[i];
+    // fill border and bulk vectors with interpolation cells coarse to fine
+    for (uint i = 0; i < parHs[level]->coarseToFine.numberOfCells; i++) {
+        sparseIndexOfICellBSW = coarseToFineCoarseAll[i];
 
         if (grid->isSparseIndexInFluidNodeIndicesBorder(sparseIndexOfICellBSW) ||
             grid->isSparseIndexInFluidNodeIndicesBorder(neighborX[sparseIndexOfICellBSW]) ||
@@ -138,45 +138,45 @@ void InterpolationCellGrouper::reorderCoarseToFineIntoBorderAndBulk(uint level)
             grid->isSparseIndexInFluidNodeIndicesBorder(neighborZ[neighborY[sparseIndexOfICellBSW]]) ||
             grid->isSparseIndexInFluidNodeIndicesBorder(neighborZ[neighborY[neighborX[sparseIndexOfICellBSW]]])) {
 
-            iCellCfcBorderVector.push_back(iCellCfcAll[i]);
-            iCellCffBorderVector.push_back(iCellCffAll[i]);
-            xOffCFBorderVector.push_back(parHs[level]->offCF.xOffCF[i]);
-            yOffCFBorderVector.push_back(parHs[level]->offCF.yOffCF[i]);
-            zOffCFBorderVector.push_back(parHs[level]->offCF.zOffCF[i]);
+            coarseToFineCoarseBorderVector.push_back(coarseToFineCoarseAll[i]);
+            coarseToFineFineBorderVector.push_back(coarseToFineFineAll[i]);
+            neighborXBorder.push_back(parHs[level]->neighborCoarseToFine.x[i]);
+            neighborYBorder.push_back(parHs[level]->neighborCoarseToFine.y[i]);
+            neighborZBorder.push_back(parHs[level]->neighborCoarseToFine.z[i]);
         } else {
-            iCellCfcBulkVector.push_back(iCellCfcAll[i]);
-            iCellCffBulkVector.push_back(iCellCffAll[i]);
-            xOffCFBulkVector.push_back(parHs[level]->offCF.xOffCF[i]);
-            yOffCFBulkVector.push_back(parHs[level]->offCF.yOffCF[i]);
-            zOffCFBulkVector.push_back(parHs[level]->offCF.zOffCF[i]);
+            coarseToFineCoarseBulkVector.push_back(coarseToFineCoarseAll[i]);
+            coarseToFineFineBulkVector.push_back(coarseToFineFineAll[i]);
+            neighborXBulk.push_back(parHs[level]->neighborCoarseToFine.x[i]);
+            neighborYBulk.push_back(parHs[level]->neighborCoarseToFine.y[i]);
+            neighborZBulk.push_back(parHs[level]->neighborCoarseToFine.z[i]);
         }
     }
 
     // set new sizes and pointers
-    parHs[level]->intCFBorder.ICellCFC = parHs[level]->intCF.ICellCFC;
-    parHs[level]->intCFBorder.ICellCFF = parHs[level]->intCF.ICellCFF;
-    parHs[level]->intCFBorder.kCF = (uint)iCellCfcBorderVector.size();
-    parHs[level]->intCFBulk.kCF = (uint)iCellCfcBulkVector.size();
-    parHs[level]->intCFBulk.ICellCFC = parHs[level]->intCF.ICellCFC + parHs[level]->intCFBorder.kCF;
-    parHs[level]->intCFBulk.ICellCFF = parHs[level]->intCF.ICellCFF + parHs[level]->intCFBorder.kCF;
-    parHs[level]->offCFBulk.xOffCF = parHs[level]->offCF.xOffCF + parHs[level]->intCFBorder.kCF;
-    parHs[level]->offCFBulk.yOffCF = parHs[level]->offCF.yOffCF + parHs[level]->intCFBorder.kCF;
-    parHs[level]->offCFBulk.zOffCF = parHs[level]->offCF.zOffCF + parHs[level]->intCFBorder.kCF;
+    parHs[level]->coarseToFineBorder.coarseCellIndices = parHs[level]->coarseToFine.coarseCellIndices;
+    parHs[level]->coarseToFineBorder.fineCellIndices = parHs[level]->coarseToFine.fineCellIndices;
+    parHs[level]->coarseToFineBorder.numberOfCells = (uint)coarseToFineCoarseBorderVector.size();
+    parHs[level]->coarseToFineBulk.numberOfCells = (uint)coarseToFineCoarseBulkVector.size();
+    parHs[level]->coarseToFineBulk.coarseCellIndices = parHs[level]->coarseToFine.coarseCellIndices + parHs[level]->coarseToFineBorder.numberOfCells;
+    parHs[level]->coarseToFineBulk.fineCellIndices = parHs[level]->coarseToFine.fineCellIndices + parHs[level]->coarseToFineBorder.numberOfCells;
+    parHs[level]->neighborCoarseToFineBulk.x = parHs[level]->neighborCoarseToFine.x + parHs[level]->coarseToFineBorder.numberOfCells;
+    parHs[level]->neighborCoarseToFineBulk.y = parHs[level]->neighborCoarseToFine.y + parHs[level]->coarseToFineBorder.numberOfCells;
+    parHs[level]->neighborCoarseToFineBulk.z = parHs[level]->neighborCoarseToFine.z + parHs[level]->coarseToFineBorder.numberOfCells;
 
     // copy the created vectors to the memory addresses of the old arrays
     // this is inefficient :(
-    for (uint i = 0; i < (uint)iCellCfcBorderVector.size(); i++) {
-        parHs[level]->intCFBorder.ICellCFC[i] = iCellCfcBorderVector[i];
-        parHs[level]->intCFBorder.ICellCFF[i] = iCellCffBorderVector[i];
-        parHs[level]->offCF.xOffCF[i] = xOffCFBorderVector[i];
-        parHs[level]->offCF.yOffCF[i] = yOffCFBorderVector[i];
-        parHs[level]->offCF.zOffCF[i] = zOffCFBorderVector[i];
+    for (uint i = 0; i < (uint)coarseToFineCoarseBorderVector.size(); i++) {
+        parHs[level]->coarseToFineBorder.coarseCellIndices[i] = coarseToFineCoarseBorderVector[i];
+        parHs[level]->coarseToFineBorder.fineCellIndices[i] = coarseToFineFineBorderVector[i];
+        parHs[level]->neighborCoarseToFine.x[i] = neighborXBorder[i];
+        parHs[level]->neighborCoarseToFine.y[i] = neighborYBorder[i];
+        parHs[level]->neighborCoarseToFine.z[i] = neighborZBorder[i];
     }
-    for (uint i = 0; i < (uint)iCellCfcBulkVector.size(); i++) {
-        parHs[level]->intCFBulk.ICellCFC[i] = iCellCfcBulkVector[i];
-        parHs[level]->intCFBulk.ICellCFF[i] = iCellCffBulkVector[i];
-        parHs[level]->offCFBulk.xOffCF[i] = xOffCFBulkVector[i];
-        parHs[level]->offCFBulk.yOffCF[i] = yOffCFBulkVector[i];
-        parHs[level]->offCFBulk.zOffCF[i] = zOffCFBulkVector[i];
+    for (uint i = 0; i < (uint)coarseToFineCoarseBulkVector.size(); i++) {
+        parHs[level]->coarseToFineBulk.coarseCellIndices[i] = coarseToFineCoarseBulkVector[i];
+        parHs[level]->coarseToFineBulk.fineCellIndices[i] = coarseToFineFineBulkVector[i];
+        parHs[level]->neighborCoarseToFineBulk.x[i] = neighborXBulk[i];
+        parHs[level]->neighborCoarseToFineBulk.y[i] = neighborYBulk[i];
+        parHs[level]->neighborCoarseToFineBulk.z[i] = neighborZBulk[i];
     }
 }
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/InterpolationCellGrouper.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/InterpolationCellGrouper.h
index 5471194f643e044a7c2cdca1db45017b9d3a1022..f619d785e1f6c30cee0c698cbc9d8dd8249f414f 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/InterpolationCellGrouper.h
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/InterpolationCellGrouper.h
@@ -6,7 +6,7 @@
 #ifndef InterpolationCellGrouper_H
 #define InterpolationCellGrouper_H
 
-#include <basics/Core/DataTypes.h>
+#include <basics/DataTypes.h>
 #include <basics/PointerDefinitions.h>
 #include <memory>
 #include <vector>
@@ -54,14 +54,14 @@ protected:
     //////////////////////////////////////////////////////////////////////////
 
     //! \brief This function reorders the arrays of CFC/CFF indices and sets the pointers and sizes of the new
-    //! subarrays: \details The coarse cells for interpolation from coarse to fine (iCellCFC) are divided into two
-    //! subgroups: border and bulk. The fine cells (iCellCFF) are reordered accordingly. The offset cells (xOffCF,
+    //! subarrays: \details The coarse cells for interpolation from coarse to fine (coarseToFineCoarse) are divided into two
+    //! subgroups: border and bulk. The fine cells (coarseToFineFine) are reordered accordingly. The offset cells (xOffCF,
     //! yOffCF, zOffCF) must be reordered in the same way.
     void reorderCoarseToFineIntoBorderAndBulk(uint level) const;
 
     //! \brief This function reorders the arrays of FCC/FCF indices and return pointers and sizes of the new subarrays:
-    //! \details The coarse cells for interpolation from fine to coarse (iCellFCC) are divided into two subgroups:
-    //! border and bulk. The fine cells (iCellFCF) are reordered accordingly. The offset cells (xOffFC,
+    //! \details The coarse cells for interpolation from fine to coarse (fineToCoarseCoarse) are divided into two subgroups:
+    //! border and bulk. The fine cells (fineToCoarseFine) are reordered accordingly. The offset cells (xOffFC,
     //! yOffFC, zOffFC) must be reordered in the same way.
     void reorderFineToCoarseIntoBorderAndBulk(uint level) const;
 
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/InterpolationCellGrouperTest.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/InterpolationCellGrouperTest.cpp
index 664552b2f47542cc6ae3d4940c2b74ede1beff91..4f4e8525f17ef004d65ee2eaa3d4d533d9d40734 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/InterpolationCellGrouperTest.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/InterpolationCellGrouperTest.cpp
@@ -38,7 +38,7 @@ private:
     std::vector<uint> fluidNodeIndicesBorder;
 
 public:
-    GridImpDouble(Object *object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta,
+    GridImpDouble(SPtr<Object> object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta,
                   Distribution d, uint level)
         : GridImp(object, startX, startY, startZ, endX, endY, endZ, delta, d, level)
     {
@@ -62,37 +62,37 @@ public:
     }
 };
 
-struct CFBorderBulk {
+struct CoarseToFineBorderBulk {
     // data to work on
     std::vector<uint> fluidNodeIndicesBorder = { 10, 11, 12, 13, 14, 15, 16 };
-    std::vector<uint> iCellCFC = { 1, 11, 3, 13, 5, 15, 7 };
-    std::vector<uint> iCellCFF = { 2, 12, 4, 14, 6, 16, 8 };
-    const uint sizeOfICellCf = (uint)iCellCFC.size();
+    std::vector<uint> intCtoFcoarse = { 1, 11, 3, 13, 5, 15, 7 };
+    std::vector<uint> fineCellIndices = { 2, 12, 4, 14, 6, 16, 8 };
+    const uint sizeOfInterpolationCoarseToFine = (uint)intCtoFcoarse.size();
     uint neighborX[17] = { 0u };
     uint neighborY[17] = { 0u };
     uint neighborZ[17] = { 0u };
     const int level = 0;
-    std::vector<real> offsetCFx = { 1, 11, 3, 13, 5, 15, 7 };
-    std::vector<real> offsetCFy = { 101, 111, 103, 113, 105, 115, 107 };
-    std::vector<real> offsetCFz = { 1001, 1011, 1003, 1013, 1005, 1015, 1007 };
+    std::vector<real> neighborCFx = { 1, 11, 3, 13, 5, 15, 7 };
+    std::vector<real> neighborCFy = { 101, 111, 103, 113, 105, 115, 107 };
+    std::vector<real> neighborCFz = { 1001, 1011, 1003, 1013, 1005, 1015, 1007 };
 
     // expected data
-    std::vector<uint> iCellCfcBorder_expected = { 11, 13, 15 };
-    std::vector<uint> iCellCfcBulk_expected = { 1, 3, 5, 7 };
-    std::vector<uint> iCellCffBorder_expected = { 12, 14, 16 };
-    std::vector<uint> iCellCffBulk_expected = { 2, 4, 6, 8 };
-    std::vector<real> offsetCFx_Border_expected = { 11, 13, 15 };
-    std::vector<real> offsetCFx_Bulk_expected = { 1, 3, 5, 7 };
-    std::vector<real> offsetCFy_Border_expected = { 111, 113, 115 };
-    std::vector<real> offsetCFy_Bulk_expected = { 101, 103, 105, 107 };
-    std::vector<real> offsetCFz_Border_expected = { 1011, 1013, 1015 };
-    std::vector<real> offsetCFz_Bulk_expected = { 1001, 1003, 1005, 1007 };
+    std::vector<uint> intCtoFcoarseBorder_expected = { 11, 13, 15 };
+    std::vector<uint> intCtoFcoarseBulk_expected = { 1, 3, 5, 7 };
+    std::vector<uint> fineCellIndicesBorder_expected = { 12, 14, 16 };
+    std::vector<uint> fineCellIndicesBulk_expected = { 2, 4, 6, 8 };
+    std::vector<real> neighborCFx_Border_expected = { 11, 13, 15 };
+    std::vector<real> neighborCFx_Bulk_expected = { 1, 3, 5, 7 };
+    std::vector<real> neighborCFy_Border_expected = { 111, 113, 115 };
+    std::vector<real> neighborCFy_Bulk_expected = { 101, 103, 105, 107 };
+    std::vector<real> neighborCFz_Border_expected = { 1011, 1013, 1015 };
+    std::vector<real> neighborCFz_Bulk_expected = { 1001, 1003, 1005, 1007 };
 };
 
 class InterpolationCellGrouperTest_IndicesCFBorderBulkTest : public testing::Test
 {
 protected:
-    CFBorderBulk cf;
+    CoarseToFineBorderBulk cf;
     SPtr<Parameter> para;
     std::unique_ptr<InterpolationCellGrouper> testSubject;
 
@@ -105,15 +105,15 @@ private:
         std::shared_ptr<LevelGridBuilderDouble> builder = std::make_shared<LevelGridBuilderDouble>(grid);
 
         para = testingVF::createParameterForLevel(cf.level);
-        para->getParH(cf.level)->intCF.ICellCFC = &(cf.iCellCFC.front());
-        para->getParH(cf.level)->intCF.ICellCFF = &(cf.iCellCFF.front());
+        para->getParH(cf.level)->coarseToFine.coarseCellIndices = &(cf.intCtoFcoarse.front());
+        para->getParH(cf.level)->coarseToFine.fineCellIndices = &(cf.fineCellIndices.front());
         para->getParH(cf.level)->neighborX = cf.neighborX;
         para->getParH(cf.level)->neighborY = cf.neighborY;
         para->getParH(cf.level)->neighborZ = cf.neighborZ;
-        para->getParH(cf.level)->intCF.kCF = cf.sizeOfICellCf;
-        para->getParH(cf.level)->offCF.xOffCF = &(cf.offsetCFx.front());
-        para->getParH(cf.level)->offCF.yOffCF = &(cf.offsetCFy.front());
-        para->getParH(cf.level)->offCF.zOffCF = &(cf.offsetCFz.front());
+        para->getParH(cf.level)->coarseToFine.numberOfCells = cf.sizeOfInterpolationCoarseToFine;
+        para->getParH(cf.level)->neighborCoarseToFine.x = &(cf.neighborCFx.front());
+        para->getParH(cf.level)->neighborCoarseToFine.y = &(cf.neighborCFy.front());
+        para->getParH(cf.level)->neighborCoarseToFine.z = &(cf.neighborCFz.front());
 
         return std::make_unique<InterpolationCellGrouper>(para->getParHallLevels(), para->getParDallLevels(), builder);
     };
@@ -128,65 +128,65 @@ TEST_F(InterpolationCellGrouperTest_IndicesCFBorderBulkTest, splitCoarseToFineIn
 {
     testSubject->splitCoarseToFineIntoBorderAndBulk(cf.level);
 
-    EXPECT_THAT(para->getParH(cf.level)->intCFBorder.kCF + para->getParH(cf.level)->intCFBulk.kCF,
-                testing::Eq(cf.sizeOfICellCf))
+    EXPECT_THAT(para->getParH(cf.level)->coarseToFineBorder.numberOfCells + para->getParH(cf.level)->coarseToFineBulk.numberOfCells,
+                testing::Eq(cf.sizeOfInterpolationCoarseToFine))
         << "The number of interpolation cells from coarse to fine changed during reordering.";
 
     // check coarse to fine border (coarse nodes)
-    EXPECT_THAT(para->getParH(cf.level)->intCFBorder.kCF, testing::Eq((uint)cf.iCellCfcBorder_expected.size()));
-    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->intCFBorder.ICellCFC, cf.iCellCfcBorder_expected))
-        << "intCFBorder.ICellCFC does not match the expected border vector";
+    EXPECT_THAT(para->getParH(cf.level)->coarseToFineBorder.numberOfCells, testing::Eq((uint)cf.intCtoFcoarseBorder_expected.size()));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->coarseToFineBorder.coarseCellIndices, cf.intCtoFcoarseBorder_expected))
+        << "coarseToFineBorder.intCtoFcoarse does not match the expected border vector";
     // check coarse to fine border (fine nodes)
-    EXPECT_THAT(para->getParH(cf.level)->intCFBorder.kCF, testing::Eq((uint)cf.iCellCffBorder_expected.size()));
-    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->intCFBorder.ICellCFF, cf.iCellCffBorder_expected))
-        << "intCFBorder.ICellCFF does not match the expected border vector";
+    EXPECT_THAT(para->getParH(cf.level)->coarseToFineBorder.numberOfCells, testing::Eq((uint)cf.fineCellIndicesBorder_expected.size()));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->coarseToFineBorder.fineCellIndices, cf.fineCellIndicesBorder_expected))
+        << "coarseToFineBorder.fineCellIndices does not match the expected border vector";
 
     // check coarse to fine bulk (coarse nodes)
-    EXPECT_THAT(para->getParH(cf.level)->intCFBulk.kCF, testing::Eq((uint)cf.iCellCfcBulk_expected.size()));
-    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->intCFBulk.ICellCFC, cf.iCellCfcBulk_expected))
-        << "intCFBulk.ICellCFC does not match the expected bulk vector";
+    EXPECT_THAT(para->getParH(cf.level)->coarseToFineBulk.numberOfCells, testing::Eq((uint)cf.intCtoFcoarseBulk_expected.size()));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->coarseToFineBulk.coarseCellIndices, cf.intCtoFcoarseBulk_expected))
+        << "coarseToFineBulk.intCtoFcoarse does not match the expected bulk vector";
     // check coarse to fine bulk (fine nodes)
-    EXPECT_THAT(para->getParH(cf.level)->intCFBulk.kCF, testing::Eq((uint)cf.iCellCffBulk_expected.size()));
-    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->intCFBulk.ICellCFF, cf.iCellCffBulk_expected))
-        << "intCFBulk.ICellCFF does not match the expected bulk vector";
-
-    // check offset cells
-    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->offCF.xOffCF, cf.offsetCFx_Border_expected));
-    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->offCFBulk.xOffCF, cf.offsetCFx_Bulk_expected));
-    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->offCF.yOffCF, cf.offsetCFy_Border_expected));
-    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->offCFBulk.yOffCF, cf.offsetCFy_Bulk_expected));
-    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->offCF.zOffCF, cf.offsetCFz_Border_expected));
-    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->offCFBulk.zOffCF, cf.offsetCFz_Bulk_expected));
+    EXPECT_THAT(para->getParH(cf.level)->coarseToFineBulk.numberOfCells, testing::Eq((uint)cf.fineCellIndicesBulk_expected.size()));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->coarseToFineBulk.fineCellIndices, cf.fineCellIndicesBulk_expected))
+        << "coarseToFineBulk.fineCellIndices does not match the expected bulk vector";
+
+    // check neighbor cells
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->neighborCoarseToFine.x, cf.neighborCFx_Border_expected));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->neighborCoarseToFineBulk.x, cf.neighborCFx_Bulk_expected));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->neighborCoarseToFine.y, cf.neighborCFy_Border_expected));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->neighborCoarseToFineBulk.y, cf.neighborCFy_Bulk_expected));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->neighborCoarseToFine.z, cf.neighborCFz_Border_expected));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->neighborCoarseToFineBulk.z, cf.neighborCFz_Bulk_expected));
 }
 
-struct FCBorderBulk {
+struct FineToCoarseBorderBulk {
     // data to work on
     std::vector<uint> fluidNodeIndicesBorder = { 110, 111, 112, 113, 114, 115, 116 };
-    std::vector<uint> iCellFCC = { 11, 111, 13, 113, 15, 115, 17 };
-    std::vector<uint> iCellFCF = { 12, 112, 14, 114, 16, 116, 18 };
-    const uint sizeOfICellFC = (uint)iCellFCC.size();
+    std::vector<uint> coarseCellIndices = { 11, 111, 13, 113, 15, 115, 17 };
+    std::vector<uint> fineCellIndices = { 12, 112, 14, 114, 16, 116, 18 };
+    const uint sizeOfIntFineToCoarse = (uint)coarseCellIndices.size();
     const int level = 1;
-    std::vector<real> offsetFCx = { 11, 111, 13, 113, 15, 115, 17 };
-    std::vector<real> offsetFCy = { 1101, 1111, 1103, 1113, 1105, 1115, 1107 };
-    std::vector<real> offsetFCz = { 11001, 11011, 11003, 11013, 11005, 11015, 11007 };
+    std::vector<real> neighborx = { 11, 111, 13, 113, 15, 115, 17 };
+    std::vector<real> neighbory = { 1101, 1111, 1103, 1113, 1105, 1115, 1107 };
+    std::vector<real> neighborz = { 11001, 11011, 11003, 11013, 11005, 11015, 11007 };
 
     // expected data
-    std::vector<uint> iCellFccBorder_expected = { 111, 113, 115 };
-    std::vector<uint> iCellFccBulk_expected = { 11, 13, 15, 17 };
-    std::vector<uint> iCellFcfBorder_expected = { 112, 114, 116 };
-    std::vector<uint> iCellFcfBulk_expected = { 12, 14, 16, 18 };
-    std::vector<real> offsetFCx_Border_expected = { 111, 113, 115 };
-    std::vector<real> offsetFCx_Bulk_expected = { 11, 13, 15, 17 };
-    std::vector<real> offsetFCy_Border_expected = { 1111, 1113, 1115 };
-    std::vector<real> offsetFCy_Bulk_expected = { 1101, 1103, 1105, 1107 };
-    std::vector<real> offsetFCz_Border_expected = { 11011, 11013, 11015 };
-    std::vector<real> offsetFCz_Bulk_expected = { 11001, 11003, 11005, 11007 };
+    std::vector<uint> coarseCellIndicesBorder_expected = { 111, 113, 115 };
+    std::vector<uint> coarseCellIndicesBulk_expected = { 11, 13, 15, 17 };
+    std::vector<uint> fineCellIndicesBorder_expected = { 112, 114, 116 };
+    std::vector<uint> fineCellIndicesBulk_expected = { 12, 14, 16, 18 };
+    std::vector<real> neighborx_Border_expected = { 111, 113, 115 };
+    std::vector<real> neighborx_Bulk_expected = { 11, 13, 15, 17 };
+    std::vector<real> neighbory_Border_expected = { 1111, 1113, 1115 };
+    std::vector<real> neighbory_Bulk_expected = { 1101, 1103, 1105, 1107 };
+    std::vector<real> neighborz_Border_expected = { 11011, 11013, 11015 };
+    std::vector<real> neighborz_Bulk_expected = { 11001, 11003, 11005, 11007 };
 };
 
 class InterpolationCellGrouperTest_IndicesFCBorderBulkTest : public testing::Test
 {
 protected:
-    FCBorderBulk fc;
+    FineToCoarseBorderBulk fc;
     SPtr<Parameter> para;
     std::unique_ptr<InterpolationCellGrouper> testSubject;
 
@@ -199,12 +199,12 @@ private:
         std::shared_ptr<LevelGridBuilderDouble> builder = std::make_shared<LevelGridBuilderDouble>(grid);
 
         para = testingVF::createParameterForLevel(fc.level);
-        para->getParH(fc.level)->intFC.ICellFCC = &(fc.iCellFCC.front());
-        para->getParH(fc.level)->intFC.ICellFCF = &(fc.iCellFCF.front());
-        para->getParH(fc.level)->intFC.kFC = fc.sizeOfICellFC;
-        para->getParH(fc.level)->offFC.xOffFC = &(fc.offsetFCx.front());
-        para->getParH(fc.level)->offFC.yOffFC = &(fc.offsetFCy.front());
-        para->getParH(fc.level)->offFC.zOffFC = &(fc.offsetFCz.front());
+        para->getParH(fc.level)->fineToCoarse.coarseCellIndices = &(fc.coarseCellIndices.front());
+        para->getParH(fc.level)->fineToCoarse.fineCellIndices = &(fc.fineCellIndices.front());
+        para->getParH(fc.level)->fineToCoarse.numberOfCells = fc.sizeOfIntFineToCoarse;
+        para->getParH(fc.level)->neighborFineToCoarse.x = &(fc.neighborx.front());
+        para->getParH(fc.level)->neighborFineToCoarse.y = &(fc.neighbory.front());
+        para->getParH(fc.level)->neighborFineToCoarse.z = &(fc.neighborz.front());
 
         return std::make_unique<InterpolationCellGrouper>(para->getParHallLevels(), para->getParDallLevels(), builder);
     };
@@ -219,33 +219,33 @@ TEST_F(InterpolationCellGrouperTest_IndicesFCBorderBulkTest, splitFineToCoarseIn
 {
     testSubject->splitFineToCoarseIntoBorderAndBulk(fc.level);
 
-    EXPECT_THAT(para->getParH(fc.level)->intFCBorder.kFC + para->getParH(fc.level)->intFCBulk.kFC,
-                testing::Eq(fc.sizeOfICellFC))
+    EXPECT_THAT(para->getParH(fc.level)->fineToCoarseBorder.numberOfCells + para->getParH(fc.level)->fineToCoarseBulk.numberOfCells,
+                testing::Eq(fc.sizeOfIntFineToCoarse))
         << "The number of interpolation cells from coarse to fine changed during reordering.";
 
     // check coarse to fine border (coarse nodes)
-    EXPECT_THAT(para->getParH(fc.level)->intFCBorder.kFC, testing::Eq((uint)fc.iCellFccBorder_expected.size()));
-    EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->intFCBorder.ICellFCC, fc.iCellFccBorder_expected))
-        << "intFCBorder.ICellFCC does not match the expected border vector";
+    EXPECT_THAT(para->getParH(fc.level)->fineToCoarseBorder.numberOfCells, testing::Eq((uint)fc.coarseCellIndicesBorder_expected.size()));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->fineToCoarseBorder.coarseCellIndices, fc.coarseCellIndicesBorder_expected))
+        << "fineToCoarseBorder.coarseCellIndices does not match the expected border vector";
     // check coarse to fine border (fine nodes)
-    EXPECT_THAT(para->getParH(fc.level)->intFCBorder.kFC, testing::Eq((uint)fc.iCellFcfBorder_expected.size()));
-    EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->intFCBorder.ICellFCF, fc.iCellFcfBorder_expected))
-        << "intFCBorder.ICellFCF does not match the expected border vector";
+    EXPECT_THAT(para->getParH(fc.level)->fineToCoarseBorder.numberOfCells, testing::Eq((uint)fc.fineCellIndicesBorder_expected.size()));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->fineToCoarseBorder.fineCellIndices, fc.fineCellIndicesBorder_expected))
+        << "fineToCoarseBorder.fineCellIndices does not match the expected border vector";
 
     // check coarse to fine bulk (coarse nodes)
-    EXPECT_THAT(para->getParH(fc.level)->intFCBulk.kFC, testing::Eq((uint)fc.iCellFccBulk_expected.size()));
-    EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->intFCBulk.ICellFCC, fc.iCellFccBulk_expected))
-        << "intFCBulk.ICellFCC does not match the expected bulk vector";
+    EXPECT_THAT(para->getParH(fc.level)->fineToCoarseBulk.numberOfCells, testing::Eq((uint)fc.coarseCellIndicesBulk_expected.size()));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->fineToCoarseBulk.coarseCellIndices, fc.coarseCellIndicesBulk_expected))
+        << "fineToCoarseBulk.coarseCellIndices does not match the expected bulk vector";
     // check coarse to fine bulk (fine nodes)
-    EXPECT_THAT(para->getParH(fc.level)->intFCBulk.kFC, testing::Eq((uint)fc.iCellFcfBulk_expected.size()));
-    EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->intFCBulk.ICellFCF, fc.iCellFcfBulk_expected))
-        << "intFCBulk.ICellFCF does not match the expected bulk vector";
-
-    // check offset cells
-    EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->offFC.xOffFC, fc.offsetFCx_Border_expected));
-    EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->offFCBulk.xOffFC, fc.offsetFCx_Bulk_expected));
-    EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->offFC.yOffFC, fc.offsetFCy_Border_expected));
-    EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->offFCBulk.yOffFC, fc.offsetFCy_Bulk_expected));
-    EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->offFC.zOffFC, fc.offsetFCz_Border_expected));
-    EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->offFCBulk.zOffFC, fc.offsetFCz_Bulk_expected));
+    EXPECT_THAT(para->getParH(fc.level)->fineToCoarseBulk.numberOfCells, testing::Eq((uint)fc.fineCellIndicesBulk_expected.size()));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->fineToCoarseBulk.fineCellIndices, fc.fineCellIndicesBulk_expected))
+        << "fineToCoarseBulk.fineCellIndices does not match the expected bulk vector";
+
+    // check neighbor cells
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->neighborFineToCoarse.x, fc.neighborx_Border_expected));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->neighborFineToCoarseBulk.x, fc.neighborx_Bulk_expected));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->neighborFineToCoarse.y, fc.neighbory_Border_expected));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->neighborFineToCoarseBulk.y, fc.neighbory_Bulk_expected));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->neighborFineToCoarse.z, fc.neighborz_Border_expected));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->neighborFineToCoarseBulk.z, fc.neighborz_Bulk_expected));
 }
diff --git a/src/gpu/VirtualFluids_GPU/Factories/GridScalingFactory.cpp b/src/gpu/VirtualFluids_GPU/Factories/GridScalingFactory.cpp
index 00a4c79574ce9d8ae372bfe9f7e546c05175bb10..297e2ac9c05fd225811f40439a26e0b3b4ce4811 100644
--- a/src/gpu/VirtualFluids_GPU/Factories/GridScalingFactory.cpp
+++ b/src/gpu/VirtualFluids_GPU/Factories/GridScalingFactory.cpp
@@ -3,34 +3,38 @@
 
 void GridScalingFactory::setScalingFactory(const GridScalingFactory::GridScaling gridScalingType)
 {
-    this->gridScaling = gridScalingType;
+    this->gridScalingType = gridScalingType;
 }
 
-gridScalingFC GridScalingFactory::getGridScalingFC() const
+gridScaling GridScalingFactory::getGridScalingFC(bool hasTurbulentViscosity) const
 {
     // for descriptions of the scaling types refer to the header
-    switch (gridScaling) {
+    switch (gridScalingType) {
         case GridScaling::ScaleRhoSq:
             return ScaleFC_RhoSq_comp_27;
             break;
         case GridScaling::ScaleCompressible:
-            return ScaleFC_compressible;
+            if(hasTurbulentViscosity)   return ScaleFC_compressible<true>;
+            else                        return ScaleFC_compressible<false>;
             break;
         default:
             return nullptr;
     }
 }
 
-gridScalingCF GridScalingFactory::getGridScalingCF() const
+gridScaling GridScalingFactory::getGridScalingCF(bool hasTurbulentViscosity) const
 {
     // for descriptions of the scaling types refer to the header
-    switch (gridScaling) {
+    switch (gridScalingType) {
         case GridScaling::ScaleRhoSq:
             return ScaleCF_RhoSq_comp_27;
             break;
         case GridScaling::ScaleCompressible:
-            return ScaleCF_compressible;
-            break;
+            {
+                if(hasTurbulentViscosity)   return ScaleCF_compressible<true>;
+                else                        return ScaleCF_compressible<false>;
+                break;
+            }
         default:
             return nullptr;
     }
diff --git a/src/gpu/VirtualFluids_GPU/Factories/GridScalingFactory.h b/src/gpu/VirtualFluids_GPU/Factories/GridScalingFactory.h
index 7d7c20c63a01e2dba6a5578c6520c0ab06894b3c..f8729b0ed9df784eb8d409b6ea97a0ab0f9cbb5a 100644
--- a/src/gpu/VirtualFluids_GPU/Factories/GridScalingFactory.h
+++ b/src/gpu/VirtualFluids_GPU/Factories/GridScalingFactory.h
@@ -42,8 +42,7 @@ struct LBMSimulationParameter;
 class Parameter;
 struct CUstream_st;
 
-using gridScalingFC = std::function<void(LBMSimulationParameter *, LBMSimulationParameter *, ICellFC *, OffFC&, CUstream_st *stream)>;
-using gridScalingCF = std::function<void(LBMSimulationParameter *, LBMSimulationParameter *, ICellCF *, OffCF&, CUstream_st *stream)>;
+using gridScaling = std::function<void(LBMSimulationParameter *, LBMSimulationParameter *, ICells *, ICellNeigh&, CUstream_st *stream)>;
 
 class GridScalingFactory
 {
@@ -59,11 +58,11 @@ public:
 
     void setScalingFactory(const GridScalingFactory::GridScaling gridScalingType);
 
-    [[nodiscard]] gridScalingFC getGridScalingFC() const;
-    [[nodiscard]] gridScalingCF getGridScalingCF() const;
+    [[nodiscard]] gridScaling getGridScalingFC(bool hasTurbulentViscosity) const;
+    [[nodiscard]] gridScaling getGridScalingCF(bool hasTurbulentViscosity) const;
 
 private:
-    GridScaling gridScaling = GridScaling::NotSpecified;
+    GridScaling gridScalingType = GridScaling::NotSpecified;
 };
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/FindInterface/FindInterface.cpp b/src/gpu/VirtualFluids_GPU/FindInterface/FindInterface.cpp
index ec435f0647ba973dbb405aefad069b285e09d6b5..9a0eae2282ca0abf11c77e6e8503c2a5b2bcc0b7 100644
--- a/src/gpu/VirtualFluids_GPU/FindInterface/FindInterface.cpp
+++ b/src/gpu/VirtualFluids_GPU/FindInterface/FindInterface.cpp
@@ -1,11 +1,11 @@
 #include "FindInterface/FindInterface.h"
 
-void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC, 
+void interpolation(InterpolationCells &intCF, InterpolationCells &intFC, 
                    unsigned int LxCoarse, unsigned int LyCoarse, unsigned int LzCoarse, 
                    unsigned int LxFine, unsigned int LyFine, unsigned int LzFine, 
                    unsigned int dNx, unsigned int dNy, unsigned int dNz, 
                    unsigned int *kCoarse, unsigned int *kFine, bool* needInterface,
-                   OffsetCF &offCF, OffsetFC &offFC)
+                   InterpolationCellNeighbor &offCF, InterpolationCellNeighbor &offFC)
 {
    unsigned int iC,iF,jC,jF,hC,hF;
    unsigned int posCSWB, posFSWB;
@@ -13,8 +13,8 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
    real xOff = (real)0.0f;
    real yOff = (real)0.0f; 
    real zOff = (real)0.0f;
-   intCF.kCF    = 0;
-   intFC.kFC    = 0;
+   intCF.numberOfCells    = 0;
+   intFC.numberOfCells    = 0;
 
    ///////////////////////////////////////////////////////////////////////////
    //Defines
@@ -125,12 +125,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {
             posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine  , LyFine);
-            intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-            intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-            offCF.xOffCF[intCF.kCF]   = xOff;
-            offCF.yOffCF[intCF.kCF]   = yOff;
-            offCF.zOffCF[intCF.kCF]   = zOff;
-            intCF.kCF++;
+            intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+            intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+            offCF.x[intCF.numberOfCells]   = xOff;
+            offCF.y[intCF.numberOfCells]   = yOff;
+            offCF.z[intCF.numberOfCells]   = zOff;
+            intCF.numberOfCells++;
          }
       }
       //////////////////////////   fine->coarse   ////////////////////////////
@@ -142,12 +142,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       }
    }
@@ -166,12 +166,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {
             posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-            intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-            offCF.xOffCF[intCF.kCF]   = xOff;
-            offCF.yOffCF[intCF.kCF]   = yOff;
-            offCF.zOffCF[intCF.kCF]   = zOff;
-            intCF.kCF++;
+            intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+            intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+            offCF.x[intCF.numberOfCells]   = xOff;
+            offCF.y[intCF.numberOfCells]   = yOff;
+            offCF.z[intCF.numberOfCells]   = zOff;
+            intCF.numberOfCells++;
          }
       }
       //////////////////////////   fine->coarse   ////////////////////////////
@@ -183,12 +183,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       }
    }
@@ -207,12 +207,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {
             posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-            intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-            offCF.xOffCF[intCF.kCF]   = xOff;
-            offCF.yOffCF[intCF.kCF]   = yOff;
-            offCF.zOffCF[intCF.kCF]   = zOff;
-            intCF.kCF++;
+            intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+            intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+            offCF.x[intCF.numberOfCells]   = xOff;
+            offCF.y[intCF.numberOfCells]   = yOff;
+            offCF.z[intCF.numberOfCells]   = zOff;
+            intCF.numberOfCells++;
          }
       }
       //////////////////////////   fine->coarse   ////////////////////////////
@@ -224,12 +224,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       }
    }
@@ -248,12 +248,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {
             posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-            intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-            offCF.xOffCF[intCF.kCF]   = xOff;
-            offCF.yOffCF[intCF.kCF]   = yOff;
-            offCF.zOffCF[intCF.kCF]   = zOff;
-            intCF.kCF++;
+            intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+            intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+            offCF.x[intCF.numberOfCells]   = xOff;
+            offCF.y[intCF.numberOfCells]   = yOff;
+            offCF.z[intCF.numberOfCells]   = zOff;
+            intCF.numberOfCells++;
          }
       }
       //////////////////////////   fine->coarse   ////////////////////////////
@@ -265,12 +265,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       }
    }
@@ -289,12 +289,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {
             posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-            intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-            offCF.xOffCF[intCF.kCF]   = xOff;
-            offCF.yOffCF[intCF.kCF]   = yOff;
-            offCF.zOffCF[intCF.kCF]   = zOff;
-            intCF.kCF++;
+            intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+            intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+            offCF.x[intCF.numberOfCells]   = xOff;
+            offCF.y[intCF.numberOfCells]   = yOff;
+            offCF.z[intCF.numberOfCells]   = zOff;
+            intCF.numberOfCells++;
          }
       }
       //////////////////////////   fine->coarse   ////////////////////////////
@@ -306,12 +306,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   =xOff;
-            offFC.yOffFC[intFC.kFC]   =yOff;
-            offFC.zOffFC[intFC.kFC]   =zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   =xOff;
+            offFC.y[intFC.numberOfCells]   =yOff;
+            offFC.z[intFC.numberOfCells]   =zOff;
+            intFC.numberOfCells++;
          }
       }
    }
@@ -330,12 +330,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {
             posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine  , LyFine);
-            intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-            intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-            offCF.xOffCF[intCF.kCF]   = xOff;
-            offCF.yOffCF[intCF.kCF]   = yOff;
-            offCF.zOffCF[intCF.kCF]   = zOff;
-            intCF.kCF++;
+            intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+            intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+            offCF.x[intCF.numberOfCells]   = xOff;
+            offCF.y[intCF.numberOfCells]   = yOff;
+            offCF.z[intCF.numberOfCells]   = zOff;
+            intCF.numberOfCells++;
          }
       }
       //////////////////////////   fine->coarse   ////////////////////////////
@@ -347,12 +347,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       }
    }
@@ -401,12 +401,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       {
          posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine  , LyFine);
-         intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-         intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-         offCF.xOffCF[intCF.kCF]   = xOff;
-         offCF.yOffCF[intCF.kCF]   = yOff;
-         offCF.zOffCF[intCF.kCF]   = zOff;
-         intCF.kCF++;
+         intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+         intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+         offCF.x[intCF.numberOfCells]   = xOff;
+         offCF.y[intCF.numberOfCells]   = yOff;
+         offCF.z[intCF.numberOfCells]   = zOff;
+         intCF.numberOfCells++;
       }
 
       //////////////////////////////////////////////////////////////////////////
@@ -427,12 +427,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       {			
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[INTERFACE_E]==false)
       {
@@ -443,12 +443,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       }
       else if (needInterface[INTERFACE_N]==false)
@@ -460,12 +460,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       }
    }
@@ -506,12 +506,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       {
          posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine  , LyFine);
-         intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-         intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-         offCF.xOffCF[intCF.kCF]   = xOff;
-         offCF.yOffCF[intCF.kCF]   = yOff;
-         offCF.zOffCF[intCF.kCF]   = zOff;
-         intCF.kCF++;
+         intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+         intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+         offCF.x[intCF.numberOfCells]   = xOff;
+         offCF.y[intCF.numberOfCells]   = yOff;
+         offCF.z[intCF.numberOfCells]   = zOff;
+         intCF.numberOfCells++;
       }
 
       //////////////////////////////////////////////////////////////////////////
@@ -532,12 +532,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       {			
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[INTERFACE_E]==false)
       {
@@ -548,12 +548,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       }
       else if (needInterface[INTERFACE_S]==false)
@@ -565,12 +565,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       }
    }
@@ -611,12 +611,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       {
          posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-         intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-         offCF.xOffCF[intCF.kCF]   = xOff;
-         offCF.yOffCF[intCF.kCF]   = yOff;
-         offCF.zOffCF[intCF.kCF]   = zOff;
-         intCF.kCF++;
+         intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+         intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+         offCF.x[intCF.numberOfCells]   = xOff;
+         offCF.y[intCF.numberOfCells]   = yOff;
+         offCF.z[intCF.numberOfCells]   = zOff;
+         intCF.numberOfCells++;
       }
 
       //////////////////////////////////////////////////////////////////////////
@@ -637,12 +637,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       {			
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[INTERFACE_E]==false)
       {
@@ -653,12 +653,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       } 
       else if (needInterface[INTERFACE_T]==false)
@@ -670,12 +670,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       }
    }
@@ -716,12 +716,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       {
          posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-         intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-         offCF.xOffCF[intCF.kCF]   = xOff;
-         offCF.yOffCF[intCF.kCF]   = yOff;
-         offCF.zOffCF[intCF.kCF]   = zOff;
-         intCF.kCF++;
+         intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+         intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+         offCF.x[intCF.numberOfCells]   = xOff;
+         offCF.y[intCF.numberOfCells]   = yOff;
+         offCF.z[intCF.numberOfCells]   = zOff;
+         intCF.numberOfCells++;
       }
 
       //////////////////////////////////////////////////////////////////////////
@@ -742,12 +742,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       {			
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[INTERFACE_E]==false)
       {
@@ -758,12 +758,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       } 
       else if (needInterface[INTERFACE_B]==false)
@@ -775,12 +775,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       }
    }
@@ -821,12 +821,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       {
          posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine  , LyFine);
-         intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-         intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-         offCF.xOffCF[intCF.kCF]   = xOff;
-         offCF.yOffCF[intCF.kCF]   = yOff;
-         offCF.zOffCF[intCF.kCF]   = zOff;
-         intCF.kCF++;
+         intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+         intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+         offCF.x[intCF.numberOfCells]   = xOff;
+         offCF.y[intCF.numberOfCells]   = yOff;
+         offCF.z[intCF.numberOfCells]   = zOff;
+         intCF.numberOfCells++;
       }
 
       //////////////////////////////////////////////////////////////////////////
@@ -847,12 +847,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       {			
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[INTERFACE_W]==false)
       {
@@ -863,12 +863,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       }
       else if (needInterface[INTERFACE_N]==false)
@@ -880,12 +880,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       }
    }
@@ -926,12 +926,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       {
          posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine  , LyFine);
-         intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-         intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-         offCF.xOffCF[intCF.kCF]   = xOff;
-         offCF.yOffCF[intCF.kCF]   = yOff;
-         offCF.zOffCF[intCF.kCF]   = zOff;
-         intCF.kCF++;
+         intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+         intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+         offCF.x[intCF.numberOfCells]   = xOff;
+         offCF.y[intCF.numberOfCells]   = yOff;
+         offCF.z[intCF.numberOfCells]   = zOff;
+         intCF.numberOfCells++;
       }
 
       //////////////////////////////////////////////////////////////////////////
@@ -952,12 +952,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       {			
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[INTERFACE_W]==false)
       {
@@ -968,12 +968,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       }
       else if (needInterface[INTERFACE_S]==false)
@@ -985,12 +985,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       }
    }
@@ -1031,12 +1031,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       {
          posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-         intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-         offCF.xOffCF[intCF.kCF]   = xOff;
-         offCF.yOffCF[intCF.kCF]   = yOff;
-         offCF.zOffCF[intCF.kCF]   = zOff;
-         intCF.kCF++;
+         intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+         intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+         offCF.x[intCF.numberOfCells]   = xOff;
+         offCF.y[intCF.numberOfCells]   = yOff;
+         offCF.z[intCF.numberOfCells]   = zOff;
+         intCF.numberOfCells++;
       }
 
       //////////////////////////////////////////////////////////////////////////
@@ -1057,12 +1057,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       {			
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[INTERFACE_W]==false)
       {
@@ -1073,12 +1073,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       } 
       else if (needInterface[INTERFACE_T]==false)
@@ -1090,12 +1090,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       }
    }
@@ -1136,12 +1136,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       {
          posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-         intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-         offCF.xOffCF[intCF.kCF]   = xOff;
-         offCF.yOffCF[intCF.kCF]   = yOff;
-         offCF.zOffCF[intCF.kCF]   = zOff;
-         intCF.kCF++;
+         intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+         intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+         offCF.x[intCF.numberOfCells]   = xOff;
+         offCF.y[intCF.numberOfCells]   = yOff;
+         offCF.z[intCF.numberOfCells]   = zOff;
+         intCF.numberOfCells++;
       }
 
       //////////////////////////////////////////////////////////////////////////
@@ -1162,12 +1162,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       {			
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[INTERFACE_W]==false)
       {
@@ -1178,12 +1178,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       } 
       else if (needInterface[INTERFACE_B]==false)
@@ -1195,12 +1195,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       }
    }
@@ -1241,12 +1241,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       {
          posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-         intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-         offCF.xOffCF[intCF.kCF]   = xOff;
-         offCF.yOffCF[intCF.kCF]   = yOff;
-         offCF.zOffCF[intCF.kCF]   = zOff;
-         intCF.kCF++;
+         intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+         intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+         offCF.x[intCF.numberOfCells]   = xOff;
+         offCF.y[intCF.numberOfCells]   = yOff;
+         offCF.z[intCF.numberOfCells]   = zOff;
+         intCF.numberOfCells++;
       }
 
       //////////////////////////////////////////////////////////////////////////
@@ -1267,12 +1267,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       {			
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[ INTERFACE_N]==false)
       {
@@ -1283,12 +1283,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       } 
       else if (needInterface[ INTERFACE_T]==false)
@@ -1300,12 +1300,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       }
    }
@@ -1346,12 +1346,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       {
          posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-         intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-         offCF.xOffCF[intCF.kCF]   = xOff;
-         offCF.yOffCF[intCF.kCF]   = yOff;
-         offCF.zOffCF[intCF.kCF]   = zOff;
-         intCF.kCF++;
+         intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+         intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+         offCF.x[intCF.numberOfCells]   = xOff;
+         offCF.y[intCF.numberOfCells]   = yOff;
+         offCF.z[intCF.numberOfCells]   = zOff;
+         intCF.numberOfCells++;
       }
 
       //////////////////////////////////////////////////////////////////////////
@@ -1372,12 +1372,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       {			
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[ INTERFACE_N]==false)
       {
@@ -1388,12 +1388,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       } 
       else if (needInterface[ INTERFACE_B]==false)
@@ -1405,12 +1405,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       }
    }
@@ -1451,12 +1451,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       {
          posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-         intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-         offCF.xOffCF[intCF.kCF]   = xOff;
-         offCF.yOffCF[intCF.kCF]   = yOff;
-         offCF.zOffCF[intCF.kCF]   = zOff;
-         intCF.kCF++;
+         intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+         intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+         offCF.x[intCF.numberOfCells]   = xOff;
+         offCF.y[intCF.numberOfCells]   = yOff;
+         offCF.z[intCF.numberOfCells]   = zOff;
+         intCF.numberOfCells++;
       }
 
       //////////////////////////////////////////////////////////////////////////
@@ -1477,12 +1477,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       {			
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[ INTERFACE_S]==false)
       {
@@ -1493,12 +1493,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       } 
       else if (needInterface[ INTERFACE_T]==false)
@@ -1510,12 +1510,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       }
    }
@@ -1556,12 +1556,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       {
          posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-         intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-         offCF.xOffCF[intCF.kCF]   = xOff;
-         offCF.yOffCF[intCF.kCF]   = yOff;
-         offCF.zOffCF[intCF.kCF]   = zOff;
-         intCF.kCF++;
+         intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+         intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+         offCF.x[intCF.numberOfCells]   = xOff;
+         offCF.y[intCF.numberOfCells]   = yOff;
+         offCF.z[intCF.numberOfCells]   = zOff;
+         intCF.numberOfCells++;
       }
 
       //////////////////////////////////////////////////////////////////////////
@@ -1582,12 +1582,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       {			
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[ INTERFACE_S]==false)
       {
@@ -1598,12 +1598,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       } 
       else if (needInterface[ INTERFACE_B]==false)
@@ -1615,12 +1615,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
          {			
             posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
             posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-            intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-            intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-            offFC.xOffFC[intFC.kFC]   = xOff;
-            offFC.yOffFC[intFC.kFC]   = yOff;
-            offFC.zOffFC[intFC.kFC]   = zOff;
-            intFC.kFC++;
+            intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+            intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+            offFC.x[intFC.numberOfCells]   = xOff;
+            offFC.y[intFC.numberOfCells]   = yOff;
+            offFC.z[intFC.numberOfCells]   = zOff;
+            intFC.numberOfCells++;
          }
       }
    }
@@ -1665,12 +1665,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       //////////////////////////////////////////////////////////////////////////
       posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
       posFSWB=vectorPosition(iF, jF, hF, LxFine  , LyFine);
-      intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-      intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-      offCF.xOffCF[intCF.kCF]   = xOff;
-      offCF.yOffCF[intCF.kCF]   = yOff;
-      offCF.zOffCF[intCF.kCF]   = zOff;
-      intCF.kCF++;
+      intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+      intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+      offCF.x[intCF.numberOfCells]   = xOff;
+      offCF.y[intCF.numberOfCells]   = yOff;
+      offCF.z[intCF.numberOfCells]   = zOff;
+      intCF.numberOfCells++;
       //////////////////////////////////////////////////////////////////////////
 
 
@@ -1691,12 +1691,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
       posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
       posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-      intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-      intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-      offFC.xOffFC[intFC.kFC]   = xOff;
-      offFC.yOffFC[intFC.kFC]   = yOff;
-      offFC.zOffFC[intFC.kFC]   = zOff;
-      intFC.kFC++;
+      intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+      intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+      offFC.x[intFC.numberOfCells]   = xOff;
+      offFC.y[intFC.numberOfCells]   = yOff;
+      offFC.z[intFC.numberOfCells]   = zOff;
+      intFC.numberOfCells++;
 
       if (needInterface[INTERFACE_E]==false)
       {
@@ -1706,12 +1706,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[INTERFACE_N]==false)
       {
@@ -1721,12 +1721,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[INTERFACE_T]==false)
       {
@@ -1736,12 +1736,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if ((needInterface[INTERFACE_E]==false) && (needInterface[INTERFACE_N]==false))
       {
@@ -1751,12 +1751,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if ((needInterface[INTERFACE_E]==false) && (needInterface[INTERFACE_T]==false))
       {
@@ -1766,12 +1766,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if ((needInterface[INTERFACE_N]==false) && (needInterface[INTERFACE_T]==false))
       {
@@ -1781,12 +1781,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
    }
 
@@ -1822,12 +1822,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       //////////////////////////////////////////////////////////////////////////
       posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
       posFSWB=vectorPosition(iF, jF, hF, LxFine  , LyFine);
-      intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-      intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-      offCF.xOffCF[intCF.kCF]   = xOff;
-      offCF.yOffCF[intCF.kCF]   = yOff;
-      offCF.zOffCF[intCF.kCF]   = zOff;
-      intCF.kCF++;
+      intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+      intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+      offCF.x[intCF.numberOfCells]   = xOff;
+      offCF.y[intCF.numberOfCells]   = yOff;
+      offCF.z[intCF.numberOfCells]   = zOff;
+      intCF.numberOfCells++;
       //////////////////////////////////////////////////////////////////////////
 
 
@@ -1848,12 +1848,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
       posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
       posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-      intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-      intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-      offFC.xOffFC[intFC.kFC]   = xOff;
-      offFC.yOffFC[intFC.kFC]   = yOff;
-      offFC.zOffFC[intFC.kFC]   = zOff;
-      intFC.kFC++;
+      intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+      intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+      offFC.x[intFC.numberOfCells]   = xOff;
+      offFC.y[intFC.numberOfCells]   = yOff;
+      offFC.z[intFC.numberOfCells]   = zOff;
+      intFC.numberOfCells++;
 
       if (needInterface[INTERFACE_E]==false)
       {
@@ -1863,12 +1863,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[INTERFACE_N]==false)
       {
@@ -1878,12 +1878,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[INTERFACE_B]==false)
       {
@@ -1893,12 +1893,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if ((needInterface[INTERFACE_E]==false) && (needInterface[INTERFACE_N]==false))
       {
@@ -1908,12 +1908,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if ((needInterface[INTERFACE_E]==false) && (needInterface[INTERFACE_B]==false))
       {
@@ -1923,12 +1923,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if ((needInterface[INTERFACE_N]==false) && (needInterface[INTERFACE_B]==false))
       {
@@ -1938,12 +1938,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
    }
 
@@ -1979,12 +1979,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       //////////////////////////////////////////////////////////////////////////
       posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
       posFSWB=vectorPosition(iF, jF, hF, LxFine  , LyFine);
-      intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-      intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-      offCF.xOffCF[intCF.kCF]   = xOff;
-      offCF.yOffCF[intCF.kCF]   = yOff;
-      offCF.zOffCF[intCF.kCF]   = zOff;
-      intCF.kCF++;
+      intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+      intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+      offCF.x[intCF.numberOfCells]   = xOff;
+      offCF.y[intCF.numberOfCells]   = yOff;
+      offCF.z[intCF.numberOfCells]   = zOff;
+      intCF.numberOfCells++;
       //////////////////////////////////////////////////////////////////////////
 
 
@@ -2005,12 +2005,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
       posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
       posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-      intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-      intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-      offFC.xOffFC[intFC.kFC]   = xOff;
-      offFC.yOffFC[intFC.kFC]   = yOff;
-      offFC.zOffFC[intFC.kFC]   = zOff;
-      intFC.kFC++;
+      intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+      intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+      offFC.x[intFC.numberOfCells]   = xOff;
+      offFC.y[intFC.numberOfCells]   = yOff;
+      offFC.z[intFC.numberOfCells]   = zOff;
+      intFC.numberOfCells++;
 
       if (needInterface[INTERFACE_E]==false)
       {
@@ -2020,12 +2020,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[INTERFACE_S]==false)
       {
@@ -2035,12 +2035,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[INTERFACE_T]==false)
       {
@@ -2050,12 +2050,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if ((needInterface[INTERFACE_E]==false) && (needInterface[INTERFACE_S]==false))
       {
@@ -2065,12 +2065,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if ((needInterface[INTERFACE_E]==false) && (needInterface[INTERFACE_T]==false))
       {
@@ -2080,12 +2080,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if ((needInterface[INTERFACE_S]==false) && (needInterface[INTERFACE_T]==false))
       {
@@ -2095,12 +2095,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
    }
 
@@ -2136,12 +2136,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       //////////////////////////////////////////////////////////////////////////
       posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
       posFSWB=vectorPosition(iF, jF, hF, LxFine  , LyFine);
-      intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-      intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-      offCF.xOffCF[intCF.kCF]   = xOff;
-      offCF.yOffCF[intCF.kCF]   = yOff;
-      offCF.zOffCF[intCF.kCF]   = zOff;
-      intCF.kCF++;
+      intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+      intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+      offCF.x[intCF.numberOfCells]   = xOff;
+      offCF.y[intCF.numberOfCells]   = yOff;
+      offCF.z[intCF.numberOfCells]   = zOff;
+      intCF.numberOfCells++;
       //////////////////////////////////////////////////////////////////////////
 
 
@@ -2162,12 +2162,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
       posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
       posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-      intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-      intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-      offFC.xOffFC[intFC.kFC]   = xOff;
-      offFC.yOffFC[intFC.kFC]   = yOff;
-      offFC.zOffFC[intFC.kFC]   = zOff;
-      intFC.kFC++;
+      intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+      intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+      offFC.x[intFC.numberOfCells]   = xOff;
+      offFC.y[intFC.numberOfCells]   = yOff;
+      offFC.z[intFC.numberOfCells]   = zOff;
+      intFC.numberOfCells++;
 
       if (needInterface[INTERFACE_E]==false)
       {
@@ -2177,12 +2177,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[INTERFACE_S]==false)
       {
@@ -2192,12 +2192,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[INTERFACE_B]==false)
       {
@@ -2207,12 +2207,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if ((needInterface[INTERFACE_E]==false) && (needInterface[INTERFACE_S]==false))
       {
@@ -2222,12 +2222,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if ((needInterface[INTERFACE_E]==false) && (needInterface[INTERFACE_B]==false))
       {
@@ -2237,12 +2237,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if ((needInterface[INTERFACE_S]==false) && (needInterface[INTERFACE_B]==false))
       {
@@ -2252,12 +2252,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
    }
 
@@ -2293,12 +2293,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       //////////////////////////////////////////////////////////////////////////
       posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
       posFSWB=vectorPosition(iF, jF, hF, LxFine  , LyFine);
-      intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-      intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-      offCF.xOffCF[intCF.kCF]   = xOff;
-      offCF.yOffCF[intCF.kCF]   = yOff;
-      offCF.zOffCF[intCF.kCF]   = zOff;
-      intCF.kCF++;
+      intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+      intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+      offCF.x[intCF.numberOfCells]   = xOff;
+      offCF.y[intCF.numberOfCells]   = yOff;
+      offCF.z[intCF.numberOfCells]   = zOff;
+      intCF.numberOfCells++;
       //////////////////////////////////////////////////////////////////////////
 
 
@@ -2319,12 +2319,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
       posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
       posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-      intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-      intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-      offFC.xOffFC[intFC.kFC]   = xOff;
-      offFC.yOffFC[intFC.kFC]   = yOff;
-      offFC.zOffFC[intFC.kFC]   = zOff;
-      intFC.kFC++;
+      intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+      intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+      offFC.x[intFC.numberOfCells]   = xOff;
+      offFC.y[intFC.numberOfCells]   = yOff;
+      offFC.z[intFC.numberOfCells]   = zOff;
+      intFC.numberOfCells++;
 
       if (needInterface[INTERFACE_W]==false)
       {
@@ -2334,12 +2334,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[INTERFACE_N]==false)
       {
@@ -2349,12 +2349,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[INTERFACE_T]==false)
       {
@@ -2364,12 +2364,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if ((needInterface[INTERFACE_W]==false) && (needInterface[INTERFACE_N]==false))
       {
@@ -2379,12 +2379,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if ((needInterface[INTERFACE_W]==false) && (needInterface[INTERFACE_T]==false))
       {
@@ -2394,12 +2394,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if ((needInterface[INTERFACE_N]==false) && (needInterface[INTERFACE_T]==false))
       {
@@ -2409,12 +2409,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
    }
 
@@ -2450,12 +2450,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       //////////////////////////////////////////////////////////////////////////
       posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
       posFSWB=vectorPosition(iF, jF, hF, LxFine  , LyFine);
-      intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-      intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-      offCF.xOffCF[intCF.kCF]   = xOff;
-      offCF.yOffCF[intCF.kCF]   = yOff;
-      offCF.zOffCF[intCF.kCF]   = zOff;
-      intCF.kCF++;
+      intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+      intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+      offCF.x[intCF.numberOfCells]   = xOff;
+      offCF.y[intCF.numberOfCells]   = yOff;
+      offCF.z[intCF.numberOfCells]   = zOff;
+      intCF.numberOfCells++;
       //////////////////////////////////////////////////////////////////////////
 
 
@@ -2476,12 +2476,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
       posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
       posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-      intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-      intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-      offFC.xOffFC[intFC.kFC]   = xOff;
-      offFC.yOffFC[intFC.kFC]   = yOff;
-      offFC.zOffFC[intFC.kFC]   = zOff;
-      intFC.kFC++;
+      intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+      intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+      offFC.x[intFC.numberOfCells]   = xOff;
+      offFC.y[intFC.numberOfCells]   = yOff;
+      offFC.z[intFC.numberOfCells]   = zOff;
+      intFC.numberOfCells++;
 
       if (needInterface[INTERFACE_W]==false)
       {
@@ -2491,12 +2491,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[INTERFACE_N]==false)
       {
@@ -2506,12 +2506,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[INTERFACE_B]==false)
       {
@@ -2521,12 +2521,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if ((needInterface[INTERFACE_W]==false) && (needInterface[INTERFACE_N]==false))
       {
@@ -2536,12 +2536,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if ((needInterface[INTERFACE_W]==false) && (needInterface[INTERFACE_B]==false))
       {
@@ -2551,12 +2551,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if ((needInterface[INTERFACE_N]==false) && (needInterface[INTERFACE_B]==false))
       {
@@ -2566,12 +2566,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
    }
 
@@ -2607,12 +2607,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       //////////////////////////////////////////////////////////////////////////
       posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
       posFSWB=vectorPosition(iF, jF, hF, LxFine  , LyFine);
-      intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-      intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-      offCF.xOffCF[intCF.kCF]   = xOff;
-      offCF.yOffCF[intCF.kCF]   = yOff;
-      offCF.zOffCF[intCF.kCF]   = zOff;
-      intCF.kCF++;
+      intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+      intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+      offCF.x[intCF.numberOfCells]   = xOff;
+      offCF.y[intCF.numberOfCells]   = yOff;
+      offCF.z[intCF.numberOfCells]   = zOff;
+      intCF.numberOfCells++;
       //////////////////////////////////////////////////////////////////////////
 
 
@@ -2633,12 +2633,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
       posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
       posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-      intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-      intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-      offFC.xOffFC[intFC.kFC]   = xOff;
-      offFC.yOffFC[intFC.kFC]   = yOff;
-      offFC.zOffFC[intFC.kFC]   = zOff;
-      intFC.kFC++;
+      intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+      intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+      offFC.x[intFC.numberOfCells]   = xOff;
+      offFC.y[intFC.numberOfCells]   = yOff;
+      offFC.z[intFC.numberOfCells]   = zOff;
+      intFC.numberOfCells++;
 
       if (needInterface[INTERFACE_W]==false)
       {
@@ -2648,12 +2648,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[INTERFACE_S]==false)
       {
@@ -2663,12 +2663,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[INTERFACE_T]==false)
       {
@@ -2678,12 +2678,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if ((needInterface[INTERFACE_W]==false) && (needInterface[INTERFACE_S]==false))
       {
@@ -2693,12 +2693,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if ((needInterface[INTERFACE_W]==false) && (needInterface[INTERFACE_T]==false))
       {
@@ -2708,12 +2708,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if ((needInterface[INTERFACE_S]==false) && (needInterface[INTERFACE_T]==false))
       {
@@ -2723,12 +2723,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
    }
 
@@ -2764,12 +2764,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
       //////////////////////////////////////////////////////////////////////////
       posCSWB=vectorPosition(iC, jC, hC, LxCoarse, LyCoarse);
       posFSWB=vectorPosition(iF, jF, hF, LxFine  , LyFine);
-      intCF.ICellCFC[intCF.kCF] = kCoarse[posCSWB];
-      intCF.ICellCFF[intCF.kCF] = kFine[posFSWB];
-      offCF.xOffCF[intCF.kCF]   = xOff;
-      offCF.yOffCF[intCF.kCF]   = yOff;
-      offCF.zOffCF[intCF.kCF]   = zOff;
-      intCF.kCF++;
+      intCF.coarseCellIndices[intCF.numberOfCells] = kCoarse[posCSWB];
+      intCF.fineCellIndices[intCF.numberOfCells] = kFine[posFSWB];
+      offCF.x[intCF.numberOfCells]   = xOff;
+      offCF.y[intCF.numberOfCells]   = yOff;
+      offCF.z[intCF.numberOfCells]   = zOff;
+      intCF.numberOfCells++;
       //////////////////////////////////////////////////////////////////////////
 
 
@@ -2790,12 +2790,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
       posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
       posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-      intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-      intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-      offFC.xOffFC[intFC.kFC]   = xOff;
-      offFC.yOffFC[intFC.kFC]   = yOff;
-      offFC.zOffFC[intFC.kFC]   = zOff;
-      intFC.kFC++;
+      intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+      intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+      offFC.x[intFC.numberOfCells]   = xOff;
+      offFC.y[intFC.numberOfCells]   = yOff;
+      offFC.z[intFC.numberOfCells]   = zOff;
+      intFC.numberOfCells++;
 
       if (needInterface[INTERFACE_W]==false)
       {
@@ -2805,12 +2805,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[INTERFACE_S]==false)
       {
@@ -2820,12 +2820,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if (needInterface[INTERFACE_B]==false)
       {
@@ -2835,12 +2835,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if ((needInterface[INTERFACE_W]==false) && (needInterface[INTERFACE_S]==false))
       {
@@ -2850,12 +2850,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if ((needInterface[INTERFACE_W]==false) && (needInterface[INTERFACE_B]==false))
       {
@@ -2865,12 +2865,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
       if ((needInterface[INTERFACE_S]==false) && (needInterface[INTERFACE_B]==false))
       {
@@ -2880,12 +2880,12 @@ void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC,
 
          posC=vectorPosition(   iC, jC, hC, LxCoarse, LyCoarse);
          posFSWB=vectorPosition(iF, jF, hF, LxFine,   LyFine);
-         intFC.ICellFCC[intFC.kFC] = kCoarse[posC];
-         intFC.ICellFCF[intFC.kFC] = kFine[posFSWB];
-         offFC.xOffFC[intFC.kFC]   = xOff;
-         offFC.yOffFC[intFC.kFC]   = yOff;
-         offFC.zOffFC[intFC.kFC]   = zOff;
-         intFC.kFC++;
+         intFC.coarseCellIndices[intFC.numberOfCells] = kCoarse[posC];
+         intFC.fineCellIndices[intFC.numberOfCells] = kFine[posFSWB];
+         offFC.x[intFC.numberOfCells]   = xOff;
+         offFC.y[intFC.numberOfCells]   = yOff;
+         offFC.z[intFC.numberOfCells]   = zOff;
+         intFC.numberOfCells++;
       }
    }
 
diff --git a/src/gpu/VirtualFluids_GPU/FindInterface/FindInterface.h b/src/gpu/VirtualFluids_GPU/FindInterface/FindInterface.h
index 3be49570b33d99f9517796b33934dee1e2f31221..17e63824f930161656291bf2d7ecc05e23af9161 100644
--- a/src/gpu/VirtualFluids_GPU/FindInterface/FindInterface.h
+++ b/src/gpu/VirtualFluids_GPU/FindInterface/FindInterface.h
@@ -5,11 +5,11 @@
 #include "lbm/constants/D3Q27.h"
 
 
-void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC, 
+void interpolation(InterpolationCells &intCF, InterpolationCells &intFC, 
                                unsigned int LxCoarse, unsigned int LyCoarse, unsigned int LzCoarse, 
                                unsigned int LxFine, unsigned int LyFine, unsigned int LzFine, 
                                unsigned int dNx, unsigned int dNy, unsigned int dNz, 
                                unsigned int *kCoarse, unsigned int *kFine, bool* needInterface,
-                               OffsetCF &offCF, OffsetFC &offFC);
+                               InterpolationCellNeighbor &offCF, InterpolationCellNeighbor &offFC);
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/FindQ/FindQ.cpp b/src/gpu/VirtualFluids_GPU/FindQ/FindQ.cpp
index 19be37ed5324f48627506bb3e2508a9a1b97cf52..bc12456ceb632b3249d8757fe23c811aad6ec541 100644
--- a/src/gpu/VirtualFluids_GPU/FindQ/FindQ.cpp
+++ b/src/gpu/VirtualFluids_GPU/FindQ/FindQ.cpp
@@ -1,5 +1,5 @@
 #include "FindQ/FindQ.h"
-#include "logger/Logger.h"
+#include <logger/Logger.h>
 #include "lbm/constants/D3Q27.h"
 
 using namespace vf::lbm::dir;
diff --git a/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusion27chim.cu b/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusion27chim.cu
index 8f54358e04063c9063c873caf02a86e76bb7f936..a22e7f6e842fcfb4474e009975eb65f1920513a9 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusion27chim.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusion27chim.cu
@@ -34,9 +34,9 @@
 #include "LBM/LB.h"
 #include "lbm/constants/D3Q27.h"
 
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -74,7 +74,7 @@ __global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel(
 	uint* neighborZ,
 	real* distributions,
 	real* distributionsAD,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	real* forces,
 	bool isEvenTimestep)
 {
@@ -100,7 +100,7 @@ __global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel(
 
 	//////////////////////////////////////////////////////////////////////////
 	// run for all indices in size_Mat and fluid nodes
-	if ((k < size_Mat) && (typeOfGridNode[k] == GEO_FLUID))
+	if ((k < numberOfLBnodes) && (typeOfGridNode[k] == GEO_FLUID))
 	{
 		//////////////////////////////////////////////////////////////////////////
 		//! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
@@ -109,125 +109,125 @@ __global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel(
 		Distributions27 dist;
 		if (isEvenTimestep)
 		{
-			dist.f[DIR_P00   ] = &distributions[DIR_P00   *size_Mat];
-			dist.f[DIR_M00   ] = &distributions[DIR_M00   *size_Mat];
-			dist.f[DIR_0P0   ] = &distributions[DIR_0P0   *size_Mat];
-			dist.f[DIR_0M0   ] = &distributions[DIR_0M0   *size_Mat];
-			dist.f[DIR_00P   ] = &distributions[DIR_00P   *size_Mat];
-			dist.f[DIR_00M   ] = &distributions[DIR_00M   *size_Mat];
-			dist.f[DIR_PP0  ] = &distributions[DIR_PP0  *size_Mat];
-			dist.f[DIR_MM0  ] = &distributions[DIR_MM0  *size_Mat];
-			dist.f[DIR_PM0  ] = &distributions[DIR_PM0  *size_Mat];
-			dist.f[DIR_MP0  ] = &distributions[DIR_MP0  *size_Mat];
-			dist.f[DIR_P0P  ] = &distributions[DIR_P0P  *size_Mat];
-			dist.f[DIR_M0M  ] = &distributions[DIR_M0M  *size_Mat];
-			dist.f[DIR_P0M  ] = &distributions[DIR_P0M  *size_Mat];
-			dist.f[DIR_M0P  ] = &distributions[DIR_M0P  *size_Mat];
-			dist.f[DIR_0PP  ] = &distributions[DIR_0PP  *size_Mat];
-			dist.f[DIR_0MM  ] = &distributions[DIR_0MM  *size_Mat];
-			dist.f[DIR_0PM  ] = &distributions[DIR_0PM  *size_Mat];
-			dist.f[DIR_0MP  ] = &distributions[DIR_0MP  *size_Mat];
-			dist.f[DIR_000] = &distributions[DIR_000*size_Mat];
-			dist.f[DIR_PPP ] = &distributions[DIR_PPP *size_Mat];
-			dist.f[DIR_MMP ] = &distributions[DIR_MMP *size_Mat];
-			dist.f[DIR_PMP ] = &distributions[DIR_PMP *size_Mat];
-			dist.f[DIR_MPP ] = &distributions[DIR_MPP *size_Mat];
-			dist.f[DIR_PPM ] = &distributions[DIR_PPM *size_Mat];
-			dist.f[DIR_MMM ] = &distributions[DIR_MMM *size_Mat];
-			dist.f[DIR_PMM ] = &distributions[DIR_PMM *size_Mat];
-			dist.f[DIR_MPM ] = &distributions[DIR_MPM *size_Mat];
+			dist.f[DIR_P00] = &distributions[DIR_P00 * numberOfLBnodes];
+			dist.f[DIR_M00] = &distributions[DIR_M00 * numberOfLBnodes];
+			dist.f[DIR_0P0] = &distributions[DIR_0P0 * numberOfLBnodes];
+			dist.f[DIR_0M0] = &distributions[DIR_0M0 * numberOfLBnodes];
+			dist.f[DIR_00P] = &distributions[DIR_00P * numberOfLBnodes];
+			dist.f[DIR_00M] = &distributions[DIR_00M * numberOfLBnodes];
+			dist.f[DIR_PP0] = &distributions[DIR_PP0 * numberOfLBnodes];
+			dist.f[DIR_MM0] = &distributions[DIR_MM0 * numberOfLBnodes];
+			dist.f[DIR_PM0] = &distributions[DIR_PM0 * numberOfLBnodes];
+			dist.f[DIR_MP0] = &distributions[DIR_MP0 * numberOfLBnodes];
+			dist.f[DIR_P0P] = &distributions[DIR_P0P * numberOfLBnodes];
+			dist.f[DIR_M0M] = &distributions[DIR_M0M * numberOfLBnodes];
+			dist.f[DIR_P0M] = &distributions[DIR_P0M * numberOfLBnodes];
+			dist.f[DIR_M0P] = &distributions[DIR_M0P * numberOfLBnodes];
+			dist.f[DIR_0PP] = &distributions[DIR_0PP * numberOfLBnodes];
+			dist.f[DIR_0MM] = &distributions[DIR_0MM * numberOfLBnodes];
+			dist.f[DIR_0PM] = &distributions[DIR_0PM * numberOfLBnodes];
+			dist.f[DIR_0MP] = &distributions[DIR_0MP * numberOfLBnodes];
+			dist.f[DIR_000] = &distributions[DIR_000 * numberOfLBnodes];
+			dist.f[DIR_PPP] = &distributions[DIR_PPP * numberOfLBnodes];
+			dist.f[DIR_MMP] = &distributions[DIR_MMP * numberOfLBnodes];
+			dist.f[DIR_PMP] = &distributions[DIR_PMP * numberOfLBnodes];
+			dist.f[DIR_MPP] = &distributions[DIR_MPP * numberOfLBnodes];
+			dist.f[DIR_PPM] = &distributions[DIR_PPM * numberOfLBnodes];
+			dist.f[DIR_MMM] = &distributions[DIR_MMM * numberOfLBnodes];
+			dist.f[DIR_PMM] = &distributions[DIR_PMM * numberOfLBnodes];
+			dist.f[DIR_MPM] = &distributions[DIR_MPM * numberOfLBnodes];
 		}
 		else
 		{
-			dist.f[DIR_M00   ] = &distributions[DIR_P00   *size_Mat];
-			dist.f[DIR_P00   ] = &distributions[DIR_M00   *size_Mat];
-			dist.f[DIR_0M0   ] = &distributions[DIR_0P0   *size_Mat];
-			dist.f[DIR_0P0   ] = &distributions[DIR_0M0   *size_Mat];
-			dist.f[DIR_00M   ] = &distributions[DIR_00P   *size_Mat];
-			dist.f[DIR_00P   ] = &distributions[DIR_00M   *size_Mat];
-			dist.f[DIR_MM0  ] = &distributions[DIR_PP0  *size_Mat];
-			dist.f[DIR_PP0  ] = &distributions[DIR_MM0  *size_Mat];
-			dist.f[DIR_MP0  ] = &distributions[DIR_PM0  *size_Mat];
-			dist.f[DIR_PM0  ] = &distributions[DIR_MP0  *size_Mat];
-			dist.f[DIR_M0M  ] = &distributions[DIR_P0P  *size_Mat];
-			dist.f[DIR_P0P  ] = &distributions[DIR_M0M  *size_Mat];
-			dist.f[DIR_M0P  ] = &distributions[DIR_P0M  *size_Mat];
-			dist.f[DIR_P0M  ] = &distributions[DIR_M0P  *size_Mat];
-			dist.f[DIR_0MM  ] = &distributions[DIR_0PP  *size_Mat];
-			dist.f[DIR_0PP  ] = &distributions[DIR_0MM  *size_Mat];
-			dist.f[DIR_0MP  ] = &distributions[DIR_0PM  *size_Mat];
-			dist.f[DIR_0PM  ] = &distributions[DIR_0MP  *size_Mat];
-			dist.f[DIR_000] = &distributions[DIR_000*size_Mat];
-			dist.f[DIR_MMM ] = &distributions[DIR_PPP *size_Mat];
-			dist.f[DIR_PPM ] = &distributions[DIR_MMP *size_Mat];
-			dist.f[DIR_MPM ] = &distributions[DIR_PMP *size_Mat];
-			dist.f[DIR_PMM ] = &distributions[DIR_MPP *size_Mat];
-			dist.f[DIR_MMP ] = &distributions[DIR_PPM *size_Mat];
-			dist.f[DIR_PPP ] = &distributions[DIR_MMM *size_Mat];
-			dist.f[DIR_MPP ] = &distributions[DIR_PMM *size_Mat];
-			dist.f[DIR_PMP ] = &distributions[DIR_MPM *size_Mat];
+			dist.f[DIR_M00] = &distributions[DIR_P00 * numberOfLBnodes];
+			dist.f[DIR_P00] = &distributions[DIR_M00 * numberOfLBnodes];
+			dist.f[DIR_0M0] = &distributions[DIR_0P0 * numberOfLBnodes];
+			dist.f[DIR_0P0] = &distributions[DIR_0M0 * numberOfLBnodes];
+			dist.f[DIR_00M] = &distributions[DIR_00P * numberOfLBnodes];
+			dist.f[DIR_00P] = &distributions[DIR_00M * numberOfLBnodes];
+			dist.f[DIR_MM0] = &distributions[DIR_PP0 * numberOfLBnodes];
+			dist.f[DIR_PP0] = &distributions[DIR_MM0 * numberOfLBnodes];
+			dist.f[DIR_MP0] = &distributions[DIR_PM0 * numberOfLBnodes];
+			dist.f[DIR_PM0] = &distributions[DIR_MP0 * numberOfLBnodes];
+			dist.f[DIR_M0M] = &distributions[DIR_P0P * numberOfLBnodes];
+			dist.f[DIR_P0P] = &distributions[DIR_M0M * numberOfLBnodes];
+			dist.f[DIR_M0P] = &distributions[DIR_P0M * numberOfLBnodes];
+			dist.f[DIR_P0M] = &distributions[DIR_M0P * numberOfLBnodes];
+			dist.f[DIR_0MM] = &distributions[DIR_0PP * numberOfLBnodes];
+			dist.f[DIR_0PP] = &distributions[DIR_0MM * numberOfLBnodes];
+			dist.f[DIR_0MP] = &distributions[DIR_0PM * numberOfLBnodes];
+			dist.f[DIR_0PM] = &distributions[DIR_0MP * numberOfLBnodes];
+			dist.f[DIR_000] = &distributions[DIR_000 * numberOfLBnodes];
+			dist.f[DIR_MMM] = &distributions[DIR_PPP * numberOfLBnodes];
+			dist.f[DIR_PPM] = &distributions[DIR_MMP * numberOfLBnodes];
+			dist.f[DIR_MPM] = &distributions[DIR_PMP * numberOfLBnodes];
+			dist.f[DIR_PMM] = &distributions[DIR_MPP * numberOfLBnodes];
+			dist.f[DIR_MMP] = &distributions[DIR_PPM * numberOfLBnodes];
+			dist.f[DIR_PPP] = &distributions[DIR_MMM * numberOfLBnodes];
+			dist.f[DIR_MPP] = &distributions[DIR_PMM * numberOfLBnodes];
+			dist.f[DIR_PMP] = &distributions[DIR_MPM * numberOfLBnodes];
 		}
 		////////////////////////////////////////////////////////////////////////////////
 		Distributions27 distAD;
 		if (isEvenTimestep)
 		{
-			distAD.f[DIR_P00   ] = &distributionsAD[DIR_P00   *size_Mat];
-			distAD.f[DIR_M00   ] = &distributionsAD[DIR_M00   *size_Mat];
-			distAD.f[DIR_0P0   ] = &distributionsAD[DIR_0P0   *size_Mat];
-			distAD.f[DIR_0M0   ] = &distributionsAD[DIR_0M0   *size_Mat];
-			distAD.f[DIR_00P   ] = &distributionsAD[DIR_00P   *size_Mat];
-			distAD.f[DIR_00M   ] = &distributionsAD[DIR_00M   *size_Mat];
-			distAD.f[DIR_PP0  ] = &distributionsAD[DIR_PP0  *size_Mat];
-			distAD.f[DIR_MM0  ] = &distributionsAD[DIR_MM0  *size_Mat];
-			distAD.f[DIR_PM0  ] = &distributionsAD[DIR_PM0  *size_Mat];
-			distAD.f[DIR_MP0  ] = &distributionsAD[DIR_MP0  *size_Mat];
-			distAD.f[DIR_P0P  ] = &distributionsAD[DIR_P0P  *size_Mat];
-			distAD.f[DIR_M0M  ] = &distributionsAD[DIR_M0M  *size_Mat];
-			distAD.f[DIR_P0M  ] = &distributionsAD[DIR_P0M  *size_Mat];
-			distAD.f[DIR_M0P  ] = &distributionsAD[DIR_M0P  *size_Mat];
-			distAD.f[DIR_0PP  ] = &distributionsAD[DIR_0PP  *size_Mat];
-			distAD.f[DIR_0MM  ] = &distributionsAD[DIR_0MM  *size_Mat];
-			distAD.f[DIR_0PM  ] = &distributionsAD[DIR_0PM  *size_Mat];
-			distAD.f[DIR_0MP  ] = &distributionsAD[DIR_0MP  *size_Mat];
-			distAD.f[DIR_000] = &distributionsAD[DIR_000*size_Mat];
-			distAD.f[DIR_PPP ] = &distributionsAD[DIR_PPP *size_Mat];
-			distAD.f[DIR_MMP ] = &distributionsAD[DIR_MMP *size_Mat];
-			distAD.f[DIR_PMP ] = &distributionsAD[DIR_PMP *size_Mat];
-			distAD.f[DIR_MPP ] = &distributionsAD[DIR_MPP *size_Mat];
-			distAD.f[DIR_PPM ] = &distributionsAD[DIR_PPM *size_Mat];
-			distAD.f[DIR_MMM ] = &distributionsAD[DIR_MMM *size_Mat];
-			distAD.f[DIR_PMM ] = &distributionsAD[DIR_PMM *size_Mat];
-			distAD.f[DIR_MPM ] = &distributionsAD[DIR_MPM *size_Mat];
+			distAD.f[DIR_P00] = &distributionsAD[DIR_P00 * numberOfLBnodes];
+			distAD.f[DIR_M00] = &distributionsAD[DIR_M00 * numberOfLBnodes];
+			distAD.f[DIR_0P0] = &distributionsAD[DIR_0P0 * numberOfLBnodes];
+			distAD.f[DIR_0M0] = &distributionsAD[DIR_0M0 * numberOfLBnodes];
+			distAD.f[DIR_00P] = &distributionsAD[DIR_00P * numberOfLBnodes];
+			distAD.f[DIR_00M] = &distributionsAD[DIR_00M * numberOfLBnodes];
+			distAD.f[DIR_PP0] = &distributionsAD[DIR_PP0 * numberOfLBnodes];
+			distAD.f[DIR_MM0] = &distributionsAD[DIR_MM0 * numberOfLBnodes];
+			distAD.f[DIR_PM0] = &distributionsAD[DIR_PM0 * numberOfLBnodes];
+			distAD.f[DIR_MP0] = &distributionsAD[DIR_MP0 * numberOfLBnodes];
+			distAD.f[DIR_P0P] = &distributionsAD[DIR_P0P * numberOfLBnodes];
+			distAD.f[DIR_M0M] = &distributionsAD[DIR_M0M * numberOfLBnodes];
+			distAD.f[DIR_P0M] = &distributionsAD[DIR_P0M * numberOfLBnodes];
+			distAD.f[DIR_M0P] = &distributionsAD[DIR_M0P * numberOfLBnodes];
+			distAD.f[DIR_0PP] = &distributionsAD[DIR_0PP * numberOfLBnodes];
+			distAD.f[DIR_0MM] = &distributionsAD[DIR_0MM * numberOfLBnodes];
+			distAD.f[DIR_0PM] = &distributionsAD[DIR_0PM * numberOfLBnodes];
+			distAD.f[DIR_0MP] = &distributionsAD[DIR_0MP * numberOfLBnodes];
+			distAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes];
+			distAD.f[DIR_PPP] = &distributionsAD[DIR_PPP * numberOfLBnodes];
+			distAD.f[DIR_MMP] = &distributionsAD[DIR_MMP * numberOfLBnodes];
+			distAD.f[DIR_PMP] = &distributionsAD[DIR_PMP * numberOfLBnodes];
+			distAD.f[DIR_MPP] = &distributionsAD[DIR_MPP * numberOfLBnodes];
+			distAD.f[DIR_PPM] = &distributionsAD[DIR_PPM * numberOfLBnodes];
+			distAD.f[DIR_MMM] = &distributionsAD[DIR_MMM * numberOfLBnodes];
+			distAD.f[DIR_PMM] = &distributionsAD[DIR_PMM * numberOfLBnodes];
+			distAD.f[DIR_MPM] = &distributionsAD[DIR_MPM * numberOfLBnodes];
 		}
 		else
 		{
-			distAD.f[DIR_M00   ] = &distributionsAD[DIR_P00   *size_Mat];
-			distAD.f[DIR_P00   ] = &distributionsAD[DIR_M00   *size_Mat];
-			distAD.f[DIR_0M0   ] = &distributionsAD[DIR_0P0   *size_Mat];
-			distAD.f[DIR_0P0   ] = &distributionsAD[DIR_0M0   *size_Mat];
-			distAD.f[DIR_00M   ] = &distributionsAD[DIR_00P   *size_Mat];
-			distAD.f[DIR_00P   ] = &distributionsAD[DIR_00M   *size_Mat];
-			distAD.f[DIR_MM0  ] = &distributionsAD[DIR_PP0  *size_Mat];
-			distAD.f[DIR_PP0  ] = &distributionsAD[DIR_MM0  *size_Mat];
-			distAD.f[DIR_MP0  ] = &distributionsAD[DIR_PM0  *size_Mat];
-			distAD.f[DIR_PM0  ] = &distributionsAD[DIR_MP0  *size_Mat];
-			distAD.f[DIR_M0M  ] = &distributionsAD[DIR_P0P  *size_Mat];
-			distAD.f[DIR_P0P  ] = &distributionsAD[DIR_M0M  *size_Mat];
-			distAD.f[DIR_M0P  ] = &distributionsAD[DIR_P0M  *size_Mat];
-			distAD.f[DIR_P0M  ] = &distributionsAD[DIR_M0P  *size_Mat];
-			distAD.f[DIR_0MM  ] = &distributionsAD[DIR_0PP  *size_Mat];
-			distAD.f[DIR_0PP  ] = &distributionsAD[DIR_0MM  *size_Mat];
-			distAD.f[DIR_0MP  ] = &distributionsAD[DIR_0PM  *size_Mat];
-			distAD.f[DIR_0PM  ] = &distributionsAD[DIR_0MP  *size_Mat];
-			distAD.f[DIR_000] = &distributionsAD[DIR_000*size_Mat];
-			distAD.f[DIR_MMM ] = &distributionsAD[DIR_PPP *size_Mat];
-			distAD.f[DIR_PPM ] = &distributionsAD[DIR_MMP *size_Mat];
-			distAD.f[DIR_MPM ] = &distributionsAD[DIR_PMP *size_Mat];
-			distAD.f[DIR_PMM ] = &distributionsAD[DIR_MPP *size_Mat];
-			distAD.f[DIR_MMP ] = &distributionsAD[DIR_PPM *size_Mat];
-			distAD.f[DIR_PPP ] = &distributionsAD[DIR_MMM *size_Mat];
-			distAD.f[DIR_MPP ] = &distributionsAD[DIR_PMM *size_Mat];
-			distAD.f[DIR_PMP ] = &distributionsAD[DIR_MPM *size_Mat];
+			distAD.f[DIR_M00] = &distributionsAD[DIR_P00 * numberOfLBnodes];
+			distAD.f[DIR_P00] = &distributionsAD[DIR_M00 * numberOfLBnodes];
+			distAD.f[DIR_0M0] = &distributionsAD[DIR_0P0 * numberOfLBnodes];
+			distAD.f[DIR_0P0] = &distributionsAD[DIR_0M0 * numberOfLBnodes];
+			distAD.f[DIR_00M] = &distributionsAD[DIR_00P * numberOfLBnodes];
+			distAD.f[DIR_00P] = &distributionsAD[DIR_00M * numberOfLBnodes];
+			distAD.f[DIR_MM0] = &distributionsAD[DIR_PP0 * numberOfLBnodes];
+			distAD.f[DIR_PP0] = &distributionsAD[DIR_MM0 * numberOfLBnodes];
+			distAD.f[DIR_MP0] = &distributionsAD[DIR_PM0 * numberOfLBnodes];
+			distAD.f[DIR_PM0] = &distributionsAD[DIR_MP0 * numberOfLBnodes];
+			distAD.f[DIR_M0M] = &distributionsAD[DIR_P0P * numberOfLBnodes];
+			distAD.f[DIR_P0P] = &distributionsAD[DIR_M0M * numberOfLBnodes];
+			distAD.f[DIR_M0P] = &distributionsAD[DIR_P0M * numberOfLBnodes];
+			distAD.f[DIR_P0M] = &distributionsAD[DIR_M0P * numberOfLBnodes];
+			distAD.f[DIR_0MM] = &distributionsAD[DIR_0PP * numberOfLBnodes];
+			distAD.f[DIR_0PP] = &distributionsAD[DIR_0MM * numberOfLBnodes];
+			distAD.f[DIR_0MP] = &distributionsAD[DIR_0PM * numberOfLBnodes];
+			distAD.f[DIR_0PM] = &distributionsAD[DIR_0MP * numberOfLBnodes];
+			distAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes];
+			distAD.f[DIR_MMM] = &distributionsAD[DIR_PPP * numberOfLBnodes];
+			distAD.f[DIR_PPM] = &distributionsAD[DIR_MMP * numberOfLBnodes];
+			distAD.f[DIR_MPM] = &distributionsAD[DIR_PMP * numberOfLBnodes];
+			distAD.f[DIR_PMM] = &distributionsAD[DIR_MPP * numberOfLBnodes];
+			distAD.f[DIR_MMP] = &distributionsAD[DIR_PPM * numberOfLBnodes];
+			distAD.f[DIR_PPP] = &distributionsAD[DIR_MMM * numberOfLBnodes];
+			distAD.f[DIR_MPP] = &distributionsAD[DIR_PMM * numberOfLBnodes];
+			distAD.f[DIR_PMP] = &distributionsAD[DIR_MPM * numberOfLBnodes];
 		}
 		////////////////////////////////////////////////////////////////////////////////
 		//! - Set neighbor indices (necessary for indirect addressing)
@@ -241,63 +241,63 @@ __global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel(
 		////////////////////////////////////////////////////////////////////////////////////
 		//! - Set local distributions Fluid
 		//!
-		real fcbb = (dist.f[DIR_P00   ])[k];
-		real fabb = (dist.f[DIR_M00   ])[kw];
-		real fbcb = (dist.f[DIR_0P0   ])[k];
-		real fbab = (dist.f[DIR_0M0   ])[ks];
-		real fbbc = (dist.f[DIR_00P   ])[k];
-		real fbba = (dist.f[DIR_00M   ])[kb];
-		real fccb = (dist.f[DIR_PP0  ])[k];
-		real faab = (dist.f[DIR_MM0  ])[ksw];
-		real fcab = (dist.f[DIR_PM0  ])[ks];
-		real facb = (dist.f[DIR_MP0  ])[kw];
-		real fcbc = (dist.f[DIR_P0P  ])[k];
-		real faba = (dist.f[DIR_M0M  ])[kbw];
-		real fcba = (dist.f[DIR_P0M  ])[kb];
-		real fabc = (dist.f[DIR_M0P  ])[kw];
-		real fbcc = (dist.f[DIR_0PP  ])[k];
-		real fbaa = (dist.f[DIR_0MM  ])[kbs];
-		real fbca = (dist.f[DIR_0PM  ])[kb];
-		real fbac = (dist.f[DIR_0MP  ])[ks];
+		real fcbb = (dist.f[DIR_P00])[k];
+		real fabb = (dist.f[DIR_M00])[kw];
+		real fbcb = (dist.f[DIR_0P0])[k];
+		real fbab = (dist.f[DIR_0M0])[ks];
+		real fbbc = (dist.f[DIR_00P])[k];
+		real fbba = (dist.f[DIR_00M])[kb];
+		real fccb = (dist.f[DIR_PP0])[k];
+		real faab = (dist.f[DIR_MM0])[ksw];
+		real fcab = (dist.f[DIR_PM0])[ks];
+		real facb = (dist.f[DIR_MP0])[kw];
+		real fcbc = (dist.f[DIR_P0P])[k];
+		real faba = (dist.f[DIR_M0M])[kbw];
+		real fcba = (dist.f[DIR_P0M])[kb];
+		real fabc = (dist.f[DIR_M0P])[kw];
+		real fbcc = (dist.f[DIR_0PP])[k];
+		real fbaa = (dist.f[DIR_0MM])[kbs];
+		real fbca = (dist.f[DIR_0PM])[kb];
+		real fbac = (dist.f[DIR_0MP])[ks];
 		real fbbb = (dist.f[DIR_000])[k];
-		real fccc = (dist.f[DIR_PPP ])[k];
-		real faac = (dist.f[DIR_MMP ])[ksw];
-		real fcac = (dist.f[DIR_PMP ])[ks];
-		real facc = (dist.f[DIR_MPP ])[kw];
-		real fcca = (dist.f[DIR_PPM ])[kb];
-		real faaa = (dist.f[DIR_MMM ])[kbsw];
-		real fcaa = (dist.f[DIR_PMM ])[kbs];
-		real faca = (dist.f[DIR_MPM ])[kbw];
+		real fccc = (dist.f[DIR_PPP])[k];
+		real faac = (dist.f[DIR_MMP])[ksw];
+		real fcac = (dist.f[DIR_PMP])[ks];
+		real facc = (dist.f[DIR_MPP])[kw];
+		real fcca = (dist.f[DIR_PPM])[kb];
+		real faaa = (dist.f[DIR_MMM])[kbsw];
+		real fcaa = (dist.f[DIR_PMM])[kbs];
+		real faca = (dist.f[DIR_MPM])[kbw];
 		////////////////////////////////////////////////////////////////////////////////////
 		//! - Set local distributions Advection Diffusion
 		//!
-		real mfcbb = (distAD.f[DIR_P00   ])[k];
-		real mfabb = (distAD.f[DIR_M00   ])[kw];
-		real mfbcb = (distAD.f[DIR_0P0   ])[k];
-		real mfbab = (distAD.f[DIR_0M0   ])[ks];
-		real mfbbc = (distAD.f[DIR_00P   ])[k];
-		real mfbba = (distAD.f[DIR_00M   ])[kb];
-		real mfccb = (distAD.f[DIR_PP0  ])[k];
-		real mfaab = (distAD.f[DIR_MM0  ])[ksw];
-		real mfcab = (distAD.f[DIR_PM0  ])[ks];
-		real mfacb = (distAD.f[DIR_MP0  ])[kw];
-		real mfcbc = (distAD.f[DIR_P0P  ])[k];
-		real mfaba = (distAD.f[DIR_M0M  ])[kbw];
-		real mfcba = (distAD.f[DIR_P0M  ])[kb];
-		real mfabc = (distAD.f[DIR_M0P  ])[kw];
-		real mfbcc = (distAD.f[DIR_0PP  ])[k];
-		real mfbaa = (distAD.f[DIR_0MM  ])[kbs];
-		real mfbca = (distAD.f[DIR_0PM  ])[kb];
-		real mfbac = (distAD.f[DIR_0MP  ])[ks];
+		real mfcbb = (distAD.f[DIR_P00])[k];
+		real mfabb = (distAD.f[DIR_M00])[kw];
+		real mfbcb = (distAD.f[DIR_0P0])[k];
+		real mfbab = (distAD.f[DIR_0M0])[ks];
+		real mfbbc = (distAD.f[DIR_00P])[k];
+		real mfbba = (distAD.f[DIR_00M])[kb];
+		real mfccb = (distAD.f[DIR_PP0])[k];
+		real mfaab = (distAD.f[DIR_MM0])[ksw];
+		real mfcab = (distAD.f[DIR_PM0])[ks];
+		real mfacb = (distAD.f[DIR_MP0])[kw];
+		real mfcbc = (distAD.f[DIR_P0P])[k];
+		real mfaba = (distAD.f[DIR_M0M])[kbw];
+		real mfcba = (distAD.f[DIR_P0M])[kb];
+		real mfabc = (distAD.f[DIR_M0P])[kw];
+		real mfbcc = (distAD.f[DIR_0PP])[k];
+		real mfbaa = (distAD.f[DIR_0MM])[kbs];
+		real mfbca = (distAD.f[DIR_0PM])[kb];
+		real mfbac = (distAD.f[DIR_0MP])[ks];
 		real mfbbb = (distAD.f[DIR_000])[k];
-		real mfccc = (distAD.f[DIR_PPP ])[k];
-		real mfaac = (distAD.f[DIR_MMP ])[ksw];
-		real mfcac = (distAD.f[DIR_PMP ])[ks];
-		real mfacc = (distAD.f[DIR_MPP ])[kw];
-		real mfcca = (distAD.f[DIR_PPM ])[kb];
-		real mfaaa = (distAD.f[DIR_MMM ])[kbsw];
-		real mfcaa = (distAD.f[DIR_PMM ])[kbs];
-		real mfaca = (distAD.f[DIR_MPM ])[kbw];
+		real mfccc = (distAD.f[DIR_PPP])[k];
+		real mfaac = (distAD.f[DIR_MMP])[ksw];
+		real mfcac = (distAD.f[DIR_PMP])[ks];
+		real mfacc = (distAD.f[DIR_MPP])[kw];
+		real mfcca = (distAD.f[DIR_PPM])[kb];
+		real mfaaa = (distAD.f[DIR_MMM])[kbsw];
+		real mfcaa = (distAD.f[DIR_PMM])[kbs];
+		real mfaca = (distAD.f[DIR_MPM])[kbw];
 		////////////////////////////////////////////////////////////////////////////////////
 		//! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
 		//! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -503,33 +503,33 @@ __global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel(
 		//! stored arrays dependent on timestep is based on the esoteric twist algorithm
 		//! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
 		//!
-		(distAD.f[DIR_P00   ])[k   ] = mfabb;
-		(distAD.f[DIR_M00   ])[kw  ] = mfcbb;
-		(distAD.f[DIR_0P0   ])[k   ] = mfbab;
-		(distAD.f[DIR_0M0   ])[ks  ] = mfbcb;
-		(distAD.f[DIR_00P   ])[k   ] = mfbba;
-		(distAD.f[DIR_00M   ])[kb  ] = mfbbc;
-		(distAD.f[DIR_PP0  ])[k   ] = mfaab;
-		(distAD.f[DIR_MM0  ])[ksw ] = mfccb;
-		(distAD.f[DIR_PM0  ])[ks  ] = mfacb;
-		(distAD.f[DIR_MP0  ])[kw  ] = mfcab;
-		(distAD.f[DIR_P0P  ])[k   ] = mfaba;
-		(distAD.f[DIR_M0M  ])[kbw ] = mfcbc;
-		(distAD.f[DIR_P0M  ])[kb  ] = mfabc;
-		(distAD.f[DIR_M0P  ])[kw  ] = mfcba;
-		(distAD.f[DIR_0PP  ])[k   ] = mfbaa;
-		(distAD.f[DIR_0MM  ])[kbs ] = mfbcc;
-		(distAD.f[DIR_0PM  ])[kb  ] = mfbac;
-		(distAD.f[DIR_0MP  ])[ks  ] = mfbca;
+		(distAD.f[DIR_P00])[k   ] = mfabb;
+		(distAD.f[DIR_M00])[kw  ] = mfcbb;
+		(distAD.f[DIR_0P0])[k   ] = mfbab;
+		(distAD.f[DIR_0M0])[ks  ] = mfbcb;
+		(distAD.f[DIR_00P])[k   ] = mfbba;
+		(distAD.f[DIR_00M])[kb  ] = mfbbc;
+		(distAD.f[DIR_PP0])[k   ] = mfaab;
+		(distAD.f[DIR_MM0])[ksw ] = mfccb;
+		(distAD.f[DIR_PM0])[ks  ] = mfacb;
+		(distAD.f[DIR_MP0])[kw  ] = mfcab;
+		(distAD.f[DIR_P0P])[k   ] = mfaba;
+		(distAD.f[DIR_M0M])[kbw ] = mfcbc;
+		(distAD.f[DIR_P0M])[kb  ] = mfabc;
+		(distAD.f[DIR_M0P])[kw  ] = mfcba;
+		(distAD.f[DIR_0PP])[k   ] = mfbaa;
+		(distAD.f[DIR_0MM])[kbs ] = mfbcc;
+		(distAD.f[DIR_0PM])[kb  ] = mfbac;
+		(distAD.f[DIR_0MP])[ks  ] = mfbca;
 		(distAD.f[DIR_000])[k   ] = mfbbb;
-		(distAD.f[DIR_PPP ])[k   ] = mfaaa;
-		(distAD.f[DIR_PMP ])[ks  ] = mfaca;
-		(distAD.f[DIR_PPM ])[kb  ] = mfaac;
-		(distAD.f[DIR_PMM ])[kbs ] = mfacc;
-		(distAD.f[DIR_MPP ])[kw  ] = mfcaa;
-		(distAD.f[DIR_MMP ])[ksw ] = mfcca;
-		(distAD.f[DIR_MPM ])[kbw ] = mfcac;
-		(distAD.f[DIR_MMM ])[kbsw] = mfccc;
+		(distAD.f[DIR_PPP])[k   ] = mfaaa;
+		(distAD.f[DIR_PMP])[ks  ] = mfaca;
+		(distAD.f[DIR_PPM])[kb  ] = mfaac;
+		(distAD.f[DIR_PMM])[kbs ] = mfacc;
+		(distAD.f[DIR_MPP])[kw  ] = mfcaa;
+		(distAD.f[DIR_MMP])[ksw ] = mfcca;
+		(distAD.f[DIR_MPM])[kbw ] = mfcac;
+		(distAD.f[DIR_MMM])[kbsw] = mfccc;
 	}
 }
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusionBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusionBCs27.cu
index ecf98a7494a0a5e1c81c1040917e941f066605e6..278d01e149aeb6de5241f5c84463e4e80d360512 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusionBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusionBCs27.cu
@@ -2,9 +2,9 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
 
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////////
@@ -20,91 +20,91 @@ __global__ void QADPress7(  real* DD,
                                        unsigned int* neighborX,
                                        unsigned int* neighborY,
                                        unsigned int* neighborZ,
-                                       unsigned int size_Mat, 
+                                       unsigned long long numberOfLBnodes, 
                                        bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
 
    Distributions7 D7;
    if (isEvenTimestep==true)
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[1] = &DD7[1*size_Mat];
-      D7.f[2] = &DD7[2*size_Mat];
-      D7.f[3] = &DD7[3*size_Mat];
-      D7.f[4] = &DD7[4*size_Mat];
-      D7.f[5] = &DD7[5*size_Mat];
-      D7.f[6] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[1] = &DD7[1*numberOfLBnodes];
+      D7.f[2] = &DD7[2*numberOfLBnodes];
+      D7.f[3] = &DD7[3*numberOfLBnodes];
+      D7.f[4] = &DD7[4*numberOfLBnodes];
+      D7.f[5] = &DD7[5*numberOfLBnodes];
+      D7.f[6] = &DD7[6*numberOfLBnodes];
    }
    else
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[2] = &DD7[1*size_Mat];
-      D7.f[1] = &DD7[2*size_Mat];
-      D7.f[4] = &DD7[3*size_Mat];
-      D7.f[3] = &DD7[4*size_Mat];
-      D7.f[6] = &DD7[5*size_Mat];
-      D7.f[5] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[2] = &DD7[1*numberOfLBnodes];
+      D7.f[1] = &DD7[2*numberOfLBnodes];
+      D7.f[4] = &DD7[3*numberOfLBnodes];
+      D7.f[3] = &DD7[4*numberOfLBnodes];
+      D7.f[6] = &DD7[5*numberOfLBnodes];
+      D7.f[5] = &DD7[6*numberOfLBnodes];
    }
 
 
@@ -128,24 +128,24 @@ __global__ void QADPress7(  real* DD,
       //         *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
       //         *q_dirBSE, *q_dirBNW;
 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      //q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      //q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      //q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      //q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      //q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      //q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      //q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      //q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      //q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      //q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      //q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      //q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      //q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      //q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      //q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      //q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      //q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      //q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      //q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      //q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      //q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      //q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      //q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      //q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       //q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       //q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       //q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -188,32 +188,32 @@ __global__ void QADPress7(  real* DD,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       /*real drho*/;
       //real vx1_Inflow   = zero;
@@ -293,23 +293,23 @@ __global__ void QADPress7(  real* DD,
       //pointertausch
       if (isEvenTimestep==false)
       {
-         D7.f[0] = &DD7[0*size_Mat];
-         D7.f[1] = &DD7[1*size_Mat];
-         D7.f[2] = &DD7[2*size_Mat];
-         D7.f[3] = &DD7[3*size_Mat];
-         D7.f[4] = &DD7[4*size_Mat];
-         D7.f[5] = &DD7[5*size_Mat];
-         D7.f[6] = &DD7[6*size_Mat];
+         D7.f[0] = &DD7[0*numberOfLBnodes];
+         D7.f[1] = &DD7[1*numberOfLBnodes];
+         D7.f[2] = &DD7[2*numberOfLBnodes];
+         D7.f[3] = &DD7[3*numberOfLBnodes];
+         D7.f[4] = &DD7[4*numberOfLBnodes];
+         D7.f[5] = &DD7[5*numberOfLBnodes];
+         D7.f[6] = &DD7[6*numberOfLBnodes];
       }
       else
       {
-         D7.f[0] = &DD7[0*size_Mat];
-         D7.f[2] = &DD7[1*size_Mat];
-         D7.f[1] = &DD7[2*size_Mat];
-         D7.f[4] = &DD7[3*size_Mat];
-         D7.f[3] = &DD7[4*size_Mat];
-         D7.f[6] = &DD7[5*size_Mat];
-         D7.f[5] = &DD7[6*size_Mat];
+         D7.f[0] = &DD7[0*numberOfLBnodes];
+         D7.f[2] = &DD7[1*numberOfLBnodes];
+         D7.f[1] = &DD7[2*numberOfLBnodes];
+         D7.f[4] = &DD7[3*numberOfLBnodes];
+         D7.f[3] = &DD7[4*numberOfLBnodes];
+         D7.f[6] = &DD7[5*numberOfLBnodes];
+         D7.f[5] = &DD7[6*numberOfLBnodes];
       }
 
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -461,131 +461,131 @@ __global__ void QADPress27( real* DD,
                                        unsigned int* neighborX,
                                        unsigned int* neighborY,
                                        unsigned int* neighborZ,
-                                       unsigned int size_Mat, 
+                                       unsigned long long numberOfLBnodes, 
                                        bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
 
    Distributions27 D27;
    if (isEvenTimestep==true)
    {
-      D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+      D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -606,24 +606,24 @@ __global__ void QADPress27( real* DD,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -663,33 +663,33 @@ __global__ void QADPress27( real* DD,
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[DIR_P00   ])[ke   ];
-      real f_E    = (D.f[DIR_M00   ])[kw   ];
-      real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      real f_B    = (D.f[DIR_00P   ])[kt   ];
-      real f_T    = (D.f[DIR_00M   ])[kb   ];
-      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      real f_W    = (D.f[DIR_P00])[ke   ];
+      real f_E    = (D.f[DIR_M00])[kw   ];
+      real f_S    = (D.f[DIR_0P0])[kn   ];
+      real f_N    = (D.f[DIR_0M0])[ks   ];
+      real f_B    = (D.f[DIR_00P])[kt   ];
+      real f_T    = (D.f[DIR_00M])[kb   ];
+      real f_SW   = (D.f[DIR_PP0])[kne  ];
+      real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0])[kse  ];
+      real f_SE   = (D.f[DIR_MP0])[knw  ];
+      real f_BW   = (D.f[DIR_P0P])[kte  ];
+      real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP])[kts  ];
       real f_ZERO = (D.f[DIR_000])[kzero];
-      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, /*drho, feq,*/ q;
       //drho   = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -715,33 +715,33 @@ __global__ void QADPress27( real* DD,
       vx2            =  OORho*((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
       vx3            =  OORho*((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B));
       ////////////////////////////////////////////////////////////////////////////////
-      real f27_W    = (D27.f[DIR_P00   ])[ke   ];
-      real f27_E    = (D27.f[DIR_M00   ])[kw   ];
-      real f27_S    = (D27.f[DIR_0P0   ])[kn   ];
-      real f27_N    = (D27.f[DIR_0M0   ])[ks   ];
-      real f27_B    = (D27.f[DIR_00P   ])[kt   ];
-      real f27_T    = (D27.f[DIR_00M   ])[kb   ];
-      real f27_SW   = (D27.f[DIR_PP0  ])[kne  ];
-      real f27_NE   = (D27.f[DIR_MM0  ])[ksw  ];
-      real f27_NW   = (D27.f[DIR_PM0  ])[kse  ];
-      real f27_SE   = (D27.f[DIR_MP0  ])[knw  ];
-      real f27_BW   = (D27.f[DIR_P0P  ])[kte  ];
-      real f27_TE   = (D27.f[DIR_M0M  ])[kbw  ];
-      real f27_TW   = (D27.f[DIR_P0M  ])[kbe  ];
-      real f27_BE   = (D27.f[DIR_M0P  ])[ktw  ];
-      real f27_BS   = (D27.f[DIR_0PP  ])[ktn  ];
-      real f27_TN   = (D27.f[DIR_0MM  ])[kbs  ];
-      real f27_TS   = (D27.f[DIR_0PM  ])[kbn  ];
-      real f27_BN   = (D27.f[DIR_0MP  ])[kts  ];
+      real f27_W    = (D27.f[DIR_P00])[ke   ];
+      real f27_E    = (D27.f[DIR_M00])[kw   ];
+      real f27_S    = (D27.f[DIR_0P0])[kn   ];
+      real f27_N    = (D27.f[DIR_0M0])[ks   ];
+      real f27_B    = (D27.f[DIR_00P])[kt   ];
+      real f27_T    = (D27.f[DIR_00M])[kb   ];
+      real f27_SW   = (D27.f[DIR_PP0])[kne  ];
+      real f27_NE   = (D27.f[DIR_MM0])[ksw  ];
+      real f27_NW   = (D27.f[DIR_PM0])[kse  ];
+      real f27_SE   = (D27.f[DIR_MP0])[knw  ];
+      real f27_BW   = (D27.f[DIR_P0P])[kte  ];
+      real f27_TE   = (D27.f[DIR_M0M])[kbw  ];
+      real f27_TW   = (D27.f[DIR_P0M])[kbe  ];
+      real f27_BE   = (D27.f[DIR_M0P])[ktw  ];
+      real f27_BS   = (D27.f[DIR_0PP])[ktn  ];
+      real f27_TN   = (D27.f[DIR_0MM])[kbs  ];
+      real f27_TS   = (D27.f[DIR_0PM])[kbn  ];
+      real f27_BN   = (D27.f[DIR_0MP])[kts  ];
       real f27_ZERO = (D27.f[DIR_000])[kzero];
-      real f27_BSW  = (D27.f[DIR_PPP ])[ktne ];
-      real f27_BNE  = (D27.f[DIR_MMP ])[ktsw ];
-      real f27_BNW  = (D27.f[DIR_PMP ])[ktse ];
-      real f27_BSE  = (D27.f[DIR_MPP ])[ktnw ];
-      real f27_TSW  = (D27.f[DIR_PPM ])[kbne ];
-      real f27_TNE  = (D27.f[DIR_MMM ])[kbsw ];
-      real f27_TNW  = (D27.f[DIR_PMM ])[kbse ];
-      real f27_TSE  = (D27.f[DIR_MPM ])[kbnw ];
+      real f27_BSW  = (D27.f[DIR_PPP])[ktne ];
+      real f27_BNE  = (D27.f[DIR_MMP])[ktsw ];
+      real f27_BNW  = (D27.f[DIR_PMP])[ktse ];
+      real f27_BSE  = (D27.f[DIR_MPP])[ktnw ];
+      real f27_TSW  = (D27.f[DIR_PPM])[kbne ];
+      real f27_TNE  = (D27.f[DIR_MMM])[kbsw ];
+      real f27_TNW  = (D27.f[DIR_PMM])[kbse ];
+      real f27_TSE  = (D27.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////
@@ -849,86 +849,86 @@ __global__ void QADPress27( real* DD,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+         D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+         D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
       //(D.f[DIR_000])[k]=c1o10;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00  ])[kw  ]=(c2o1*feqW27_W  -(f27_E  *(q*omegaD-c1o1)-omegaD*feq27_E  *(q-c1o1))/(omegaD-c1o1)+f27_W  *q)/(q+c1o1);
-      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00  ])[ke  ]=(c2o1*feqW27_E  -(f27_W  *(q*omegaD-c1o1)-omegaD*feq27_W  *(q-c1o1))/(omegaD-c1o1)+f27_E  *q)/(q+c1o1);
-      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0  ])[ks  ]=(c2o1*feqW27_S  -(f27_N  *(q*omegaD-c1o1)-omegaD*feq27_N  *(q-c1o1))/(omegaD-c1o1)+f27_S  *q)/(q+c1o1);
-      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0  ])[kn  ]=(c2o1*feqW27_N  -(f27_S  *(q*omegaD-c1o1)-omegaD*feq27_S  *(q-c1o1))/(omegaD-c1o1)+f27_N  *q)/(q+c1o1);
-      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M  ])[kb  ]=(c2o1*feqW27_B  -(f27_T  *(q*omegaD-c1o1)-omegaD*feq27_T  *(q-c1o1))/(omegaD-c1o1)+f27_B  *q)/(q+c1o1);
-      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P  ])[kt  ]=(c2o1*feqW27_T  -(f27_B  *(q*omegaD-c1o1)-omegaD*feq27_B  *(q-c1o1))/(omegaD-c1o1)+f27_T  *q)/(q+c1o1);
-      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]=(c2o1*feqW27_SW -(f27_NE *(q*omegaD-c1o1)-omegaD*feq27_NE *(q-c1o1))/(omegaD-c1o1)+f27_SW *q)/(q+c1o1);
-      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]=(c2o1*feqW27_NE -(f27_SW *(q*omegaD-c1o1)-omegaD*feq27_SW *(q-c1o1))/(omegaD-c1o1)+f27_NE *q)/(q+c1o1);
-      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]=(c2o1*feqW27_NW -(f27_SE *(q*omegaD-c1o1)-omegaD*feq27_SE *(q-c1o1))/(omegaD-c1o1)+f27_NW *q)/(q+c1o1);
-      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]=(c2o1*feqW27_SE -(f27_NW *(q*omegaD-c1o1)-omegaD*feq27_NW *(q-c1o1))/(omegaD-c1o1)+f27_SE *q)/(q+c1o1);
-      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]=(c2o1*feqW27_BW -(f27_TE *(q*omegaD-c1o1)-omegaD*feq27_TE *(q-c1o1))/(omegaD-c1o1)+f27_BW *q)/(q+c1o1);
-      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]=(c2o1*feqW27_TE -(f27_BW *(q*omegaD-c1o1)-omegaD*feq27_BW *(q-c1o1))/(omegaD-c1o1)+f27_TE *q)/(q+c1o1);
-      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]=(c2o1*feqW27_TW -(f27_BE *(q*omegaD-c1o1)-omegaD*feq27_BE *(q-c1o1))/(omegaD-c1o1)+f27_TW *q)/(q+c1o1);
-      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]=(c2o1*feqW27_BE -(f27_TW *(q*omegaD-c1o1)-omegaD*feq27_TW *(q-c1o1))/(omegaD-c1o1)+f27_BE *q)/(q+c1o1);
-      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]=(c2o1*feqW27_BS -(f27_TN *(q*omegaD-c1o1)-omegaD*feq27_TN *(q-c1o1))/(omegaD-c1o1)+f27_BS *q)/(q+c1o1);
-      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]=(c2o1*feqW27_TN -(f27_BS *(q*omegaD-c1o1)-omegaD*feq27_BS *(q-c1o1))/(omegaD-c1o1)+f27_TN *q)/(q+c1o1);
-      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]=(c2o1*feqW27_TS -(f27_BN *(q*omegaD-c1o1)-omegaD*feq27_BN *(q-c1o1))/(omegaD-c1o1)+f27_TS *q)/(q+c1o1);
-      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]=(c2o1*feqW27_BN -(f27_TS *(q*omegaD-c1o1)-omegaD*feq27_TS *(q-c1o1))/(omegaD-c1o1)+f27_BN *q)/(q+c1o1);
+      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00])[kw  ]=(c2o1*feqW27_W  -(f27_E  *(q*omegaD-c1o1)-omegaD*feq27_E  *(q-c1o1))/(omegaD-c1o1)+f27_W  *q)/(q+c1o1);
+      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00])[ke  ]=(c2o1*feqW27_E  -(f27_W  *(q*omegaD-c1o1)-omegaD*feq27_W  *(q-c1o1))/(omegaD-c1o1)+f27_E  *q)/(q+c1o1);
+      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0])[ks  ]=(c2o1*feqW27_S  -(f27_N  *(q*omegaD-c1o1)-omegaD*feq27_N  *(q-c1o1))/(omegaD-c1o1)+f27_S  *q)/(q+c1o1);
+      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0])[kn  ]=(c2o1*feqW27_N  -(f27_S  *(q*omegaD-c1o1)-omegaD*feq27_S  *(q-c1o1))/(omegaD-c1o1)+f27_N  *q)/(q+c1o1);
+      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M])[kb  ]=(c2o1*feqW27_B  -(f27_T  *(q*omegaD-c1o1)-omegaD*feq27_T  *(q-c1o1))/(omegaD-c1o1)+f27_B  *q)/(q+c1o1);
+      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P])[kt  ]=(c2o1*feqW27_T  -(f27_B  *(q*omegaD-c1o1)-omegaD*feq27_B  *(q-c1o1))/(omegaD-c1o1)+f27_T  *q)/(q+c1o1);
+      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0])[ksw ]=(c2o1*feqW27_SW -(f27_NE *(q*omegaD-c1o1)-omegaD*feq27_NE *(q-c1o1))/(omegaD-c1o1)+f27_SW *q)/(q+c1o1);
+      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0])[kne ]=(c2o1*feqW27_NE -(f27_SW *(q*omegaD-c1o1)-omegaD*feq27_SW *(q-c1o1))/(omegaD-c1o1)+f27_NE *q)/(q+c1o1);
+      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0])[knw ]=(c2o1*feqW27_NW -(f27_SE *(q*omegaD-c1o1)-omegaD*feq27_SE *(q-c1o1))/(omegaD-c1o1)+f27_NW *q)/(q+c1o1);
+      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0])[kse ]=(c2o1*feqW27_SE -(f27_NW *(q*omegaD-c1o1)-omegaD*feq27_NW *(q-c1o1))/(omegaD-c1o1)+f27_SE *q)/(q+c1o1);
+      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M])[kbw ]=(c2o1*feqW27_BW -(f27_TE *(q*omegaD-c1o1)-omegaD*feq27_TE *(q-c1o1))/(omegaD-c1o1)+f27_BW *q)/(q+c1o1);
+      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P])[kte ]=(c2o1*feqW27_TE -(f27_BW *(q*omegaD-c1o1)-omegaD*feq27_BW *(q-c1o1))/(omegaD-c1o1)+f27_TE *q)/(q+c1o1);
+      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P])[ktw ]=(c2o1*feqW27_TW -(f27_BE *(q*omegaD-c1o1)-omegaD*feq27_BE *(q-c1o1))/(omegaD-c1o1)+f27_TW *q)/(q+c1o1);
+      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M])[kbe ]=(c2o1*feqW27_BE -(f27_TW *(q*omegaD-c1o1)-omegaD*feq27_TW *(q-c1o1))/(omegaD-c1o1)+f27_BE *q)/(q+c1o1);
+      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM])[kbs ]=(c2o1*feqW27_BS -(f27_TN *(q*omegaD-c1o1)-omegaD*feq27_TN *(q-c1o1))/(omegaD-c1o1)+f27_BS *q)/(q+c1o1);
+      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP])[ktn ]=(c2o1*feqW27_TN -(f27_BS *(q*omegaD-c1o1)-omegaD*feq27_BS *(q-c1o1))/(omegaD-c1o1)+f27_TN *q)/(q+c1o1);
+      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP])[kts ]=(c2o1*feqW27_TS -(f27_BN *(q*omegaD-c1o1)-omegaD*feq27_BN *(q-c1o1))/(omegaD-c1o1)+f27_TS *q)/(q+c1o1);
+      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM])[kbn ]=(c2o1*feqW27_BN -(f27_TS *(q*omegaD-c1o1)-omegaD*feq27_TS *(q-c1o1))/(omegaD-c1o1)+f27_BN *q)/(q+c1o1);
       q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]=(c2o1*feqW27_BSW-(f27_TNE*(q*omegaD-c1o1)-omegaD*feq27_TNE*(q-c1o1))/(omegaD-c1o1)+f27_BSW*q)/(q+c1o1);
       q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]=(c2o1*feqW27_TNE-(f27_BSW*(q*omegaD-c1o1)-omegaD*feq27_BSW*(q-c1o1))/(omegaD-c1o1)+f27_TNE*q)/(q+c1o1);
       q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]=(c2o1*feqW27_TSW-(f27_BNE*(q*omegaD-c1o1)-omegaD*feq27_BNE*(q-c1o1))/(omegaD-c1o1)+f27_TSW*q)/(q+c1o1);
@@ -989,132 +989,132 @@ __global__ void QADPressNEQNeighbor27(
 													unsigned int* neighborX,
 													unsigned int* neighborY,
 													unsigned int* neighborZ,
-													unsigned int size_Mat,
+													unsigned long long numberOfLBnodes,
 													bool isEvenTimestep
 												)
 {
 	Distributions27 D;
 	if (isEvenTimestep == true)
 	{
-		D.f[DIR_P00] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_M00] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0P0] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0M0] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00P] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00M] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_PP0] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_MM0] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_PM0] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_MP0] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_P0P] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_M0M] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_P0M] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_M0P] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0PP] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0MM] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0PM] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0MP] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_MMP] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_PMP] = &DD[DIR_PMP *size_Mat];
-		D.f[DIR_MPP] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_PPM] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_MMM] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_PMM] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_MPM] = &DD[DIR_MPM *size_Mat];
+		D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
 	}
 	else
 	{
-		D.f[DIR_M00] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_P00] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0M0] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0P0] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00M] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00P] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_MM0] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_PP0] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_MP0] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_PM0] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_M0M] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_P0P] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_M0P] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_P0M] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0MM] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0PP] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0MP] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0PM] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_MMP] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_PMP] = &DD[DIR_MPM *size_Mat];
-		D.f[DIR_MPP] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_PPM] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_MMM] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_PMM] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_MPM] = &DD[DIR_PMP *size_Mat];
+		D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
 	}
 
 	Distributions27 D27;
 	if (isEvenTimestep == true)
 	{
-		D27.f[DIR_P00] = &DD27[DIR_P00   *size_Mat];
-		D27.f[DIR_M00] = &DD27[DIR_M00   *size_Mat];
-		D27.f[DIR_0P0] = &DD27[DIR_0P0   *size_Mat];
-		D27.f[DIR_0M0] = &DD27[DIR_0M0   *size_Mat];
-		D27.f[DIR_00P] = &DD27[DIR_00P   *size_Mat];
-		D27.f[DIR_00M] = &DD27[DIR_00M   *size_Mat];
-		D27.f[DIR_PP0] = &DD27[DIR_PP0  *size_Mat];
-		D27.f[DIR_MM0] = &DD27[DIR_MM0  *size_Mat];
-		D27.f[DIR_PM0] = &DD27[DIR_PM0  *size_Mat];
-		D27.f[DIR_MP0] = &DD27[DIR_MP0  *size_Mat];
-		D27.f[DIR_P0P] = &DD27[DIR_P0P  *size_Mat];
-		D27.f[DIR_M0M] = &DD27[DIR_M0M  *size_Mat];
-		D27.f[DIR_P0M] = &DD27[DIR_P0M  *size_Mat];
-		D27.f[DIR_M0P] = &DD27[DIR_M0P  *size_Mat];
-		D27.f[DIR_0PP] = &DD27[DIR_0PP  *size_Mat];
-		D27.f[DIR_0MM] = &DD27[DIR_0MM  *size_Mat];
-		D27.f[DIR_0PM] = &DD27[DIR_0PM  *size_Mat];
-		D27.f[DIR_0MP] = &DD27[DIR_0MP  *size_Mat];
-		D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-		D27.f[DIR_PPP] = &DD27[DIR_PPP *size_Mat];
-		D27.f[DIR_MMP] = &DD27[DIR_MMP *size_Mat];
-		D27.f[DIR_PMP] = &DD27[DIR_PMP *size_Mat];
-		D27.f[DIR_MPP] = &DD27[DIR_MPP *size_Mat];
-		D27.f[DIR_PPM] = &DD27[DIR_PPM *size_Mat];
-		D27.f[DIR_MMM] = &DD27[DIR_MMM *size_Mat];
-		D27.f[DIR_PMM] = &DD27[DIR_PMM *size_Mat];
-		D27.f[DIR_MPM] = &DD27[DIR_MPM *size_Mat];
+		D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+		D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+		D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+		D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+		D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+		D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+		D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+		D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+		D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+		D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+		D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+		D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+		D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+		D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+		D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+		D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+		D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+		D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+		D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+		D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+		D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+		D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+		D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+		D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+		D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+		D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+		D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
 	}
 	else
 	{
-		D27.f[DIR_M00] = &DD27[DIR_P00   *size_Mat];
-		D27.f[DIR_P00] = &DD27[DIR_M00   *size_Mat];
-		D27.f[DIR_0M0] = &DD27[DIR_0P0   *size_Mat];
-		D27.f[DIR_0P0] = &DD27[DIR_0M0   *size_Mat];
-		D27.f[DIR_00M] = &DD27[DIR_00P   *size_Mat];
-		D27.f[DIR_00P] = &DD27[DIR_00M   *size_Mat];
-		D27.f[DIR_MM0] = &DD27[DIR_PP0  *size_Mat];
-		D27.f[DIR_PP0] = &DD27[DIR_MM0  *size_Mat];
-		D27.f[DIR_MP0] = &DD27[DIR_PM0  *size_Mat];
-		D27.f[DIR_PM0] = &DD27[DIR_MP0  *size_Mat];
-		D27.f[DIR_M0M] = &DD27[DIR_P0P  *size_Mat];
-		D27.f[DIR_P0P] = &DD27[DIR_M0M  *size_Mat];
-		D27.f[DIR_M0P] = &DD27[DIR_P0M  *size_Mat];
-		D27.f[DIR_P0M] = &DD27[DIR_M0P  *size_Mat];
-		D27.f[DIR_0MM] = &DD27[DIR_0PP  *size_Mat];
-		D27.f[DIR_0PP] = &DD27[DIR_0MM  *size_Mat];
-		D27.f[DIR_0MP] = &DD27[DIR_0PM  *size_Mat];
-		D27.f[DIR_0PM] = &DD27[DIR_0MP  *size_Mat];
-		D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-		D27.f[DIR_PPP] = &DD27[DIR_MMM *size_Mat];
-		D27.f[DIR_MMP] = &DD27[DIR_PPM *size_Mat];
-		D27.f[DIR_PMP] = &DD27[DIR_MPM *size_Mat];
-		D27.f[DIR_MPP] = &DD27[DIR_PMM *size_Mat];
-		D27.f[DIR_PPM] = &DD27[DIR_MMP *size_Mat];
-		D27.f[DIR_MMM] = &DD27[DIR_PPP *size_Mat];
-		D27.f[DIR_PMM] = &DD27[DIR_MPP *size_Mat];
-		D27.f[DIR_MPM] = &DD27[DIR_PMP *size_Mat];
+		D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+		D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+		D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+		D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+		D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+		D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+		D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+		D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+		D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+		D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+		D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+		D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+		D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+		D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+		D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+		D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+		D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+		D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+		D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+		D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+		D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+		D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+		D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+		D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+		D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+		D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+		D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -1345,33 +1345,33 @@ __global__ void QADPressNEQNeighbor27(
 		unsigned int kNbsw = neighborZ[kNsw];
 		////////////////////////////////////////////////////////////////////////////////
 		//update distributions at neighbor nodes
-        (D27.f[DIR_P00   ])[kNe   ] = f27_W   ;  
-        (D27.f[DIR_M00   ])[kNw   ] = f27_E   ;	
-        (D27.f[DIR_0P0   ])[kNn   ] = f27_S   ;	
-        (D27.f[DIR_0M0   ])[kNs   ] = f27_N   ;	
-        (D27.f[DIR_00P   ])[kNt   ] = f27_B   ;	
-        (D27.f[DIR_00M   ])[kNb   ] = f27_T   ;	
-        (D27.f[DIR_PP0  ])[kNne  ] = f27_SW  ;	
-        (D27.f[DIR_MM0  ])[kNsw  ] = f27_NE  ;	
-        (D27.f[DIR_PM0  ])[kNse  ] = f27_NW  ;	
-        (D27.f[DIR_MP0  ])[kNnw  ] = f27_SE  ;	
-        (D27.f[DIR_P0P  ])[kNte  ] = f27_BW  ;	
-        (D27.f[DIR_M0M  ])[kNbw  ] = f27_TE  ;	
-        (D27.f[DIR_P0M  ])[kNbe  ] = f27_TW  ;	
-        (D27.f[DIR_M0P  ])[kNtw  ] = f27_BE  ;	
-        (D27.f[DIR_0PP  ])[kNtn  ] = f27_BS  ;	
-        (D27.f[DIR_0MM  ])[kNbs  ] = f27_TN  ;	
-        (D27.f[DIR_0PM  ])[kNbn  ] = f27_TS  ;	
-        (D27.f[DIR_0MP  ])[kNts  ] = f27_BN  ;	
+        (D27.f[DIR_P00])[kNe   ] = f27_W   ;  
+        (D27.f[DIR_M00])[kNw   ] = f27_E   ;	
+        (D27.f[DIR_0P0])[kNn   ] = f27_S   ;	
+        (D27.f[DIR_0M0])[kNs   ] = f27_N   ;	
+        (D27.f[DIR_00P])[kNt   ] = f27_B   ;	
+        (D27.f[DIR_00M])[kNb   ] = f27_T   ;	
+        (D27.f[DIR_PP0])[kNne  ] = f27_SW  ;	
+        (D27.f[DIR_MM0])[kNsw  ] = f27_NE  ;	
+        (D27.f[DIR_PM0])[kNse  ] = f27_NW  ;	
+        (D27.f[DIR_MP0])[kNnw  ] = f27_SE  ;	
+        (D27.f[DIR_P0P])[kNte  ] = f27_BW  ;	
+        (D27.f[DIR_M0M])[kNbw  ] = f27_TE  ;	
+        (D27.f[DIR_P0M])[kNbe  ] = f27_TW  ;	
+        (D27.f[DIR_M0P])[kNtw  ] = f27_BE  ;	
+        (D27.f[DIR_0PP])[kNtn  ] = f27_BS  ;	
+        (D27.f[DIR_0MM])[kNbs  ] = f27_TN  ;	
+        (D27.f[DIR_0PM])[kNbn  ] = f27_TS  ;	
+        (D27.f[DIR_0MP])[kNts  ] = f27_BN  ;	
         (D27.f[DIR_000])[kNzero] = f27_ZERO;	
-        (D27.f[DIR_PPP ])[kNtne ] = f27_BSW ;	
-        (D27.f[DIR_MMP ])[kNtsw ] = f27_BNE ;	
-        (D27.f[DIR_PMP ])[kNtse ] = f27_BNW ;	
-        (D27.f[DIR_MPP ])[kNtnw ] = f27_BSE ;	
-        (D27.f[DIR_PPM ])[kNbne ] = f27_TSW ;	
-        (D27.f[DIR_MMM ])[kNbsw ] = f27_TNE ;	
-        (D27.f[DIR_PMM ])[kNbse ] = f27_TNW ;	
-        (D27.f[DIR_MPM ])[kNbnw ] = f27_TSE ;       
+        (D27.f[DIR_PPP])[kNtne ] = f27_BSW ;	
+        (D27.f[DIR_MMP])[kNtsw ] = f27_BNE ;	
+        (D27.f[DIR_PMP])[kNtse ] = f27_BNW ;	
+        (D27.f[DIR_MPP])[kNtnw ] = f27_BSE ;	
+        (D27.f[DIR_PPM])[kNbne ] = f27_TSW ;	
+        (D27.f[DIR_MMM])[kNbsw ] = f27_TNE ;	
+        (D27.f[DIR_PMM])[kNbse ] = f27_TNW ;	
+        (D27.f[DIR_MPM])[kNbnw ] = f27_TSE ;       
 	}
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -1427,91 +1427,91 @@ __global__ void QADVel7( real* DD,
                                     unsigned int* neighborX,
                                     unsigned int* neighborY,
                                     unsigned int* neighborZ,
-                                    unsigned int size_Mat, 
+                                    unsigned long long numberOfLBnodes, 
                                     bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
 
    Distributions7 D7;
    if (isEvenTimestep==true)
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[1] = &DD7[1*size_Mat];
-      D7.f[2] = &DD7[2*size_Mat];
-      D7.f[3] = &DD7[3*size_Mat];
-      D7.f[4] = &DD7[4*size_Mat];
-      D7.f[5] = &DD7[5*size_Mat];
-      D7.f[6] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[1] = &DD7[1*numberOfLBnodes];
+      D7.f[2] = &DD7[2*numberOfLBnodes];
+      D7.f[3] = &DD7[3*numberOfLBnodes];
+      D7.f[4] = &DD7[4*numberOfLBnodes];
+      D7.f[5] = &DD7[5*numberOfLBnodes];
+      D7.f[6] = &DD7[6*numberOfLBnodes];
    }
    else
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[2] = &DD7[1*size_Mat];
-      D7.f[1] = &DD7[2*size_Mat];
-      D7.f[4] = &DD7[3*size_Mat];
-      D7.f[3] = &DD7[4*size_Mat];
-      D7.f[6] = &DD7[5*size_Mat];
-      D7.f[5] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[2] = &DD7[1*numberOfLBnodes];
+      D7.f[1] = &DD7[2*numberOfLBnodes];
+      D7.f[4] = &DD7[3*numberOfLBnodes];
+      D7.f[3] = &DD7[4*numberOfLBnodes];
+      D7.f[6] = &DD7[5*numberOfLBnodes];
+      D7.f[5] = &DD7[6*numberOfLBnodes];
    }
 
 
@@ -1531,12 +1531,12 @@ __global__ void QADVel7( real* DD,
       //////////////////////////////////////////////////////////////////////////////////
       real  *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB;//, 
 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
       //////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -1571,32 +1571,32 @@ __global__ void QADVel7( real* DD,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       /*real drho*/;
       real vx1_Inflow   = c0o1;
@@ -1676,23 +1676,23 @@ __global__ void QADVel7( real* DD,
       //pointertausch
       if (isEvenTimestep==false)
       {
-         D7.f[0] = &DD7[0*size_Mat];
-         D7.f[1] = &DD7[1*size_Mat];
-         D7.f[2] = &DD7[2*size_Mat];
-         D7.f[3] = &DD7[3*size_Mat];
-         D7.f[4] = &DD7[4*size_Mat];
-         D7.f[5] = &DD7[5*size_Mat];
-         D7.f[6] = &DD7[6*size_Mat];
+         D7.f[0] = &DD7[0*numberOfLBnodes];
+         D7.f[1] = &DD7[1*numberOfLBnodes];
+         D7.f[2] = &DD7[2*numberOfLBnodes];
+         D7.f[3] = &DD7[3*numberOfLBnodes];
+         D7.f[4] = &DD7[4*numberOfLBnodes];
+         D7.f[5] = &DD7[5*numberOfLBnodes];
+         D7.f[6] = &DD7[6*numberOfLBnodes];
       }
       else
       {
-         D7.f[0] = &DD7[0*size_Mat];
-         D7.f[2] = &DD7[1*size_Mat];
-         D7.f[1] = &DD7[2*size_Mat];
-         D7.f[4] = &DD7[3*size_Mat];
-         D7.f[3] = &DD7[4*size_Mat];
-         D7.f[6] = &DD7[5*size_Mat];
-         D7.f[5] = &DD7[6*size_Mat];
+         D7.f[0] = &DD7[0*numberOfLBnodes];
+         D7.f[2] = &DD7[1*numberOfLBnodes];
+         D7.f[1] = &DD7[2*numberOfLBnodes];
+         D7.f[4] = &DD7[3*numberOfLBnodes];
+         D7.f[3] = &DD7[4*numberOfLBnodes];
+         D7.f[6] = &DD7[5*numberOfLBnodes];
+         D7.f[5] = &DD7[6*numberOfLBnodes];
       }
 
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -1844,131 +1844,131 @@ __global__ void QADVel27(real* DD,
                                     unsigned int* neighborX,
                                     unsigned int* neighborY,
                                     unsigned int* neighborZ,
-                                    unsigned int size_Mat, 
+                                    unsigned long long numberOfLBnodes, 
                                     bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
 
    Distributions27 D27;
    if (isEvenTimestep==true)
    {
-      D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+      D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -1989,24 +1989,24 @@ __global__ void QADVel27(real* DD,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -2046,33 +2046,33 @@ __global__ void QADVel27(real* DD,
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[DIR_P00   ])[ke   ];
-      real f_E    = (D.f[DIR_M00   ])[kw   ];
-      real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      real f_B    = (D.f[DIR_00P   ])[kt   ];
-      real f_T    = (D.f[DIR_00M   ])[kb   ];
-      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      real f_W    = (D.f[DIR_P00])[ke   ];
+      real f_E    = (D.f[DIR_M00])[kw   ];
+      real f_S    = (D.f[DIR_0P0])[kn   ];
+      real f_N    = (D.f[DIR_0M0])[ks   ];
+      real f_B    = (D.f[DIR_00P])[kt   ];
+      real f_T    = (D.f[DIR_00M])[kb   ];
+      real f_SW   = (D.f[DIR_PP0])[kne  ];
+      real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0])[kse  ];
+      real f_SE   = (D.f[DIR_MP0])[knw  ];
+      real f_BW   = (D.f[DIR_P0P])[kte  ];
+      real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP])[kts  ];
       real f_ZERO = (D.f[DIR_000])[kzero];
-      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, /*drho, feq,*/ q;
       ////drho   = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -2098,33 +2098,33 @@ __global__ void QADVel27(real* DD,
       vx2     =  OORho*((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
       vx3     =  OORho*((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B));
       ////////////////////////////////////////////////////////////////////////////////
-      //real f27_W    = (D27.f[DIR_P00   ])[ke   ];
-      //real f27_E    = (D27.f[DIR_M00   ])[kw   ];
-      //real f27_S    = (D27.f[DIR_0P0   ])[kn   ];
-      //real f27_N    = (D27.f[DIR_0M0   ])[ks   ];
-      //real f27_B    = (D27.f[DIR_00P   ])[kt   ];
-      //real f27_T    = (D27.f[DIR_00M   ])[kb   ];
-      //real f27_SW   = (D27.f[DIR_PP0  ])[kne  ];
-      //real f27_NE   = (D27.f[DIR_MM0  ])[ksw  ];
-      //real f27_NW   = (D27.f[DIR_PM0  ])[kse  ];
-      //real f27_SE   = (D27.f[DIR_MP0  ])[knw  ];
-      //real f27_BW   = (D27.f[DIR_P0P  ])[kte  ];
-      //real f27_TE   = (D27.f[DIR_M0M  ])[kbw  ];
-      //real f27_TW   = (D27.f[DIR_P0M  ])[kbe  ];
-      //real f27_BE   = (D27.f[DIR_M0P  ])[ktw  ];
-      //real f27_BS   = (D27.f[DIR_0PP  ])[ktn  ];
-      //real f27_TN   = (D27.f[DIR_0MM  ])[kbs  ];
-      //real f27_TS   = (D27.f[DIR_0PM  ])[kbn  ];
-      //real f27_BN   = (D27.f[DIR_0MP  ])[kts  ];
+      //real f27_W    = (D27.f[DIR_P00])[ke   ];
+      //real f27_E    = (D27.f[DIR_M00])[kw   ];
+      //real f27_S    = (D27.f[DIR_0P0])[kn   ];
+      //real f27_N    = (D27.f[DIR_0M0])[ks   ];
+      //real f27_B    = (D27.f[DIR_00P])[kt   ];
+      //real f27_T    = (D27.f[DIR_00M])[kb   ];
+      //real f27_SW   = (D27.f[DIR_PP0])[kne  ];
+      //real f27_NE   = (D27.f[DIR_MM0])[ksw  ];
+      //real f27_NW   = (D27.f[DIR_PM0])[kse  ];
+      //real f27_SE   = (D27.f[DIR_MP0])[knw  ];
+      //real f27_BW   = (D27.f[DIR_P0P])[kte  ];
+      //real f27_TE   = (D27.f[DIR_M0M])[kbw  ];
+      //real f27_TW   = (D27.f[DIR_P0M])[kbe  ];
+      //real f27_BE   = (D27.f[DIR_M0P])[ktw  ];
+      //real f27_BS   = (D27.f[DIR_0PP])[ktn  ];
+      //real f27_TN   = (D27.f[DIR_0MM])[kbs  ];
+      //real f27_TS   = (D27.f[DIR_0PM])[kbn  ];
+      //real f27_BN   = (D27.f[DIR_0MP])[kts  ];
       //real f27_ZERO = (D27.f[DIR_000])[kzero];
-      //real f27_BSW  = (D27.f[DIR_PPP ])[ktne ];
-      //real f27_BNE  = (D27.f[DIR_MMP ])[ktsw ];
-      //real f27_BNW  = (D27.f[DIR_PMP ])[ktse ];
-      //real f27_BSE  = (D27.f[DIR_MPP ])[ktnw ];
-      //real f27_TSW  = (D27.f[DIR_PPM ])[kbne ];
-      //real f27_TNE  = (D27.f[DIR_MMM ])[kbsw ];
-      //real f27_TNW  = (D27.f[DIR_PMM ])[kbse ];
-      //real f27_TSE  = (D27.f[DIR_MPM ])[kbnw ];
+      //real f27_BSW  = (D27.f[DIR_PPP])[ktne ];
+      //real f27_BNE  = (D27.f[DIR_MMP])[ktsw ];
+      //real f27_BNW  = (D27.f[DIR_PMP])[ktse ];
+      //real f27_BSE  = (D27.f[DIR_MPP])[ktnw ];
+      //real f27_TSW  = (D27.f[DIR_PPM])[kbne ];
+      //real f27_TNE  = (D27.f[DIR_MMM])[kbsw ];
+      //real f27_TNW  = (D27.f[DIR_PMM])[kbse ];
+      //real f27_TSE  = (D27.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////
@@ -2233,63 +2233,63 @@ __global__ void QADVel27(real* DD,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+         D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+         D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
@@ -2299,24 +2299,24 @@ __global__ void QADVel27(real* DD,
       //Test
       //(D.f[DIR_000])[k]=c1o10;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      //(D27.f[DIR_M00  ])[kw  ]= four;
-      //(D27.f[DIR_P00  ])[ke  ]= four;
-      //(D27.f[DIR_0M0  ])[ks  ]= four;
-      //(D27.f[DIR_0P0  ])[kn  ]= four;
-      //(D27.f[DIR_00M  ])[kb  ]= four;
-      //(D27.f[DIR_00P  ])[kt  ]= four;
-      //(D27.f[DIR_MM0 ])[ksw ]= four;
-      //(D27.f[DIR_PP0 ])[kne ]= four;
-      //(D27.f[DIR_MP0 ])[knw ]= four;
-      //(D27.f[DIR_PM0 ])[kse ]= four;
-      //(D27.f[DIR_M0M ])[kbw ]= four;
-      //(D27.f[DIR_P0P ])[kte ]= four;
-      //(D27.f[DIR_M0P ])[ktw ]= four;
-      //(D27.f[DIR_P0M ])[kbe ]= four;
-      //(D27.f[DIR_0MM ])[kbs ]= four;
-      //(D27.f[DIR_0PP ])[ktn ]= four;
-      //(D27.f[DIR_0MP ])[kts ]= four;
-      //(D27.f[DIR_0PM ])[kbn ]= four;
+      //(D27.f[DIR_M00])[kw  ]= four;
+      //(D27.f[DIR_P00])[ke  ]= four;
+      //(D27.f[DIR_0M0])[ks  ]= four;
+      //(D27.f[DIR_0P0])[kn  ]= four;
+      //(D27.f[DIR_00M])[kb  ]= four;
+      //(D27.f[DIR_00P])[kt  ]= four;
+      //(D27.f[DIR_MM0])[ksw ]= four;
+      //(D27.f[DIR_PP0])[kne ]= four;
+      //(D27.f[DIR_MP0])[knw ]= four;
+      //(D27.f[DIR_PM0])[kse ]= four;
+      //(D27.f[DIR_M0M])[kbw ]= four;
+      //(D27.f[DIR_P0P])[kte ]= four;
+      //(D27.f[DIR_M0P])[ktw ]= four;
+      //(D27.f[DIR_P0M])[kbe ]= four;
+      //(D27.f[DIR_0MM])[kbs ]= four;
+      //(D27.f[DIR_0PP])[ktn ]= four;
+      //(D27.f[DIR_0MP])[kts ]= four;
+      //(D27.f[DIR_0PM])[kbn ]= four;
       //(D27.f[DIR_MMM])[kbsw]= four;
       //(D27.f[DIR_PPP])[ktne]= four;
       //(D27.f[DIR_MMP])[ktsw]= four;
@@ -2325,24 +2325,24 @@ __global__ void QADVel27(real* DD,
       //(D27.f[DIR_PMP])[ktse]= four;
       //(D27.f[DIR_MPP])[ktnw]= four;
       //(D27.f[DIR_PMM])[kbse]= four;
-      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00  ])[kw  ]= -feqW27_W  + c2o1 * c2o27  * TempD;
-      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00  ])[ke  ]= -feqW27_E  + c2o1 * c2o27  * TempD;
-      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0  ])[ks  ]= -feqW27_S  + c2o1 * c2o27  * TempD;
-      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0  ])[kn  ]= -feqW27_N  + c2o1 * c2o27  * TempD;
-      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M  ])[kb  ]= -feqW27_B  + c2o1 * c2o27  * TempD;
-      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P  ])[kt  ]= -feqW27_T  + c2o1 * c2o27  * TempD;
-      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]= -feqW27_SW + c2o1 * c1o54  * TempD;
-      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]= -feqW27_NE + c2o1 * c1o54  * TempD;
-      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]= -feqW27_NW + c2o1 * c1o54  * TempD;
-      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]= -feqW27_SE + c2o1 * c1o54  * TempD;
-      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]= -feqW27_BW + c2o1 * c1o54  * TempD;
-      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]= -feqW27_TE + c2o1 * c1o54  * TempD;
-      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]= -feqW27_TW + c2o1 * c1o54  * TempD;
-      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]= -feqW27_BE + c2o1 * c1o54  * TempD;
-      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]= -feqW27_BS + c2o1 * c1o54  * TempD;
-      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]= -feqW27_TN + c2o1 * c1o54  * TempD;
-      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]= -feqW27_TS + c2o1 * c1o54  * TempD;
-      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]= -feqW27_BN + c2o1 * c1o54  * TempD;
+      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00])[kw  ]= -feqW27_W  + c2o1 * c2o27  * TempD;
+      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00])[ke  ]= -feqW27_E  + c2o1 * c2o27  * TempD;
+      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0])[ks  ]= -feqW27_S  + c2o1 * c2o27  * TempD;
+      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0])[kn  ]= -feqW27_N  + c2o1 * c2o27  * TempD;
+      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M])[kb  ]= -feqW27_B  + c2o1 * c2o27  * TempD;
+      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P])[kt  ]= -feqW27_T  + c2o1 * c2o27  * TempD;
+      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0])[ksw ]= -feqW27_SW + c2o1 * c1o54  * TempD;
+      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0])[kne ]= -feqW27_NE + c2o1 * c1o54  * TempD;
+      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0])[knw ]= -feqW27_NW + c2o1 * c1o54  * TempD;
+      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0])[kse ]= -feqW27_SE + c2o1 * c1o54  * TempD;
+      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M])[kbw ]= -feqW27_BW + c2o1 * c1o54  * TempD;
+      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P])[kte ]= -feqW27_TE + c2o1 * c1o54  * TempD;
+      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P])[ktw ]= -feqW27_TW + c2o1 * c1o54  * TempD;
+      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M])[kbe ]= -feqW27_BE + c2o1 * c1o54  * TempD;
+      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM])[kbs ]= -feqW27_BS + c2o1 * c1o54  * TempD;
+      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP])[ktn ]= -feqW27_TN + c2o1 * c1o54  * TempD;
+      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP])[kts ]= -feqW27_TS + c2o1 * c1o54  * TempD;
+      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM])[kbn ]= -feqW27_BN + c2o1 * c1o54  * TempD;
       q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]= -feqW27_BSW+ c2o1 * c1o216 * TempD;
       q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]= -feqW27_TNE+ c2o1 * c1o216 * TempD;
       q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]= -feqW27_TSW+ c2o1 * c1o216 * TempD;
@@ -2351,24 +2351,24 @@ __global__ void QADVel27(real* DD,
       q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMP])[ktse]= -feqW27_TSE+ c2o1 * c1o216 * TempD;
       q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPP])[ktnw]= -feqW27_TNW+ c2o1 * c1o216 * TempD;
       q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMM])[kbse]= -feqW27_BSE+ c2o1 * c1o216 * TempD;
-      //q = q_dirE[k];   if (q>=zero && q<=one) (D27.f[DIR_M00  ])[kw  ]=(two*feqW27_W  -(f27_E  *(q*omegaD-one)-omegaD*feq27_E  *(q-one))/(omegaD-one)+f27_W  *q)/(q+one);
-      //q = q_dirW[k];   if (q>=zero && q<=one) (D27.f[DIR_P00  ])[ke  ]=(two*feqW27_E  -(f27_W  *(q*omegaD-one)-omegaD*feq27_W  *(q-one))/(omegaD-one)+f27_E  *q)/(q+one);
-      //q = q_dirN[k];   if (q>=zero && q<=one) (D27.f[DIR_0M0  ])[ks  ]=(two*feqW27_S  -(f27_N  *(q*omegaD-one)-omegaD*feq27_N  *(q-one))/(omegaD-one)+f27_S  *q)/(q+one);
-      //q = q_dirS[k];   if (q>=zero && q<=one) (D27.f[DIR_0P0  ])[kn  ]=(two*feqW27_N  -(f27_S  *(q*omegaD-one)-omegaD*feq27_S  *(q-one))/(omegaD-one)+f27_N  *q)/(q+one);
-      //q = q_dirT[k];   if (q>=zero && q<=one) (D27.f[DIR_00M  ])[kb  ]=(two*feqW27_B  -(f27_T  *(q*omegaD-one)-omegaD*feq27_T  *(q-one))/(omegaD-one)+f27_B  *q)/(q+one);
-      //q = q_dirB[k];   if (q>=zero && q<=one) (D27.f[DIR_00P  ])[kt  ]=(two*feqW27_T  -(f27_B  *(q*omegaD-one)-omegaD*feq27_B  *(q-one))/(omegaD-one)+f27_T  *q)/(q+one);
-      //q = q_dirNE[k];  if (q>=zero && q<=one) (D27.f[DIR_MM0 ])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one);
-      //q = q_dirSW[k];  if (q>=zero && q<=one) (D27.f[DIR_PP0 ])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one);
-      //q = q_dirSE[k];  if (q>=zero && q<=one) (D27.f[DIR_MP0 ])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one);
-      //q = q_dirNW[k];  if (q>=zero && q<=one) (D27.f[DIR_PM0 ])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one);
-      //q = q_dirTE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0M ])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one);
-      //q = q_dirBW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0P ])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one);
-      //q = q_dirBE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0P ])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one);
-      //q = q_dirTW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0M ])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one);
-      //q = q_dirTN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MM ])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one);
-      //q = q_dirBS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PP ])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one);
-      //q = q_dirBN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MP ])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one);
-      //q = q_dirTS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PM ])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one);
+      //q = q_dirE[k];   if (q>=zero && q<=one) (D27.f[DIR_M00])[kw  ]=(two*feqW27_W  -(f27_E  *(q*omegaD-one)-omegaD*feq27_E  *(q-one))/(omegaD-one)+f27_W  *q)/(q+one);
+      //q = q_dirW[k];   if (q>=zero && q<=one) (D27.f[DIR_P00])[ke  ]=(two*feqW27_E  -(f27_W  *(q*omegaD-one)-omegaD*feq27_W  *(q-one))/(omegaD-one)+f27_E  *q)/(q+one);
+      //q = q_dirN[k];   if (q>=zero && q<=one) (D27.f[DIR_0M0])[ks  ]=(two*feqW27_S  -(f27_N  *(q*omegaD-one)-omegaD*feq27_N  *(q-one))/(omegaD-one)+f27_S  *q)/(q+one);
+      //q = q_dirS[k];   if (q>=zero && q<=one) (D27.f[DIR_0P0])[kn  ]=(two*feqW27_N  -(f27_S  *(q*omegaD-one)-omegaD*feq27_S  *(q-one))/(omegaD-one)+f27_N  *q)/(q+one);
+      //q = q_dirT[k];   if (q>=zero && q<=one) (D27.f[DIR_00M])[kb  ]=(two*feqW27_B  -(f27_T  *(q*omegaD-one)-omegaD*feq27_T  *(q-one))/(omegaD-one)+f27_B  *q)/(q+one);
+      //q = q_dirB[k];   if (q>=zero && q<=one) (D27.f[DIR_00P])[kt  ]=(two*feqW27_T  -(f27_B  *(q*omegaD-one)-omegaD*feq27_B  *(q-one))/(omegaD-one)+f27_T  *q)/(q+one);
+      //q = q_dirNE[k];  if (q>=zero && q<=one) (D27.f[DIR_MM0])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one);
+      //q = q_dirSW[k];  if (q>=zero && q<=one) (D27.f[DIR_PP0])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one);
+      //q = q_dirSE[k];  if (q>=zero && q<=one) (D27.f[DIR_MP0])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one);
+      //q = q_dirNW[k];  if (q>=zero && q<=one) (D27.f[DIR_PM0])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one);
+      //q = q_dirTE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0M])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one);
+      //q = q_dirBW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0P])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one);
+      //q = q_dirBE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0P])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one);
+      //q = q_dirTW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0M])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one);
+      //q = q_dirTN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MM])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one);
+      //q = q_dirBS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PP])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one);
+      //q = q_dirBN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MP])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one);
+      //q = q_dirTS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PM])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one);
       //q = q_dirTNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMM])[kbsw]=(two*feqW27_BSW-(f27_TNE*(q*omegaD-one)-omegaD*feq27_TNE*(q-one))/(omegaD-one)+f27_BSW*q)/(q+one);
       //q = q_dirBSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PPP])[ktne]=(two*feqW27_TNE-(f27_BSW*(q*omegaD-one)-omegaD*feq27_BSW*(q-one))/(omegaD-one)+f27_TNE*q)/(q+one);
       //q = q_dirBNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMP])[ktsw]=(two*feqW27_TSW-(f27_BNE*(q*omegaD-one)-omegaD*feq27_BNE*(q-one))/(omegaD-one)+f27_TSW*q)/(q+one);
@@ -2431,91 +2431,91 @@ __global__ void QAD7( real* DD,
                                  unsigned int* neighborX,
                                  unsigned int* neighborY,
                                  unsigned int* neighborZ,
-                                 unsigned int size_Mat, 
+                                 unsigned long long numberOfLBnodes, 
                                  bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
 
    Distributions7 D7;
    if (isEvenTimestep==true)
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[1] = &DD7[1*size_Mat];
-      D7.f[2] = &DD7[2*size_Mat];
-      D7.f[3] = &DD7[3*size_Mat];
-      D7.f[4] = &DD7[4*size_Mat];
-      D7.f[5] = &DD7[5*size_Mat];
-      D7.f[6] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[1] = &DD7[1*numberOfLBnodes];
+      D7.f[2] = &DD7[2*numberOfLBnodes];
+      D7.f[3] = &DD7[3*numberOfLBnodes];
+      D7.f[4] = &DD7[4*numberOfLBnodes];
+      D7.f[5] = &DD7[5*numberOfLBnodes];
+      D7.f[6] = &DD7[6*numberOfLBnodes];
    }
    else
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[2] = &DD7[1*size_Mat];
-      D7.f[1] = &DD7[2*size_Mat];
-      D7.f[4] = &DD7[3*size_Mat];
-      D7.f[3] = &DD7[4*size_Mat];
-      D7.f[6] = &DD7[5*size_Mat];
-      D7.f[5] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[2] = &DD7[1*numberOfLBnodes];
+      D7.f[1] = &DD7[2*numberOfLBnodes];
+      D7.f[4] = &DD7[3*numberOfLBnodes];
+      D7.f[3] = &DD7[4*numberOfLBnodes];
+      D7.f[6] = &DD7[5*numberOfLBnodes];
+      D7.f[5] = &DD7[6*numberOfLBnodes];
    }
 
 
@@ -2539,24 +2539,24 @@ __global__ void QAD7( real* DD,
       //         *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
       //         *q_dirBSE, *q_dirBNW;
 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      //q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      //q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      //q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      //q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      //q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      //q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      //q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      //q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      //q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      //q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      //q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      //q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      //q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      //q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      //q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      //q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      //q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      //q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      //q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      //q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      //q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      //q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      //q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      //q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       //q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       //q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       //q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -2599,32 +2599,32 @@ __global__ void QAD7( real* DD,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3/*, drho*/;
       //drho   =    f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -2696,23 +2696,23 @@ __global__ void QAD7( real* DD,
       //pointertausch
       if (isEvenTimestep==false)
       {
-         D7.f[0] = &DD7[0*size_Mat];
-         D7.f[1] = &DD7[1*size_Mat];
-         D7.f[2] = &DD7[2*size_Mat];
-         D7.f[3] = &DD7[3*size_Mat];
-         D7.f[4] = &DD7[4*size_Mat];
-         D7.f[5] = &DD7[5*size_Mat];
-         D7.f[6] = &DD7[6*size_Mat];
+         D7.f[0] = &DD7[0*numberOfLBnodes];
+         D7.f[1] = &DD7[1*numberOfLBnodes];
+         D7.f[2] = &DD7[2*numberOfLBnodes];
+         D7.f[3] = &DD7[3*numberOfLBnodes];
+         D7.f[4] = &DD7[4*numberOfLBnodes];
+         D7.f[5] = &DD7[5*numberOfLBnodes];
+         D7.f[6] = &DD7[6*numberOfLBnodes];
       }
       else
       {
-         D7.f[0] = &DD7[0*size_Mat];
-         D7.f[2] = &DD7[1*size_Mat];
-         D7.f[1] = &DD7[2*size_Mat];
-         D7.f[4] = &DD7[3*size_Mat];
-         D7.f[3] = &DD7[4*size_Mat];
-         D7.f[6] = &DD7[5*size_Mat];
-         D7.f[5] = &DD7[6*size_Mat];
+         D7.f[0] = &DD7[0*numberOfLBnodes];
+         D7.f[2] = &DD7[1*numberOfLBnodes];
+         D7.f[1] = &DD7[2*numberOfLBnodes];
+         D7.f[4] = &DD7[3*numberOfLBnodes];
+         D7.f[3] = &DD7[4*numberOfLBnodes];
+         D7.f[6] = &DD7[5*numberOfLBnodes];
+         D7.f[5] = &DD7[6*numberOfLBnodes];
       }
 
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -2864,131 +2864,131 @@ __global__ void QADDirichlet27(
 											 unsigned int* neighborX,
 											 unsigned int* neighborY,
 											 unsigned int* neighborZ,
-											 unsigned int size_Mat, 
+											 unsigned long long numberOfLBnodes, 
 											 bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
 
    Distributions27 D27;
    if (isEvenTimestep==true)
    {
-      D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+      D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -3009,24 +3009,24 @@ __global__ void QADDirichlet27(
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -3066,33 +3066,33 @@ __global__ void QADDirichlet27(
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[DIR_P00   ])[ke   ];
-      real f_E    = (D.f[DIR_M00   ])[kw   ];
-      real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      real f_B    = (D.f[DIR_00P   ])[kt   ];
-      real f_T    = (D.f[DIR_00M   ])[kb   ];
-      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      real f_W    = (D.f[DIR_P00])[ke   ];
+      real f_E    = (D.f[DIR_M00])[kw   ];
+      real f_S    = (D.f[DIR_0P0])[kn   ];
+      real f_N    = (D.f[DIR_0M0])[ks   ];
+      real f_B    = (D.f[DIR_00P])[kt   ];
+      real f_T    = (D.f[DIR_00M])[kb   ];
+      real f_SW   = (D.f[DIR_PP0])[kne  ];
+      real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0])[kse  ];
+      real f_SE   = (D.f[DIR_MP0])[knw  ];
+      real f_BW   = (D.f[DIR_P0P])[kte  ];
+      real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP])[kts  ];
       real f_ZERO = (D.f[DIR_000])[kzero];
-      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, /*drho, feq,*/ q;
       ////drho   = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -3118,33 +3118,33 @@ __global__ void QADDirichlet27(
       vx2     =  OORho*((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
       vx3     =  OORho*((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B));
       ////////////////////////////////////////////////////////////////////////////////
-      real f27_W    = (D27.f[DIR_P00   ])[ke   ];
-      real f27_E    = (D27.f[DIR_M00   ])[kw   ];
-      real f27_S    = (D27.f[DIR_0P0   ])[kn   ];
-      real f27_N    = (D27.f[DIR_0M0   ])[ks   ];
-      real f27_B    = (D27.f[DIR_00P   ])[kt   ];
-      real f27_T    = (D27.f[DIR_00M   ])[kb   ];
-      real f27_SW   = (D27.f[DIR_PP0  ])[kne  ];
-      real f27_NE   = (D27.f[DIR_MM0  ])[ksw  ];
-      real f27_NW   = (D27.f[DIR_PM0  ])[kse  ];
-      real f27_SE   = (D27.f[DIR_MP0  ])[knw  ];
-      real f27_BW   = (D27.f[DIR_P0P  ])[kte  ];
-      real f27_TE   = (D27.f[DIR_M0M  ])[kbw  ];
-      real f27_TW   = (D27.f[DIR_P0M  ])[kbe  ];
-      real f27_BE   = (D27.f[DIR_M0P  ])[ktw  ];
-      real f27_BS   = (D27.f[DIR_0PP  ])[ktn  ];
-      real f27_TN   = (D27.f[DIR_0MM  ])[kbs  ];
-      real f27_TS   = (D27.f[DIR_0PM  ])[kbn  ];
-      real f27_BN   = (D27.f[DIR_0MP  ])[kts  ];
+      real f27_W    = (D27.f[DIR_P00])[ke   ];
+      real f27_E    = (D27.f[DIR_M00])[kw   ];
+      real f27_S    = (D27.f[DIR_0P0])[kn   ];
+      real f27_N    = (D27.f[DIR_0M0])[ks   ];
+      real f27_B    = (D27.f[DIR_00P])[kt   ];
+      real f27_T    = (D27.f[DIR_00M])[kb   ];
+      real f27_SW   = (D27.f[DIR_PP0])[kne  ];
+      real f27_NE   = (D27.f[DIR_MM0])[ksw  ];
+      real f27_NW   = (D27.f[DIR_PM0])[kse  ];
+      real f27_SE   = (D27.f[DIR_MP0])[knw  ];
+      real f27_BW   = (D27.f[DIR_P0P])[kte  ];
+      real f27_TE   = (D27.f[DIR_M0M])[kbw  ];
+      real f27_TW   = (D27.f[DIR_P0M])[kbe  ];
+      real f27_BE   = (D27.f[DIR_M0P])[ktw  ];
+      real f27_BS   = (D27.f[DIR_0PP])[ktn  ];
+      real f27_TN   = (D27.f[DIR_0MM])[kbs  ];
+      real f27_TS   = (D27.f[DIR_0PM])[kbn  ];
+      real f27_BN   = (D27.f[DIR_0MP])[kts  ];
       real f27_ZERO = (D27.f[DIR_000])[kzero];
-      real f27_BSW  = (D27.f[DIR_PPP ])[ktne ];
-      real f27_BNE  = (D27.f[DIR_MMP ])[ktsw ];
-      real f27_BNW  = (D27.f[DIR_PMP ])[ktse ];
-      real f27_BSE  = (D27.f[DIR_MPP ])[ktnw ];
-      real f27_TSW  = (D27.f[DIR_PPM ])[kbne ];
-      real f27_TNE  = (D27.f[DIR_MMM ])[kbsw ];
-      real f27_TNW  = (D27.f[DIR_PMM ])[kbse ];
-      real f27_TSE  = (D27.f[DIR_MPM ])[kbnw ];
+      real f27_BSW  = (D27.f[DIR_PPP])[ktne ];
+      real f27_BNE  = (D27.f[DIR_MMP])[ktsw ];
+      real f27_BNW  = (D27.f[DIR_PMP])[ktse ];
+      real f27_BSE  = (D27.f[DIR_MPP])[ktnw ];
+      real f27_TSW  = (D27.f[DIR_PPM])[kbne ];
+      real f27_TNE  = (D27.f[DIR_MMM])[kbsw ];
+      real f27_TNW  = (D27.f[DIR_PMM])[kbse ];
+      real f27_TSE  = (D27.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////
@@ -3220,86 +3220,86 @@ __global__ void QADDirichlet27(
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+         D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+         D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
       //(D.f[DIR_000])[k]=0.1f;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      q = q_dirE[  ke   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00  ])[kw  ]=(c2o1*feqW27_W  -(f27_E  *(q*omegaD-c1o1)-omegaD*feq27_E  *(q-c1o1))/(omegaD-c1o1)+f27_W  *q)/(q+c1o1);
-      q = q_dirW[  kw   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00  ])[ke  ]=(c2o1*feqW27_E  -(f27_W  *(q*omegaD-c1o1)-omegaD*feq27_W  *(q-c1o1))/(omegaD-c1o1)+f27_E  *q)/(q+c1o1);
-      q = q_dirN[  kn   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0  ])[ks  ]=(c2o1*feqW27_S  -(f27_N  *(q*omegaD-c1o1)-omegaD*feq27_N  *(q-c1o1))/(omegaD-c1o1)+f27_S  *q)/(q+c1o1);
-      q = q_dirS[  ks   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0  ])[kn  ]=(c2o1*feqW27_N  -(f27_S  *(q*omegaD-c1o1)-omegaD*feq27_S  *(q-c1o1))/(omegaD-c1o1)+f27_N  *q)/(q+c1o1);
-      q = q_dirT[  kt   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M  ])[kb  ]=(c2o1*feqW27_B  -(f27_T  *(q*omegaD-c1o1)-omegaD*feq27_T  *(q-c1o1))/(omegaD-c1o1)+f27_B  *q)/(q+c1o1);
-      q = q_dirB[  kb   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P  ])[kt  ]=(c2o1*feqW27_T  -(f27_B  *(q*omegaD-c1o1)-omegaD*feq27_B  *(q-c1o1))/(omegaD-c1o1)+f27_T  *q)/(q+c1o1);
-      q = q_dirNE[ kne  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]=(c2o1*feqW27_SW -(f27_NE *(q*omegaD-c1o1)-omegaD*feq27_NE *(q-c1o1))/(omegaD-c1o1)+f27_SW *q)/(q+c1o1);
-      q = q_dirSW[ ksw  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]=(c2o1*feqW27_NE -(f27_SW *(q*omegaD-c1o1)-omegaD*feq27_SW *(q-c1o1))/(omegaD-c1o1)+f27_NE *q)/(q+c1o1);
-      q = q_dirSE[ kse  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]=(c2o1*feqW27_NW -(f27_SE *(q*omegaD-c1o1)-omegaD*feq27_SE *(q-c1o1))/(omegaD-c1o1)+f27_NW *q)/(q+c1o1);
-      q = q_dirNW[ knw  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]=(c2o1*feqW27_SE -(f27_NW *(q*omegaD-c1o1)-omegaD*feq27_NW *(q-c1o1))/(omegaD-c1o1)+f27_SE *q)/(q+c1o1);
-      q = q_dirTE[ kte  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]=(c2o1*feqW27_BW -(f27_TE *(q*omegaD-c1o1)-omegaD*feq27_TE *(q-c1o1))/(omegaD-c1o1)+f27_BW *q)/(q+c1o1);
-      q = q_dirBW[ kbw  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]=(c2o1*feqW27_TE -(f27_BW *(q*omegaD-c1o1)-omegaD*feq27_BW *(q-c1o1))/(omegaD-c1o1)+f27_TE *q)/(q+c1o1);
-      q = q_dirBE[ kbe  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]=(c2o1*feqW27_TW -(f27_BE *(q*omegaD-c1o1)-omegaD*feq27_BE *(q-c1o1))/(omegaD-c1o1)+f27_TW *q)/(q+c1o1);
-      q = q_dirTW[ ktw  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]=(c2o1*feqW27_BE -(f27_TW *(q*omegaD-c1o1)-omegaD*feq27_TW *(q-c1o1))/(omegaD-c1o1)+f27_BE *q)/(q+c1o1);
-      q = q_dirTN[ ktn  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]=(c2o1*feqW27_BS -(f27_TN *(q*omegaD-c1o1)-omegaD*feq27_TN *(q-c1o1))/(omegaD-c1o1)+f27_BS *q)/(q+c1o1);
-      q = q_dirBS[ kbs  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]=(c2o1*feqW27_TN -(f27_BS *(q*omegaD-c1o1)-omegaD*feq27_BS *(q-c1o1))/(omegaD-c1o1)+f27_TN *q)/(q+c1o1);
-      q = q_dirBN[ kbn  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]=(c2o1*feqW27_TS -(f27_BN *(q*omegaD-c1o1)-omegaD*feq27_BN *(q-c1o1))/(omegaD-c1o1)+f27_TS *q)/(q+c1o1);
-      q = q_dirTS[ kts  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]=(c2o1*feqW27_BN -(f27_TS *(q*omegaD-c1o1)-omegaD*feq27_TS *(q-c1o1))/(omegaD-c1o1)+f27_BN *q)/(q+c1o1);
+      q = q_dirE[  ke   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00])[kw  ]=(c2o1*feqW27_W  -(f27_E  *(q*omegaD-c1o1)-omegaD*feq27_E  *(q-c1o1))/(omegaD-c1o1)+f27_W  *q)/(q+c1o1);
+      q = q_dirW[  kw   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00])[ke  ]=(c2o1*feqW27_E  -(f27_W  *(q*omegaD-c1o1)-omegaD*feq27_W  *(q-c1o1))/(omegaD-c1o1)+f27_E  *q)/(q+c1o1);
+      q = q_dirN[  kn   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0])[ks  ]=(c2o1*feqW27_S  -(f27_N  *(q*omegaD-c1o1)-omegaD*feq27_N  *(q-c1o1))/(omegaD-c1o1)+f27_S  *q)/(q+c1o1);
+      q = q_dirS[  ks   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0])[kn  ]=(c2o1*feqW27_N  -(f27_S  *(q*omegaD-c1o1)-omegaD*feq27_S  *(q-c1o1))/(omegaD-c1o1)+f27_N  *q)/(q+c1o1);
+      q = q_dirT[  kt   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M])[kb  ]=(c2o1*feqW27_B  -(f27_T  *(q*omegaD-c1o1)-omegaD*feq27_T  *(q-c1o1))/(omegaD-c1o1)+f27_B  *q)/(q+c1o1);
+      q = q_dirB[  kb   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P])[kt  ]=(c2o1*feqW27_T  -(f27_B  *(q*omegaD-c1o1)-omegaD*feq27_B  *(q-c1o1))/(omegaD-c1o1)+f27_T  *q)/(q+c1o1);
+      q = q_dirNE[ kne  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0])[ksw ]=(c2o1*feqW27_SW -(f27_NE *(q*omegaD-c1o1)-omegaD*feq27_NE *(q-c1o1))/(omegaD-c1o1)+f27_SW *q)/(q+c1o1);
+      q = q_dirSW[ ksw  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0])[kne ]=(c2o1*feqW27_NE -(f27_SW *(q*omegaD-c1o1)-omegaD*feq27_SW *(q-c1o1))/(omegaD-c1o1)+f27_NE *q)/(q+c1o1);
+      q = q_dirSE[ kse  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0])[knw ]=(c2o1*feqW27_NW -(f27_SE *(q*omegaD-c1o1)-omegaD*feq27_SE *(q-c1o1))/(omegaD-c1o1)+f27_NW *q)/(q+c1o1);
+      q = q_dirNW[ knw  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0])[kse ]=(c2o1*feqW27_SE -(f27_NW *(q*omegaD-c1o1)-omegaD*feq27_NW *(q-c1o1))/(omegaD-c1o1)+f27_SE *q)/(q+c1o1);
+      q = q_dirTE[ kte  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M])[kbw ]=(c2o1*feqW27_BW -(f27_TE *(q*omegaD-c1o1)-omegaD*feq27_TE *(q-c1o1))/(omegaD-c1o1)+f27_BW *q)/(q+c1o1);
+      q = q_dirBW[ kbw  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P])[kte ]=(c2o1*feqW27_TE -(f27_BW *(q*omegaD-c1o1)-omegaD*feq27_BW *(q-c1o1))/(omegaD-c1o1)+f27_TE *q)/(q+c1o1);
+      q = q_dirBE[ kbe  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P])[ktw ]=(c2o1*feqW27_TW -(f27_BE *(q*omegaD-c1o1)-omegaD*feq27_BE *(q-c1o1))/(omegaD-c1o1)+f27_TW *q)/(q+c1o1);
+      q = q_dirTW[ ktw  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M])[kbe ]=(c2o1*feqW27_BE -(f27_TW *(q*omegaD-c1o1)-omegaD*feq27_TW *(q-c1o1))/(omegaD-c1o1)+f27_BE *q)/(q+c1o1);
+      q = q_dirTN[ ktn  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM])[kbs ]=(c2o1*feqW27_BS -(f27_TN *(q*omegaD-c1o1)-omegaD*feq27_TN *(q-c1o1))/(omegaD-c1o1)+f27_BS *q)/(q+c1o1);
+      q = q_dirBS[ kbs  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP])[ktn ]=(c2o1*feqW27_TN -(f27_BS *(q*omegaD-c1o1)-omegaD*feq27_BS *(q-c1o1))/(omegaD-c1o1)+f27_TN *q)/(q+c1o1);
+      q = q_dirBN[ kbn  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP])[kts ]=(c2o1*feqW27_TS -(f27_BN *(q*omegaD-c1o1)-omegaD*feq27_BN *(q-c1o1))/(omegaD-c1o1)+f27_TS *q)/(q+c1o1);
+      q = q_dirTS[ kts  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM])[kbn ]=(c2o1*feqW27_BN -(f27_TS *(q*omegaD-c1o1)-omegaD*feq27_TS *(q-c1o1))/(omegaD-c1o1)+f27_BN *q)/(q+c1o1);
       q = q_dirTNE[ktne ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]=(c2o1*feqW27_BSW-(f27_TNE*(q*omegaD-c1o1)-omegaD*feq27_TNE*(q-c1o1))/(omegaD-c1o1)+f27_BSW*q)/(q+c1o1);
       q = q_dirBSW[kbsw ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]=(c2o1*feqW27_TNE-(f27_BSW*(q*omegaD-c1o1)-omegaD*feq27_BSW*(q-c1o1))/(omegaD-c1o1)+f27_TNE*q)/(q+c1o1);
       q = q_dirBNE[kbne ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]=(c2o1*feqW27_TSW-(f27_BNE*(q*omegaD-c1o1)-omegaD*feq27_BNE*(q-c1o1))/(omegaD-c1o1)+f27_TSW*q)/(q+c1o1);
@@ -3308,24 +3308,24 @@ __global__ void QADDirichlet27(
       q = q_dirBNW[kbnw ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMP])[ktse]=(c2o1*feqW27_TSE-(f27_BNW*(q*omegaD-c1o1)-omegaD*feq27_BNW*(q-c1o1))/(omegaD-c1o1)+f27_TSE*q)/(q+c1o1);
       q = q_dirBSE[kbse ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPP])[ktnw]=(c2o1*feqW27_TNW-(f27_BSE*(q*omegaD-c1o1)-omegaD*feq27_BSE*(q-c1o1))/(omegaD-c1o1)+f27_TNW*q)/(q+c1o1);
       q = q_dirTNW[ktnw ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMM])[kbse]=(c2o1*feqW27_BSE-(f27_TNW*(q*omegaD-c1o1)-omegaD*feq27_TNW*(q-c1o1))/(omegaD-c1o1)+f27_BSE*q)/(q+c1o1);
-      //q = q_dirE[k];   if (q>=zero && q<=one) (D27.f[DIR_M00  ])[kw  ]=(two*feqW27_W  -(f27_E  *(q*omegaD-one)-omegaD*feq27_E  *(q-one))/(omegaD-one)+f27_W  *q)/(q+one);
-      //q = q_dirW[k];   if (q>=zero && q<=one) (D27.f[DIR_P00  ])[ke  ]=(two*feqW27_E  -(f27_W  *(q*omegaD-one)-omegaD*feq27_W  *(q-one))/(omegaD-one)+f27_E  *q)/(q+one);
-      //q = q_dirN[k];   if (q>=zero && q<=one) (D27.f[DIR_0M0  ])[ks  ]=(two*feqW27_S  -(f27_N  *(q*omegaD-one)-omegaD*feq27_N  *(q-one))/(omegaD-one)+f27_S  *q)/(q+one);
-      //q = q_dirS[k];   if (q>=zero && q<=one) (D27.f[DIR_0P0  ])[kn  ]=(two*feqW27_N  -(f27_S  *(q*omegaD-one)-omegaD*feq27_S  *(q-one))/(omegaD-one)+f27_N  *q)/(q+one);
-      //q = q_dirT[k];   if (q>=zero && q<=one) (D27.f[DIR_00M  ])[kb  ]=(two*feqW27_B  -(f27_T  *(q*omegaD-one)-omegaD*feq27_T  *(q-one))/(omegaD-one)+f27_B  *q)/(q+one);
-      //q = q_dirB[k];   if (q>=zero && q<=one) (D27.f[DIR_00P  ])[kt  ]=(two*feqW27_T  -(f27_B  *(q*omegaD-one)-omegaD*feq27_B  *(q-one))/(omegaD-one)+f27_T  *q)/(q+one);
-      //q = q_dirNE[k];  if (q>=zero && q<=one) (D27.f[DIR_MM0 ])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one);
-      //q = q_dirSW[k];  if (q>=zero && q<=one) (D27.f[DIR_PP0 ])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one);
-      //q = q_dirSE[k];  if (q>=zero && q<=one) (D27.f[DIR_MP0 ])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one);
-      //q = q_dirNW[k];  if (q>=zero && q<=one) (D27.f[DIR_PM0 ])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one);
-      //q = q_dirTE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0M ])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one);
-      //q = q_dirBW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0P ])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one);
-      //q = q_dirBE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0P ])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one);
-      //q = q_dirTW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0M ])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one);
-      //q = q_dirTN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MM ])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one);
-      //q = q_dirBS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PP ])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one);
-      //q = q_dirBN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MP ])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one);
-      //q = q_dirTS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PM ])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one);
+      //q = q_dirE[k];   if (q>=zero && q<=one) (D27.f[DIR_M00])[kw  ]=(two*feqW27_W  -(f27_E  *(q*omegaD-one)-omegaD*feq27_E  *(q-one))/(omegaD-one)+f27_W  *q)/(q+one);
+      //q = q_dirW[k];   if (q>=zero && q<=one) (D27.f[DIR_P00])[ke  ]=(two*feqW27_E  -(f27_W  *(q*omegaD-one)-omegaD*feq27_W  *(q-one))/(omegaD-one)+f27_E  *q)/(q+one);
+      //q = q_dirN[k];   if (q>=zero && q<=one) (D27.f[DIR_0M0])[ks  ]=(two*feqW27_S  -(f27_N  *(q*omegaD-one)-omegaD*feq27_N  *(q-one))/(omegaD-one)+f27_S  *q)/(q+one);
+      //q = q_dirS[k];   if (q>=zero && q<=one) (D27.f[DIR_0P0])[kn  ]=(two*feqW27_N  -(f27_S  *(q*omegaD-one)-omegaD*feq27_S  *(q-one))/(omegaD-one)+f27_N  *q)/(q+one);
+      //q = q_dirT[k];   if (q>=zero && q<=one) (D27.f[DIR_00M])[kb  ]=(two*feqW27_B  -(f27_T  *(q*omegaD-one)-omegaD*feq27_T  *(q-one))/(omegaD-one)+f27_B  *q)/(q+one);
+      //q = q_dirB[k];   if (q>=zero && q<=one) (D27.f[DIR_00P])[kt  ]=(two*feqW27_T  -(f27_B  *(q*omegaD-one)-omegaD*feq27_B  *(q-one))/(omegaD-one)+f27_T  *q)/(q+one);
+      //q = q_dirNE[k];  if (q>=zero && q<=one) (D27.f[DIR_MM0])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one);
+      //q = q_dirSW[k];  if (q>=zero && q<=one) (D27.f[DIR_PP0])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one);
+      //q = q_dirSE[k];  if (q>=zero && q<=one) (D27.f[DIR_MP0])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one);
+      //q = q_dirNW[k];  if (q>=zero && q<=one) (D27.f[DIR_PM0])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one);
+      //q = q_dirTE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0M])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one);
+      //q = q_dirBW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0P])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one);
+      //q = q_dirBE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0P])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one);
+      //q = q_dirTW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0M])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one);
+      //q = q_dirTN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MM])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one);
+      //q = q_dirBS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PP])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one);
+      //q = q_dirBN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MP])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one);
+      //q = q_dirTS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PM])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one);
       //q = q_dirTNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMM])[kbsw]=(two*feqW27_BSW-(f27_TNE*(q*omegaD-one)-omegaD*feq27_TNE*(q-one))/(omegaD-one)+f27_BSW*q)/(q+one);
       //q = q_dirBSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PPP])[ktne]=(two*feqW27_TNE-(f27_BSW*(q*omegaD-one)-omegaD*feq27_BSW*(q-one))/(omegaD-one)+f27_TNE*q)/(q+one);
       //q = q_dirBNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMP])[ktsw]=(two*feqW27_TSW-(f27_BNE*(q*omegaD-one)-omegaD*feq27_BNE*(q-one))/(omegaD-one)+f27_TSW*q)/(q+one);
@@ -3389,131 +3389,131 @@ __global__ void QADBB27( real* DD,
                                    unsigned int* neighborX,
                                    unsigned int* neighborY,
                                    unsigned int* neighborZ,
-                                   unsigned int size_Mat, 
+                                   unsigned long long numberOfLBnodes, 
                                    bool isEvenTimestep)
 {
    //Distributions27 D;
    //if (isEvenTimestep==true)
    //{
-   //   D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-   //   D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-   //   D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-   //   D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-   //   D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-   //   D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-   //   D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-   //   D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-   //   D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-   //   D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-   //   D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-   //   D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-   //   D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-   //   D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-   //   D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-   //   D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-   //   D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-   //   D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-   //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   //   D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-   //   D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-   //   D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-   //   D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-   //   D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-   //   D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-   //   D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-   //   D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+   //   D.f[DIR_P00] = &DD[DIR_P00 * size_Mat];
+   //   D.f[DIR_M00] = &DD[DIR_M00 * size_Mat];
+   //   D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat];
+   //   D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat];
+   //   D.f[DIR_00P] = &DD[DIR_00P * size_Mat];
+   //   D.f[DIR_00M] = &DD[DIR_00M * size_Mat];
+   //   D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat];
+   //   D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat];
+   //   D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat];
+   //   D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat];
+   //   D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat];
+   //   D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat];
+   //   D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat];
+   //   D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat];
+   //   D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat];
+   //   D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat];
+   //   D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat];
+   //   D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat];
+   //   D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+   //   D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat];
+   //   D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat];
+   //   D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat];
+   //   D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat];
+   //   D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat];
+   //   D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat];
+   //   D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat];
+   //   D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat];
    //} 
    //else
    //{
-   //   D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-   //   D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-   //   D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-   //   D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-   //   D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-   //   D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-   //   D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-   //   D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-   //   D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-   //   D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-   //   D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-   //   D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-   //   D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-   //   D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-   //   D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-   //   D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-   //   D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-   //   D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-   //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   //   D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-   //   D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-   //   D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-   //   D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-   //   D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-   //   D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-   //   D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-   //   D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+   //   D.f[DIR_M00] = &DD[DIR_P00 * size_Mat];
+   //   D.f[DIR_P00] = &DD[DIR_M00 * size_Mat];
+   //   D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat];
+   //   D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat];
+   //   D.f[DIR_00M] = &DD[DIR_00P * size_Mat];
+   //   D.f[DIR_00P] = &DD[DIR_00M * size_Mat];
+   //   D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat];
+   //   D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat];
+   //   D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat];
+   //   D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat];
+   //   D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat];
+   //   D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat];
+   //   D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat];
+   //   D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat];
+   //   D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat];
+   //   D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat];
+   //   D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat];
+   //   D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat];
+   //   D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+   //   D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat];
+   //   D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat];
+   //   D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat];
+   //   D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat];
+   //   D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat];
+   //   D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat];
+   //   D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat];
+   //   D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat];
    //}
 
    Distributions27 D27;
    if (isEvenTimestep==true)
    {
-      D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+      D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -3534,24 +3534,24 @@ __global__ void QADBB27( real* DD,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -3591,33 +3591,33 @@ __global__ void QADBB27( real* DD,
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      //real f_W    = (D.f[DIR_P00   ])[ke   ];
-      //real f_E    = (D.f[DIR_M00   ])[kw   ];
-      //real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      //real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      //real f_B    = (D.f[DIR_00P   ])[kt   ];
-      //real f_T    = (D.f[DIR_00M   ])[kb   ];
-      //real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      //real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      //real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      //real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      //real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      //real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      //real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      //real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      //real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      //real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      //real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      //real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      //real f_W    = (D.f[DIR_P00])[ke   ];
+      //real f_E    = (D.f[DIR_M00])[kw   ];
+      //real f_S    = (D.f[DIR_0P0])[kn   ];
+      //real f_N    = (D.f[DIR_0M0])[ks   ];
+      //real f_B    = (D.f[DIR_00P])[kt   ];
+      //real f_T    = (D.f[DIR_00M])[kb   ];
+      //real f_SW   = (D.f[DIR_PP0])[kne  ];
+      //real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      //real f_NW   = (D.f[DIR_PM0])[kse  ];
+      //real f_SE   = (D.f[DIR_MP0])[knw  ];
+      //real f_BW   = (D.f[DIR_P0P])[kte  ];
+      //real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      //real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      //real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      //real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      //real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      //real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      //real f_BN   = (D.f[DIR_0MP])[kts  ];
       //real f_ZERO = (D.f[DIR_000])[kzero];
-      //real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      //real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      //real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      //real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      //real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      //real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      //real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      //real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      //real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      //real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      //real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      //real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      //real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      //real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      //real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      //real f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       //real vx1, vx2, vx3, /*drho, feq,*/ q;
       real q;
@@ -3644,33 +3644,33 @@ __global__ void QADBB27( real* DD,
       //vx2     =  OORho*((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
       //vx3     =  OORho*((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B));
       ////////////////////////////////////////////////////////////////////////////////
-      real f27_W    = (D27.f[DIR_P00   ])[ke   ];
-      real f27_E    = (D27.f[DIR_M00   ])[kw   ];
-      real f27_S    = (D27.f[DIR_0P0   ])[kn   ];
-      real f27_N    = (D27.f[DIR_0M0   ])[ks   ];
-      real f27_B    = (D27.f[DIR_00P   ])[kt   ];
-      real f27_T    = (D27.f[DIR_00M   ])[kb   ];
-      real f27_SW   = (D27.f[DIR_PP0  ])[kne  ];
-      real f27_NE   = (D27.f[DIR_MM0  ])[ksw  ];
-      real f27_NW   = (D27.f[DIR_PM0  ])[kse  ];
-      real f27_SE   = (D27.f[DIR_MP0  ])[knw  ];
-      real f27_BW   = (D27.f[DIR_P0P  ])[kte  ];
-      real f27_TE   = (D27.f[DIR_M0M  ])[kbw  ];
-      real f27_TW   = (D27.f[DIR_P0M  ])[kbe  ];
-      real f27_BE   = (D27.f[DIR_M0P  ])[ktw  ];
-      real f27_BS   = (D27.f[DIR_0PP  ])[ktn  ];
-      real f27_TN   = (D27.f[DIR_0MM  ])[kbs  ];
-      real f27_TS   = (D27.f[DIR_0PM  ])[kbn  ];
-      real f27_BN   = (D27.f[DIR_0MP  ])[kts  ];
+      real f27_W    = (D27.f[DIR_P00])[ke   ];
+      real f27_E    = (D27.f[DIR_M00])[kw   ];
+      real f27_S    = (D27.f[DIR_0P0])[kn   ];
+      real f27_N    = (D27.f[DIR_0M0])[ks   ];
+      real f27_B    = (D27.f[DIR_00P])[kt   ];
+      real f27_T    = (D27.f[DIR_00M])[kb   ];
+      real f27_SW   = (D27.f[DIR_PP0])[kne  ];
+      real f27_NE   = (D27.f[DIR_MM0])[ksw  ];
+      real f27_NW   = (D27.f[DIR_PM0])[kse  ];
+      real f27_SE   = (D27.f[DIR_MP0])[knw  ];
+      real f27_BW   = (D27.f[DIR_P0P])[kte  ];
+      real f27_TE   = (D27.f[DIR_M0M])[kbw  ];
+      real f27_TW   = (D27.f[DIR_P0M])[kbe  ];
+      real f27_BE   = (D27.f[DIR_M0P])[ktw  ];
+      real f27_BS   = (D27.f[DIR_0PP])[ktn  ];
+      real f27_TN   = (D27.f[DIR_0MM])[kbs  ];
+      real f27_TS   = (D27.f[DIR_0PM])[kbn  ];
+      real f27_BN   = (D27.f[DIR_0MP])[kts  ];
       //real f27_ZERO = (D27.f[DIR_000])[kzero];
-      real f27_BSW  = (D27.f[DIR_PPP ])[ktne ];
-      real f27_BNE  = (D27.f[DIR_MMP ])[ktsw ];
-      real f27_BNW  = (D27.f[DIR_PMP ])[ktse ];
-      real f27_BSE  = (D27.f[DIR_MPP ])[ktnw ];
-      real f27_TSW  = (D27.f[DIR_PPM ])[kbne ];
-      real f27_TNE  = (D27.f[DIR_MMM ])[kbsw ];
-      real f27_TNW  = (D27.f[DIR_PMM ])[kbse ];
-      real f27_TSE  = (D27.f[DIR_MPM ])[kbnw ];
+      real f27_BSW  = (D27.f[DIR_PPP])[ktne ];
+      real f27_BNE  = (D27.f[DIR_MMP])[ktsw ];
+      real f27_BNW  = (D27.f[DIR_PMP])[ktse ];
+      real f27_BSE  = (D27.f[DIR_MPP])[ktnw ];
+      real f27_TSW  = (D27.f[DIR_PPM])[kbne ];
+      real f27_TNE  = (D27.f[DIR_MMM])[kbsw ];
+      real f27_TNW  = (D27.f[DIR_PMM])[kbse ];
+      real f27_TSE  = (D27.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       //real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////
@@ -3746,86 +3746,86 @@ __global__ void QADBB27( real* DD,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+         D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+         D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
       //(D.f[DIR_000])[k]=0.1f;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00  ])[kw  ]=f27_E  ;
-      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00  ])[ke  ]=f27_W  ;
-      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0  ])[ks  ]=f27_N  ;
-      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0  ])[kn  ]=f27_S  ;
-      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M  ])[kb  ]=f27_T  ;
-      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P  ])[kt  ]=f27_B  ;
-      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]=f27_NE ;
-      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]=f27_SW ;
-      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]=f27_SE ;
-      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]=f27_NW ;
-      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]=f27_TE ;
-      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]=f27_BW ;
-      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]=f27_BE ;
-      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]=f27_TW ;
-      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]=f27_TN ;
-      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]=f27_BS ;
-      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]=f27_BN ;
-      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]=f27_TS ;
+      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00])[kw  ]=f27_E  ;
+      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00])[ke  ]=f27_W  ;
+      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0])[ks  ]=f27_N  ;
+      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0])[kn  ]=f27_S  ;
+      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M])[kb  ]=f27_T  ;
+      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P])[kt  ]=f27_B  ;
+      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0])[ksw ]=f27_NE ;
+      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0])[kne ]=f27_SW ;
+      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0])[knw ]=f27_SE ;
+      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0])[kse ]=f27_NW ;
+      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M])[kbw ]=f27_TE ;
+      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P])[kte ]=f27_BW ;
+      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P])[ktw ]=f27_BE ;
+      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M])[kbe ]=f27_TW ;
+      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM])[kbs ]=f27_TN ;
+      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP])[ktn ]=f27_BS ;
+      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP])[kts ]=f27_BN ;
+      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM])[kbn ]=f27_TS ;
       q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]=f27_TNE;
       q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]=f27_BSW;
       q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]=f27_BNE;
@@ -3905,91 +3905,91 @@ __global__ void QNoSlipADincomp7(
 											 unsigned int* neighborX,
 											 unsigned int* neighborY,
 											 unsigned int* neighborZ,
-											 unsigned int size_Mat, 
+											 unsigned long long numberOfLBnodes, 
 											 bool isEvenTimestep)
 {
    //Distributions27 D;
    //if (isEvenTimestep==true)
    //{
-   //   D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-   //   D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-   //   D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-   //   D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-   //   D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-   //   D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-   //   D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-   //   D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-   //   D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-   //   D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-   //   D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-   //   D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-   //   D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-   //   D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-   //   D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-   //   D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-   //   D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-   //   D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-   //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   //   D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-   //   D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-   //   D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-   //   D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-   //   D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-   //   D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-   //   D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-   //   D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+   //   D.f[DIR_P00] = &DD[DIR_P00 * size_Mat];
+   //   D.f[DIR_M00] = &DD[DIR_M00 * size_Mat];
+   //   D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat];
+   //   D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat];
+   //   D.f[DIR_00P] = &DD[DIR_00P * size_Mat];
+   //   D.f[DIR_00M] = &DD[DIR_00M * size_Mat];
+   //   D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat];
+   //   D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat];
+   //   D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat];
+   //   D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat];
+   //   D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat];
+   //   D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat];
+   //   D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat];
+   //   D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat];
+   //   D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat];
+   //   D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat];
+   //   D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat];
+   //   D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat];
+   //   D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+   //   D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat];
+   //   D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat];
+   //   D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat];
+   //   D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat];
+   //   D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat];
+   //   D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat];
+   //   D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat];
+   //   D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat];
    //} 
    //else
    //{
-   //   D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-   //   D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-   //   D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-   //   D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-   //   D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-   //   D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-   //   D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-   //   D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-   //   D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-   //   D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-   //   D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-   //   D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-   //   D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-   //   D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-   //   D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-   //   D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-   //   D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-   //   D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-   //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   //   D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-   //   D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-   //   D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-   //   D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-   //   D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-   //   D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-   //   D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-   //   D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+   //   D.f[DIR_M00] = &DD[DIR_P00 * size_Mat];
+   //   D.f[DIR_P00] = &DD[DIR_M00 * size_Mat];
+   //   D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat];
+   //   D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat];
+   //   D.f[DIR_00M] = &DD[DIR_00P * size_Mat];
+   //   D.f[DIR_00P] = &DD[DIR_00M * size_Mat];
+   //   D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat];
+   //   D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat];
+   //   D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat];
+   //   D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat];
+   //   D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat];
+   //   D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat];
+   //   D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat];
+   //   D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat];
+   //   D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat];
+   //   D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat];
+   //   D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat];
+   //   D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat];
+   //   D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+   //   D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat];
+   //   D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat];
+   //   D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat];
+   //   D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat];
+   //   D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat];
+   //   D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat];
+   //   D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat];
+   //   D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat];
    //}
 
    Distributions7 D7;
    if (isEvenTimestep==true)
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[1] = &DD7[1*size_Mat];
-      D7.f[2] = &DD7[2*size_Mat];
-      D7.f[3] = &DD7[3*size_Mat];
-      D7.f[4] = &DD7[4*size_Mat];
-      D7.f[5] = &DD7[5*size_Mat];
-      D7.f[6] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[1] = &DD7[1*numberOfLBnodes];
+      D7.f[2] = &DD7[2*numberOfLBnodes];
+      D7.f[3] = &DD7[3*numberOfLBnodes];
+      D7.f[4] = &DD7[4*numberOfLBnodes];
+      D7.f[5] = &DD7[5*numberOfLBnodes];
+      D7.f[6] = &DD7[6*numberOfLBnodes];
    }
    else
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[2] = &DD7[1*size_Mat];
-      D7.f[1] = &DD7[2*size_Mat];
-      D7.f[4] = &DD7[3*size_Mat];
-      D7.f[3] = &DD7[4*size_Mat];
-      D7.f[6] = &DD7[5*size_Mat];
-      D7.f[5] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[2] = &DD7[1*numberOfLBnodes];
+      D7.f[1] = &DD7[2*numberOfLBnodes];
+      D7.f[4] = &DD7[3*numberOfLBnodes];
+      D7.f[3] = &DD7[4*numberOfLBnodes];
+      D7.f[6] = &DD7[5*numberOfLBnodes];
+      D7.f[5] = &DD7[6*numberOfLBnodes];
    }
 
 
@@ -4009,12 +4009,12 @@ __global__ void QNoSlipADincomp7(
       //////////////////////////////////////////////////////////////////////////////////
       real  *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB;
 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
       //////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -4046,32 +4046,32 @@ __global__ void QNoSlipADincomp7(
       //unsigned int ktne = KQK;
       //unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      //real f_W    = (D.f[DIR_P00   ])[ke   ];
-      //real f_E    = (D.f[DIR_M00   ])[kw   ];
-      //real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      //real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      //real f_B    = (D.f[DIR_00P   ])[kt   ];
-      //real f_T    = (D.f[DIR_00M   ])[kb   ];
-      //real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      //real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      //real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      //real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      //real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      //real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      //real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      //real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      //real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      //real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      //real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      //real f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      //real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      //real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      //real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      //real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      //real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      //real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      //real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      //real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      //real f_W    = (D.f[DIR_P00])[ke   ];
+      //real f_E    = (D.f[DIR_M00])[kw   ];
+      //real f_S    = (D.f[DIR_0P0])[kn   ];
+      //real f_N    = (D.f[DIR_0M0])[ks   ];
+      //real f_B    = (D.f[DIR_00P])[kt   ];
+      //real f_T    = (D.f[DIR_00M])[kb   ];
+      //real f_SW   = (D.f[DIR_PP0])[kne  ];
+      //real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      //real f_NW   = (D.f[DIR_PM0])[kse  ];
+      //real f_SE   = (D.f[DIR_MP0])[knw  ];
+      //real f_BW   = (D.f[DIR_P0P])[kte  ];
+      //real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      //real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      //real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      //real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      //real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      //real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      //real f_BN   = (D.f[DIR_0MP])[kts  ];
+      //real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      //real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      //real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      //real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      //real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      //real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      //real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      //real f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       //real vx1 =  ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_TSE-f_BNW)+(f_BSE-f_TNW) +(f_NE-f_SW)+(f_SE-f_NW)+(f_TE-f_BW)+(f_BE-f_TW)+(f_E-f_W));
       //real vx2 =  ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
@@ -4131,23 +4131,23 @@ __global__ void QNoSlipADincomp7(
       //pointertausch
       if (isEvenTimestep==false)
       {
-         D7.f[0] = &DD7[0*size_Mat];
-         D7.f[1] = &DD7[1*size_Mat];
-         D7.f[2] = &DD7[2*size_Mat];
-         D7.f[3] = &DD7[3*size_Mat];
-         D7.f[4] = &DD7[4*size_Mat];
-         D7.f[5] = &DD7[5*size_Mat];
-         D7.f[6] = &DD7[6*size_Mat];
+         D7.f[0] = &DD7[0*numberOfLBnodes];
+         D7.f[1] = &DD7[1*numberOfLBnodes];
+         D7.f[2] = &DD7[2*numberOfLBnodes];
+         D7.f[3] = &DD7[3*numberOfLBnodes];
+         D7.f[4] = &DD7[4*numberOfLBnodes];
+         D7.f[5] = &DD7[5*numberOfLBnodes];
+         D7.f[6] = &DD7[6*numberOfLBnodes];
       }
       else
       {
-         D7.f[0] = &DD7[0*size_Mat];
-         D7.f[2] = &DD7[1*size_Mat];
-         D7.f[1] = &DD7[2*size_Mat];
-         D7.f[4] = &DD7[3*size_Mat];
-         D7.f[3] = &DD7[4*size_Mat];
-         D7.f[6] = &DD7[5*size_Mat];
-         D7.f[5] = &DD7[6*size_Mat];
+         D7.f[0] = &DD7[0*numberOfLBnodes];
+         D7.f[2] = &DD7[1*numberOfLBnodes];
+         D7.f[1] = &DD7[2*numberOfLBnodes];
+         D7.f[4] = &DD7[3*numberOfLBnodes];
+         D7.f[3] = &DD7[4*numberOfLBnodes];
+         D7.f[6] = &DD7[5*numberOfLBnodes];
+         D7.f[5] = &DD7[6*numberOfLBnodes];
       }
 
       ////////////////////////////////////////////////////////////////////////////
@@ -4329,131 +4329,131 @@ __global__ void QNoSlipADincomp27(
 											 unsigned int* neighborX,
 											 unsigned int* neighborY,
 											 unsigned int* neighborZ,
-											 unsigned int size_Mat, 
+											 unsigned long long numberOfLBnodes, 
 											 bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
 
    Distributions27 D27;
    if (isEvenTimestep==true)
    {
-      D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+      D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -4474,24 +4474,24 @@ __global__ void QNoSlipADincomp27(
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -4531,65 +4531,65 @@ __global__ void QNoSlipADincomp27(
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[DIR_P00   ])[ke   ];
-      real f_E    = (D.f[DIR_M00   ])[kw   ];
-      real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      real f_B    = (D.f[DIR_00P   ])[kt   ];
-      real f_T    = (D.f[DIR_00M   ])[kb   ];
-      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      real f_W    = (D.f[DIR_P00])[ke   ];
+      real f_E    = (D.f[DIR_M00])[kw   ];
+      real f_S    = (D.f[DIR_0P0])[kn   ];
+      real f_N    = (D.f[DIR_0M0])[ks   ];
+      real f_B    = (D.f[DIR_00P])[kt   ];
+      real f_T    = (D.f[DIR_00M])[kb   ];
+      real f_SW   = (D.f[DIR_PP0])[kne  ];
+      real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0])[kse  ];
+      real f_SE   = (D.f[DIR_MP0])[knw  ];
+      real f_BW   = (D.f[DIR_P0P])[kte  ];
+      real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP])[kts  ];
       //real f_ZERO = (D.f[DIR_000])[kzero];
-      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1 =  ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_TSE-f_BNW)+(f_BSE-f_TNW) +(f_NE-f_SW)+(f_SE-f_NW)+(f_TE-f_BW)+(f_BE-f_TW)+(f_E-f_W));
       real vx2 =  ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
       real vx3 =  ((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B));
       ////////////////////////////////////////////////////////////////////////////////
-      real f27_W    = (D27.f[DIR_P00   ])[ke   ];
-      real f27_E    = (D27.f[DIR_M00   ])[kw   ];
-      real f27_S    = (D27.f[DIR_0P0   ])[kn   ];
-      real f27_N    = (D27.f[DIR_0M0   ])[ks   ];
-      real f27_B    = (D27.f[DIR_00P   ])[kt   ];
-      real f27_T    = (D27.f[DIR_00M   ])[kb   ];
-      real f27_SW   = (D27.f[DIR_PP0  ])[kne  ];
-      real f27_NE   = (D27.f[DIR_MM0  ])[ksw  ];
-      real f27_NW   = (D27.f[DIR_PM0  ])[kse  ];
-      real f27_SE   = (D27.f[DIR_MP0  ])[knw  ];
-      real f27_BW   = (D27.f[DIR_P0P  ])[kte  ];
-      real f27_TE   = (D27.f[DIR_M0M  ])[kbw  ];
-      real f27_TW   = (D27.f[DIR_P0M  ])[kbe  ];
-      real f27_BE   = (D27.f[DIR_M0P  ])[ktw  ];
-      real f27_BS   = (D27.f[DIR_0PP  ])[ktn  ];
-      real f27_TN   = (D27.f[DIR_0MM  ])[kbs  ];
-      real f27_TS   = (D27.f[DIR_0PM  ])[kbn  ];
-      real f27_BN   = (D27.f[DIR_0MP  ])[kts  ];
+      real f27_W    = (D27.f[DIR_P00])[ke   ];
+      real f27_E    = (D27.f[DIR_M00])[kw   ];
+      real f27_S    = (D27.f[DIR_0P0])[kn   ];
+      real f27_N    = (D27.f[DIR_0M0])[ks   ];
+      real f27_B    = (D27.f[DIR_00P])[kt   ];
+      real f27_T    = (D27.f[DIR_00M])[kb   ];
+      real f27_SW   = (D27.f[DIR_PP0])[kne  ];
+      real f27_NE   = (D27.f[DIR_MM0])[ksw  ];
+      real f27_NW   = (D27.f[DIR_PM0])[kse  ];
+      real f27_SE   = (D27.f[DIR_MP0])[knw  ];
+      real f27_BW   = (D27.f[DIR_P0P])[kte  ];
+      real f27_TE   = (D27.f[DIR_M0M])[kbw  ];
+      real f27_TW   = (D27.f[DIR_P0M])[kbe  ];
+      real f27_BE   = (D27.f[DIR_M0P])[ktw  ];
+      real f27_BS   = (D27.f[DIR_0PP])[ktn  ];
+      real f27_TN   = (D27.f[DIR_0MM])[kbs  ];
+      real f27_TS   = (D27.f[DIR_0PM])[kbn  ];
+      real f27_BN   = (D27.f[DIR_0MP])[kts  ];
       real f27_ZERO = (D27.f[DIR_000])[kzero];
-      real f27_BSW  = (D27.f[DIR_PPP ])[ktne ];
-      real f27_BNE  = (D27.f[DIR_MMP ])[ktsw ];
-      real f27_BNW  = (D27.f[DIR_PMP ])[ktse ];
-      real f27_BSE  = (D27.f[DIR_MPP ])[ktnw ];
-      real f27_TSW  = (D27.f[DIR_PPM ])[kbne ];
-      real f27_TNE  = (D27.f[DIR_MMM ])[kbsw ];
-      real f27_TNW  = (D27.f[DIR_PMM ])[kbse ];
-      real f27_TSE  = (D27.f[DIR_MPM ])[kbnw ];
+      real f27_BSW  = (D27.f[DIR_PPP])[ktne ];
+      real f27_BNE  = (D27.f[DIR_MMP])[ktsw ];
+      real f27_BNW  = (D27.f[DIR_PMP])[ktse ];
+      real f27_BSE  = (D27.f[DIR_MPP])[ktnw ];
+      real f27_TSW  = (D27.f[DIR_PPM])[kbne ];
+      real f27_TNE  = (D27.f[DIR_MMM])[kbsw ];
+      real f27_TNW  = (D27.f[DIR_PMM])[kbse ];
+      real f27_TSE  = (D27.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////
@@ -4665,63 +4665,63 @@ __global__ void QNoSlipADincomp27(
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+         D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+         D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
@@ -4729,24 +4729,24 @@ __global__ void QNoSlipADincomp27(
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  real q;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00  ])[kw  ]=(c2o1*feqW27_W  -(f27_E  *(q*omegaD-c1o1)-omegaD*feq27_E  *(q-c1o1))/(omegaD-c1o1)+f27_W  *q)/(q+c1o1);
-      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00  ])[ke  ]=(c2o1*feqW27_E  -(f27_W  *(q*omegaD-c1o1)-omegaD*feq27_W  *(q-c1o1))/(omegaD-c1o1)+f27_E  *q)/(q+c1o1);
-      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0  ])[ks  ]=(c2o1*feqW27_S  -(f27_N  *(q*omegaD-c1o1)-omegaD*feq27_N  *(q-c1o1))/(omegaD-c1o1)+f27_S  *q)/(q+c1o1);
-      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0  ])[kn  ]=(c2o1*feqW27_N  -(f27_S  *(q*omegaD-c1o1)-omegaD*feq27_S  *(q-c1o1))/(omegaD-c1o1)+f27_N  *q)/(q+c1o1);
-      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M  ])[kb  ]=(c2o1*feqW27_B  -(f27_T  *(q*omegaD-c1o1)-omegaD*feq27_T  *(q-c1o1))/(omegaD-c1o1)+f27_B  *q)/(q+c1o1);
-      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P  ])[kt  ]=(c2o1*feqW27_T  -(f27_B  *(q*omegaD-c1o1)-omegaD*feq27_B  *(q-c1o1))/(omegaD-c1o1)+f27_T  *q)/(q+c1o1);
-      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]=(c2o1*feqW27_SW -(f27_NE *(q*omegaD-c1o1)-omegaD*feq27_NE *(q-c1o1))/(omegaD-c1o1)+f27_SW *q)/(q+c1o1);
-      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]=(c2o1*feqW27_NE -(f27_SW *(q*omegaD-c1o1)-omegaD*feq27_SW *(q-c1o1))/(omegaD-c1o1)+f27_NE *q)/(q+c1o1);
-      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]=(c2o1*feqW27_NW -(f27_SE *(q*omegaD-c1o1)-omegaD*feq27_SE *(q-c1o1))/(omegaD-c1o1)+f27_NW *q)/(q+c1o1);
-      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]=(c2o1*feqW27_SE -(f27_NW *(q*omegaD-c1o1)-omegaD*feq27_NW *(q-c1o1))/(omegaD-c1o1)+f27_SE *q)/(q+c1o1);
-      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]=(c2o1*feqW27_BW -(f27_TE *(q*omegaD-c1o1)-omegaD*feq27_TE *(q-c1o1))/(omegaD-c1o1)+f27_BW *q)/(q+c1o1);
-      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]=(c2o1*feqW27_TE -(f27_BW *(q*omegaD-c1o1)-omegaD*feq27_BW *(q-c1o1))/(omegaD-c1o1)+f27_TE *q)/(q+c1o1);
-      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]=(c2o1*feqW27_TW -(f27_BE *(q*omegaD-c1o1)-omegaD*feq27_BE *(q-c1o1))/(omegaD-c1o1)+f27_TW *q)/(q+c1o1);
-      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]=(c2o1*feqW27_BE -(f27_TW *(q*omegaD-c1o1)-omegaD*feq27_TW *(q-c1o1))/(omegaD-c1o1)+f27_BE *q)/(q+c1o1);
-      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]=(c2o1*feqW27_BS -(f27_TN *(q*omegaD-c1o1)-omegaD*feq27_TN *(q-c1o1))/(omegaD-c1o1)+f27_BS *q)/(q+c1o1);
-      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]=(c2o1*feqW27_TN -(f27_BS *(q*omegaD-c1o1)-omegaD*feq27_BS *(q-c1o1))/(omegaD-c1o1)+f27_TN *q)/(q+c1o1);
-      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]=(c2o1*feqW27_TS -(f27_BN *(q*omegaD-c1o1)-omegaD*feq27_BN *(q-c1o1))/(omegaD-c1o1)+f27_TS *q)/(q+c1o1);
-      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]=(c2o1*feqW27_BN -(f27_TS *(q*omegaD-c1o1)-omegaD*feq27_TS *(q-c1o1))/(omegaD-c1o1)+f27_BN *q)/(q+c1o1);
+      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00])[kw  ]=(c2o1*feqW27_W  -(f27_E  *(q*omegaD-c1o1)-omegaD*feq27_E  *(q-c1o1))/(omegaD-c1o1)+f27_W  *q)/(q+c1o1);
+      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00])[ke  ]=(c2o1*feqW27_E  -(f27_W  *(q*omegaD-c1o1)-omegaD*feq27_W  *(q-c1o1))/(omegaD-c1o1)+f27_E  *q)/(q+c1o1);
+      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0])[ks  ]=(c2o1*feqW27_S  -(f27_N  *(q*omegaD-c1o1)-omegaD*feq27_N  *(q-c1o1))/(omegaD-c1o1)+f27_S  *q)/(q+c1o1);
+      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0])[kn  ]=(c2o1*feqW27_N  -(f27_S  *(q*omegaD-c1o1)-omegaD*feq27_S  *(q-c1o1))/(omegaD-c1o1)+f27_N  *q)/(q+c1o1);
+      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M])[kb  ]=(c2o1*feqW27_B  -(f27_T  *(q*omegaD-c1o1)-omegaD*feq27_T  *(q-c1o1))/(omegaD-c1o1)+f27_B  *q)/(q+c1o1);
+      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P])[kt  ]=(c2o1*feqW27_T  -(f27_B  *(q*omegaD-c1o1)-omegaD*feq27_B  *(q-c1o1))/(omegaD-c1o1)+f27_T  *q)/(q+c1o1);
+      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0])[ksw ]=(c2o1*feqW27_SW -(f27_NE *(q*omegaD-c1o1)-omegaD*feq27_NE *(q-c1o1))/(omegaD-c1o1)+f27_SW *q)/(q+c1o1);
+      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0])[kne ]=(c2o1*feqW27_NE -(f27_SW *(q*omegaD-c1o1)-omegaD*feq27_SW *(q-c1o1))/(omegaD-c1o1)+f27_NE *q)/(q+c1o1);
+      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0])[knw ]=(c2o1*feqW27_NW -(f27_SE *(q*omegaD-c1o1)-omegaD*feq27_SE *(q-c1o1))/(omegaD-c1o1)+f27_NW *q)/(q+c1o1);
+      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0])[kse ]=(c2o1*feqW27_SE -(f27_NW *(q*omegaD-c1o1)-omegaD*feq27_NW *(q-c1o1))/(omegaD-c1o1)+f27_SE *q)/(q+c1o1);
+      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M])[kbw ]=(c2o1*feqW27_BW -(f27_TE *(q*omegaD-c1o1)-omegaD*feq27_TE *(q-c1o1))/(omegaD-c1o1)+f27_BW *q)/(q+c1o1);
+      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P])[kte ]=(c2o1*feqW27_TE -(f27_BW *(q*omegaD-c1o1)-omegaD*feq27_BW *(q-c1o1))/(omegaD-c1o1)+f27_TE *q)/(q+c1o1);
+      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P])[ktw ]=(c2o1*feqW27_TW -(f27_BE *(q*omegaD-c1o1)-omegaD*feq27_BE *(q-c1o1))/(omegaD-c1o1)+f27_TW *q)/(q+c1o1);
+      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M])[kbe ]=(c2o1*feqW27_BE -(f27_TW *(q*omegaD-c1o1)-omegaD*feq27_TW *(q-c1o1))/(omegaD-c1o1)+f27_BE *q)/(q+c1o1);
+      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM])[kbs ]=(c2o1*feqW27_BS -(f27_TN *(q*omegaD-c1o1)-omegaD*feq27_TN *(q-c1o1))/(omegaD-c1o1)+f27_BS *q)/(q+c1o1);
+      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP])[ktn ]=(c2o1*feqW27_TN -(f27_BS *(q*omegaD-c1o1)-omegaD*feq27_BS *(q-c1o1))/(omegaD-c1o1)+f27_TN *q)/(q+c1o1);
+      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP])[kts ]=(c2o1*feqW27_TS -(f27_BN *(q*omegaD-c1o1)-omegaD*feq27_BN *(q-c1o1))/(omegaD-c1o1)+f27_TS *q)/(q+c1o1);
+      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM])[kbn ]=(c2o1*feqW27_BN -(f27_TS *(q*omegaD-c1o1)-omegaD*feq27_TS *(q-c1o1))/(omegaD-c1o1)+f27_BN *q)/(q+c1o1);
       q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]=(c2o1*feqW27_BSW-(f27_TNE*(q*omegaD-c1o1)-omegaD*feq27_TNE*(q-c1o1))/(omegaD-c1o1)+f27_BSW*q)/(q+c1o1);
       q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]=(c2o1*feqW27_TNE-(f27_BSW*(q*omegaD-c1o1)-omegaD*feq27_BSW*(q-c1o1))/(omegaD-c1o1)+f27_TNE*q)/(q+c1o1);
       q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]=(c2o1*feqW27_TSW-(f27_BNE*(q*omegaD-c1o1)-omegaD*feq27_BNE*(q-c1o1))/(omegaD-c1o1)+f27_TSW*q)/(q+c1o1);
@@ -4811,91 +4811,91 @@ __global__ void QADVeloIncomp7(
 											unsigned int* neighborX,
 											unsigned int* neighborY,
 											unsigned int* neighborZ,
-											unsigned int size_Mat, 
+											unsigned long long numberOfLBnodes, 
 											bool isEvenTimestep)
 {
    //Distributions27 D;
    //if (isEvenTimestep==true)
    //{
-   //   D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-   //   D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-   //   D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-   //   D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-   //   D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-   //   D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-   //   D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-   //   D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-   //   D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-   //   D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-   //   D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-   //   D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-   //   D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-   //   D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-   //   D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-   //   D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-   //   D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-   //   D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-   //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   //   D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-   //   D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-   //   D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-   //   D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-   //   D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-   //   D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-   //   D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-   //   D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+   //   D.f[DIR_P00] = &DD[DIR_P00 * size_Mat];
+   //   D.f[DIR_M00] = &DD[DIR_M00 * size_Mat];
+   //   D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat];
+   //   D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat];
+   //   D.f[DIR_00P] = &DD[DIR_00P * size_Mat];
+   //   D.f[DIR_00M] = &DD[DIR_00M * size_Mat];
+   //   D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat];
+   //   D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat];
+   //   D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat];
+   //   D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat];
+   //   D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat];
+   //   D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat];
+   //   D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat];
+   //   D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat];
+   //   D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat];
+   //   D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat];
+   //   D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat];
+   //   D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat];
+   //   D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+   //   D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat];
+   //   D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat];
+   //   D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat];
+   //   D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat];
+   //   D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat];
+   //   D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat];
+   //   D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat];
+   //   D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat];
    //} 
    //else
    //{
-   //   D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-   //   D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-   //   D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-   //   D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-   //   D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-   //   D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-   //   D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-   //   D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-   //   D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-   //   D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-   //   D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-   //   D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-   //   D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-   //   D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-   //   D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-   //   D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-   //   D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-   //   D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-   //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   //   D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-   //   D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-   //   D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-   //   D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-   //   D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-   //   D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-   //   D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-   //   D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+   //   D.f[DIR_M00] = &DD[DIR_P00 * size_Mat];
+   //   D.f[DIR_P00] = &DD[DIR_M00 * size_Mat];
+   //   D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat];
+   //   D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat];
+   //   D.f[DIR_00M] = &DD[DIR_00P * size_Mat];
+   //   D.f[DIR_00P] = &DD[DIR_00M * size_Mat];
+   //   D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat];
+   //   D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat];
+   //   D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat];
+   //   D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat];
+   //   D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat];
+   //   D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat];
+   //   D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat];
+   //   D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat];
+   //   D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat];
+   //   D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat];
+   //   D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat];
+   //   D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat];
+   //   D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+   //   D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat];
+   //   D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat];
+   //   D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat];
+   //   D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat];
+   //   D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat];
+   //   D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat];
+   //   D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat];
+   //   D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat];
    //}
 
    Distributions7 D7;
    if (isEvenTimestep==true)
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[1] = &DD7[1*size_Mat];
-      D7.f[2] = &DD7[2*size_Mat];
-      D7.f[3] = &DD7[3*size_Mat];
-      D7.f[4] = &DD7[4*size_Mat];
-      D7.f[5] = &DD7[5*size_Mat];
-      D7.f[6] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[1] = &DD7[1*numberOfLBnodes];
+      D7.f[2] = &DD7[2*numberOfLBnodes];
+      D7.f[3] = &DD7[3*numberOfLBnodes];
+      D7.f[4] = &DD7[4*numberOfLBnodes];
+      D7.f[5] = &DD7[5*numberOfLBnodes];
+      D7.f[6] = &DD7[6*numberOfLBnodes];
    }
    else
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[2] = &DD7[1*size_Mat];
-      D7.f[1] = &DD7[2*size_Mat];
-      D7.f[4] = &DD7[3*size_Mat];
-      D7.f[3] = &DD7[4*size_Mat];
-      D7.f[6] = &DD7[5*size_Mat];
-      D7.f[5] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[2] = &DD7[1*numberOfLBnodes];
+      D7.f[1] = &DD7[2*numberOfLBnodes];
+      D7.f[4] = &DD7[3*numberOfLBnodes];
+      D7.f[3] = &DD7[4*numberOfLBnodes];
+      D7.f[6] = &DD7[5*numberOfLBnodes];
+      D7.f[5] = &DD7[6*numberOfLBnodes];
    }
 
 
@@ -4915,12 +4915,12 @@ __global__ void QADVeloIncomp7(
       //////////////////////////////////////////////////////////////////////////////////
       real  *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB; 
 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
       //////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -4952,32 +4952,32 @@ __global__ void QADVeloIncomp7(
       //unsigned int ktne = KQK;
       //unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      //real f_W    = (D.f[DIR_P00   ])[ke   ];
-      //real f_E    = (D.f[DIR_M00   ])[kw   ];
-      //real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      //real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      //real f_B    = (D.f[DIR_00P   ])[kt   ];
-      //real f_T    = (D.f[DIR_00M   ])[kb   ];
-      //real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      //real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      //real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      //real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      //real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      //real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      //real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      //real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      //real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      //real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      //real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      //real f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      //real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      //real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      //real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      //real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      //real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      //real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      //real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      //real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      //real f_W    = (D.f[DIR_P00])[ke   ];
+      //real f_E    = (D.f[DIR_M00])[kw   ];
+      //real f_S    = (D.f[DIR_0P0])[kn   ];
+      //real f_N    = (D.f[DIR_0M0])[ks   ];
+      //real f_B    = (D.f[DIR_00P])[kt   ];
+      //real f_T    = (D.f[DIR_00M])[kb   ];
+      //real f_SW   = (D.f[DIR_PP0])[kne  ];
+      //real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      //real f_NW   = (D.f[DIR_PM0])[kse  ];
+      //real f_SE   = (D.f[DIR_MP0])[knw  ];
+      //real f_BW   = (D.f[DIR_P0P])[kte  ];
+      //real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      //real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      //real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      //real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      //real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      //real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      //real f_BN   = (D.f[DIR_0MP])[kts  ];
+      //real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      //real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      //real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      //real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      //real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      //real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      //real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      //real f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       //real vx1_Inflow   = c0o1;
       //real vx2_Inflow   = velo[k];
@@ -5091,23 +5091,23 @@ __global__ void QADVeloIncomp7(
       //pointertausch
       if (isEvenTimestep==false)
       {
-         D7.f[0] = &DD7[0*size_Mat];
-         D7.f[1] = &DD7[1*size_Mat];
-         D7.f[2] = &DD7[2*size_Mat];
-         D7.f[3] = &DD7[3*size_Mat];
-         D7.f[4] = &DD7[4*size_Mat];
-         D7.f[5] = &DD7[5*size_Mat];
-         D7.f[6] = &DD7[6*size_Mat];
+         D7.f[0] = &DD7[0*numberOfLBnodes];
+         D7.f[1] = &DD7[1*numberOfLBnodes];
+         D7.f[2] = &DD7[2*numberOfLBnodes];
+         D7.f[3] = &DD7[3*numberOfLBnodes];
+         D7.f[4] = &DD7[4*numberOfLBnodes];
+         D7.f[5] = &DD7[5*numberOfLBnodes];
+         D7.f[6] = &DD7[6*numberOfLBnodes];
       }
       else
       {
-         D7.f[0] = &DD7[0*size_Mat];
-         D7.f[2] = &DD7[1*size_Mat];
-         D7.f[1] = &DD7[2*size_Mat];
-         D7.f[4] = &DD7[3*size_Mat];
-         D7.f[3] = &DD7[4*size_Mat];
-         D7.f[6] = &DD7[5*size_Mat];
-         D7.f[5] = &DD7[6*size_Mat];
+         D7.f[0] = &DD7[0*numberOfLBnodes];
+         D7.f[2] = &DD7[1*numberOfLBnodes];
+         D7.f[1] = &DD7[2*numberOfLBnodes];
+         D7.f[4] = &DD7[3*numberOfLBnodes];
+         D7.f[3] = &DD7[4*numberOfLBnodes];
+         D7.f[6] = &DD7[5*numberOfLBnodes];
+         D7.f[5] = &DD7[6*numberOfLBnodes];
       }
 
       ////////////////////////////////////////////////////////////////////////////
@@ -5289,131 +5289,131 @@ __global__ void QADVeloIncomp27(
 											unsigned int* neighborX,
 											unsigned int* neighborY,
 											unsigned int* neighborZ,
-											unsigned int size_Mat, 
+											unsigned long long numberOfLBnodes, 
 											bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
 
    Distributions27 D27;
    if (isEvenTimestep==true)
    {
-      D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+      D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -5434,24 +5434,24 @@ __global__ void QADVeloIncomp27(
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -5491,65 +5491,65 @@ __global__ void QADVeloIncomp27(
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[DIR_P00   ])[ke   ];
-      real f_E    = (D.f[DIR_M00   ])[kw   ];
-      real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      real f_B    = (D.f[DIR_00P   ])[kt   ];
-      real f_T    = (D.f[DIR_00M   ])[kb   ];
-      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      real f_W    = (D.f[DIR_P00])[ke   ];
+      real f_E    = (D.f[DIR_M00])[kw   ];
+      real f_S    = (D.f[DIR_0P0])[kn   ];
+      real f_N    = (D.f[DIR_0M0])[ks   ];
+      real f_B    = (D.f[DIR_00P])[kt   ];
+      real f_T    = (D.f[DIR_00M])[kb   ];
+      real f_SW   = (D.f[DIR_PP0])[kne  ];
+      real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0])[kse  ];
+      real f_SE   = (D.f[DIR_MP0])[knw  ];
+      real f_BW   = (D.f[DIR_P0P])[kte  ];
+      real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP])[kts  ];
       //real f_ZERO = (D.f[DIR_000])[kzero];
-      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1 = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_TSE-f_BNW)+(f_BSE-f_TNW) +(f_NE-f_SW)+(f_SE-f_NW)+(f_TE-f_BW)+(f_BE-f_TW)+(f_E-f_W));
       real vx2 = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
       real vx3 = ((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B));
       ////////////////////////////////////////////////////////////////////////////////
-      //real f27_W    = (D27.f[DIR_P00   ])[ke   ];
-      //real f27_E    = (D27.f[DIR_M00   ])[kw   ];
-      //real f27_S    = (D27.f[DIR_0P0   ])[kn   ];
-      //real f27_N    = (D27.f[DIR_0M0   ])[ks   ];
-      //real f27_B    = (D27.f[DIR_00P   ])[kt   ];
-      //real f27_T    = (D27.f[DIR_00M   ])[kb   ];
-      //real f27_SW   = (D27.f[DIR_PP0  ])[kne  ];
-      //real f27_NE   = (D27.f[DIR_MM0  ])[ksw  ];
-      //real f27_NW   = (D27.f[DIR_PM0  ])[kse  ];
-      //real f27_SE   = (D27.f[DIR_MP0  ])[knw  ];
-      //real f27_BW   = (D27.f[DIR_P0P  ])[kte  ];
-      //real f27_TE   = (D27.f[DIR_M0M  ])[kbw  ];
-      //real f27_TW   = (D27.f[DIR_P0M  ])[kbe  ];
-      //real f27_BE   = (D27.f[DIR_M0P  ])[ktw  ];
-      //real f27_BS   = (D27.f[DIR_0PP  ])[ktn  ];
-      //real f27_TN   = (D27.f[DIR_0MM  ])[kbs  ];
-      //real f27_TS   = (D27.f[DIR_0PM  ])[kbn  ];
-      //real f27_BN   = (D27.f[DIR_0MP  ])[kts  ];
+      //real f27_W    = (D27.f[DIR_P00])[ke   ];
+      //real f27_E    = (D27.f[DIR_M00])[kw   ];
+      //real f27_S    = (D27.f[DIR_0P0])[kn   ];
+      //real f27_N    = (D27.f[DIR_0M0])[ks   ];
+      //real f27_B    = (D27.f[DIR_00P])[kt   ];
+      //real f27_T    = (D27.f[DIR_00M])[kb   ];
+      //real f27_SW   = (D27.f[DIR_PP0])[kne  ];
+      //real f27_NE   = (D27.f[DIR_MM0])[ksw  ];
+      //real f27_NW   = (D27.f[DIR_PM0])[kse  ];
+      //real f27_SE   = (D27.f[DIR_MP0])[knw  ];
+      //real f27_BW   = (D27.f[DIR_P0P])[kte  ];
+      //real f27_TE   = (D27.f[DIR_M0M])[kbw  ];
+      //real f27_TW   = (D27.f[DIR_P0M])[kbe  ];
+      //real f27_BE   = (D27.f[DIR_M0P])[ktw  ];
+      //real f27_BS   = (D27.f[DIR_0PP])[ktn  ];
+      //real f27_TN   = (D27.f[DIR_0MM])[kbs  ];
+      //real f27_TS   = (D27.f[DIR_0PM])[kbn  ];
+      //real f27_BN   = (D27.f[DIR_0MP])[kts  ];
       //real f27_ZERO = (D27.f[DIR_000])[kzero];
-      //real f27_BSW  = (D27.f[DIR_PPP ])[ktne ];
-      //real f27_BNE  = (D27.f[DIR_MMP ])[ktsw ];
-      //real f27_BNW  = (D27.f[DIR_PMP ])[ktse ];
-      //real f27_BSE  = (D27.f[DIR_MPP ])[ktnw ];
-      //real f27_TSW  = (D27.f[DIR_PPM ])[kbne ];
-      //real f27_TNE  = (D27.f[DIR_MMM ])[kbsw ];
-      //real f27_TNW  = (D27.f[DIR_PMM ])[kbse ];
-      //real f27_TSE  = (D27.f[DIR_MPM ])[kbnw ];
+      //real f27_BSW  = (D27.f[DIR_PPP])[ktne ];
+      //real f27_BNE  = (D27.f[DIR_MMP])[ktsw ];
+      //real f27_BNW  = (D27.f[DIR_PMP])[ktse ];
+      //real f27_BSE  = (D27.f[DIR_MPP])[ktnw ];
+      //real f27_TSW  = (D27.f[DIR_PPM])[kbne ];
+      //real f27_TNE  = (D27.f[DIR_MMM])[kbsw ];
+      //real f27_TNW  = (D27.f[DIR_PMM])[kbse ];
+      //real f27_TSE  = (D27.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////
@@ -5630,63 +5630,63 @@ __global__ void QADVeloIncomp27(
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+         D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+         D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
@@ -5694,24 +5694,24 @@ __global__ void QADVeloIncomp27(
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real q;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00  ])[kw  ]= -feqW27_W  + c2o1 * c2o27  * TempD;
-      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00  ])[ke  ]= -feqW27_E  + c2o1 * c2o27  * TempD;
-      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0  ])[ks  ]= -feqW27_S  + c2o1 * c2o27  * TempD;
-      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0  ])[kn  ]= -feqW27_N  + c2o1 * c2o27  * TempD;
-      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M  ])[kb  ]= -feqW27_B  + c2o1 * c2o27  * TempD;
-      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P  ])[kt  ]= -feqW27_T  + c2o1 * c2o27  * TempD;
-      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]= -feqW27_SW + c2o1 * c1o54  * TempD;
-      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]= -feqW27_NE + c2o1 * c1o54  * TempD;
-      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]= -feqW27_NW + c2o1 * c1o54  * TempD;
-      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]= -feqW27_SE + c2o1 * c1o54  * TempD;
-      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]= -feqW27_BW + c2o1 * c1o54  * TempD;
-      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]= -feqW27_TE + c2o1 * c1o54  * TempD;
-      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]= -feqW27_TW + c2o1 * c1o54  * TempD;
-      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]= -feqW27_BE + c2o1 * c1o54  * TempD;
-      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]= -feqW27_BS + c2o1 * c1o54  * TempD;
-      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]= -feqW27_TN + c2o1 * c1o54  * TempD;
-      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]= -feqW27_TS + c2o1 * c1o54  * TempD;
-      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]= -feqW27_BN + c2o1 * c1o54  * TempD;
+      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00])[kw  ]= -feqW27_W  + c2o1 * c2o27  * TempD;
+      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00])[ke  ]= -feqW27_E  + c2o1 * c2o27  * TempD;
+      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0])[ks  ]= -feqW27_S  + c2o1 * c2o27  * TempD;
+      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0])[kn  ]= -feqW27_N  + c2o1 * c2o27  * TempD;
+      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M])[kb  ]= -feqW27_B  + c2o1 * c2o27  * TempD;
+      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P])[kt  ]= -feqW27_T  + c2o1 * c2o27  * TempD;
+      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0])[ksw ]= -feqW27_SW + c2o1 * c1o54  * TempD;
+      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0])[kne ]= -feqW27_NE + c2o1 * c1o54  * TempD;
+      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0])[knw ]= -feqW27_NW + c2o1 * c1o54  * TempD;
+      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0])[kse ]= -feqW27_SE + c2o1 * c1o54  * TempD;
+      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M])[kbw ]= -feqW27_BW + c2o1 * c1o54  * TempD;
+      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P])[kte ]= -feqW27_TE + c2o1 * c1o54  * TempD;
+      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P])[ktw ]= -feqW27_TW + c2o1 * c1o54  * TempD;
+      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M])[kbe ]= -feqW27_BE + c2o1 * c1o54  * TempD;
+      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM])[kbs ]= -feqW27_BS + c2o1 * c1o54  * TempD;
+      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP])[ktn ]= -feqW27_TN + c2o1 * c1o54  * TempD;
+      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP])[kts ]= -feqW27_TS + c2o1 * c1o54  * TempD;
+      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM])[kbn ]= -feqW27_BN + c2o1 * c1o54  * TempD;
       q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]= -feqW27_BSW+ c2o1 * c1o216 * TempD;
       q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]= -feqW27_TNE+ c2o1 * c1o216 * TempD;
       q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]= -feqW27_TSW+ c2o1 * c1o216 * TempD;
@@ -5720,24 +5720,24 @@ __global__ void QADVeloIncomp27(
       q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMP])[ktse]= -feqW27_TSE+ c2o1 * c1o216 * TempD;
       q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPP])[ktnw]= -feqW27_TNW+ c2o1 * c1o216 * TempD;
       q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMM])[kbse]= -feqW27_BSE+ c2o1 * c1o216 * TempD;
-      //q = q_dirE[k];   if (q>=zero && q<=one) (D27.f[DIR_M00  ])[kw  ]=(two*feqW27_W  -(f27_E  *(q*omegaD-one)-omegaD*feq27_E  *(q-one))/(omegaD-one)+f27_W  *q)/(q+one);
-      //q = q_dirW[k];   if (q>=zero && q<=one) (D27.f[DIR_P00  ])[ke  ]=(two*feqW27_E  -(f27_W  *(q*omegaD-one)-omegaD*feq27_W  *(q-one))/(omegaD-one)+f27_E  *q)/(q+one);
-      //q = q_dirN[k];   if (q>=zero && q<=one) (D27.f[DIR_0M0  ])[ks  ]=(two*feqW27_S  -(f27_N  *(q*omegaD-one)-omegaD*feq27_N  *(q-one))/(omegaD-one)+f27_S  *q)/(q+one);
-      //q = q_dirS[k];   if (q>=zero && q<=one) (D27.f[DIR_0P0  ])[kn  ]=(two*feqW27_N  -(f27_S  *(q*omegaD-one)-omegaD*feq27_S  *(q-one))/(omegaD-one)+f27_N  *q)/(q+one);
-      //q = q_dirT[k];   if (q>=zero && q<=one) (D27.f[DIR_00M  ])[kb  ]=(two*feqW27_B  -(f27_T  *(q*omegaD-one)-omegaD*feq27_T  *(q-one))/(omegaD-one)+f27_B  *q)/(q+one);
-      //q = q_dirB[k];   if (q>=zero && q<=one) (D27.f[DIR_00P  ])[kt  ]=(two*feqW27_T  -(f27_B  *(q*omegaD-one)-omegaD*feq27_B  *(q-one))/(omegaD-one)+f27_T  *q)/(q+one);
-      //q = q_dirNE[k];  if (q>=zero && q<=one) (D27.f[DIR_MM0 ])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one);
-      //q = q_dirSW[k];  if (q>=zero && q<=one) (D27.f[DIR_PP0 ])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one);
-      //q = q_dirSE[k];  if (q>=zero && q<=one) (D27.f[DIR_MP0 ])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one);
-      //q = q_dirNW[k];  if (q>=zero && q<=one) (D27.f[DIR_PM0 ])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one);
-      //q = q_dirTE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0M ])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one);
-      //q = q_dirBW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0P ])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one);
-      //q = q_dirBE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0P ])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one);
-      //q = q_dirTW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0M ])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one);
-      //q = q_dirTN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MM ])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one);
-      //q = q_dirBS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PP ])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one);
-      //q = q_dirBN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MP ])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one);
-      //q = q_dirTS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PM ])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one);
+      //q = q_dirE[k];   if (q>=zero && q<=one) (D27.f[DIR_M00])[kw  ]=(two*feqW27_W  -(f27_E  *(q*omegaD-one)-omegaD*feq27_E  *(q-one))/(omegaD-one)+f27_W  *q)/(q+one);
+      //q = q_dirW[k];   if (q>=zero && q<=one) (D27.f[DIR_P00])[ke  ]=(two*feqW27_E  -(f27_W  *(q*omegaD-one)-omegaD*feq27_W  *(q-one))/(omegaD-one)+f27_E  *q)/(q+one);
+      //q = q_dirN[k];   if (q>=zero && q<=one) (D27.f[DIR_0M0])[ks  ]=(two*feqW27_S  -(f27_N  *(q*omegaD-one)-omegaD*feq27_N  *(q-one))/(omegaD-one)+f27_S  *q)/(q+one);
+      //q = q_dirS[k];   if (q>=zero && q<=one) (D27.f[DIR_0P0])[kn  ]=(two*feqW27_N  -(f27_S  *(q*omegaD-one)-omegaD*feq27_S  *(q-one))/(omegaD-one)+f27_N  *q)/(q+one);
+      //q = q_dirT[k];   if (q>=zero && q<=one) (D27.f[DIR_00M])[kb  ]=(two*feqW27_B  -(f27_T  *(q*omegaD-one)-omegaD*feq27_T  *(q-one))/(omegaD-one)+f27_B  *q)/(q+one);
+      //q = q_dirB[k];   if (q>=zero && q<=one) (D27.f[DIR_00P])[kt  ]=(two*feqW27_T  -(f27_B  *(q*omegaD-one)-omegaD*feq27_B  *(q-one))/(omegaD-one)+f27_T  *q)/(q+one);
+      //q = q_dirNE[k];  if (q>=zero && q<=one) (D27.f[DIR_MM0])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one);
+      //q = q_dirSW[k];  if (q>=zero && q<=one) (D27.f[DIR_PP0])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one);
+      //q = q_dirSE[k];  if (q>=zero && q<=one) (D27.f[DIR_MP0])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one);
+      //q = q_dirNW[k];  if (q>=zero && q<=one) (D27.f[DIR_PM0])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one);
+      //q = q_dirTE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0M])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one);
+      //q = q_dirBW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0P])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one);
+      //q = q_dirBE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0P])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one);
+      //q = q_dirTW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0M])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one);
+      //q = q_dirTN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MM])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one);
+      //q = q_dirBS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PP])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one);
+      //q = q_dirBN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MP])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one);
+      //q = q_dirTS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PM])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one);
       //q = q_dirTNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMM])[kbsw]=(two*feqW27_BSW-(f27_TNE*(q*omegaD-one)-omegaD*feq27_TNE*(q-one))/(omegaD-one)+f27_BSW*q)/(q+one);
       //q = q_dirBSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PPP])[ktne]=(two*feqW27_TNE-(f27_BSW*(q*omegaD-one)-omegaD*feq27_BSW*(q-one))/(omegaD-one)+f27_TNE*q)/(q+one);
       //q = q_dirBNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMP])[ktsw]=(two*feqW27_TSW-(f27_BNE*(q*omegaD-one)-omegaD*feq27_BNE*(q-one))/(omegaD-one)+f27_TSW*q)/(q+one);
@@ -5801,91 +5801,91 @@ __global__ void QADPressIncomp7( real* DD,
 										   unsigned int* neighborX,
 										   unsigned int* neighborY,
 										   unsigned int* neighborZ,
-										   unsigned int size_Mat, 
+										   unsigned long long numberOfLBnodes, 
 										   bool isEvenTimestep)
 {
   /* Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * size_Mat];
+      D.f[DIR_M00] = &DD[DIR_M00 * size_Mat];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat];
+      D.f[DIR_00P] = &DD[DIR_00P * size_Mat];
+      D.f[DIR_00M] = &DD[DIR_00M * size_Mat];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat];
+      D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat];
+      D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat];
+      D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat];
+      D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat];
+      D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat];
+      D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat];
+      D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat];
+      D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat];
+      D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+      D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat];
+      D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat];
+      D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat];
+      D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat];
+      D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat];
+      D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat];
+      D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat];
+      D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * size_Mat];
+      D.f[DIR_P00] = &DD[DIR_M00 * size_Mat];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat];
+      D.f[DIR_00M] = &DD[DIR_00P * size_Mat];
+      D.f[DIR_00P] = &DD[DIR_00M * size_Mat];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat];
+      D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat];
+      D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat];
+      D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat];
+      D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat];
+      D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat];
+      D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat];
+      D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat];
+      D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat];
+      D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+      D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat];
+      D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat];
+      D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat];
+      D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat];
+      D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat];
+      D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat];
+      D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat];
+      D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat];
    }*/
 
    Distributions7 D7;
    if (isEvenTimestep==true)
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[1] = &DD7[1*size_Mat];
-      D7.f[2] = &DD7[2*size_Mat];
-      D7.f[3] = &DD7[3*size_Mat];
-      D7.f[4] = &DD7[4*size_Mat];
-      D7.f[5] = &DD7[5*size_Mat];
-      D7.f[6] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[1] = &DD7[1*numberOfLBnodes];
+      D7.f[2] = &DD7[2*numberOfLBnodes];
+      D7.f[3] = &DD7[3*numberOfLBnodes];
+      D7.f[4] = &DD7[4*numberOfLBnodes];
+      D7.f[5] = &DD7[5*numberOfLBnodes];
+      D7.f[6] = &DD7[6*numberOfLBnodes];
    }
    else
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[2] = &DD7[1*size_Mat];
-      D7.f[1] = &DD7[2*size_Mat];
-      D7.f[4] = &DD7[3*size_Mat];
-      D7.f[3] = &DD7[4*size_Mat];
-      D7.f[6] = &DD7[5*size_Mat];
-      D7.f[5] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[2] = &DD7[1*numberOfLBnodes];
+      D7.f[1] = &DD7[2*numberOfLBnodes];
+      D7.f[4] = &DD7[3*numberOfLBnodes];
+      D7.f[3] = &DD7[4*numberOfLBnodes];
+      D7.f[6] = &DD7[5*numberOfLBnodes];
+      D7.f[5] = &DD7[6*numberOfLBnodes];
    }
 
 
@@ -5905,12 +5905,12 @@ __global__ void QADPressIncomp7( real* DD,
       //////////////////////////////////////////////////////////////////////////////////
       real  *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB; 
 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
       //////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -5945,32 +5945,32 @@ __global__ void QADPressIncomp7( real* DD,
     /*  real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];*/
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];*/
       ////////////////////////////////////////////////////////////////////////////////
       //real vx1 = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_TSE-f_BNW)+(f_BSE-f_TNW) +(f_NE-f_SW)+(f_SE-f_NW)+(f_TE-f_BW)+(f_BE-f_TW)+(f_E-f_W));
       //real vx2 = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
@@ -6035,23 +6035,23 @@ __global__ void QADPressIncomp7( real* DD,
       //pointertausch
       if (isEvenTimestep==false)
       {
-         D7.f[0] = &DD7[0*size_Mat];
-         D7.f[1] = &DD7[1*size_Mat];
-         D7.f[2] = &DD7[2*size_Mat];
-         D7.f[3] = &DD7[3*size_Mat];
-         D7.f[4] = &DD7[4*size_Mat];
-         D7.f[5] = &DD7[5*size_Mat];
-         D7.f[6] = &DD7[6*size_Mat];
+         D7.f[0] = &DD7[0*numberOfLBnodes];
+         D7.f[1] = &DD7[1*numberOfLBnodes];
+         D7.f[2] = &DD7[2*numberOfLBnodes];
+         D7.f[3] = &DD7[3*numberOfLBnodes];
+         D7.f[4] = &DD7[4*numberOfLBnodes];
+         D7.f[5] = &DD7[5*numberOfLBnodes];
+         D7.f[6] = &DD7[6*numberOfLBnodes];
       }
       else
       {
-         D7.f[0] = &DD7[0*size_Mat];
-         D7.f[2] = &DD7[1*size_Mat];
-         D7.f[1] = &DD7[2*size_Mat];
-         D7.f[4] = &DD7[3*size_Mat];
-         D7.f[3] = &DD7[4*size_Mat];
-         D7.f[6] = &DD7[5*size_Mat];
-         D7.f[5] = &DD7[6*size_Mat];
+         D7.f[0] = &DD7[0*numberOfLBnodes];
+         D7.f[2] = &DD7[1*numberOfLBnodes];
+         D7.f[1] = &DD7[2*numberOfLBnodes];
+         D7.f[4] = &DD7[3*numberOfLBnodes];
+         D7.f[3] = &DD7[4*numberOfLBnodes];
+         D7.f[6] = &DD7[5*numberOfLBnodes];
+         D7.f[5] = &DD7[6*numberOfLBnodes];
       }
 
       ////////////////////////////////////////////////////////////////////////////
@@ -6240,131 +6240,131 @@ __global__ void QADPressIncomp27(
 											   unsigned int* neighborX,
 											   unsigned int* neighborY,
 											   unsigned int* neighborZ,
-											   unsigned int size_Mat, 
+											   unsigned long long numberOfLBnodes, 
 											   bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
 
    Distributions27 D27;
    if (isEvenTimestep==true)
    {
-      D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+      D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -6385,24 +6385,24 @@ __global__ void QADPressIncomp27(
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -6442,65 +6442,65 @@ __global__ void QADPressIncomp27(
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[DIR_P00   ])[ke   ];
-      real f_E    = (D.f[DIR_M00   ])[kw   ];
-      real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      real f_B    = (D.f[DIR_00P   ])[kt   ];
-      real f_T    = (D.f[DIR_00M   ])[kb   ];
-      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      real f_W    = (D.f[DIR_P00])[ke   ];
+      real f_E    = (D.f[DIR_M00])[kw   ];
+      real f_S    = (D.f[DIR_0P0])[kn   ];
+      real f_N    = (D.f[DIR_0M0])[ks   ];
+      real f_B    = (D.f[DIR_00P])[kt   ];
+      real f_T    = (D.f[DIR_00M])[kb   ];
+      real f_SW   = (D.f[DIR_PP0])[kne  ];
+      real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0])[kse  ];
+      real f_SE   = (D.f[DIR_MP0])[knw  ];
+      real f_BW   = (D.f[DIR_P0P])[kte  ];
+      real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP])[kts  ];
       //real f_ZERO = (D.f[DIR_000])[kzero];
-      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1      = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_TSE-f_BNW)+(f_BSE-f_TNW) +(f_NE-f_SW)+(f_SE-f_NW)+(f_TE-f_BW)+(f_BE-f_TW)+(f_E-f_W));
       real vx2      = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
       real vx3      = ((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B));
       ////////////////////////////////////////////////////////////////////////////////
-      //real f27_W    = (D27.f[DIR_P00   ])[ke   ];
-      //real f27_E    = (D27.f[DIR_M00   ])[kw   ];
-      //real f27_S    = (D27.f[DIR_0P0   ])[kn   ];
-      //real f27_N    = (D27.f[DIR_0M0   ])[ks   ];
-      //real f27_B    = (D27.f[DIR_00P   ])[kt   ];
-      //real f27_T    = (D27.f[DIR_00M   ])[kb   ];
-      //real f27_SW   = (D27.f[DIR_PP0  ])[kne  ];
-      //real f27_NE   = (D27.f[DIR_MM0  ])[ksw  ];
-      //real f27_NW   = (D27.f[DIR_PM0  ])[kse  ];
-      //real f27_SE   = (D27.f[DIR_MP0  ])[knw  ];
-      //real f27_BW   = (D27.f[DIR_P0P  ])[kte  ];
-      //real f27_TE   = (D27.f[DIR_M0M  ])[kbw  ];
-      //real f27_TW   = (D27.f[DIR_P0M  ])[kbe  ];
-      //real f27_BE   = (D27.f[DIR_M0P  ])[ktw  ];
-      //real f27_BS   = (D27.f[DIR_0PP  ])[ktn  ];
-      //real f27_TN   = (D27.f[DIR_0MM  ])[kbs  ];
-      //real f27_TS   = (D27.f[DIR_0PM  ])[kbn  ];
-      //real f27_BN   = (D27.f[DIR_0MP  ])[kts  ];
+      //real f27_W    = (D27.f[DIR_P00])[ke   ];
+      //real f27_E    = (D27.f[DIR_M00])[kw   ];
+      //real f27_S    = (D27.f[DIR_0P0])[kn   ];
+      //real f27_N    = (D27.f[DIR_0M0])[ks   ];
+      //real f27_B    = (D27.f[DIR_00P])[kt   ];
+      //real f27_T    = (D27.f[DIR_00M])[kb   ];
+      //real f27_SW   = (D27.f[DIR_PP0])[kne  ];
+      //real f27_NE   = (D27.f[DIR_MM0])[ksw  ];
+      //real f27_NW   = (D27.f[DIR_PM0])[kse  ];
+      //real f27_SE   = (D27.f[DIR_MP0])[knw  ];
+      //real f27_BW   = (D27.f[DIR_P0P])[kte  ];
+      //real f27_TE   = (D27.f[DIR_M0M])[kbw  ];
+      //real f27_TW   = (D27.f[DIR_P0M])[kbe  ];
+      //real f27_BE   = (D27.f[DIR_M0P])[ktw  ];
+      //real f27_BS   = (D27.f[DIR_0PP])[ktn  ];
+      //real f27_TN   = (D27.f[DIR_0MM])[kbs  ];
+      //real f27_TS   = (D27.f[DIR_0PM])[kbn  ];
+      //real f27_BN   = (D27.f[DIR_0MP])[kts  ];
       //real f27_ZERO = (D27.f[DIR_000])[kzero];
-      //real f27_BSW  = (D27.f[DIR_PPP ])[ktne ];
-      //real f27_BNE  = (D27.f[DIR_MMP ])[ktsw ];
-      //real f27_BNW  = (D27.f[DIR_PMP ])[ktse ];
-      //real f27_BSE  = (D27.f[DIR_MPP ])[ktnw ];
-      //real f27_TSW  = (D27.f[DIR_PPM ])[kbne ];
-      //real f27_TNE  = (D27.f[DIR_MMM ])[kbsw ];
-      //real f27_TNW  = (D27.f[DIR_PMM ])[kbse ];
-      //real f27_TSE  = (D27.f[DIR_MPM ])[kbnw ];
+      //real f27_BSW  = (D27.f[DIR_PPP])[ktne ];
+      //real f27_BNE  = (D27.f[DIR_MMP])[ktsw ];
+      //real f27_BNW  = (D27.f[DIR_PMP])[ktse ];
+      //real f27_BSE  = (D27.f[DIR_MPP])[ktnw ];
+      //real f27_TSW  = (D27.f[DIR_PPM])[kbne ];
+      //real f27_TNE  = (D27.f[DIR_MMM])[kbsw ];
+      //real f27_TNW  = (D27.f[DIR_PMM])[kbse ];
+      //real f27_TSE  = (D27.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////
@@ -6581,63 +6581,63 @@ __global__ void QADPressIncomp27(
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+         D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-         D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-         D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-         D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-         D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-         D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-         D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-         D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-         D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-         D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-         D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-         D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-         D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-         D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-         D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-         D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-         D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-         D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-         D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-         D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-         D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
-         D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-         D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-         D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-         D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-         D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+         D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+         D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+         D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+         D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+         D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+         D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+         D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+         D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+         D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+         D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+         D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+         D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+         D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+         D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+         D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+         D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+         D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+         D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+         D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+         D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+         D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+         D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
+         D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+         D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+         D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+         D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+         D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
@@ -6645,24 +6645,24 @@ __global__ void QADPressIncomp27(
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real q;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00  ])[kw  ]= -feqW27_W  + c2o1 * c2o27  * TempD;
-      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00  ])[ke  ]= -feqW27_E  + c2o1 * c2o27  * TempD;
-      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0  ])[ks  ]= -feqW27_S  + c2o1 * c2o27  * TempD;
-      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0  ])[kn  ]= -feqW27_N  + c2o1 * c2o27  * TempD;
-      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M  ])[kb  ]= -feqW27_B  + c2o1 * c2o27  * TempD;
-      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P  ])[kt  ]= -feqW27_T  + c2o1 * c2o27  * TempD;
-      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]= -feqW27_SW + c2o1 * c1o54  * TempD;
-      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]= -feqW27_NE + c2o1 * c1o54  * TempD;
-      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]= -feqW27_NW + c2o1 * c1o54  * TempD;
-      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]= -feqW27_SE + c2o1 * c1o54  * TempD;
-      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]= -feqW27_BW + c2o1 * c1o54  * TempD;
-      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]= -feqW27_TE + c2o1 * c1o54  * TempD;
-      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]= -feqW27_TW + c2o1 * c1o54  * TempD;
-      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]= -feqW27_BE + c2o1 * c1o54  * TempD;
-      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]= -feqW27_BS + c2o1 * c1o54  * TempD;
-      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]= -feqW27_TN + c2o1 * c1o54  * TempD;
-      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]= -feqW27_TS + c2o1 * c1o54  * TempD;
-      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]= -feqW27_BN + c2o1 * c1o54  * TempD;
+      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00])[kw  ]= -feqW27_W  + c2o1 * c2o27  * TempD;
+      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00])[ke  ]= -feqW27_E  + c2o1 * c2o27  * TempD;
+      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0])[ks  ]= -feqW27_S  + c2o1 * c2o27  * TempD;
+      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0])[kn  ]= -feqW27_N  + c2o1 * c2o27  * TempD;
+      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M])[kb  ]= -feqW27_B  + c2o1 * c2o27  * TempD;
+      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P])[kt  ]= -feqW27_T  + c2o1 * c2o27  * TempD;
+      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0])[ksw ]= -feqW27_SW + c2o1 * c1o54  * TempD;
+      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0])[kne ]= -feqW27_NE + c2o1 * c1o54  * TempD;
+      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0])[knw ]= -feqW27_NW + c2o1 * c1o54  * TempD;
+      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0])[kse ]= -feqW27_SE + c2o1 * c1o54  * TempD;
+      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M])[kbw ]= -feqW27_BW + c2o1 * c1o54  * TempD;
+      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P])[kte ]= -feqW27_TE + c2o1 * c1o54  * TempD;
+      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P])[ktw ]= -feqW27_TW + c2o1 * c1o54  * TempD;
+      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M])[kbe ]= -feqW27_BE + c2o1 * c1o54  * TempD;
+      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM])[kbs ]= -feqW27_BS + c2o1 * c1o54  * TempD;
+      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP])[ktn ]= -feqW27_TN + c2o1 * c1o54  * TempD;
+      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP])[kts ]= -feqW27_TS + c2o1 * c1o54  * TempD;
+      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM])[kbn ]= -feqW27_BN + c2o1 * c1o54  * TempD;
       q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]= -feqW27_BSW+ c2o1 * c1o216 * TempD;
       q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]= -feqW27_TNE+ c2o1 * c1o216 * TempD;
       q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]= -feqW27_TSW+ c2o1 * c1o216 * TempD;
@@ -6671,24 +6671,24 @@ __global__ void QADPressIncomp27(
       q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMP])[ktse]= -feqW27_TSE+ c2o1 * c1o216 * TempD;
       q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPP])[ktnw]= -feqW27_TNW+ c2o1 * c1o216 * TempD;
       q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMM])[kbse]= -feqW27_BSE+ c2o1 * c1o216 * TempD;
-      //q = q_dirE[k];   if (q>=zero && q<=one) (D27.f[DIR_M00  ])[kw  ]=(two*feqW27_W  -(f27_E  *(q*omegaD-one)-omegaD*feq27_E  *(q-one))/(omegaD-one)+f27_W  *q)/(q+one);
-      //q = q_dirW[k];   if (q>=zero && q<=one) (D27.f[DIR_P00  ])[ke  ]=(two*feqW27_E  -(f27_W  *(q*omegaD-one)-omegaD*feq27_W  *(q-one))/(omegaD-one)+f27_E  *q)/(q+one);
-      //q = q_dirN[k];   if (q>=zero && q<=one) (D27.f[DIR_0M0  ])[ks  ]=(two*feqW27_S  -(f27_N  *(q*omegaD-one)-omegaD*feq27_N  *(q-one))/(omegaD-one)+f27_S  *q)/(q+one);
-      //q = q_dirS[k];   if (q>=zero && q<=one) (D27.f[DIR_0P0  ])[kn  ]=(two*feqW27_N  -(f27_S  *(q*omegaD-one)-omegaD*feq27_S  *(q-one))/(omegaD-one)+f27_N  *q)/(q+one);
-      //q = q_dirT[k];   if (q>=zero && q<=one) (D27.f[DIR_00M  ])[kb  ]=(two*feqW27_B  -(f27_T  *(q*omegaD-one)-omegaD*feq27_T  *(q-one))/(omegaD-one)+f27_B  *q)/(q+one);
-      //q = q_dirB[k];   if (q>=zero && q<=one) (D27.f[DIR_00P  ])[kt  ]=(two*feqW27_T  -(f27_B  *(q*omegaD-one)-omegaD*feq27_B  *(q-one))/(omegaD-one)+f27_T  *q)/(q+one);
-      //q = q_dirNE[k];  if (q>=zero && q<=one) (D27.f[DIR_MM0 ])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one);
-      //q = q_dirSW[k];  if (q>=zero && q<=one) (D27.f[DIR_PP0 ])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one);
-      //q = q_dirSE[k];  if (q>=zero && q<=one) (D27.f[DIR_MP0 ])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one);
-      //q = q_dirNW[k];  if (q>=zero && q<=one) (D27.f[DIR_PM0 ])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one);
-      //q = q_dirTE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0M ])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one);
-      //q = q_dirBW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0P ])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one);
-      //q = q_dirBE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0P ])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one);
-      //q = q_dirTW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0M ])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one);
-      //q = q_dirTN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MM ])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one);
-      //q = q_dirBS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PP ])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one);
-      //q = q_dirBN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MP ])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one);
-      //q = q_dirTS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PM ])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one);
+      //q = q_dirE[k];   if (q>=zero && q<=one) (D27.f[DIR_M00])[kw  ]=(two*feqW27_W  -(f27_E  *(q*omegaD-one)-omegaD*feq27_E  *(q-one))/(omegaD-one)+f27_W  *q)/(q+one);
+      //q = q_dirW[k];   if (q>=zero && q<=one) (D27.f[DIR_P00])[ke  ]=(two*feqW27_E  -(f27_W  *(q*omegaD-one)-omegaD*feq27_W  *(q-one))/(omegaD-one)+f27_E  *q)/(q+one);
+      //q = q_dirN[k];   if (q>=zero && q<=one) (D27.f[DIR_0M0])[ks  ]=(two*feqW27_S  -(f27_N  *(q*omegaD-one)-omegaD*feq27_N  *(q-one))/(omegaD-one)+f27_S  *q)/(q+one);
+      //q = q_dirS[k];   if (q>=zero && q<=one) (D27.f[DIR_0P0])[kn  ]=(two*feqW27_N  -(f27_S  *(q*omegaD-one)-omegaD*feq27_S  *(q-one))/(omegaD-one)+f27_N  *q)/(q+one);
+      //q = q_dirT[k];   if (q>=zero && q<=one) (D27.f[DIR_00M])[kb  ]=(two*feqW27_B  -(f27_T  *(q*omegaD-one)-omegaD*feq27_T  *(q-one))/(omegaD-one)+f27_B  *q)/(q+one);
+      //q = q_dirB[k];   if (q>=zero && q<=one) (D27.f[DIR_00P])[kt  ]=(two*feqW27_T  -(f27_B  *(q*omegaD-one)-omegaD*feq27_B  *(q-one))/(omegaD-one)+f27_T  *q)/(q+one);
+      //q = q_dirNE[k];  if (q>=zero && q<=one) (D27.f[DIR_MM0])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one);
+      //q = q_dirSW[k];  if (q>=zero && q<=one) (D27.f[DIR_PP0])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one);
+      //q = q_dirSE[k];  if (q>=zero && q<=one) (D27.f[DIR_MP0])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one);
+      //q = q_dirNW[k];  if (q>=zero && q<=one) (D27.f[DIR_PM0])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one);
+      //q = q_dirTE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0M])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one);
+      //q = q_dirBW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0P])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one);
+      //q = q_dirBE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0P])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one);
+      //q = q_dirTW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0M])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one);
+      //q = q_dirTN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MM])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one);
+      //q = q_dirBS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PP])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one);
+      //q = q_dirBN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MP])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one);
+      //q = q_dirTS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PM])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one);
       //q = q_dirTNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMM])[kbsw]=(two*feqW27_BSW-(f27_TNE*(q*omegaD-one)-omegaD*feq27_TNE*(q-one))/(omegaD-one)+f27_BSW*q)/(q+one);
       //q = q_dirBSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PPP])[ktne]=(two*feqW27_TNE-(f27_BSW*(q*omegaD-one)-omegaD*feq27_BSW*(q-one))/(omegaD-one)+f27_TNE*q)/(q+one);
       //q = q_dirBNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMP])[ktsw]=(two*feqW27_TSW-(f27_BNE*(q*omegaD-one)-omegaD*feq27_BNE*(q-one))/(omegaD-one)+f27_TSW*q)/(q+one);
@@ -6739,131 +6739,131 @@ __global__ void AD_SlipVelDeviceComp(
     uint* neighborX,
     uint* neighborY,
     uint* neighborZ,
-    uint size_Mat,
+    unsigned long long numberOfLBnodes,
     bool isEvenTimestep)
 {
     Distributions27 D;
     if (isEvenTimestep)
     {
-        D.f[DIR_P00   ] = &distributions[DIR_P00    * size_Mat];
-        D.f[DIR_M00   ] = &distributions[DIR_M00    * size_Mat];
-        D.f[DIR_0P0   ] = &distributions[DIR_0P0    * size_Mat];
-        D.f[DIR_0M0   ] = &distributions[DIR_0M0    * size_Mat];
-        D.f[DIR_00P   ] = &distributions[DIR_00P    * size_Mat];
-        D.f[DIR_00M   ] = &distributions[DIR_00M    * size_Mat];
-        D.f[DIR_PP0  ] = &distributions[DIR_PP0   * size_Mat];
-        D.f[DIR_MM0  ] = &distributions[DIR_MM0   * size_Mat];
-        D.f[DIR_PM0  ] = &distributions[DIR_PM0   * size_Mat];
-        D.f[DIR_MP0  ] = &distributions[DIR_MP0   * size_Mat];
-        D.f[DIR_P0P  ] = &distributions[DIR_P0P   * size_Mat];
-        D.f[DIR_M0M  ] = &distributions[DIR_M0M   * size_Mat];
-        D.f[DIR_P0M  ] = &distributions[DIR_P0M   * size_Mat];
-        D.f[DIR_M0P  ] = &distributions[DIR_M0P   * size_Mat];
-        D.f[DIR_0PP  ] = &distributions[DIR_0PP   * size_Mat];
-        D.f[DIR_0MM  ] = &distributions[DIR_0MM   * size_Mat];
-        D.f[DIR_0PM  ] = &distributions[DIR_0PM   * size_Mat];
-        D.f[DIR_0MP  ] = &distributions[DIR_0MP   * size_Mat];
-        D.f[DIR_000] = &distributions[DIR_000 * size_Mat];
-        D.f[DIR_PPP ] = &distributions[DIR_PPP  * size_Mat];
-        D.f[DIR_MMP ] = &distributions[DIR_MMP  * size_Mat];
-        D.f[DIR_PMP ] = &distributions[DIR_PMP  * size_Mat];
-        D.f[DIR_MPP ] = &distributions[DIR_MPP  * size_Mat];
-        D.f[DIR_PPM ] = &distributions[DIR_PPM  * size_Mat];
-        D.f[DIR_MMM ] = &distributions[DIR_MMM  * size_Mat];
-        D.f[DIR_PMM ] = &distributions[DIR_PMM  * size_Mat];
-        D.f[DIR_MPM ] = &distributions[DIR_MPM  * size_Mat];
+        D.f[DIR_P00] = &distributions[DIR_P00 * numberOfLBnodes];
+        D.f[DIR_M00] = &distributions[DIR_M00 * numberOfLBnodes];
+        D.f[DIR_0P0] = &distributions[DIR_0P0 * numberOfLBnodes];
+        D.f[DIR_0M0] = &distributions[DIR_0M0 * numberOfLBnodes];
+        D.f[DIR_00P] = &distributions[DIR_00P * numberOfLBnodes];
+        D.f[DIR_00M] = &distributions[DIR_00M * numberOfLBnodes];
+        D.f[DIR_PP0] = &distributions[DIR_PP0 * numberOfLBnodes];
+        D.f[DIR_MM0] = &distributions[DIR_MM0 * numberOfLBnodes];
+        D.f[DIR_PM0] = &distributions[DIR_PM0 * numberOfLBnodes];
+        D.f[DIR_MP0] = &distributions[DIR_MP0 * numberOfLBnodes];
+        D.f[DIR_P0P] = &distributions[DIR_P0P * numberOfLBnodes];
+        D.f[DIR_M0M] = &distributions[DIR_M0M * numberOfLBnodes];
+        D.f[DIR_P0M] = &distributions[DIR_P0M * numberOfLBnodes];
+        D.f[DIR_M0P] = &distributions[DIR_M0P * numberOfLBnodes];
+        D.f[DIR_0PP] = &distributions[DIR_0PP * numberOfLBnodes];
+        D.f[DIR_0MM] = &distributions[DIR_0MM * numberOfLBnodes];
+        D.f[DIR_0PM] = &distributions[DIR_0PM * numberOfLBnodes];
+        D.f[DIR_0MP] = &distributions[DIR_0MP * numberOfLBnodes];
+        D.f[DIR_000] = &distributions[DIR_000 * numberOfLBnodes];
+        D.f[DIR_PPP] = &distributions[DIR_PPP * numberOfLBnodes];
+        D.f[DIR_MMP] = &distributions[DIR_MMP * numberOfLBnodes];
+        D.f[DIR_PMP] = &distributions[DIR_PMP * numberOfLBnodes];
+        D.f[DIR_MPP] = &distributions[DIR_MPP * numberOfLBnodes];
+        D.f[DIR_PPM] = &distributions[DIR_PPM * numberOfLBnodes];
+        D.f[DIR_MMM] = &distributions[DIR_MMM * numberOfLBnodes];
+        D.f[DIR_PMM] = &distributions[DIR_PMM * numberOfLBnodes];
+        D.f[DIR_MPM] = &distributions[DIR_MPM * numberOfLBnodes];
     }
     else
     {
-        D.f[DIR_M00   ] = &distributions[DIR_P00    * size_Mat];
-        D.f[DIR_P00   ] = &distributions[DIR_M00    * size_Mat];
-        D.f[DIR_0M0   ] = &distributions[DIR_0P0    * size_Mat];
-        D.f[DIR_0P0   ] = &distributions[DIR_0M0    * size_Mat];
-        D.f[DIR_00M   ] = &distributions[DIR_00P    * size_Mat];
-        D.f[DIR_00P   ] = &distributions[DIR_00M    * size_Mat];
-        D.f[DIR_MM0  ] = &distributions[DIR_PP0   * size_Mat];
-        D.f[DIR_PP0  ] = &distributions[DIR_MM0   * size_Mat];
-        D.f[DIR_MP0  ] = &distributions[DIR_PM0   * size_Mat];
-        D.f[DIR_PM0  ] = &distributions[DIR_MP0   * size_Mat];
-        D.f[DIR_M0M  ] = &distributions[DIR_P0P   * size_Mat];
-        D.f[DIR_P0P  ] = &distributions[DIR_M0M   * size_Mat];
-        D.f[DIR_M0P  ] = &distributions[DIR_P0M   * size_Mat];
-        D.f[DIR_P0M  ] = &distributions[DIR_M0P   * size_Mat];
-        D.f[DIR_0MM  ] = &distributions[DIR_0PP   * size_Mat];
-        D.f[DIR_0PP  ] = &distributions[DIR_0MM   * size_Mat];
-        D.f[DIR_0MP  ] = &distributions[DIR_0PM   * size_Mat];
-        D.f[DIR_0PM  ] = &distributions[DIR_0MP   * size_Mat];
-        D.f[DIR_000] = &distributions[DIR_000 * size_Mat];
-        D.f[DIR_PPP ] = &distributions[DIR_MMM  * size_Mat];
-        D.f[DIR_MMP ] = &distributions[DIR_PPM  * size_Mat];
-        D.f[DIR_PMP ] = &distributions[DIR_MPM  * size_Mat];
-        D.f[DIR_MPP ] = &distributions[DIR_PMM  * size_Mat];
-        D.f[DIR_PPM ] = &distributions[DIR_MMP  * size_Mat];
-        D.f[DIR_MMM ] = &distributions[DIR_PPP  * size_Mat];
-        D.f[DIR_PMM ] = &distributions[DIR_MPP  * size_Mat];
-        D.f[DIR_MPM ] = &distributions[DIR_PMP  * size_Mat];
+        D.f[DIR_M00] = &distributions[DIR_P00 * numberOfLBnodes];
+        D.f[DIR_P00] = &distributions[DIR_M00 * numberOfLBnodes];
+        D.f[DIR_0M0] = &distributions[DIR_0P0 * numberOfLBnodes];
+        D.f[DIR_0P0] = &distributions[DIR_0M0 * numberOfLBnodes];
+        D.f[DIR_00M] = &distributions[DIR_00P * numberOfLBnodes];
+        D.f[DIR_00P] = &distributions[DIR_00M * numberOfLBnodes];
+        D.f[DIR_MM0] = &distributions[DIR_PP0 * numberOfLBnodes];
+        D.f[DIR_PP0] = &distributions[DIR_MM0 * numberOfLBnodes];
+        D.f[DIR_MP0] = &distributions[DIR_PM0 * numberOfLBnodes];
+        D.f[DIR_PM0] = &distributions[DIR_MP0 * numberOfLBnodes];
+        D.f[DIR_M0M] = &distributions[DIR_P0P * numberOfLBnodes];
+        D.f[DIR_P0P] = &distributions[DIR_M0M * numberOfLBnodes];
+        D.f[DIR_M0P] = &distributions[DIR_P0M * numberOfLBnodes];
+        D.f[DIR_P0M] = &distributions[DIR_M0P * numberOfLBnodes];
+        D.f[DIR_0MM] = &distributions[DIR_0PP * numberOfLBnodes];
+        D.f[DIR_0PP] = &distributions[DIR_0MM * numberOfLBnodes];
+        D.f[DIR_0MP] = &distributions[DIR_0PM * numberOfLBnodes];
+        D.f[DIR_0PM] = &distributions[DIR_0MP * numberOfLBnodes];
+        D.f[DIR_000] = &distributions[DIR_000 * numberOfLBnodes];
+        D.f[DIR_PPP] = &distributions[DIR_MMM * numberOfLBnodes];
+        D.f[DIR_MMP] = &distributions[DIR_PPM * numberOfLBnodes];
+        D.f[DIR_PMP] = &distributions[DIR_MPM * numberOfLBnodes];
+        D.f[DIR_MPP] = &distributions[DIR_PMM * numberOfLBnodes];
+        D.f[DIR_PPM] = &distributions[DIR_MMP * numberOfLBnodes];
+        D.f[DIR_MMM] = &distributions[DIR_PPP * numberOfLBnodes];
+        D.f[DIR_PMM] = &distributions[DIR_MPP * numberOfLBnodes];
+        D.f[DIR_MPM] = &distributions[DIR_PMP * numberOfLBnodes];
     }
     ////////////////////////////////////////////////////////////////////////////////
     Distributions27 DAD;
     if (isEvenTimestep)
     {
-        DAD.f[DIR_P00   ] = &distributionsAD[DIR_P00    * size_Mat];
-        DAD.f[DIR_M00   ] = &distributionsAD[DIR_M00    * size_Mat];
-        DAD.f[DIR_0P0   ] = &distributionsAD[DIR_0P0    * size_Mat];
-        DAD.f[DIR_0M0   ] = &distributionsAD[DIR_0M0    * size_Mat];
-        DAD.f[DIR_00P   ] = &distributionsAD[DIR_00P    * size_Mat];
-        DAD.f[DIR_00M   ] = &distributionsAD[DIR_00M    * size_Mat];
-        DAD.f[DIR_PP0  ] = &distributionsAD[DIR_PP0   * size_Mat];
-        DAD.f[DIR_MM0  ] = &distributionsAD[DIR_MM0   * size_Mat];
-        DAD.f[DIR_PM0  ] = &distributionsAD[DIR_PM0   * size_Mat];
-        DAD.f[DIR_MP0  ] = &distributionsAD[DIR_MP0   * size_Mat];
-        DAD.f[DIR_P0P  ] = &distributionsAD[DIR_P0P   * size_Mat];
-        DAD.f[DIR_M0M  ] = &distributionsAD[DIR_M0M   * size_Mat];
-        DAD.f[DIR_P0M  ] = &distributionsAD[DIR_P0M   * size_Mat];
-        DAD.f[DIR_M0P  ] = &distributionsAD[DIR_M0P   * size_Mat];
-        DAD.f[DIR_0PP  ] = &distributionsAD[DIR_0PP   * size_Mat];
-        DAD.f[DIR_0MM  ] = &distributionsAD[DIR_0MM   * size_Mat];
-        DAD.f[DIR_0PM  ] = &distributionsAD[DIR_0PM   * size_Mat];
-        DAD.f[DIR_0MP  ] = &distributionsAD[DIR_0MP   * size_Mat];
-        DAD.f[DIR_000] = &distributionsAD[DIR_000 * size_Mat];
-        DAD.f[DIR_PPP ] = &distributionsAD[DIR_PPP  * size_Mat];
-        DAD.f[DIR_MMP ] = &distributionsAD[DIR_MMP  * size_Mat];
-        DAD.f[DIR_PMP ] = &distributionsAD[DIR_PMP  * size_Mat];
-        DAD.f[DIR_MPP ] = &distributionsAD[DIR_MPP  * size_Mat];
-        DAD.f[DIR_PPM ] = &distributionsAD[DIR_PPM  * size_Mat];
-        DAD.f[DIR_MMM ] = &distributionsAD[DIR_MMM  * size_Mat];
-        DAD.f[DIR_PMM ] = &distributionsAD[DIR_PMM  * size_Mat];
-        DAD.f[DIR_MPM ] = &distributionsAD[DIR_MPM  * size_Mat];
+        DAD.f[DIR_P00] = &distributionsAD[DIR_P00 * numberOfLBnodes];
+        DAD.f[DIR_M00] = &distributionsAD[DIR_M00 * numberOfLBnodes];
+        DAD.f[DIR_0P0] = &distributionsAD[DIR_0P0 * numberOfLBnodes];
+        DAD.f[DIR_0M0] = &distributionsAD[DIR_0M0 * numberOfLBnodes];
+        DAD.f[DIR_00P] = &distributionsAD[DIR_00P * numberOfLBnodes];
+        DAD.f[DIR_00M] = &distributionsAD[DIR_00M * numberOfLBnodes];
+        DAD.f[DIR_PP0] = &distributionsAD[DIR_PP0 * numberOfLBnodes];
+        DAD.f[DIR_MM0] = &distributionsAD[DIR_MM0 * numberOfLBnodes];
+        DAD.f[DIR_PM0] = &distributionsAD[DIR_PM0 * numberOfLBnodes];
+        DAD.f[DIR_MP0] = &distributionsAD[DIR_MP0 * numberOfLBnodes];
+        DAD.f[DIR_P0P] = &distributionsAD[DIR_P0P * numberOfLBnodes];
+        DAD.f[DIR_M0M] = &distributionsAD[DIR_M0M * numberOfLBnodes];
+        DAD.f[DIR_P0M] = &distributionsAD[DIR_P0M * numberOfLBnodes];
+        DAD.f[DIR_M0P] = &distributionsAD[DIR_M0P * numberOfLBnodes];
+        DAD.f[DIR_0PP] = &distributionsAD[DIR_0PP * numberOfLBnodes];
+        DAD.f[DIR_0MM] = &distributionsAD[DIR_0MM * numberOfLBnodes];
+        DAD.f[DIR_0PM] = &distributionsAD[DIR_0PM * numberOfLBnodes];
+        DAD.f[DIR_0MP] = &distributionsAD[DIR_0MP * numberOfLBnodes];
+        DAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes];
+        DAD.f[DIR_PPP] = &distributionsAD[DIR_PPP * numberOfLBnodes];
+        DAD.f[DIR_MMP] = &distributionsAD[DIR_MMP * numberOfLBnodes];
+        DAD.f[DIR_PMP] = &distributionsAD[DIR_PMP * numberOfLBnodes];
+        DAD.f[DIR_MPP] = &distributionsAD[DIR_MPP * numberOfLBnodes];
+        DAD.f[DIR_PPM] = &distributionsAD[DIR_PPM * numberOfLBnodes];
+        DAD.f[DIR_MMM] = &distributionsAD[DIR_MMM * numberOfLBnodes];
+        DAD.f[DIR_PMM] = &distributionsAD[DIR_PMM * numberOfLBnodes];
+        DAD.f[DIR_MPM] = &distributionsAD[DIR_MPM * numberOfLBnodes];
     }
     else
     {
-        DAD.f[DIR_M00   ] = &distributionsAD[DIR_P00    * size_Mat];
-        DAD.f[DIR_P00   ] = &distributionsAD[DIR_M00    * size_Mat];
-        DAD.f[DIR_0M0   ] = &distributionsAD[DIR_0P0    * size_Mat];
-        DAD.f[DIR_0P0   ] = &distributionsAD[DIR_0M0    * size_Mat];
-        DAD.f[DIR_00M   ] = &distributionsAD[DIR_00P    * size_Mat];
-        DAD.f[DIR_00P   ] = &distributionsAD[DIR_00M    * size_Mat];
-        DAD.f[DIR_MM0  ] = &distributionsAD[DIR_PP0   * size_Mat];
-        DAD.f[DIR_PP0  ] = &distributionsAD[DIR_MM0   * size_Mat];
-        DAD.f[DIR_MP0  ] = &distributionsAD[DIR_PM0   * size_Mat];
-        DAD.f[DIR_PM0  ] = &distributionsAD[DIR_MP0   * size_Mat];
-        DAD.f[DIR_M0M  ] = &distributionsAD[DIR_P0P   * size_Mat];
-        DAD.f[DIR_P0P  ] = &distributionsAD[DIR_M0M   * size_Mat];
-        DAD.f[DIR_M0P  ] = &distributionsAD[DIR_P0M   * size_Mat];
-        DAD.f[DIR_P0M  ] = &distributionsAD[DIR_M0P   * size_Mat];
-        DAD.f[DIR_0MM  ] = &distributionsAD[DIR_0PP   * size_Mat];
-        DAD.f[DIR_0PP  ] = &distributionsAD[DIR_0MM   * size_Mat];
-        DAD.f[DIR_0MP  ] = &distributionsAD[DIR_0PM   * size_Mat];
-        DAD.f[DIR_0PM  ] = &distributionsAD[DIR_0MP   * size_Mat];
-        DAD.f[DIR_000] = &distributionsAD[DIR_000 * size_Mat];
-        DAD.f[DIR_PPP ] = &distributionsAD[DIR_MMM  * size_Mat];
-        DAD.f[DIR_MMP ] = &distributionsAD[DIR_PPM  * size_Mat];
-        DAD.f[DIR_PMP ] = &distributionsAD[DIR_MPM  * size_Mat];
-        DAD.f[DIR_MPP ] = &distributionsAD[DIR_PMM  * size_Mat];
-        DAD.f[DIR_PPM ] = &distributionsAD[DIR_MMP  * size_Mat];
-        DAD.f[DIR_MMM ] = &distributionsAD[DIR_PPP  * size_Mat];
-        DAD.f[DIR_PMM ] = &distributionsAD[DIR_MPP  * size_Mat];
-        DAD.f[DIR_MPM ] = &distributionsAD[DIR_PMP  * size_Mat];
+        DAD.f[DIR_M00] = &distributionsAD[DIR_P00 * numberOfLBnodes];
+        DAD.f[DIR_P00] = &distributionsAD[DIR_M00 * numberOfLBnodes];
+        DAD.f[DIR_0M0] = &distributionsAD[DIR_0P0 * numberOfLBnodes];
+        DAD.f[DIR_0P0] = &distributionsAD[DIR_0M0 * numberOfLBnodes];
+        DAD.f[DIR_00M] = &distributionsAD[DIR_00P * numberOfLBnodes];
+        DAD.f[DIR_00P] = &distributionsAD[DIR_00M * numberOfLBnodes];
+        DAD.f[DIR_MM0] = &distributionsAD[DIR_PP0 * numberOfLBnodes];
+        DAD.f[DIR_PP0] = &distributionsAD[DIR_MM0 * numberOfLBnodes];
+        DAD.f[DIR_MP0] = &distributionsAD[DIR_PM0 * numberOfLBnodes];
+        DAD.f[DIR_PM0] = &distributionsAD[DIR_MP0 * numberOfLBnodes];
+        DAD.f[DIR_M0M] = &distributionsAD[DIR_P0P * numberOfLBnodes];
+        DAD.f[DIR_P0P] = &distributionsAD[DIR_M0M * numberOfLBnodes];
+        DAD.f[DIR_M0P] = &distributionsAD[DIR_P0M * numberOfLBnodes];
+        DAD.f[DIR_P0M] = &distributionsAD[DIR_M0P * numberOfLBnodes];
+        DAD.f[DIR_0MM] = &distributionsAD[DIR_0PP * numberOfLBnodes];
+        DAD.f[DIR_0PP] = &distributionsAD[DIR_0MM * numberOfLBnodes];
+        DAD.f[DIR_0MP] = &distributionsAD[DIR_0PM * numberOfLBnodes];
+        DAD.f[DIR_0PM] = &distributionsAD[DIR_0MP * numberOfLBnodes];
+        DAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes];
+        DAD.f[DIR_PPP] = &distributionsAD[DIR_MMM * numberOfLBnodes];
+        DAD.f[DIR_MMP] = &distributionsAD[DIR_PPM * numberOfLBnodes];
+        DAD.f[DIR_PMP] = &distributionsAD[DIR_MPM * numberOfLBnodes];
+        DAD.f[DIR_MPP] = &distributionsAD[DIR_PMM * numberOfLBnodes];
+        DAD.f[DIR_PPM] = &distributionsAD[DIR_MMP * numberOfLBnodes];
+        DAD.f[DIR_MMM] = &distributionsAD[DIR_PPP * numberOfLBnodes];
+        DAD.f[DIR_PMM] = &distributionsAD[DIR_MPP * numberOfLBnodes];
+        DAD.f[DIR_MPM] = &distributionsAD[DIR_PMP * numberOfLBnodes];
     }
     ////////////////////////////////////////////////////////////////////////////////
     const unsigned  x = threadIdx.x;  // Globaler x-Index
@@ -6888,24 +6888,24 @@ __global__ void AD_SlipVelDeviceComp(
             * q_dirBE, * q_dirTW, * q_dirTN, * q_dirBS, * q_dirBN, * q_dirTS,
             * q_dirTNE, * q_dirTSW, * q_dirTSE, * q_dirTNW, * q_dirBNE, * q_dirBSW,
             * q_dirBSE, * q_dirBNW;
-        q_dirE   = &Qarrays[DIR_P00   * numberOfBCnodes];
-        q_dirW   = &Qarrays[DIR_M00   * numberOfBCnodes];
-        q_dirN   = &Qarrays[DIR_0P0   * numberOfBCnodes];
-        q_dirS   = &Qarrays[DIR_0M0   * numberOfBCnodes];
-        q_dirT   = &Qarrays[DIR_00P   * numberOfBCnodes];
-        q_dirB   = &Qarrays[DIR_00M   * numberOfBCnodes];
-        q_dirNE  = &Qarrays[DIR_PP0  * numberOfBCnodes];
-        q_dirSW  = &Qarrays[DIR_MM0  * numberOfBCnodes];
-        q_dirSE  = &Qarrays[DIR_PM0  * numberOfBCnodes];
-        q_dirNW  = &Qarrays[DIR_MP0  * numberOfBCnodes];
-        q_dirTE  = &Qarrays[DIR_P0P  * numberOfBCnodes];
-        q_dirBW  = &Qarrays[DIR_M0M  * numberOfBCnodes];
-        q_dirBE  = &Qarrays[DIR_P0M  * numberOfBCnodes];
-        q_dirTW  = &Qarrays[DIR_M0P  * numberOfBCnodes];
-        q_dirTN  = &Qarrays[DIR_0PP  * numberOfBCnodes];
-        q_dirBS  = &Qarrays[DIR_0MM  * numberOfBCnodes];
-        q_dirBN  = &Qarrays[DIR_0PM  * numberOfBCnodes];
-        q_dirTS  = &Qarrays[DIR_0MP  * numberOfBCnodes];
+        q_dirE   = &Qarrays[DIR_P00 * numberOfBCnodes];
+        q_dirW   = &Qarrays[DIR_M00 * numberOfBCnodes];
+        q_dirN   = &Qarrays[DIR_0P0 * numberOfBCnodes];
+        q_dirS   = &Qarrays[DIR_0M0 * numberOfBCnodes];
+        q_dirT   = &Qarrays[DIR_00P * numberOfBCnodes];
+        q_dirB   = &Qarrays[DIR_00M * numberOfBCnodes];
+        q_dirNE  = &Qarrays[DIR_PP0 * numberOfBCnodes];
+        q_dirSW  = &Qarrays[DIR_MM0 * numberOfBCnodes];
+        q_dirSE  = &Qarrays[DIR_PM0 * numberOfBCnodes];
+        q_dirNW  = &Qarrays[DIR_MP0 * numberOfBCnodes];
+        q_dirTE  = &Qarrays[DIR_P0P * numberOfBCnodes];
+        q_dirBW  = &Qarrays[DIR_M0M * numberOfBCnodes];
+        q_dirBE  = &Qarrays[DIR_P0M * numberOfBCnodes];
+        q_dirTW  = &Qarrays[DIR_M0P * numberOfBCnodes];
+        q_dirTN  = &Qarrays[DIR_0PP * numberOfBCnodes];
+        q_dirBS  = &Qarrays[DIR_0MM * numberOfBCnodes];
+        q_dirBN  = &Qarrays[DIR_0PM * numberOfBCnodes];
+        q_dirTS  = &Qarrays[DIR_0MP * numberOfBCnodes];
         q_dirTNE = &Qarrays[DIR_PPP * numberOfBCnodes];
         q_dirTSW = &Qarrays[DIR_MMP * numberOfBCnodes];
         q_dirTSE = &Qarrays[DIR_PMP * numberOfBCnodes];
@@ -7025,63 +7025,63 @@ __global__ void AD_SlipVelDeviceComp(
         //////////////////////////////////////////////////////////////////////////
         if (!isEvenTimestep)
         {
-            DAD.f[DIR_P00   ] = &distributionsAD[DIR_P00    * size_Mat];
-            DAD.f[DIR_M00   ] = &distributionsAD[DIR_M00    * size_Mat];
-            DAD.f[DIR_0P0   ] = &distributionsAD[DIR_0P0    * size_Mat];
-            DAD.f[DIR_0M0   ] = &distributionsAD[DIR_0M0    * size_Mat];
-            DAD.f[DIR_00P   ] = &distributionsAD[DIR_00P    * size_Mat];
-            DAD.f[DIR_00M   ] = &distributionsAD[DIR_00M    * size_Mat];
-            DAD.f[DIR_PP0  ] = &distributionsAD[DIR_PP0   * size_Mat];
-            DAD.f[DIR_MM0  ] = &distributionsAD[DIR_MM0   * size_Mat];
-            DAD.f[DIR_PM0  ] = &distributionsAD[DIR_PM0   * size_Mat];
-            DAD.f[DIR_MP0  ] = &distributionsAD[DIR_MP0   * size_Mat];
-            DAD.f[DIR_P0P  ] = &distributionsAD[DIR_P0P   * size_Mat];
-            DAD.f[DIR_M0M  ] = &distributionsAD[DIR_M0M   * size_Mat];
-            DAD.f[DIR_P0M  ] = &distributionsAD[DIR_P0M   * size_Mat];
-            DAD.f[DIR_M0P  ] = &distributionsAD[DIR_M0P   * size_Mat];
-            DAD.f[DIR_0PP  ] = &distributionsAD[DIR_0PP   * size_Mat];
-            DAD.f[DIR_0MM  ] = &distributionsAD[DIR_0MM   * size_Mat];
-            DAD.f[DIR_0PM  ] = &distributionsAD[DIR_0PM   * size_Mat];
-            DAD.f[DIR_0MP  ] = &distributionsAD[DIR_0MP   * size_Mat];
-            DAD.f[DIR_000] = &distributionsAD[DIR_000 * size_Mat];
-            DAD.f[DIR_PPP ] = &distributionsAD[DIR_PPP  * size_Mat];
-            DAD.f[DIR_MMP ] = &distributionsAD[DIR_MMP  * size_Mat];
-            DAD.f[DIR_PMP ] = &distributionsAD[DIR_PMP  * size_Mat];
-            DAD.f[DIR_MPP ] = &distributionsAD[DIR_MPP  * size_Mat];
-            DAD.f[DIR_PPM ] = &distributionsAD[DIR_PPM  * size_Mat];
-            DAD.f[DIR_MMM ] = &distributionsAD[DIR_MMM  * size_Mat];
-            DAD.f[DIR_PMM ] = &distributionsAD[DIR_PMM  * size_Mat];
-            DAD.f[DIR_MPM ] = &distributionsAD[DIR_MPM  * size_Mat];
+            DAD.f[DIR_P00] = &distributionsAD[DIR_P00 * numberOfLBnodes];
+            DAD.f[DIR_M00] = &distributionsAD[DIR_M00 * numberOfLBnodes];
+            DAD.f[DIR_0P0] = &distributionsAD[DIR_0P0 * numberOfLBnodes];
+            DAD.f[DIR_0M0] = &distributionsAD[DIR_0M0 * numberOfLBnodes];
+            DAD.f[DIR_00P] = &distributionsAD[DIR_00P * numberOfLBnodes];
+            DAD.f[DIR_00M] = &distributionsAD[DIR_00M * numberOfLBnodes];
+            DAD.f[DIR_PP0] = &distributionsAD[DIR_PP0 * numberOfLBnodes];
+            DAD.f[DIR_MM0] = &distributionsAD[DIR_MM0 * numberOfLBnodes];
+            DAD.f[DIR_PM0] = &distributionsAD[DIR_PM0 * numberOfLBnodes];
+            DAD.f[DIR_MP0] = &distributionsAD[DIR_MP0 * numberOfLBnodes];
+            DAD.f[DIR_P0P] = &distributionsAD[DIR_P0P * numberOfLBnodes];
+            DAD.f[DIR_M0M] = &distributionsAD[DIR_M0M * numberOfLBnodes];
+            DAD.f[DIR_P0M] = &distributionsAD[DIR_P0M * numberOfLBnodes];
+            DAD.f[DIR_M0P] = &distributionsAD[DIR_M0P * numberOfLBnodes];
+            DAD.f[DIR_0PP] = &distributionsAD[DIR_0PP * numberOfLBnodes];
+            DAD.f[DIR_0MM] = &distributionsAD[DIR_0MM * numberOfLBnodes];
+            DAD.f[DIR_0PM] = &distributionsAD[DIR_0PM * numberOfLBnodes];
+            DAD.f[DIR_0MP] = &distributionsAD[DIR_0MP * numberOfLBnodes];
+            DAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes];
+            DAD.f[DIR_PPP] = &distributionsAD[DIR_PPP * numberOfLBnodes];
+            DAD.f[DIR_MMP] = &distributionsAD[DIR_MMP * numberOfLBnodes];
+            DAD.f[DIR_PMP] = &distributionsAD[DIR_PMP * numberOfLBnodes];
+            DAD.f[DIR_MPP] = &distributionsAD[DIR_MPP * numberOfLBnodes];
+            DAD.f[DIR_PPM] = &distributionsAD[DIR_PPM * numberOfLBnodes];
+            DAD.f[DIR_MMM] = &distributionsAD[DIR_MMM * numberOfLBnodes];
+            DAD.f[DIR_PMM] = &distributionsAD[DIR_PMM * numberOfLBnodes];
+            DAD.f[DIR_MPM] = &distributionsAD[DIR_MPM * numberOfLBnodes];
         }
         else
         {
-            DAD.f[DIR_M00   ] = &distributionsAD[DIR_P00    * size_Mat];
-            DAD.f[DIR_P00   ] = &distributionsAD[DIR_M00    * size_Mat];
-            DAD.f[DIR_0M0   ] = &distributionsAD[DIR_0P0    * size_Mat];
-            DAD.f[DIR_0P0   ] = &distributionsAD[DIR_0M0    * size_Mat];
-            DAD.f[DIR_00M   ] = &distributionsAD[DIR_00P    * size_Mat];
-            DAD.f[DIR_00P   ] = &distributionsAD[DIR_00M    * size_Mat];
-            DAD.f[DIR_MM0  ] = &distributionsAD[DIR_PP0   * size_Mat];
-            DAD.f[DIR_PP0  ] = &distributionsAD[DIR_MM0   * size_Mat];
-            DAD.f[DIR_MP0  ] = &distributionsAD[DIR_PM0   * size_Mat];
-            DAD.f[DIR_PM0  ] = &distributionsAD[DIR_MP0   * size_Mat];
-            DAD.f[DIR_M0M  ] = &distributionsAD[DIR_P0P   * size_Mat];
-            DAD.f[DIR_P0P  ] = &distributionsAD[DIR_M0M   * size_Mat];
-            DAD.f[DIR_M0P  ] = &distributionsAD[DIR_P0M   * size_Mat];
-            DAD.f[DIR_P0M  ] = &distributionsAD[DIR_M0P   * size_Mat];
-            DAD.f[DIR_0MM  ] = &distributionsAD[DIR_0PP   * size_Mat];
-            DAD.f[DIR_0PP  ] = &distributionsAD[DIR_0MM   * size_Mat];
-            DAD.f[DIR_0MP  ] = &distributionsAD[DIR_0PM   * size_Mat];
-            DAD.f[DIR_0PM  ] = &distributionsAD[DIR_0MP   * size_Mat];
-            DAD.f[DIR_000] = &distributionsAD[DIR_000 * size_Mat];
-            DAD.f[DIR_PPP ] = &distributionsAD[DIR_MMM  * size_Mat];
-            DAD.f[DIR_MMP ] = &distributionsAD[DIR_PPM  * size_Mat];
-            DAD.f[DIR_PMP ] = &distributionsAD[DIR_MPM  * size_Mat];
-            DAD.f[DIR_MPP ] = &distributionsAD[DIR_PMM  * size_Mat];
-            DAD.f[DIR_PPM ] = &distributionsAD[DIR_MMP  * size_Mat];
-            DAD.f[DIR_MMM ] = &distributionsAD[DIR_PPP  * size_Mat];
-            DAD.f[DIR_PMM ] = &distributionsAD[DIR_MPP  * size_Mat];
-            DAD.f[DIR_MPM ] = &distributionsAD[DIR_PMP  * size_Mat];
+            DAD.f[DIR_M00] = &distributionsAD[DIR_P00 * numberOfLBnodes];
+            DAD.f[DIR_P00] = &distributionsAD[DIR_M00 * numberOfLBnodes];
+            DAD.f[DIR_0M0] = &distributionsAD[DIR_0P0 * numberOfLBnodes];
+            DAD.f[DIR_0P0] = &distributionsAD[DIR_0M0 * numberOfLBnodes];
+            DAD.f[DIR_00M] = &distributionsAD[DIR_00P * numberOfLBnodes];
+            DAD.f[DIR_00P] = &distributionsAD[DIR_00M * numberOfLBnodes];
+            DAD.f[DIR_MM0] = &distributionsAD[DIR_PP0 * numberOfLBnodes];
+            DAD.f[DIR_PP0] = &distributionsAD[DIR_MM0 * numberOfLBnodes];
+            DAD.f[DIR_MP0] = &distributionsAD[DIR_PM0 * numberOfLBnodes];
+            DAD.f[DIR_PM0] = &distributionsAD[DIR_MP0 * numberOfLBnodes];
+            DAD.f[DIR_M0M] = &distributionsAD[DIR_P0P * numberOfLBnodes];
+            DAD.f[DIR_P0P] = &distributionsAD[DIR_M0M * numberOfLBnodes];
+            DAD.f[DIR_M0P] = &distributionsAD[DIR_P0M * numberOfLBnodes];
+            DAD.f[DIR_P0M] = &distributionsAD[DIR_M0P * numberOfLBnodes];
+            DAD.f[DIR_0MM] = &distributionsAD[DIR_0PP * numberOfLBnodes];
+            DAD.f[DIR_0PP] = &distributionsAD[DIR_0MM * numberOfLBnodes];
+            DAD.f[DIR_0MP] = &distributionsAD[DIR_0PM * numberOfLBnodes];
+            DAD.f[DIR_0PM] = &distributionsAD[DIR_0MP * numberOfLBnodes];
+            DAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes];
+            DAD.f[DIR_PPP] = &distributionsAD[DIR_MMM * numberOfLBnodes];
+            DAD.f[DIR_MMP] = &distributionsAD[DIR_PPM * numberOfLBnodes];
+            DAD.f[DIR_PMP] = &distributionsAD[DIR_MPM * numberOfLBnodes];
+            DAD.f[DIR_MPP] = &distributionsAD[DIR_PMM * numberOfLBnodes];
+            DAD.f[DIR_PPM] = &distributionsAD[DIR_MMP * numberOfLBnodes];
+            DAD.f[DIR_MMM] = &distributionsAD[DIR_PPP * numberOfLBnodes];
+            DAD.f[DIR_PMM] = &distributionsAD[DIR_MPP * numberOfLBnodes];
+            DAD.f[DIR_MPM] = &distributionsAD[DIR_PMP * numberOfLBnodes];
         }
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         real concentration =
@@ -7115,24 +7115,24 @@ __global__ void AD_SlipVelDeviceComp(
         real jTan3 = jx3 - NormJ * NormZ;
 
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        q = q_dirE[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_M00  ])[kw  ] = calcDistributionBC_AD(q, c2o27,   vx1,         cu_sq, f_E,   f_W,   omegaDiffusivity,        jTan1,       concentration); }
-        q = q_dirW[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_P00  ])[ke  ] = calcDistributionBC_AD(q, c2o27,  -vx1,         cu_sq, f_W,   f_E,   omegaDiffusivity,       -jTan1,       concentration); }
-        q = q_dirN[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0M0  ])[ks  ] = calcDistributionBC_AD(q, c2o27,   vx2,         cu_sq, f_N,   f_S,   omegaDiffusivity,        jTan2,       concentration); }
-        q = q_dirS[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0P0  ])[kn  ] = calcDistributionBC_AD(q, c2o27,  -vx2,         cu_sq, f_S,   f_N,   omegaDiffusivity,       -jTan2,       concentration); }
-        q = q_dirT[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_00M  ])[kb  ] = calcDistributionBC_AD(q, c2o27,   vx3,         cu_sq, f_T,   f_B,   omegaDiffusivity,        jTan3,       concentration); }
-        q = q_dirB[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_00P  ])[kt  ] = calcDistributionBC_AD(q, c2o27,  -vx3,         cu_sq, f_B,   f_T,   omegaDiffusivity,       -jTan3,       concentration); }
-        q = q_dirNE[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_MM0 ])[ksw ] = calcDistributionBC_AD(q, c1o54,   vx1+vx2,     cu_sq, f_NE,  f_SW,  omegaDiffusivity,  jTan1+jTan2,       concentration); }
-        q = q_dirSW[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_PP0 ])[kne ] = calcDistributionBC_AD(q, c1o54,  -vx1-vx2,     cu_sq, f_SW,  f_NE,  omegaDiffusivity, -jTan1-jTan2,       concentration); }
-        q = q_dirSE[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_MP0 ])[knw ] = calcDistributionBC_AD(q, c1o54,   vx1-vx2,     cu_sq, f_SE,  f_NW,  omegaDiffusivity,  jTan1-jTan2,       concentration); }
-        q = q_dirNW[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_PM0 ])[kse ] = calcDistributionBC_AD(q, c1o54,  -vx1+vx2,     cu_sq, f_NW,  f_SE,  omegaDiffusivity, -jTan1+jTan2,       concentration); }
-        q = q_dirTE[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_M0M ])[kbw ] = calcDistributionBC_AD(q, c1o54,   vx1    +vx3, cu_sq, f_TE,  f_BW,  omegaDiffusivity,  jTan1      +jTan3, concentration); }
-        q = q_dirBW[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_P0P ])[kte ] = calcDistributionBC_AD(q, c1o54,  -vx1    -vx3, cu_sq, f_BW,  f_TE,  omegaDiffusivity, -jTan1      -jTan3, concentration); }
-        q = q_dirBE[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_M0P ])[ktw ] = calcDistributionBC_AD(q, c1o54,   vx1    -vx3, cu_sq, f_BE,  f_TW,  omegaDiffusivity,  jTan1      -jTan3, concentration); }
-        q = q_dirTW[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_P0M ])[kbe ] = calcDistributionBC_AD(q, c1o54,  -vx1    +vx3, cu_sq, f_TW,  f_BE,  omegaDiffusivity, -jTan1      +jTan3, concentration); }
-        q = q_dirTN[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0MM ])[kbs ] = calcDistributionBC_AD(q, c1o54,       vx2+vx3, cu_sq, f_TN,  f_BS,  omegaDiffusivity,        jTan2+jTan3, concentration); }
-        q = q_dirBS[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0PP ])[ktn ] = calcDistributionBC_AD(q, c1o54,      -vx2-vx3, cu_sq, f_BS,  f_TN,  omegaDiffusivity,       -jTan2-jTan3, concentration); }
-        q = q_dirBN[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0MP ])[kts ] = calcDistributionBC_AD(q, c1o54,       vx2-vx3, cu_sq, f_BN,  f_TS,  omegaDiffusivity,        jTan2-jTan3, concentration); }
-        q = q_dirTS[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0PM ])[kbn ] = calcDistributionBC_AD(q, c1o54,      -vx2+vx3, cu_sq, f_TS,  f_BN,  omegaDiffusivity,       -jTan2+jTan3, concentration); }
+        q = q_dirE[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_M00])[kw  ] = calcDistributionBC_AD(q, c2o27,   vx1,         cu_sq, f_E,   f_W,   omegaDiffusivity,        jTan1,       concentration); }
+        q = q_dirW[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_P00])[ke  ] = calcDistributionBC_AD(q, c2o27,  -vx1,         cu_sq, f_W,   f_E,   omegaDiffusivity,       -jTan1,       concentration); }
+        q = q_dirN[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0M0])[ks  ] = calcDistributionBC_AD(q, c2o27,   vx2,         cu_sq, f_N,   f_S,   omegaDiffusivity,        jTan2,       concentration); }
+        q = q_dirS[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0P0])[kn  ] = calcDistributionBC_AD(q, c2o27,  -vx2,         cu_sq, f_S,   f_N,   omegaDiffusivity,       -jTan2,       concentration); }
+        q = q_dirT[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_00M])[kb  ] = calcDistributionBC_AD(q, c2o27,   vx3,         cu_sq, f_T,   f_B,   omegaDiffusivity,        jTan3,       concentration); }
+        q = q_dirB[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_00P])[kt  ] = calcDistributionBC_AD(q, c2o27,  -vx3,         cu_sq, f_B,   f_T,   omegaDiffusivity,       -jTan3,       concentration); }
+        q = q_dirNE[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_MM0])[ksw ] = calcDistributionBC_AD(q, c1o54,   vx1+vx2,     cu_sq, f_NE,  f_SW,  omegaDiffusivity,  jTan1+jTan2,       concentration); }
+        q = q_dirSW[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_PP0])[kne ] = calcDistributionBC_AD(q, c1o54,  -vx1-vx2,     cu_sq, f_SW,  f_NE,  omegaDiffusivity, -jTan1-jTan2,       concentration); }
+        q = q_dirSE[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_MP0])[knw ] = calcDistributionBC_AD(q, c1o54,   vx1-vx2,     cu_sq, f_SE,  f_NW,  omegaDiffusivity,  jTan1-jTan2,       concentration); }
+        q = q_dirNW[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_PM0])[kse ] = calcDistributionBC_AD(q, c1o54,  -vx1+vx2,     cu_sq, f_NW,  f_SE,  omegaDiffusivity, -jTan1+jTan2,       concentration); }
+        q = q_dirTE[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_M0M])[kbw ] = calcDistributionBC_AD(q, c1o54,   vx1    +vx3, cu_sq, f_TE,  f_BW,  omegaDiffusivity,  jTan1      +jTan3, concentration); }
+        q = q_dirBW[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_P0P])[kte ] = calcDistributionBC_AD(q, c1o54,  -vx1    -vx3, cu_sq, f_BW,  f_TE,  omegaDiffusivity, -jTan1      -jTan3, concentration); }
+        q = q_dirBE[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_M0P])[ktw ] = calcDistributionBC_AD(q, c1o54,   vx1    -vx3, cu_sq, f_BE,  f_TW,  omegaDiffusivity,  jTan1      -jTan3, concentration); }
+        q = q_dirTW[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_P0M])[kbe ] = calcDistributionBC_AD(q, c1o54,  -vx1    +vx3, cu_sq, f_TW,  f_BE,  omegaDiffusivity, -jTan1      +jTan3, concentration); }
+        q = q_dirTN[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0MM])[kbs ] = calcDistributionBC_AD(q, c1o54,       vx2+vx3, cu_sq, f_TN,  f_BS,  omegaDiffusivity,        jTan2+jTan3, concentration); }
+        q = q_dirBS[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0PP])[ktn ] = calcDistributionBC_AD(q, c1o54,      -vx2-vx3, cu_sq, f_BS,  f_TN,  omegaDiffusivity,       -jTan2-jTan3, concentration); }
+        q = q_dirBN[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0MP])[kts ] = calcDistributionBC_AD(q, c1o54,       vx2-vx3, cu_sq, f_BN,  f_TS,  omegaDiffusivity,        jTan2-jTan3, concentration); }
+        q = q_dirTS[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0PM])[kbn ] = calcDistributionBC_AD(q, c1o54,      -vx2+vx3, cu_sq, f_TS,  f_BN,  omegaDiffusivity,       -jTan2+jTan3, concentration); }
         q = q_dirTNE[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_MMM])[kbsw] = calcDistributionBC_AD(q, c1o216,  vx1+vx2+vx3, cu_sq, f_TNE, f_BSW, omegaDiffusivity,  jTan1+jTan2+jTan3, concentration); }
         q = q_dirBSW[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_PPP])[ktne] = calcDistributionBC_AD(q, c1o216, -vx1-vx2-vx3, cu_sq, f_BSW, f_TNE, omegaDiffusivity, -jTan1-jTan2-jTan3, concentration); }
         q = q_dirBNE[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_MMP])[ktsw] = calcDistributionBC_AD(q, c1o216,  vx1+vx2-vx3, cu_sq, f_BNE, f_TSW, omegaDiffusivity,  jTan1+jTan2-jTan3, concentration); }
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CP27.cu b/src/gpu/VirtualFluids_GPU/GPU/CP27.cu
index 1ef111330c0d4293c14d66893847689ad8fac77f..41a50e5dde7dd8e024721653f43652f2e4a17548 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CP27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/CP27.cu
@@ -1,9 +1,9 @@
 /* Device code */
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -14,69 +14,69 @@ __global__ void CalcCP27(real* DD,
 									unsigned int* neighborX,
 									unsigned int* neighborY,
 									unsigned int* neighborZ,
-									unsigned int size_Mat, 
+									unsigned long long numberOfLBnodes, 
 									bool isEvenTimestep)
 {
 	Distributions27 D;
 	if (isEvenTimestep==true)
 	{
-		D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+		D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
 	} 
 	else
 	{
-		D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+		D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -124,20 +124,20 @@ __global__ void CalcCP27(real* DD,
 		////////////////////////////////////////////////////////////////////////////////
 		double PressCP;
 
-		PressCP  =   (D.f[DIR_P00   ])[ke  ]+ (D.f[DIR_M00   ])[kw  ]+ 
-                     (D.f[DIR_0P0   ])[kn  ]+ (D.f[DIR_0M0   ])[ks  ]+
-                     (D.f[DIR_00P   ])[kt  ]+ (D.f[DIR_00M   ])[kb  ]+
-                     (D.f[DIR_PP0  ])[kne ]+ (D.f[DIR_MM0  ])[ksw ]+
-                     (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
-                     (D.f[DIR_P0P  ])[kte ]+ (D.f[DIR_M0M  ])[kbw ]+
-                     (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
-                     (D.f[DIR_0PP  ])[ktn ]+ (D.f[DIR_0MM  ])[kbs ]+
-                     (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ]+
+		PressCP  =   (D.f[DIR_P00])[ke  ]+ (D.f[DIR_M00])[kw  ]+ 
+                     (D.f[DIR_0P0])[kn  ]+ (D.f[DIR_0M0])[ks  ]+
+                     (D.f[DIR_00P])[kt  ]+ (D.f[DIR_00M])[kb  ]+
+                     (D.f[DIR_PP0])[kne ]+ (D.f[DIR_MM0])[ksw ]+
+                     (D.f[DIR_PM0])[kse ]+ (D.f[DIR_MP0])[knw ]+
+                     (D.f[DIR_P0P])[kte ]+ (D.f[DIR_M0M])[kbw ]+
+                     (D.f[DIR_P0M])[kbe ]+ (D.f[DIR_M0P])[ktw ]+
+                     (D.f[DIR_0PP])[ktn ]+ (D.f[DIR_0MM])[kbs ]+
+                     (D.f[DIR_0PM])[kbn ]+ (D.f[DIR_0MP])[kts ]+
                      (D.f[DIR_000])[kzero]+ 
-                     (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
-                     (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
-                     (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ 
-                     (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw];
+                     (D.f[DIR_PPP])[ktne]+ (D.f[DIR_MMP])[ktsw]+ 
+                     (D.f[DIR_PMP])[ktse]+ (D.f[DIR_MPP])[ktnw]+ 
+                     (D.f[DIR_PPM])[kbne]+ (D.f[DIR_MMM])[kbsw]+ 
+                     (D.f[DIR_PMM])[kbse]+ (D.f[DIR_MPM])[kbnw];
 		////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		cpPress[k] = PressCP;
 		////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Calc2ndMoments27.cu b/src/gpu/VirtualFluids_GPU/GPU/Calc2ndMoments27.cu
index ce8fe68cd6a2e8f09f150cb0ccdec502a6278b50..15b8ecefee35463895d8bf5a48cc64868763bf93 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Calc2ndMoments27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Calc2ndMoments27.cu
@@ -1,9 +1,9 @@
 /* Device code */
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -16,70 +16,70 @@ __global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
 														unsigned int* neighborZ,
-														unsigned int size_Mat,
+														unsigned long long numberOfLBnodes,
 														real* DD,
 														bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -92,7 +92,7 @@ __global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k < size_Mat)
+   if(k < numberOfLBnodes)
    {
       //////////////////////////////////////////////////////////////////////////
       //index
@@ -125,33 +125,33 @@ __global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
       unsigned int kbsw = neighborZ[ksw];
       //////////////////////////////////////////////////////////////////////////
       real        f_E,f_W,f_N,f_S,f_T,f_B,f_NE,f_SW,f_SE,f_NW,f_TE,f_BW,f_BE,f_TW,f_TN,f_BS,f_BN,f_TS,/*f_ZERO,*/f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
-	  f_E    = (D.f[DIR_P00   ])[ke   ];
-	  f_W    = (D.f[DIR_M00   ])[kw   ];
-	  f_N    = (D.f[DIR_0P0   ])[kn   ];
-	  f_S    = (D.f[DIR_0M0   ])[ks   ];
-	  f_T    = (D.f[DIR_00P   ])[kt   ];
-	  f_B    = (D.f[DIR_00M   ])[kb   ];
-	  f_NE   = (D.f[DIR_PP0  ])[kne  ];
-	  f_SW   = (D.f[DIR_MM0  ])[ksw  ];
-	  f_SE   = (D.f[DIR_PM0  ])[kse  ];
-	  f_NW   = (D.f[DIR_MP0  ])[knw  ];
-	  f_TE   = (D.f[DIR_P0P  ])[kte  ];
-	  f_BW   = (D.f[DIR_M0M  ])[kbw  ];
-	  f_BE   = (D.f[DIR_P0M  ])[kbe  ];
-	  f_TW   = (D.f[DIR_M0P  ])[ktw  ];
-	  f_TN   = (D.f[DIR_0PP  ])[ktn  ];
-	  f_BS   = (D.f[DIR_0MM  ])[kbs  ];
-	  f_BN   = (D.f[DIR_0PM  ])[kbn  ];
-	  f_TS   = (D.f[DIR_0MP  ])[kts  ];
+	  f_E    = (D.f[DIR_P00])[ke   ];
+	  f_W    = (D.f[DIR_M00])[kw   ];
+	  f_N    = (D.f[DIR_0P0])[kn   ];
+	  f_S    = (D.f[DIR_0M0])[ks   ];
+	  f_T    = (D.f[DIR_00P])[kt   ];
+	  f_B    = (D.f[DIR_00M])[kb   ];
+	  f_NE   = (D.f[DIR_PP0])[kne  ];
+	  f_SW   = (D.f[DIR_MM0])[ksw  ];
+	  f_SE   = (D.f[DIR_PM0])[kse  ];
+	  f_NW   = (D.f[DIR_MP0])[knw  ];
+	  f_TE   = (D.f[DIR_P0P])[kte  ];
+	  f_BW   = (D.f[DIR_M0M])[kbw  ];
+	  f_BE   = (D.f[DIR_P0M])[kbe  ];
+	  f_TW   = (D.f[DIR_M0P])[ktw  ];
+	  f_TN   = (D.f[DIR_0PP])[ktn  ];
+	  f_BS   = (D.f[DIR_0MM])[kbs  ];
+	  f_BN   = (D.f[DIR_0PM])[kbn  ];
+	  f_TS   = (D.f[DIR_0MP])[kts  ];
 	  //f_ZERO = (D.f[DIR_000])[kzero];
-	  f_TNE  = (D.f[DIR_PPP ])[ktne ];
-	  f_TSW  = (D.f[DIR_MMP ])[ktsw ];
-	  f_TSE  = (D.f[DIR_PMP ])[ktse ];
-	  f_TNW  = (D.f[DIR_MPP ])[ktnw ];
-	  f_BNE  = (D.f[DIR_PPM ])[kbne ];
-	  f_BSW  = (D.f[DIR_MMM ])[kbsw ];
-	  f_BSE  = (D.f[DIR_PMM ])[kbse ];
-	  f_BNW  = (D.f[DIR_MPM ])[kbnw ];
+	  f_TNE  = (D.f[DIR_PPP])[ktne ];
+	  f_TSW  = (D.f[DIR_MMP])[ktsw ];
+	  f_TSE  = (D.f[DIR_PMP])[ktse ];
+	  f_TNW  = (D.f[DIR_MPP])[ktnw ];
+	  f_BNE  = (D.f[DIR_PPM])[kbne ];
+	  f_BSW  = (D.f[DIR_MMM])[kbsw ];
+	  f_BSE  = (D.f[DIR_PMM])[kbse ];
+	  f_BNW  = (D.f[DIR_MPM])[kbnw ];
       //////////////////////////////////////////////////////////////////////////
 	  real vx1, vx2, vx3;
       kxyFromfcNEQ[k]       = c0o1;
@@ -215,70 +215,70 @@ __global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ,
 													unsigned int* neighborX,
 													unsigned int* neighborY,
 													unsigned int* neighborZ,
-													unsigned int size_Mat,
+													unsigned long long numberOfLBnodes,
 													real* DD,
 													bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -291,7 +291,7 @@ __global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k < size_Mat)
+   if(k < numberOfLBnodes)
    {
       //////////////////////////////////////////////////////////////////////////
       //index
@@ -325,33 +325,33 @@ __global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ,
       //////////////////////////////////////////////////////////////////////////
       real f_ZERO;
       real        f_E,f_W,f_N,f_S,f_T,f_B,f_NE,f_SW,f_SE,f_NW,f_TE,f_BW,f_BE,f_TW,f_TN,f_BS,f_BN,f_TS,f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
-	  f_E    = (D.f[DIR_P00   ])[ke   ];
-	  f_W    = (D.f[DIR_M00   ])[kw   ];
-	  f_N    = (D.f[DIR_0P0   ])[kn   ];
-	  f_S    = (D.f[DIR_0M0   ])[ks   ];
-	  f_T    = (D.f[DIR_00P   ])[kt   ];
-	  f_B    = (D.f[DIR_00M   ])[kb   ];
-	  f_NE   = (D.f[DIR_PP0  ])[kne  ];
-	  f_SW   = (D.f[DIR_MM0  ])[ksw  ];
-	  f_SE   = (D.f[DIR_PM0  ])[kse  ];
-	  f_NW   = (D.f[DIR_MP0  ])[knw  ];
-	  f_TE   = (D.f[DIR_P0P  ])[kte  ];
-	  f_BW   = (D.f[DIR_M0M  ])[kbw  ];
-	  f_BE   = (D.f[DIR_P0M  ])[kbe  ];
-	  f_TW   = (D.f[DIR_M0P  ])[ktw  ];
-	  f_TN   = (D.f[DIR_0PP  ])[ktn  ];
-	  f_BS   = (D.f[DIR_0MM  ])[kbs  ];
-	  f_BN   = (D.f[DIR_0PM  ])[kbn  ];
-	  f_TS   = (D.f[DIR_0MP  ])[kts  ];
+	  f_E    = (D.f[DIR_P00])[ke   ];
+	  f_W    = (D.f[DIR_M00])[kw   ];
+	  f_N    = (D.f[DIR_0P0])[kn   ];
+	  f_S    = (D.f[DIR_0M0])[ks   ];
+	  f_T    = (D.f[DIR_00P])[kt   ];
+	  f_B    = (D.f[DIR_00M])[kb   ];
+	  f_NE   = (D.f[DIR_PP0])[kne  ];
+	  f_SW   = (D.f[DIR_MM0])[ksw  ];
+	  f_SE   = (D.f[DIR_PM0])[kse  ];
+	  f_NW   = (D.f[DIR_MP0])[knw  ];
+	  f_TE   = (D.f[DIR_P0P])[kte  ];
+	  f_BW   = (D.f[DIR_M0M])[kbw  ];
+	  f_BE   = (D.f[DIR_P0M])[kbe  ];
+	  f_TW   = (D.f[DIR_M0P])[ktw  ];
+	  f_TN   = (D.f[DIR_0PP])[ktn  ];
+	  f_BS   = (D.f[DIR_0MM])[kbs  ];
+	  f_BN   = (D.f[DIR_0PM])[kbn  ];
+	  f_TS   = (D.f[DIR_0MP])[kts  ];
 	  f_ZERO = (D.f[DIR_000])[kzero];
-	  f_TNE  = (D.f[DIR_PPP ])[ktne ];
-	  f_TSW  = (D.f[DIR_MMP ])[ktsw ];
-	  f_TSE  = (D.f[DIR_PMP ])[ktse ];
-	  f_TNW  = (D.f[DIR_MPP ])[ktnw ];
-	  f_BNE  = (D.f[DIR_PPM ])[kbne ];
-	  f_BSW  = (D.f[DIR_MMM ])[kbsw ];
-	  f_BSE  = (D.f[DIR_PMM ])[kbse ];
-	  f_BNW  = (D.f[DIR_MPM ])[kbnw ];
+	  f_TNE  = (D.f[DIR_PPP])[ktne ];
+	  f_TSW  = (D.f[DIR_MMP])[ktsw ];
+	  f_TSE  = (D.f[DIR_PMP])[ktse ];
+	  f_TNW  = (D.f[DIR_MPP])[ktnw ];
+	  f_BNE  = (D.f[DIR_PPM])[kbne ];
+	  f_BSW  = (D.f[DIR_MMM])[kbsw ];
+	  f_BSE  = (D.f[DIR_PMM])[kbse ];
+	  f_BNW  = (D.f[DIR_MPM])[kbnw ];
       //////////////////////////////////////////////////////////////////////////
 	  real drho;
 	  real vx1, vx2, vx3, rho;
@@ -423,7 +423,7 @@ __global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
 														unsigned int* neighborY,
 														unsigned int* neighborZ,
 														real* DDStart,
-														int size_Mat,
+														unsigned long long numberOfLBnodes,
 														bool EvenOrOdd)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -437,7 +437,7 @@ __global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if(k<size_Mat)
+	if(k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -448,63 +448,63 @@ __global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -517,33 +517,33 @@ __global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
 			unsigned int kbs  = neighborZ[ks];
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];
-			real mfabb = (D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];
-			real mfbab = (D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];
-			real mfbba = (D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];
-			real mfaab = (D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];
-			real mfacb = (D.f[DIR_MP0  ])[kw ];
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];
-			real mfaba = (D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];
-			real mfabc = (D.f[DIR_M0P  ])[kw ];
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];
-			real mfbac = (D.f[DIR_0MP  ])[ks ];
+			real mfcbb = (D.f[DIR_P00])[k  ];
+			real mfabb = (D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];
+			real mfbab = (D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k  ];
+			real mfbba = (D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k  ];
+			real mfaab = (D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks ];
+			real mfacb = (D.f[DIR_MP0])[kw ];
+			real mfcbc = (D.f[DIR_P0P])[k  ];
+			real mfaba = (D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb ];
+			real mfabc = (D.f[DIR_M0P])[kw ];
+			real mfbcc = (D.f[DIR_0PP])[k  ];
+			real mfbaa = (D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb ];
+			real mfbac = (D.f[DIR_0MP])[ks ];
 			real mfbbb = (D.f[DIR_000])[k  ];
-			real mfccc = (D.f[DIR_PPP ])[k  ];
-			real mfaac = (D.f[DIR_MMP ])[ksw];
-			real mfcac = (D.f[DIR_PMP ])[ks ];
-			real mfacc = (D.f[DIR_MPP ])[kw ];
-			real mfcca = (D.f[DIR_PPM ])[kb ];
-			real mfaaa = (D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs];
-			real mfaca = (D.f[DIR_MPM ])[kbw];
+			real mfccc = (D.f[DIR_PPP])[k  ];
+			real mfaac = (D.f[DIR_MMP])[ksw];
+			real mfcac = (D.f[DIR_PMP])[ks ];
+			real mfacc = (D.f[DIR_MPP])[kw ];
+			real mfcca = (D.f[DIR_PPM])[kb ];
+			real mfaaa = (D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];
+			real mfaca = (D.f[DIR_MPM])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real vvx    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfcaa-mfacc) + (mfcca-mfaac))) + 
 						     (((mfcba-mfabc) + (mfcbc-mfaba)) + ((mfcab-mfacb) + (mfccb-mfaab))) +
@@ -857,7 +857,7 @@ __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
 													unsigned int* neighborY,
 													unsigned int* neighborZ,
 													real* DDStart,
-													int size_Mat,
+													unsigned long long numberOfLBnodes,
 													bool EvenOrOdd)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -871,7 +871,7 @@ __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if(k<size_Mat)
+	if(k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -882,63 +882,63 @@ __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -951,33 +951,33 @@ __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
 			unsigned int kbs  = neighborZ[ks];
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];
-			real mfabb = (D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];
-			real mfbab = (D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];
-			real mfbba = (D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];
-			real mfaab = (D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];
-			real mfacb = (D.f[DIR_MP0  ])[kw ];
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];
-			real mfaba = (D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];
-			real mfabc = (D.f[DIR_M0P  ])[kw ];
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];
-			real mfbac = (D.f[DIR_0MP  ])[ks ];
+			real mfcbb = (D.f[DIR_P00])[k  ];
+			real mfabb = (D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];
+			real mfbab = (D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k  ];
+			real mfbba = (D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k  ];
+			real mfaab = (D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks ];
+			real mfacb = (D.f[DIR_MP0])[kw ];
+			real mfcbc = (D.f[DIR_P0P])[k  ];
+			real mfaba = (D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb ];
+			real mfabc = (D.f[DIR_M0P])[kw ];
+			real mfbcc = (D.f[DIR_0PP])[k  ];
+			real mfbaa = (D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb ];
+			real mfbac = (D.f[DIR_0MP])[ks ];
 			real mfbbb = (D.f[DIR_000])[k  ];
-			real mfccc = (D.f[DIR_PPP ])[k  ];
-			real mfaac = (D.f[DIR_MMP ])[ksw];
-			real mfcac = (D.f[DIR_PMP ])[ks ];
-			real mfacc = (D.f[DIR_MPP ])[kw ];
-			real mfcca = (D.f[DIR_PPM ])[kb ];
-			real mfaaa = (D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs];
-			real mfaca = (D.f[DIR_MPM ])[kbw];
+			real mfccc = (D.f[DIR_PPP])[k  ];
+			real mfaac = (D.f[DIR_MMP])[ksw];
+			real mfcac = (D.f[DIR_PMP])[ks ];
+			real mfacc = (D.f[DIR_MPP])[kw ];
+			real mfcca = (D.f[DIR_PPM])[kb ];
+			real mfaaa = (D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];
+			real mfaca = (D.f[DIR_MPM])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
 							(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
@@ -1298,7 +1298,7 @@ __global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
 															unsigned int* neighborY,
 															unsigned int* neighborZ,
 															real* DDStart,
-															int size_Mat,
+															unsigned long long numberOfLBnodes,
 															bool EvenOrOdd)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -1312,7 +1312,7 @@ __global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if(k<size_Mat)
+	if(k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -1323,63 +1323,63 @@ __global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -1392,33 +1392,33 @@ __global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
 			unsigned int kbs  = neighborZ[ks];
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];
-			real mfabb = (D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];
-			real mfbab = (D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];
-			real mfbba = (D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];
-			real mfaab = (D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];
-			real mfacb = (D.f[DIR_MP0  ])[kw ];
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];
-			real mfaba = (D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];
-			real mfabc = (D.f[DIR_M0P  ])[kw ];
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];
-			real mfbac = (D.f[DIR_0MP  ])[ks ];
+			real mfcbb = (D.f[DIR_P00])[k  ];
+			real mfabb = (D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];
+			real mfbab = (D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k  ];
+			real mfbba = (D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k  ];
+			real mfaab = (D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks ];
+			real mfacb = (D.f[DIR_MP0])[kw ];
+			real mfcbc = (D.f[DIR_P0P])[k  ];
+			real mfaba = (D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb ];
+			real mfabc = (D.f[DIR_M0P])[kw ];
+			real mfbcc = (D.f[DIR_0PP])[k  ];
+			real mfbaa = (D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb ];
+			real mfbac = (D.f[DIR_0MP])[ks ];
 			real mfbbb = (D.f[DIR_000])[k  ];
-			real mfccc = (D.f[DIR_PPP ])[k  ];
-			real mfaac = (D.f[DIR_MMP ])[ksw];
-			real mfcac = (D.f[DIR_PMP ])[ks ];
-			real mfacc = (D.f[DIR_MPP ])[kw ];
-			real mfcca = (D.f[DIR_PPM ])[kb ];
-			real mfaaa = (D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs];
-			real mfaca = (D.f[DIR_MPM ])[kbw];
+			real mfccc = (D.f[DIR_PPP])[k  ];
+			real mfaac = (D.f[DIR_MMP])[ksw];
+			real mfcac = (D.f[DIR_PMP])[ks ];
+			real mfacc = (D.f[DIR_MPP])[kw ];
+			real mfcca = (D.f[DIR_PPM])[kb ];
+			real mfaaa = (D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];
+			real mfaca = (D.f[DIR_MPM])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real vvx    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfcaa-mfacc) + (mfcca-mfaac))) + 
 						     (((mfcba-mfabc) + (mfcbc-mfaba)) + ((mfcab-mfacb) + (mfccb-mfaab))) +
@@ -1752,7 +1752,7 @@ __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
 														unsigned int* neighborY,
 														unsigned int* neighborZ,
 														real* DDStart,
-														int size_Mat,
+														unsigned long long numberOfLBnodes,
 														bool EvenOrOdd)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -1766,7 +1766,7 @@ __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if(k<size_Mat)
+	if(k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -1777,63 +1777,63 @@ __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -1846,33 +1846,33 @@ __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
 			unsigned int kbs  = neighborZ[ks];
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];
-			real mfabb = (D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];
-			real mfbab = (D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];
-			real mfbba = (D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];
-			real mfaab = (D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];
-			real mfacb = (D.f[DIR_MP0  ])[kw ];
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];
-			real mfaba = (D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];
-			real mfabc = (D.f[DIR_M0P  ])[kw ];
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];
-			real mfbac = (D.f[DIR_0MP  ])[ks ];
+			real mfcbb = (D.f[DIR_P00])[k  ];
+			real mfabb = (D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];
+			real mfbab = (D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k  ];
+			real mfbba = (D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k  ];
+			real mfaab = (D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks ];
+			real mfacb = (D.f[DIR_MP0])[kw ];
+			real mfcbc = (D.f[DIR_P0P])[k  ];
+			real mfaba = (D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb ];
+			real mfabc = (D.f[DIR_M0P])[kw ];
+			real mfbcc = (D.f[DIR_0PP])[k  ];
+			real mfbaa = (D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb ];
+			real mfbac = (D.f[DIR_0MP])[ks ];
 			real mfbbb = (D.f[DIR_000])[k  ];
-			real mfccc = (D.f[DIR_PPP ])[k  ];
-			real mfaac = (D.f[DIR_MMP ])[ksw];
-			real mfcac = (D.f[DIR_PMP ])[ks ];
-			real mfacc = (D.f[DIR_MPP ])[kw ];
-			real mfcca = (D.f[DIR_PPM ])[kb ];
-			real mfaaa = (D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs];
-			real mfaca = (D.f[DIR_MPM ])[kbw];
+			real mfccc = (D.f[DIR_PPP])[k  ];
+			real mfaac = (D.f[DIR_MMP])[ksw];
+			real mfcac = (D.f[DIR_PMP])[ks ];
+			real mfacc = (D.f[DIR_MPP])[kw ];
+			real mfcca = (D.f[DIR_PPM])[kb ];
+			real mfaaa = (D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];
+			real mfaca = (D.f[DIR_MPM])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
 							(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CalcConc27.cu b/src/gpu/VirtualFluids_GPU/GPU/CalcConc27.cu
index d246f39a030b6df0b249aee17f37b7d5258ff00d..0986a42b07351456f684ae5141d38245e5e17c57 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CalcConc27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/CalcConc27.cu
@@ -33,9 +33,9 @@
 /* Device code */
 #include "LBM/LB.h"
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -45,7 +45,7 @@ __global__ void CalcConc27(
 	uint* neighborX,
 	uint* neighborY,
 	uint* neighborZ,
-	uint size_Mat,
+	unsigned long long numberOfLBnodes,
 	real* distributionsAD,
 	bool isEvenTimestep)
 {
@@ -67,7 +67,7 @@ __global__ void CalcConc27(
 
    //////////////////////////////////////////////////////////////////////////
    // run for all indices in size_Mat and fluid nodes
-   if ((k < size_Mat) && (typeOfGridNode[k] == GEO_FLUID))
+   if ((k < numberOfLBnodes) && (typeOfGridNode[k] == GEO_FLUID))
    {
       //////////////////////////////////////////////////////////////////////////
       //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
@@ -76,63 +76,63 @@ __global__ void CalcConc27(
       Distributions27 distAD;
       if (isEvenTimestep)
       {
-         distAD.f[DIR_P00   ] = &distributionsAD[DIR_P00   *size_Mat];
-         distAD.f[DIR_M00   ] = &distributionsAD[DIR_M00   *size_Mat];
-         distAD.f[DIR_0P0   ] = &distributionsAD[DIR_0P0   *size_Mat];
-         distAD.f[DIR_0M0   ] = &distributionsAD[DIR_0M0   *size_Mat];
-         distAD.f[DIR_00P   ] = &distributionsAD[DIR_00P   *size_Mat];
-         distAD.f[DIR_00M   ] = &distributionsAD[DIR_00M   *size_Mat];
-         distAD.f[DIR_PP0  ] = &distributionsAD[DIR_PP0  *size_Mat];
-         distAD.f[DIR_MM0  ] = &distributionsAD[DIR_MM0  *size_Mat];
-         distAD.f[DIR_PM0  ] = &distributionsAD[DIR_PM0  *size_Mat];
-         distAD.f[DIR_MP0  ] = &distributionsAD[DIR_MP0  *size_Mat];
-         distAD.f[DIR_P0P  ] = &distributionsAD[DIR_P0P  *size_Mat];
-         distAD.f[DIR_M0M  ] = &distributionsAD[DIR_M0M  *size_Mat];
-         distAD.f[DIR_P0M  ] = &distributionsAD[DIR_P0M  *size_Mat];
-         distAD.f[DIR_M0P  ] = &distributionsAD[DIR_M0P  *size_Mat];
-         distAD.f[DIR_0PP  ] = &distributionsAD[DIR_0PP  *size_Mat];
-         distAD.f[DIR_0MM  ] = &distributionsAD[DIR_0MM  *size_Mat];
-         distAD.f[DIR_0PM  ] = &distributionsAD[DIR_0PM  *size_Mat];
-         distAD.f[DIR_0MP  ] = &distributionsAD[DIR_0MP  *size_Mat];
-         distAD.f[DIR_000] = &distributionsAD[DIR_000*size_Mat];
-         distAD.f[DIR_PPP ] = &distributionsAD[DIR_PPP *size_Mat];
-         distAD.f[DIR_MMP ] = &distributionsAD[DIR_MMP *size_Mat];
-         distAD.f[DIR_PMP ] = &distributionsAD[DIR_PMP *size_Mat];
-         distAD.f[DIR_MPP ] = &distributionsAD[DIR_MPP *size_Mat];
-         distAD.f[DIR_PPM ] = &distributionsAD[DIR_PPM *size_Mat];
-         distAD.f[DIR_MMM ] = &distributionsAD[DIR_MMM *size_Mat];
-         distAD.f[DIR_PMM ] = &distributionsAD[DIR_PMM *size_Mat];
-         distAD.f[DIR_MPM ] = &distributionsAD[DIR_MPM *size_Mat];
+         distAD.f[DIR_P00] = &distributionsAD[DIR_P00 * numberOfLBnodes];
+         distAD.f[DIR_M00] = &distributionsAD[DIR_M00 * numberOfLBnodes];
+         distAD.f[DIR_0P0] = &distributionsAD[DIR_0P0 * numberOfLBnodes];
+         distAD.f[DIR_0M0] = &distributionsAD[DIR_0M0 * numberOfLBnodes];
+         distAD.f[DIR_00P] = &distributionsAD[DIR_00P * numberOfLBnodes];
+         distAD.f[DIR_00M] = &distributionsAD[DIR_00M * numberOfLBnodes];
+         distAD.f[DIR_PP0] = &distributionsAD[DIR_PP0 * numberOfLBnodes];
+         distAD.f[DIR_MM0] = &distributionsAD[DIR_MM0 * numberOfLBnodes];
+         distAD.f[DIR_PM0] = &distributionsAD[DIR_PM0 * numberOfLBnodes];
+         distAD.f[DIR_MP0] = &distributionsAD[DIR_MP0 * numberOfLBnodes];
+         distAD.f[DIR_P0P] = &distributionsAD[DIR_P0P * numberOfLBnodes];
+         distAD.f[DIR_M0M] = &distributionsAD[DIR_M0M * numberOfLBnodes];
+         distAD.f[DIR_P0M] = &distributionsAD[DIR_P0M * numberOfLBnodes];
+         distAD.f[DIR_M0P] = &distributionsAD[DIR_M0P * numberOfLBnodes];
+         distAD.f[DIR_0PP] = &distributionsAD[DIR_0PP * numberOfLBnodes];
+         distAD.f[DIR_0MM] = &distributionsAD[DIR_0MM * numberOfLBnodes];
+         distAD.f[DIR_0PM] = &distributionsAD[DIR_0PM * numberOfLBnodes];
+         distAD.f[DIR_0MP] = &distributionsAD[DIR_0MP * numberOfLBnodes];
+         distAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes];
+         distAD.f[DIR_PPP] = &distributionsAD[DIR_PPP * numberOfLBnodes];
+         distAD.f[DIR_MMP] = &distributionsAD[DIR_MMP * numberOfLBnodes];
+         distAD.f[DIR_PMP] = &distributionsAD[DIR_PMP * numberOfLBnodes];
+         distAD.f[DIR_MPP] = &distributionsAD[DIR_MPP * numberOfLBnodes];
+         distAD.f[DIR_PPM] = &distributionsAD[DIR_PPM * numberOfLBnodes];
+         distAD.f[DIR_MMM] = &distributionsAD[DIR_MMM * numberOfLBnodes];
+         distAD.f[DIR_PMM] = &distributionsAD[DIR_PMM * numberOfLBnodes];
+         distAD.f[DIR_MPM] = &distributionsAD[DIR_MPM * numberOfLBnodes];
       }
       else
       {
-         distAD.f[DIR_M00   ] = &distributionsAD[DIR_P00   *size_Mat];
-         distAD.f[DIR_P00   ] = &distributionsAD[DIR_M00   *size_Mat];
-         distAD.f[DIR_0M0   ] = &distributionsAD[DIR_0P0   *size_Mat];
-         distAD.f[DIR_0P0   ] = &distributionsAD[DIR_0M0   *size_Mat];
-         distAD.f[DIR_00M   ] = &distributionsAD[DIR_00P   *size_Mat];
-         distAD.f[DIR_00P   ] = &distributionsAD[DIR_00M   *size_Mat];
-         distAD.f[DIR_MM0  ] = &distributionsAD[DIR_PP0  *size_Mat];
-         distAD.f[DIR_PP0  ] = &distributionsAD[DIR_MM0  *size_Mat];
-         distAD.f[DIR_MP0  ] = &distributionsAD[DIR_PM0  *size_Mat];
-         distAD.f[DIR_PM0  ] = &distributionsAD[DIR_MP0  *size_Mat];
-         distAD.f[DIR_M0M  ] = &distributionsAD[DIR_P0P  *size_Mat];
-         distAD.f[DIR_P0P  ] = &distributionsAD[DIR_M0M  *size_Mat];
-         distAD.f[DIR_M0P  ] = &distributionsAD[DIR_P0M  *size_Mat];
-         distAD.f[DIR_P0M  ] = &distributionsAD[DIR_M0P  *size_Mat];
-         distAD.f[DIR_0MM  ] = &distributionsAD[DIR_0PP  *size_Mat];
-         distAD.f[DIR_0PP  ] = &distributionsAD[DIR_0MM  *size_Mat];
-         distAD.f[DIR_0MP  ] = &distributionsAD[DIR_0PM  *size_Mat];
-         distAD.f[DIR_0PM  ] = &distributionsAD[DIR_0MP  *size_Mat];
-         distAD.f[DIR_000] = &distributionsAD[DIR_000*size_Mat];
-         distAD.f[DIR_PPP ] = &distributionsAD[DIR_MMM *size_Mat];
-         distAD.f[DIR_MMP ] = &distributionsAD[DIR_PPM *size_Mat];
-         distAD.f[DIR_PMP ] = &distributionsAD[DIR_MPM *size_Mat];
-         distAD.f[DIR_MPP ] = &distributionsAD[DIR_PMM *size_Mat];
-         distAD.f[DIR_PPM ] = &distributionsAD[DIR_MMP *size_Mat];
-         distAD.f[DIR_MMM ] = &distributionsAD[DIR_PPP *size_Mat];
-         distAD.f[DIR_PMM ] = &distributionsAD[DIR_MPP *size_Mat];
-         distAD.f[DIR_MPM ] = &distributionsAD[DIR_PMP *size_Mat];
+         distAD.f[DIR_M00] = &distributionsAD[DIR_P00 * numberOfLBnodes];
+         distAD.f[DIR_P00] = &distributionsAD[DIR_M00 * numberOfLBnodes];
+         distAD.f[DIR_0M0] = &distributionsAD[DIR_0P0 * numberOfLBnodes];
+         distAD.f[DIR_0P0] = &distributionsAD[DIR_0M0 * numberOfLBnodes];
+         distAD.f[DIR_00M] = &distributionsAD[DIR_00P * numberOfLBnodes];
+         distAD.f[DIR_00P] = &distributionsAD[DIR_00M * numberOfLBnodes];
+         distAD.f[DIR_MM0] = &distributionsAD[DIR_PP0 * numberOfLBnodes];
+         distAD.f[DIR_PP0] = &distributionsAD[DIR_MM0 * numberOfLBnodes];
+         distAD.f[DIR_MP0] = &distributionsAD[DIR_PM0 * numberOfLBnodes];
+         distAD.f[DIR_PM0] = &distributionsAD[DIR_MP0 * numberOfLBnodes];
+         distAD.f[DIR_M0M] = &distributionsAD[DIR_P0P * numberOfLBnodes];
+         distAD.f[DIR_P0P] = &distributionsAD[DIR_M0M * numberOfLBnodes];
+         distAD.f[DIR_M0P] = &distributionsAD[DIR_P0M * numberOfLBnodes];
+         distAD.f[DIR_P0M] = &distributionsAD[DIR_M0P * numberOfLBnodes];
+         distAD.f[DIR_0MM] = &distributionsAD[DIR_0PP * numberOfLBnodes];
+         distAD.f[DIR_0PP] = &distributionsAD[DIR_0MM * numberOfLBnodes];
+         distAD.f[DIR_0MP] = &distributionsAD[DIR_0PM * numberOfLBnodes];
+         distAD.f[DIR_0PM] = &distributionsAD[DIR_0MP * numberOfLBnodes];
+         distAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes];
+         distAD.f[DIR_PPP] = &distributionsAD[DIR_MMM * numberOfLBnodes];
+         distAD.f[DIR_MMP] = &distributionsAD[DIR_PPM * numberOfLBnodes];
+         distAD.f[DIR_PMP] = &distributionsAD[DIR_MPM * numberOfLBnodes];
+         distAD.f[DIR_MPP] = &distributionsAD[DIR_PMM * numberOfLBnodes];
+         distAD.f[DIR_PPM] = &distributionsAD[DIR_MMP * numberOfLBnodes];
+         distAD.f[DIR_MMM] = &distributionsAD[DIR_PPP * numberOfLBnodes];
+         distAD.f[DIR_PMM] = &distributionsAD[DIR_MPP * numberOfLBnodes];
+         distAD.f[DIR_MPM] = &distributionsAD[DIR_PMP * numberOfLBnodes];
       }
 	  ////////////////////////////////////////////////////////////////////////////////
 	  //! - Set neighbor indices (necessary for indirect addressing)
@@ -166,33 +166,33 @@ __global__ void CalcConc27(
 	  ////////////////////////////////////////////////////////////////////////////////
 	  //! - Set local distributions
 	  //!
-	  real mfcbb = (distAD.f[DIR_P00   ])[ke  ];
-	  real mfabb = (distAD.f[DIR_M00   ])[kw  ];
-	  real mfbcb = (distAD.f[DIR_0P0   ])[kn  ];
-	  real mfbab = (distAD.f[DIR_0M0   ])[ks  ];
-	  real mfbbc = (distAD.f[DIR_00P   ])[kt  ];
-	  real mfbba = (distAD.f[DIR_00M   ])[kb  ];
-	  real mfccb = (distAD.f[DIR_PP0  ])[kne ];
-	  real mfaab = (distAD.f[DIR_MM0  ])[ksw ];
-	  real mfcab = (distAD.f[DIR_PM0  ])[kse ];
-	  real mfacb = (distAD.f[DIR_MP0  ])[knw ];
-	  real mfcbc = (distAD.f[DIR_P0P  ])[kte ];
-	  real mfaba = (distAD.f[DIR_M0M  ])[kbw ];
-	  real mfcba = (distAD.f[DIR_P0M  ])[kbe ];
-	  real mfabc = (distAD.f[DIR_M0P  ])[ktw ];
-	  real mfbcc = (distAD.f[DIR_0PP  ])[ktn ];
-	  real mfbaa = (distAD.f[DIR_0MM  ])[kbs ];
-	  real mfbca = (distAD.f[DIR_0PM  ])[kbn ];
-	  real mfbac = (distAD.f[DIR_0MP  ])[kts ];
+	  real mfcbb = (distAD.f[DIR_P00])[ke  ];
+	  real mfabb = (distAD.f[DIR_M00])[kw  ];
+	  real mfbcb = (distAD.f[DIR_0P0])[kn  ];
+	  real mfbab = (distAD.f[DIR_0M0])[ks  ];
+	  real mfbbc = (distAD.f[DIR_00P])[kt  ];
+	  real mfbba = (distAD.f[DIR_00M])[kb  ];
+	  real mfccb = (distAD.f[DIR_PP0])[kne ];
+	  real mfaab = (distAD.f[DIR_MM0])[ksw ];
+	  real mfcab = (distAD.f[DIR_PM0])[kse ];
+	  real mfacb = (distAD.f[DIR_MP0])[knw ];
+	  real mfcbc = (distAD.f[DIR_P0P])[kte ];
+	  real mfaba = (distAD.f[DIR_M0M])[kbw ];
+	  real mfcba = (distAD.f[DIR_P0M])[kbe ];
+	  real mfabc = (distAD.f[DIR_M0P])[ktw ];
+	  real mfbcc = (distAD.f[DIR_0PP])[ktn ];
+	  real mfbaa = (distAD.f[DIR_0MM])[kbs ];
+	  real mfbca = (distAD.f[DIR_0PM])[kbn ];
+	  real mfbac = (distAD.f[DIR_0MP])[kts ];
 	  real mfbbb = (distAD.f[DIR_000])[k   ];
-	  real mfccc = (distAD.f[DIR_PPP ])[ktne];
-	  real mfaac = (distAD.f[DIR_MMP ])[ktsw];
-	  real mfcac = (distAD.f[DIR_PMP ])[ktse];
-	  real mfacc = (distAD.f[DIR_MPP ])[ktnw];
-	  real mfcca = (distAD.f[DIR_PPM ])[kbne];
-	  real mfaaa = (distAD.f[DIR_MMM ])[kbsw];
-	  real mfcaa = (distAD.f[DIR_PMM ])[kbse];
-	  real mfaca = (distAD.f[DIR_MPM ])[kbnw];
+	  real mfccc = (distAD.f[DIR_PPP])[ktne];
+	  real mfaac = (distAD.f[DIR_MMP])[ktsw];
+	  real mfcac = (distAD.f[DIR_PMP])[ktse];
+	  real mfacc = (distAD.f[DIR_MPP])[ktnw];
+	  real mfcca = (distAD.f[DIR_PPM])[kbne];
+	  real mfaaa = (distAD.f[DIR_MMM])[kbsw];
+	  real mfcaa = (distAD.f[DIR_PMM])[kbse];
+	  real mfaca = (distAD.f[DIR_MPM])[kbnw];
       //////////////////////////////////////////////////////////////////////////
 	  //! - Calculate concentration using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
 	  //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -229,30 +229,30 @@ __global__ void CalcConc7( real* Conc,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat,
+                                          unsigned long long numberOfLBnodes,
                                           real* DD7,
                                           bool isEvenTimestep)
 {
    Distributions7 D7;
    if (isEvenTimestep==true)
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[1] = &DD7[1*size_Mat];
-      D7.f[2] = &DD7[2*size_Mat];
-      D7.f[3] = &DD7[3*size_Mat];
-      D7.f[4] = &DD7[4*size_Mat];
-      D7.f[5] = &DD7[5*size_Mat];
-      D7.f[6] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[1] = &DD7[1*numberOfLBnodes];
+      D7.f[2] = &DD7[2*numberOfLBnodes];
+      D7.f[3] = &DD7[3*numberOfLBnodes];
+      D7.f[4] = &DD7[4*numberOfLBnodes];
+      D7.f[5] = &DD7[5*numberOfLBnodes];
+      D7.f[6] = &DD7[6*numberOfLBnodes];
    } 
    else
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[2] = &DD7[1*size_Mat];
-      D7.f[1] = &DD7[2*size_Mat];
-      D7.f[4] = &DD7[3*size_Mat];
-      D7.f[3] = &DD7[4*size_Mat];
-      D7.f[6] = &DD7[5*size_Mat];
-      D7.f[5] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[2] = &DD7[1*numberOfLBnodes];
+      D7.f[1] = &DD7[2*numberOfLBnodes];
+      D7.f[4] = &DD7[3*numberOfLBnodes];
+      D7.f[3] = &DD7[4*numberOfLBnodes];
+      D7.f[6] = &DD7[5*numberOfLBnodes];
+      D7.f[5] = &DD7[6*numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -265,7 +265,7 @@ __global__ void CalcConc7( real* Conc,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<size_Mat)
+   if(k<numberOfLBnodes)
    {
       //////////////////////////////////////////////////////////////////////////
       //index
@@ -327,63 +327,63 @@ __global__ void CalcConc7( real* Conc,
 //    Distributions27 D27;
 //    if (isEvenTimestep==true)
 //    {
-//       D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-//       D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-//       D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-//       D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-//       D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-//       D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-//       D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-//       D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-//       D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-//       D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-//       D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-//       D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-//       D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-//       D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-//       D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-//       D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-//       D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-//       D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-//       D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-//       D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-//       D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-//       D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-//       D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-//       D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-//       D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-//       D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-//       D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+//       D27.f[DIR_P00] = &DD27[DIR_P00 * size_Mat];
+//       D27.f[DIR_M00] = &DD27[DIR_M00 * size_Mat];
+//       D27.f[DIR_0P0] = &DD27[DIR_0P0 * size_Mat];
+//       D27.f[DIR_0M0] = &DD27[DIR_0M0 * size_Mat];
+//       D27.f[DIR_00P] = &DD27[DIR_00P * size_Mat];
+//       D27.f[DIR_00M] = &DD27[DIR_00M * size_Mat];
+//       D27.f[DIR_PP0] = &DD27[DIR_PP0 * size_Mat];
+//       D27.f[DIR_MM0] = &DD27[DIR_MM0 * size_Mat];
+//       D27.f[DIR_PM0] = &DD27[DIR_PM0 * size_Mat];
+//       D27.f[DIR_MP0] = &DD27[DIR_MP0 * size_Mat];
+//       D27.f[DIR_P0P] = &DD27[DIR_P0P * size_Mat];
+//       D27.f[DIR_M0M] = &DD27[DIR_M0M * size_Mat];
+//       D27.f[DIR_P0M] = &DD27[DIR_P0M * size_Mat];
+//       D27.f[DIR_M0P] = &DD27[DIR_M0P * size_Mat];
+//       D27.f[DIR_0PP] = &DD27[DIR_0PP * size_Mat];
+//       D27.f[DIR_0MM] = &DD27[DIR_0MM * size_Mat];
+//       D27.f[DIR_0PM] = &DD27[DIR_0PM * size_Mat];
+//       D27.f[DIR_0MP] = &DD27[DIR_0MP * size_Mat];
+//       D27.f[DIR_000] = &DD27[DIR_000 * size_Mat];
+//       D27.f[DIR_PPP] = &DD27[DIR_PPP * size_Mat];
+//       D27.f[DIR_MMP] = &DD27[DIR_MMP * size_Mat];
+//       D27.f[DIR_PMP] = &DD27[DIR_PMP * size_Mat];
+//       D27.f[DIR_MPP] = &DD27[DIR_MPP * size_Mat];
+//       D27.f[DIR_PPM] = &DD27[DIR_PPM * size_Mat];
+//       D27.f[DIR_MMM] = &DD27[DIR_MMM * size_Mat];
+//       D27.f[DIR_PMM] = &DD27[DIR_PMM * size_Mat];
+//       D27.f[DIR_MPM] = &DD27[DIR_MPM * size_Mat];
 //    }
 //    else
 //    {
-//       D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-//       D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-//       D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-//       D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-//       D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-//       D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-//       D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-//       D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-//       D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-//       D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-//       D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-//       D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-//       D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-//       D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-//       D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-//       D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-//       D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-//       D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-//       D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-//       D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-//       D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-//       D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
-//       D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-//       D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-//       D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-//       D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-//       D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
+//       D27.f[DIR_M00] = &DD27[DIR_P00 * size_Mat];
+//       D27.f[DIR_P00] = &DD27[DIR_M00 * size_Mat];
+//       D27.f[DIR_0M0] = &DD27[DIR_0P0 * size_Mat];
+//       D27.f[DIR_0P0] = &DD27[DIR_0M0 * size_Mat];
+//       D27.f[DIR_00M] = &DD27[DIR_00P * size_Mat];
+//       D27.f[DIR_00P] = &DD27[DIR_00M * size_Mat];
+//       D27.f[DIR_MM0] = &DD27[DIR_PP0 * size_Mat];
+//       D27.f[DIR_PP0] = &DD27[DIR_MM0 * size_Mat];
+//       D27.f[DIR_MP0] = &DD27[DIR_PM0 * size_Mat];
+//       D27.f[DIR_PM0] = &DD27[DIR_MP0 * size_Mat];
+//       D27.f[DIR_M0M] = &DD27[DIR_P0P * size_Mat];
+//       D27.f[DIR_P0P] = &DD27[DIR_M0M * size_Mat];
+//       D27.f[DIR_M0P] = &DD27[DIR_P0M * size_Mat];
+//       D27.f[DIR_P0M] = &DD27[DIR_M0P * size_Mat];
+//       D27.f[DIR_0MM] = &DD27[DIR_0PP * size_Mat];
+//       D27.f[DIR_0PP] = &DD27[DIR_0MM * size_Mat];
+//       D27.f[DIR_0MP] = &DD27[DIR_0PM * size_Mat];
+//       D27.f[DIR_0PM] = &DD27[DIR_0MP * size_Mat];
+//       D27.f[DIR_000] = &DD27[DIR_000 * size_Mat];
+//       D27.f[DIR_MMM] = &DD27[DIR_PPP * size_Mat];
+//       D27.f[DIR_PPM] = &DD27[DIR_MMP * size_Mat];
+//       D27.f[DIR_MPM] = &DD27[DIR_PMP * size_Mat];
+//       D27.f[DIR_PMM] = &DD27[DIR_MPP * size_Mat];
+//       D27.f[DIR_MMP] = &DD27[DIR_PPM * size_Mat];
+//       D27.f[DIR_PPP] = &DD27[DIR_MMM * size_Mat];
+//       D27.f[DIR_MPP] = &DD27[DIR_PMM * size_Mat];
+//       D27.f[DIR_PMP] = &DD27[DIR_MPM * size_Mat];
 //    }
 //    ////////////////////////////////////////////////////////////////////////////////
 //    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -432,20 +432,20 @@ __global__ void CalcConc7( real* Conc,
 
 //       if(geoD[k] == GEO_FLUID)
 //       {
-//          Conc[k]    =   (D27.f[DIR_P00   ])[ke  ]+ (D27.f[DIR_M00   ])[kw  ]+ 
-//                         (D27.f[DIR_0P0   ])[kn  ]+ (D27.f[DIR_0M0   ])[ks  ]+
-//                         (D27.f[DIR_00P   ])[kt  ]+ (D27.f[DIR_00M   ])[kb  ]+
-//                         (D27.f[DIR_PP0  ])[kne ]+ (D27.f[DIR_MM0  ])[ksw ]+
-//                         (D27.f[DIR_PM0  ])[kse ]+ (D27.f[DIR_MP0  ])[knw ]+
-//                         (D27.f[DIR_P0P  ])[kte ]+ (D27.f[DIR_M0M  ])[kbw ]+
-//                         (D27.f[DIR_P0M  ])[kbe ]+ (D27.f[DIR_M0P  ])[ktw ]+
-//                         (D27.f[DIR_0PP  ])[ktn ]+ (D27.f[DIR_0MM  ])[kbs ]+
-//                         (D27.f[DIR_0PM  ])[kbn ]+ (D27.f[DIR_0MP  ])[kts ]+
+//          Conc[k]    =   (D27.f[DIR_P00])[ke  ]+ (D27.f[DIR_M00])[kw  ]+ 
+//                         (D27.f[DIR_0P0])[kn  ]+ (D27.f[DIR_0M0])[ks  ]+
+//                         (D27.f[DIR_00P])[kt  ]+ (D27.f[DIR_00M])[kb  ]+
+//                         (D27.f[DIR_PP0])[kne ]+ (D27.f[DIR_MM0])[ksw ]+
+//                         (D27.f[DIR_PM0])[kse ]+ (D27.f[DIR_MP0])[knw ]+
+//                         (D27.f[DIR_P0P])[kte ]+ (D27.f[DIR_M0M])[kbw ]+
+//                         (D27.f[DIR_P0M])[kbe ]+ (D27.f[DIR_M0P])[ktw ]+
+//                         (D27.f[DIR_0PP])[ktn ]+ (D27.f[DIR_0MM])[kbs ]+
+//                         (D27.f[DIR_0PM])[kbn ]+ (D27.f[DIR_0MP])[kts ]+
 //                         (D27.f[DIR_000])[kzero]+ 
-//                         (D27.f[DIR_PPP ])[ktne]+ (D27.f[DIR_MMP ])[ktsw]+
-//                         (D27.f[DIR_PMP ])[ktse]+ (D27.f[DIR_MPP ])[ktnw]+
-//                         (D27.f[DIR_PPM ])[kbne]+ (D27.f[DIR_MMM ])[kbsw]+
-//                         (D27.f[DIR_PMM ])[kbse]+ (D27.f[DIR_MPM ])[kbnw];
+//                         (D27.f[DIR_PPP])[ktne]+ (D27.f[DIR_MMP])[ktsw]+
+//                         (D27.f[DIR_PMP])[ktse]+ (D27.f[DIR_MPP])[ktnw]+
+//                         (D27.f[DIR_PPM])[kbne]+ (D27.f[DIR_MMM])[kbsw]+
+//                         (D27.f[DIR_PMM])[kbse]+ (D27.f[DIR_MPM])[kbnw];
 //       }
 //    }   
 // }
@@ -476,30 +476,30 @@ __global__ void GetPlaneConc7(real* Conc,
 											unsigned int* neighborX,
 											unsigned int* neighborY,
 											unsigned int* neighborZ,
-											unsigned int size_Mat,
+											unsigned long long numberOfLBnodes,
 											real* DD7,
 											bool isEvenTimestep)
 {
    Distributions7 D7;
    if (isEvenTimestep==true)
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[1] = &DD7[1*size_Mat];
-      D7.f[2] = &DD7[2*size_Mat];
-      D7.f[3] = &DD7[3*size_Mat];
-      D7.f[4] = &DD7[4*size_Mat];
-      D7.f[5] = &DD7[5*size_Mat];
-      D7.f[6] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[1] = &DD7[1*numberOfLBnodes];
+      D7.f[2] = &DD7[2*numberOfLBnodes];
+      D7.f[3] = &DD7[3*numberOfLBnodes];
+      D7.f[4] = &DD7[4*numberOfLBnodes];
+      D7.f[5] = &DD7[5*numberOfLBnodes];
+      D7.f[6] = &DD7[6*numberOfLBnodes];
    } 
    else
    {
-      D7.f[0] = &DD7[0*size_Mat];
-      D7.f[2] = &DD7[1*size_Mat];
-      D7.f[1] = &DD7[2*size_Mat];
-      D7.f[4] = &DD7[3*size_Mat];
-      D7.f[3] = &DD7[4*size_Mat];
-      D7.f[6] = &DD7[5*size_Mat];
-      D7.f[5] = &DD7[6*size_Mat];
+      D7.f[0] = &DD7[0*numberOfLBnodes];
+      D7.f[2] = &DD7[1*numberOfLBnodes];
+      D7.f[1] = &DD7[2*numberOfLBnodes];
+      D7.f[4] = &DD7[3*numberOfLBnodes];
+      D7.f[3] = &DD7[4*numberOfLBnodes];
+      D7.f[6] = &DD7[5*numberOfLBnodes];
+      D7.f[5] = &DD7[6*numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -581,70 +581,70 @@ __global__ void GetPlaneConc27(real* Conc,
 											 unsigned int* neighborX,
 											 unsigned int* neighborY,
 											 unsigned int* neighborZ,
-											 unsigned int size_Mat,
+											 unsigned long long numberOfLBnodes,
 											 real* DD27,
 											 bool isEvenTimestep)
 {
    Distributions27 D27;
    if (isEvenTimestep==true)
    {
-      D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
    }
    else
    {
-      D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-      D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-      D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-      D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-      D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-      D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-      D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-      D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-      D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-      D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-      D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-      D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-      D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-      D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-      D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-      D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-      D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-      D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-      D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-      D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-      D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
-      D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-      D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-      D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-      D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-      D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+      D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+      D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+      D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+      D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+      D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+      D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+      D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+      D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+      D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+      D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+      D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+      D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+      D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+      D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+      D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+      D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+      D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+      D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+      D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+      D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+      D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
+      D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+      D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+      D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+      D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+      D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -693,20 +693,20 @@ __global__ void GetPlaneConc27(real* Conc,
 
       if(geoD[k] == GEO_FLUID)
       {
-         Conc[k]    =   (D27.f[DIR_P00   ])[ke  ]+ (D27.f[DIR_M00   ])[kw  ]+ 
-                        (D27.f[DIR_0P0   ])[kn  ]+ (D27.f[DIR_0M0   ])[ks  ]+
-                        (D27.f[DIR_00P   ])[kt  ]+ (D27.f[DIR_00M   ])[kb  ]+
-                        (D27.f[DIR_PP0  ])[kne ]+ (D27.f[DIR_MM0  ])[ksw ]+
-                        (D27.f[DIR_PM0  ])[kse ]+ (D27.f[DIR_MP0  ])[knw ]+
-                        (D27.f[DIR_P0P  ])[kte ]+ (D27.f[DIR_M0M  ])[kbw ]+
-                        (D27.f[DIR_P0M  ])[kbe ]+ (D27.f[DIR_M0P  ])[ktw ]+
-                        (D27.f[DIR_0PP  ])[ktn ]+ (D27.f[DIR_0MM  ])[kbs ]+
-                        (D27.f[DIR_0PM  ])[kbn ]+ (D27.f[DIR_0MP  ])[kts ]+
+         Conc[k]    =   (D27.f[DIR_P00])[ke  ]+ (D27.f[DIR_M00])[kw  ]+ 
+                        (D27.f[DIR_0P0])[kn  ]+ (D27.f[DIR_0M0])[ks  ]+
+                        (D27.f[DIR_00P])[kt  ]+ (D27.f[DIR_00M])[kb  ]+
+                        (D27.f[DIR_PP0])[kne ]+ (D27.f[DIR_MM0])[ksw ]+
+                        (D27.f[DIR_PM0])[kse ]+ (D27.f[DIR_MP0])[knw ]+
+                        (D27.f[DIR_P0P])[kte ]+ (D27.f[DIR_M0M])[kbw ]+
+                        (D27.f[DIR_P0M])[kbe ]+ (D27.f[DIR_M0P])[ktw ]+
+                        (D27.f[DIR_0PP])[ktn ]+ (D27.f[DIR_0MM])[kbs ]+
+                        (D27.f[DIR_0PM])[kbn ]+ (D27.f[DIR_0MP])[kts ]+
                         (D27.f[DIR_000])[kzero]+ 
-                        (D27.f[DIR_PPP ])[ktne]+ (D27.f[DIR_MMP ])[ktsw]+
-                        (D27.f[DIR_PMP ])[ktse]+ (D27.f[DIR_MPP ])[ktnw]+
-                        (D27.f[DIR_PPM ])[kbne]+ (D27.f[DIR_MMM ])[kbsw]+
-                        (D27.f[DIR_PMM ])[kbse]+ (D27.f[DIR_MPM ])[kbnw];
+                        (D27.f[DIR_PPP])[ktne]+ (D27.f[DIR_MMP])[ktsw]+
+                        (D27.f[DIR_PMP])[ktse]+ (D27.f[DIR_MPP])[ktnw]+
+                        (D27.f[DIR_PPM])[kbne]+ (D27.f[DIR_MMM])[kbsw]+
+                        (D27.f[DIR_PMM])[kbse]+ (D27.f[DIR_MPM])[kbnw];
       }
    }   
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu b/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu
index 4792b8846b2612383c07a97419e0473b21ebd187..8907e846757c8923c3aed46f9c90d6c67f465eee 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu
@@ -1,306 +1,310 @@
-//  _    ___      __              __________      _     __        ______________   __
-// | |  / (_)____/ /___  ______ _/ / ____/ /_  __(_)___/ /____   /  ___/ __  / /  / /
-// | | / / / ___/ __/ / / / __ `/ / /_  / / / / / / __  / ___/  / /___/ /_/ / /  / /
-// | |/ / / /  / /_/ /_/ / /_/ / / __/ / / /_/ / / /_/ (__  )  / /_) / ____/ /__/ / 
-// |___/_/_/   \__/\__,_/\__,_/_/_/   /_/\__,_/_/\__,_/____/   \____/_/    \_____/
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
 //
-//////////////////////////////////////////////////////////////////////////
-/* Device code */
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file CalcMac27.cu
+//! \ingroup GPU
+//! \author Martin Schoenherr, Soeren Peters
+//======================================================================================
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include "lbm/constants/NumericConstants.h"
+#include "basics/constants/NumericConstants.h"
+#include "lbm/MacroscopicQuantities.h"
+
+#include "Kernel/Utilities/DistributionHelper.cuh"
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
+using namespace vf::gpu;
+
+////////////////////////////////////////////////////////////////////////////////
+__global__ void LBCalcMac27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    real* distributions,
+    bool isEvenTimestep)
+{
+    const unsigned int tx = threadIdx.x;    // Thread index = lokaler i index
+    const unsigned int by = blockIdx.x;     // Block index x
+    const unsigned int bz = blockIdx.y;     // Block index y
+    const unsigned int x = tx + STARTOFFX;  // Globaler x-Index
+    const unsigned int y = by + STARTOFFY;  // Globaler y-Index
+    const unsigned int z = bz + STARTOFFZ;  // Globaler z-Index
+ 
+    const unsigned nx = blockDim.x + 2 * STARTOFFX;
+    const unsigned ny = gridDim.x + 2 * STARTOFFY;
+ 
+    const unsigned int k = nx*(ny*z + y) + x; // Zugriff auf arrays im device
+ 
+ 
+    if(k >= numberOfLBnodes)
+        return;
+ 
+    if(!isValidFluidNode(geoD[k]))
+       return;
+ 
+    rhoD[k] = c0o1;
+    vxD[k]  = c0o1;
+    vyD[k]  = c0o1;
+    vzD[k]  = c0o1;
+ 
+    DistributionWrapper distr_wrapper(distributions, numberOfLBnodes, isEvenTimestep, k, neighborX, neighborY, neighborZ);
+    const auto& distribution = distr_wrapper.distribution;
+ 
+    rhoD[k] = vf::lbm::getDensity(distribution.f);
+    vxD[k] = vf::lbm::getIncompressibleVelocityX1(distribution.f);
+    vyD[k] = vf::lbm::getIncompressibleVelocityX2(distribution.f);
+    vzD[k] = vf::lbm::getIncompressibleVelocityX3(distribution.f);
+}
+
 
-#include "lbm/MacroscopicQuantities.h"
 
-#include "../Kernel/Utilities/DistributionHelper.cuh"
 
 
 ////////////////////////////////////////////////////////////////////////////////
-__global__ void LBCalcMac27( real* vxD,
-                                        real* vyD,
-                                        real* vzD,
-                                        real* rhoD,
-                                        unsigned int* geoD,
-                                        unsigned int* neighborX,
-                                        unsigned int* neighborY,
-                                        unsigned int* neighborZ,
-                                        unsigned int size_Mat,
-                                        real* distributions,
-                                        bool isEvenTimestep)
+__global__ void LBCalcMacSP27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    real* distributions,
+    bool isEvenTimestep)
 {
-   const unsigned int tx = threadIdx.x;    // Thread index = lokaler i index
-   const unsigned int by = blockIdx.x;     // Block index x
-   const unsigned int bz = blockIdx.y;     // Block index y
-   const unsigned int x = tx + STARTOFFX;  // Globaler x-Index 
-   const unsigned int y = by + STARTOFFY;  // Globaler y-Index 
-   const unsigned int z = bz + STARTOFFZ;  // Globaler z-Index 
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
+   
+    //////////////////////////////////////////////////////////////////////////
+    if(nodeIndex<numberOfLBnodes)
+    {
+        //////////////////////////////////////////////////////////////////////////
+        //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on
+        //! timestep is based on the esoteric twist algorithm \ref <a
+        //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
+        //! DOI:10.3390/computation5020019 ]</b></a>
+        //!
+        Distributions27 dist;
+        getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
+       
+        //////////////////////////////////////////////////////////////////////////
+        //index
+        unsigned int kzero= nodeIndex;
+        unsigned int ke   = nodeIndex;
+        unsigned int kw   = neighborX[nodeIndex];
+        unsigned int kn   = nodeIndex;
+        unsigned int ks   = neighborY[nodeIndex];
+        unsigned int kt   = nodeIndex;
+        unsigned int kb   = neighborZ[nodeIndex];
+        unsigned int ksw  = neighborY[kw];
+        unsigned int kne  = nodeIndex;
+        unsigned int kse  = ks;
+        unsigned int knw  = kw;
+        unsigned int kbw  = neighborZ[kw];
+        unsigned int kte  = nodeIndex;
+        unsigned int kbe  = kb;
+        unsigned int ktw  = kw;
+        unsigned int kbs  = neighborZ[ks];
+        unsigned int ktn  = nodeIndex;
+        unsigned int kbn  = kb;
+        unsigned int kts  = ks;
+        unsigned int ktse = ks;
+        unsigned int kbnw = kbw;
+        unsigned int ktnw = kw;
+        unsigned int kbse = kbs;
+        unsigned int ktsw = ksw;
+        unsigned int kbne = kb;
+        unsigned int ktne = nodeIndex;
+        unsigned int kbsw = neighborZ[ksw];
+        //////////////////////////////////////////////////////////////////////////
+        pressD[nodeIndex] = c0o1;
+        rhoD[nodeIndex]   = c0o1;
+        vxD[nodeIndex]    = c0o1;
+        vyD[nodeIndex]    = c0o1;
+        vzD[nodeIndex]    = c0o1;
+       
+        if(geoD[nodeIndex] == GEO_FLUID)
+        {
+            rhoD[nodeIndex] = 
+                (dist.f[DIR_P00])[ke  ]+ (dist.f[DIR_M00])[kw  ]+ 
+                (dist.f[DIR_0P0])[kn  ]+ (dist.f[DIR_0M0])[ks  ]+
+                (dist.f[DIR_00P])[kt  ]+ (dist.f[DIR_00M])[kb  ]+
+                (dist.f[DIR_PP0])[kne ]+ (dist.f[DIR_MM0])[ksw ]+
+                (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+
+                (dist.f[DIR_P0P])[kte ]+ (dist.f[DIR_M0M])[kbw ]+
+                (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+
+                (dist.f[DIR_0PP])[ktn ]+ (dist.f[DIR_0MM])[kbs ]+
+                (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ]+
+                (dist.f[DIR_000])[kzero]+ 
+                (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ 
+                (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ 
+                (dist.f[DIR_PPM])[kbne]+ (dist.f[DIR_MMM])[kbsw]+ 
+                (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw];
+           
+            vxD[nodeIndex] =
+                (dist.f[DIR_P00])[ke  ]- (dist.f[DIR_M00])[kw  ]+ 
+                (dist.f[DIR_PP0])[kne ]- (dist.f[DIR_MM0])[ksw ]+
+                (dist.f[DIR_PM0])[kse ]- (dist.f[DIR_MP0])[knw ]+
+                (dist.f[DIR_P0P])[kte ]- (dist.f[DIR_M0M])[kbw ]+
+                (dist.f[DIR_P0M])[kbe ]- (dist.f[DIR_M0P])[ktw ]+
+                (dist.f[DIR_PPP])[ktne]- (dist.f[DIR_MMP])[ktsw]+ 
+                (dist.f[DIR_PMP])[ktse]- (dist.f[DIR_MPP])[ktnw]+ 
+                (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]+ 
+                (dist.f[DIR_PMM])[kbse]- (dist.f[DIR_MPM])[kbnw];
+           
+            vyD[nodeIndex] =
+                (dist.f[DIR_0P0])[kn  ]- (dist.f[DIR_0M0])[ks  ]+
+                (dist.f[DIR_PP0])[kne ]- (dist.f[DIR_MM0])[ksw ]-
+                (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+
+                (dist.f[DIR_0PP])[ktn ]- (dist.f[DIR_0MM])[kbs ]+
+                (dist.f[DIR_0PM])[kbn ]- (dist.f[DIR_0MP])[kts ]+
+                (dist.f[DIR_PPP])[ktne]- (dist.f[DIR_MMP])[ktsw]- 
+                (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ 
+                (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]- 
+                (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw];
+           
+            vzD[nodeIndex] =
+                (dist.f[DIR_00P])[kt  ]- (dist.f[DIR_00M])[kb  ]+
+                (dist.f[DIR_P0P])[kte ]- (dist.f[DIR_M0M])[kbw ]-
+                (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+
+                (dist.f[DIR_0PP])[ktn ]- (dist.f[DIR_0MM])[kbs ]-
+                (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ]+
+                (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ 
+                (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]- 
+                (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]- 
+                (dist.f[DIR_PMM])[kbse]- (dist.f[DIR_MPM])[kbnw];
+           
+            pressD[nodeIndex] =
+                ((dist.f[DIR_P00])[ke  ]+ (dist.f[DIR_M00])[kw  ]+ 
+                (dist.f[DIR_0P0])[kn  ]+ (dist.f[DIR_0M0])[ks  ]+
+                (dist.f[DIR_00P])[kt  ]+ (dist.f[DIR_00M])[kb  ]+
+                2.f*(
+                (dist.f[DIR_PP0])[kne ]+ (dist.f[DIR_MM0])[ksw ]+
+                (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+
+                (dist.f[DIR_P0P])[kte ]+ (dist.f[DIR_M0M])[kbw ]+
+                (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+
+                (dist.f[DIR_0PP])[ktn ]+ (dist.f[DIR_0MM])[kbs ]+
+                (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ])+
+                3.f*(
+                (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ 
+                (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ 
+                (dist.f[DIR_PPM])[kbne]+ (dist.f[DIR_MMM])[kbsw]+ 
+                (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw])-
+                rhoD[nodeIndex]-(vxD[nodeIndex] * vxD[nodeIndex] + vyD[nodeIndex] * vyD[nodeIndex] + vzD[nodeIndex] * vzD[nodeIndex]) * (c1o1+c0o1*rhoD[nodeIndex])) * c1o2+rhoD[nodeIndex]; // times zero for incompressible case   
+            //achtung op hart gesetzt Annahme op = 1 ;                                                    ^^^^(1.0/op-0.5)=0.5
+       }
+    }
+}
+////////////////////////////////////////////////////////////////////////////////
+
+
+
+
+
+
+
+
+
+
+
+
+
 
-   const unsigned nx = blockDim.x + 2 * STARTOFFX;
-   const unsigned ny = gridDim.x + 2 * STARTOFFY;
 
-   const unsigned int k = nx*(ny*z + y) + x; // Zugriff auf arrays im device
 
 
-   if(k >= size_Mat)
-      return;
 
-   if(!vf::gpu::isValidFluidNode(geoD[k]))
-      return;
 
-   rhoD[k] = c0o1;
-   vxD[k]  = c0o1;
-   vyD[k]  = c0o1;
-   vzD[k]  = c0o1;
 
-   vf::gpu::DistributionWrapper distr_wrapper(distributions, size_Mat, isEvenTimestep, k, neighborX, neighborY, neighborZ);
-   const auto& distribution = distr_wrapper.distribution;
 
-   rhoD[k] = vf::lbm::getDensity(distribution.f);
-   vxD[k] = vf::lbm::getIncompressibleVelocityX1(distribution.f);
-   vyD[k] = vf::lbm::getIncompressibleVelocityX2(distribution.f);
-   vzD[k] = vf::lbm::getIncompressibleVelocityX3(distribution.f);
 
-}
 
 
 
 
 
-////////////////////////////////////////////////////////////////////////////////
-__global__ void LBCalcMacSP27( real* vxD,
-                                          real* vyD,
-                                          real* vzD,
-                                          real* rhoD,
-                                          real* pressD,
-                                          unsigned int* geoD,
-                                          unsigned int* neighborX,
-                                          unsigned int* neighborY,
-                                          unsigned int* neighborZ,
-                                          unsigned int size_Mat,
-                                          real* DD,
-                                          bool isEvenTimestep)
-{
-   Distributions27 D;
-   if (isEvenTimestep==true)
-   {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-   } 
-   else
-   {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-   }
-   ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
 
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
 
-   const unsigned k = nx*(ny*z + y) + x;
-   //////////////////////////////////////////////////////////////////////////
 
-   if(k<size_Mat)
-   {
-      //////////////////////////////////////////////////////////////////////////
-      //index
-      unsigned int kzero= k;
-      unsigned int ke   = k;
-      unsigned int kw   = neighborX[k];
-      unsigned int kn   = k;
-      unsigned int ks   = neighborY[k];
-      unsigned int kt   = k;
-      unsigned int kb   = neighborZ[k];
-      unsigned int ksw  = neighborY[kw];
-      unsigned int kne  = k;
-      unsigned int kse  = ks;
-      unsigned int knw  = kw;
-      unsigned int kbw  = neighborZ[kw];
-      unsigned int kte  = k;
-      unsigned int kbe  = kb;
-      unsigned int ktw  = kw;
-      unsigned int kbs  = neighborZ[ks];
-      unsigned int ktn  = k;
-      unsigned int kbn  = kb;
-      unsigned int kts  = ks;
-      unsigned int ktse = ks;
-      unsigned int kbnw = kbw;
-      unsigned int ktnw = kw;
-      unsigned int kbse = kbs;
-      unsigned int ktsw = ksw;
-      unsigned int kbne = kb;
-      unsigned int ktne = k;
-      unsigned int kbsw = neighborZ[ksw];
-      //////////////////////////////////////////////////////////////////////////
-      pressD[k] = c0o1;
-	  rhoD[k]   = c0o1;
-	  vxD[k]    = c0o1;
-	  vyD[k]    = c0o1;
-	  vzD[k]    = c0o1;
-
-      if(geoD[k] == GEO_FLUID)
-      {
-         rhoD[k]    =   (D.f[DIR_P00   ])[ke  ]+ (D.f[DIR_M00   ])[kw  ]+ 
-                        (D.f[DIR_0P0   ])[kn  ]+ (D.f[DIR_0M0   ])[ks  ]+
-                        (D.f[DIR_00P   ])[kt  ]+ (D.f[DIR_00M   ])[kb  ]+
-                        (D.f[DIR_PP0  ])[kne ]+ (D.f[DIR_MM0  ])[ksw ]+
-                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
-                        (D.f[DIR_P0P  ])[kte ]+ (D.f[DIR_M0M  ])[kbw ]+
-                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
-                        (D.f[DIR_0PP  ])[ktn ]+ (D.f[DIR_0MM  ])[kbs ]+
-                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ]+
-                        (D.f[DIR_000])[kzero]+ 
-                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
-                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
-                        (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ 
-                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw];
-
-         vxD[k]     =   (D.f[DIR_P00   ])[ke  ]- (D.f[DIR_M00   ])[kw  ]+ 
-                        (D.f[DIR_PP0  ])[kne ]- (D.f[DIR_MM0  ])[ksw ]+
-                        (D.f[DIR_PM0  ])[kse ]- (D.f[DIR_MP0  ])[knw ]+
-                        (D.f[DIR_P0P  ])[kte ]- (D.f[DIR_M0M  ])[kbw ]+
-                        (D.f[DIR_P0M  ])[kbe ]- (D.f[DIR_M0P  ])[ktw ]+
-                        (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]+ 
-                        (D.f[DIR_PMP ])[ktse]- (D.f[DIR_MPP ])[ktnw]+ 
-                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]+ 
-                        (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw];
-
-         vyD[k]     =   (D.f[DIR_0P0   ])[kn  ]- (D.f[DIR_0M0   ])[ks  ]+
-                        (D.f[DIR_PP0  ])[kne ]- (D.f[DIR_MM0  ])[ksw ]-
-                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
-                        (D.f[DIR_0PP  ])[ktn ]- (D.f[DIR_0MM  ])[kbs ]+
-                        (D.f[DIR_0PM  ])[kbn ]- (D.f[DIR_0MP  ])[kts ]+
-                        (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]- 
-                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
-                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- 
-                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw];
-
-         vzD[k]     =   (D.f[DIR_00P   ])[kt  ]- (D.f[DIR_00M   ])[kb  ]+
-                        (D.f[DIR_P0P  ])[kte ]- (D.f[DIR_M0M  ])[kbw ]-
-                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
-                        (D.f[DIR_0PP  ])[ktn ]- (D.f[DIR_0MM  ])[kbs ]-
-                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ]+
-                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
-                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]- 
-                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- 
-                        (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw];
-
-         pressD[k]  =  ((D.f[DIR_P00   ])[ke  ]+ (D.f[DIR_M00   ])[kw  ]+ 
-                        (D.f[DIR_0P0   ])[kn  ]+ (D.f[DIR_0M0   ])[ks  ]+
-                        (D.f[DIR_00P   ])[kt  ]+ (D.f[DIR_00M   ])[kb  ]+
-                        2.f*(
-                        (D.f[DIR_PP0  ])[kne ]+ (D.f[DIR_MM0  ])[ksw ]+
-                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
-                        (D.f[DIR_P0P  ])[kte ]+ (D.f[DIR_M0M  ])[kbw ]+
-                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
-                        (D.f[DIR_0PP  ])[ktn ]+ (D.f[DIR_0MM  ])[kbs ]+
-                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ])+
-                        3.f*(
-                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
-                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
-                        (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ 
-                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw])-
-                        rhoD[k]-(vxD[k] * vxD[k] + vyD[k] * vyD[k] + vzD[k] * vzD[k]) * (c1o1+c0o1*rhoD[k])) * c1o2+rhoD[k]; // times zero for incompressible case   
-         //achtung op hart gesetzt Annahme op = 1 ;                                                    ^^^^(1.0/op-0.5)=0.5
-
-      }
-   }
-}
 
 
 ////////////////////////////////////////////////////////////////////////////////
 __global__ void LBCalcMacCompSP27(
-   real *vxD,
-   real *vyD,
-   real *vzD,
-   real *rhoD,
-   real *pressD,
-   unsigned int *geoD,
-   unsigned int *neighborX,
-   unsigned int *neighborY,
-   unsigned int *neighborZ,
-   unsigned int size_Mat,
-   real *distributions,
-   bool isEvenTimestep)
+    real *vxD,
+    real *vyD,
+    real *vzD,
+    real *rhoD,
+    real *pressD,
+    unsigned int *geoD,
+    unsigned int *neighborX,
+    unsigned int *neighborY,
+    unsigned int *neighborZ,
+    unsigned long long numberOfLBnodes,
+    real *distributions,
+    bool isEvenTimestep)
 {
-    const unsigned k = vf::gpu::getNodeIndex();
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
 
-    if(k >= size_Mat)
+    if(nodeIndex >= numberOfLBnodes)
         return;
 
-    pressD[k] = c0o1;
-    rhoD[k]   = c0o1;
-    vxD[k]    = c0o1;
-    vyD[k]    = c0o1;
-    vzD[k]    = c0o1;
+    pressD[nodeIndex] = c0o1;
+    rhoD[nodeIndex]   = c0o1;
+    vxD[nodeIndex]    = c0o1;
+    vyD[nodeIndex]    = c0o1;
+    vzD[nodeIndex]    = c0o1;
 
-    if (!vf::gpu::isValidFluidNode(geoD[k]))
+    if (!isValidFluidNode(geoD[nodeIndex]))
         return;
 
-    vf::gpu::DistributionWrapper distr_wrapper(distributions, size_Mat, isEvenTimestep, k, neighborX, neighborY,
-                                               neighborZ);
+    DistributionWrapper distr_wrapper(distributions, numberOfLBnodes, isEvenTimestep, nodeIndex, neighborX, neighborY, neighborZ);
     const auto &distribution = distr_wrapper.distribution;
 
-    rhoD[k]   = vf::lbm::getDensity(distribution.f);
-    vxD[k]    = vf::lbm::getCompressibleVelocityX1(distribution.f, rhoD[k]);
-    vyD[k]    = vf::lbm::getCompressibleVelocityX2(distribution.f, rhoD[k]);
-    vzD[k]    = vf::lbm::getCompressibleVelocityX3(distribution.f, rhoD[k]);
-    pressD[k] = vf::lbm::getPressure(distribution.f, rhoD[k], vxD[k], vyD[k], vzD[k]); 
+    rhoD[nodeIndex]   = vf::lbm::getDensity(distribution.f);
+    vxD[nodeIndex]    = vf::lbm::getCompressibleVelocityX1(distribution.f, rhoD[nodeIndex]);
+    vyD[nodeIndex]    = vf::lbm::getCompressibleVelocityX2(distribution.f, rhoD[nodeIndex]);
+    vzD[nodeIndex]    = vf::lbm::getCompressibleVelocityX3(distribution.f, rhoD[nodeIndex]);
+    pressD[nodeIndex] = vf::lbm::getPressure(distribution.f, rhoD[nodeIndex], vxD[nodeIndex], vyD[nodeIndex], vzD[nodeIndex]); 
 }
 
 
@@ -339,206 +343,155 @@ __global__ void LBCalcMacCompSP27(
 
 
 ////////////////////////////////////////////////////////////////////////////////
-__global__ void LBCalcMedSP27( real* vxD,
-                                          real* vyD,
-                                          real* vzD,
-                                          real* rhoD,
-                                          real* pressD,
-                                          unsigned int* geoD,
-                                          unsigned int* neighborX,
-                                          unsigned int* neighborY,
-                                          unsigned int* neighborZ,
-                                          unsigned int size_Mat,
-                                          real* DD,
-                                          bool isEvenTimestep)
+__global__ void LBCalcMedSP27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    real* distributions,
+    bool isEvenTimestep)
 {
-   Distributions27 D;
-   if (isEvenTimestep==true)
-   {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-   } 
-   else
-   {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-   }
-   ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
-   //////////////////////////////////////////////////////////////////////////
-
-   if(k<size_Mat)
-   {
-      //////////////////////////////////////////////////////////////////////////
-      //index
-      unsigned int kzero= k;
-      unsigned int ke   = k;
-      unsigned int kw   = neighborX[k];
-      unsigned int kn   = k;
-      unsigned int ks   = neighborY[k];
-      unsigned int kt   = k;
-      unsigned int kb   = neighborZ[k];
-      unsigned int ksw  = neighborY[kw];
-      unsigned int kne  = k;
-      unsigned int kse  = ks;
-      unsigned int knw  = kw;
-      unsigned int kbw  = neighborZ[kw];
-      unsigned int kte  = k;
-      unsigned int kbe  = kb;
-      unsigned int ktw  = kw;
-      unsigned int kbs  = neighborZ[ks];
-      unsigned int ktn  = k;
-      unsigned int kbn  = kb;
-      unsigned int kts  = ks;
-      unsigned int ktse = ks;
-      unsigned int kbnw = kbw;
-      unsigned int ktnw = kw;
-      unsigned int kbse = kbs;
-      unsigned int ktsw = ksw;
-      unsigned int kbne = kb;
-      unsigned int ktne = k;
-      unsigned int kbsw = neighborZ[ksw];
-      //////////////////////////////////////////////////////////////////////////
-      real PRESS = pressD[k];
-      real RHO   = rhoD[k];
-      real VX    = vxD[k];
-      real VY    = vyD[k];
-      real VZ    = vzD[k];
-      //////////////////////////////////////////////////////////////////////////
-      pressD[k] = c0o1;
-	  rhoD[k]   = c0o1;
-	  vxD[k]    = c0o1;
-	  vyD[k]    = c0o1;
-	  vzD[k]    = c0o1;
-
-      if(geoD[k] == GEO_FLUID)
-      {
-         rhoD[k]    =   (D.f[DIR_P00   ])[ke  ]+ (D.f[DIR_M00   ])[kw  ]+ 
-                        (D.f[DIR_0P0   ])[kn  ]+ (D.f[DIR_0M0   ])[ks  ]+
-                        (D.f[DIR_00P   ])[kt  ]+ (D.f[DIR_00M   ])[kb  ]+
-                        (D.f[DIR_PP0  ])[kne ]+ (D.f[DIR_MM0  ])[ksw ]+
-                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
-                        (D.f[DIR_P0P  ])[kte ]+ (D.f[DIR_M0M  ])[kbw ]+
-                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
-                        (D.f[DIR_0PP  ])[ktn ]+ (D.f[DIR_0MM  ])[kbs ]+
-                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ]+
-                        (D.f[DIR_000])[kzero]+ 
-                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
-                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
-                        (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ 
-                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw]+
-                        RHO;
-
-         vxD[k]     =   (D.f[DIR_P00   ])[ke  ]- (D.f[DIR_M00   ])[kw  ]+ 
-                        (D.f[DIR_PP0  ])[kne ]- (D.f[DIR_MM0  ])[ksw ]+
-                        (D.f[DIR_PM0  ])[kse ]- (D.f[DIR_MP0  ])[knw ]+
-                        (D.f[DIR_P0P  ])[kte ]- (D.f[DIR_M0M  ])[kbw ]+
-                        (D.f[DIR_P0M  ])[kbe ]- (D.f[DIR_M0P  ])[ktw ]+
-                        (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]+ 
-                        (D.f[DIR_PMP ])[ktse]- (D.f[DIR_MPP ])[ktnw]+ 
-                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]+ 
-                        (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw]+
-                        VX;
-
-         vyD[k]     =   (D.f[DIR_0P0   ])[kn  ]- (D.f[DIR_0M0   ])[ks  ]+
-                        (D.f[DIR_PP0  ])[kne ]- (D.f[DIR_MM0  ])[ksw ]-
-                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
-                        (D.f[DIR_0PP  ])[ktn ]- (D.f[DIR_0MM  ])[kbs ]+
-                        (D.f[DIR_0PM  ])[kbn ]- (D.f[DIR_0MP  ])[kts ]+
-                        (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]- 
-                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
-                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- 
-                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw]+
-                        VY;
-
-         vzD[k]     =   (D.f[DIR_00P   ])[kt  ]- (D.f[DIR_00M   ])[kb  ]+
-                        (D.f[DIR_P0P  ])[kte ]- (D.f[DIR_M0M  ])[kbw ]-
-                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
-                        (D.f[DIR_0PP  ])[ktn ]- (D.f[DIR_0MM  ])[kbs ]-
-                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ]+
-                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
-                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]- 
-                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- 
-                        (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw]+
-                        VZ;
-
-         pressD[k]  =   ((D.f[DIR_P00   ])[ke  ]+ (D.f[DIR_M00   ])[kw  ]+ 
-                        (D.f[DIR_0P0   ])[kn  ]+ (D.f[DIR_0M0   ])[ks  ]+
-                        (D.f[DIR_00P   ])[kt  ]+ (D.f[DIR_00M   ])[kb  ]+
-                        c2o1*(
-                        (D.f[DIR_PP0  ])[kne ]+ (D.f[DIR_MM0  ])[ksw ]+
-                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
-                        (D.f[DIR_P0P  ])[kte ]+ (D.f[DIR_M0M  ])[kbw ]+
-                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
-                        (D.f[DIR_0PP  ])[ktn ]+ (D.f[DIR_0MM  ])[kbs ]+
-                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ])+
-                        c3o1*(
-                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
-                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
-                        (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ 
-                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw])-
-                        rhoD[k]-(vxD[k] * vxD[k] + vyD[k] * vyD[k] + vzD[k] * vzD[k]) * (c1o1+rhoD[k])) * c1o2+rhoD[k]+
-                        PRESS;    
-         //achtung op hart gesetzt Annahme op = 1 ;                                                    ^^^^(1.0/op-0.5)=0.5
-      }
-   }
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
+
+    //////////////////////////////////////////////////////////////////////////
+    if( nodeIndex < numberOfLBnodes )
+    {
+        //////////////////////////////////////////////////////////////////////////
+        //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on
+        //! timestep is based on the esoteric twist algorithm \ref <a
+        //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
+        //! DOI:10.3390/computation5020019 ]</b></a>
+        //!
+        Distributions27 dist;
+        getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
+        
+        //////////////////////////////////////////////////////////////////////////
+        //index
+        unsigned int kzero= nodeIndex;
+        unsigned int ke   = nodeIndex;
+        unsigned int kw   = neighborX[nodeIndex];
+        unsigned int kn   = nodeIndex;
+        unsigned int ks   = neighborY[nodeIndex];
+        unsigned int kt   = nodeIndex;
+        unsigned int kb   = neighborZ[nodeIndex];
+        unsigned int ksw  = neighborY[kw];
+        unsigned int kne  = nodeIndex;
+        unsigned int kse  = ks;
+        unsigned int knw  = kw;
+        unsigned int kbw  = neighborZ[kw];
+        unsigned int kte  = nodeIndex;
+        unsigned int kbe  = kb;
+        unsigned int ktw  = kw;
+        unsigned int kbs  = neighborZ[ks];
+        unsigned int ktn  = nodeIndex;
+        unsigned int kbn  = kb;
+        unsigned int kts  = ks;
+        unsigned int ktse = ks;
+        unsigned int kbnw = kbw;
+        unsigned int ktnw = kw;
+        unsigned int kbse = kbs;
+        unsigned int ktsw = ksw;
+        unsigned int kbne = kb;
+        unsigned int ktne = nodeIndex;
+        unsigned int kbsw = neighborZ[ksw];
+        //////////////////////////////////////////////////////////////////////////
+        real PRESS = pressD[nodeIndex];
+        real RHO   = rhoD[nodeIndex];
+        real VX    = vxD[nodeIndex];
+        real VY    = vyD[nodeIndex];
+        real VZ    = vzD[nodeIndex];
+        //////////////////////////////////////////////////////////////////////////
+        pressD[nodeIndex] = c0o1;
+        rhoD[nodeIndex]   = c0o1;
+        vxD[nodeIndex]    = c0o1;
+        vyD[nodeIndex]    = c0o1;
+        vzD[nodeIndex]    = c0o1;
+        
+        if(geoD[nodeIndex] == GEO_FLUID)
+        {
+            rhoD[nodeIndex] =
+                (dist.f[DIR_P00])[ke  ]+ (dist.f[DIR_M00])[kw  ]+ 
+                (dist.f[DIR_0P0])[kn  ]+ (dist.f[DIR_0M0])[ks  ]+
+                (dist.f[DIR_00P])[kt  ]+ (dist.f[DIR_00M])[kb  ]+
+                (dist.f[DIR_PP0])[kne ]+ (dist.f[DIR_MM0])[ksw ]+
+                (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+
+                (dist.f[DIR_P0P])[kte ]+ (dist.f[DIR_M0M])[kbw ]+
+                (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+
+                (dist.f[DIR_0PP])[ktn ]+ (dist.f[DIR_0MM])[kbs ]+
+                (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ]+
+                (dist.f[DIR_000])[kzero]+ 
+                (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ 
+                (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ 
+                (dist.f[DIR_PPM])[kbne]+ (dist.f[DIR_MMM])[kbsw]+ 
+                (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw]+
+                RHO;
+            
+            vxD[nodeIndex] =
+                (dist.f[DIR_P00])[ke  ]- (dist.f[DIR_M00])[kw  ]+ 
+                (dist.f[DIR_PP0])[kne ]- (dist.f[DIR_MM0])[ksw ]+
+                (dist.f[DIR_PM0])[kse ]- (dist.f[DIR_MP0])[knw ]+
+                (dist.f[DIR_P0P])[kte ]- (dist.f[DIR_M0M])[kbw ]+
+                (dist.f[DIR_P0M])[kbe ]- (dist.f[DIR_M0P])[ktw ]+
+                (dist.f[DIR_PPP])[ktne]- (dist.f[DIR_MMP])[ktsw]+ 
+                (dist.f[DIR_PMP])[ktse]- (dist.f[DIR_MPP])[ktnw]+ 
+                (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]+ 
+                (dist.f[DIR_PMM])[kbse]- (dist.f[DIR_MPM])[kbnw]+
+                VX;
+            
+            vyD[nodeIndex] =
+                (dist.f[DIR_0P0])[kn  ]- (dist.f[DIR_0M0])[ks  ]+
+                (dist.f[DIR_PP0])[kne ]- (dist.f[DIR_MM0])[ksw ]-
+                (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+
+                (dist.f[DIR_0PP])[ktn ]- (dist.f[DIR_0MM])[kbs ]+
+                (dist.f[DIR_0PM])[kbn ]- (dist.f[DIR_0MP])[kts ]+
+                (dist.f[DIR_PPP])[ktne]- (dist.f[DIR_MMP])[ktsw]- 
+                (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ 
+                (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]- 
+                (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw]+
+                VY;
+            
+            vzD[nodeIndex] =
+                (dist.f[DIR_00P])[kt  ]- (dist.f[DIR_00M])[kb  ]+
+                (dist.f[DIR_P0P])[kte ]- (dist.f[DIR_M0M])[kbw ]-
+                (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+
+                (dist.f[DIR_0PP])[ktn ]- (dist.f[DIR_0MM])[kbs ]-
+                (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ]+
+                (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ 
+                (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]- 
+                (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]- 
+                (dist.f[DIR_PMM])[kbse]- (dist.f[DIR_MPM])[kbnw]+
+                VZ;
+            
+            pressD[nodeIndex] =
+                ((dist.f[DIR_P00])[ke  ]+ (dist.f[DIR_M00])[kw  ]+ 
+                (dist.f[DIR_0P0])[kn  ]+ (dist.f[DIR_0M0])[ks  ]+
+                (dist.f[DIR_00P])[kt  ]+ (dist.f[DIR_00M])[kb  ]+
+                c2o1*(
+                (dist.f[DIR_PP0])[kne ]+ (dist.f[DIR_MM0])[ksw ]+
+                (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+
+                (dist.f[DIR_P0P])[kte ]+ (dist.f[DIR_M0M])[kbw ]+
+                (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+
+                (dist.f[DIR_0PP])[ktn ]+ (dist.f[DIR_0MM])[kbs ]+
+                (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ])+
+                c3o1*(
+                (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ 
+                (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ 
+                (dist.f[DIR_PPM])[kbne]+ (dist.f[DIR_MMM])[kbsw]+ 
+                (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw])-
+                rhoD[nodeIndex]-(vxD[nodeIndex] * vxD[nodeIndex] + vyD[nodeIndex] * vyD[nodeIndex] + vzD[nodeIndex] * vzD[nodeIndex]) * (c1o1+rhoD[nodeIndex])) * c1o2+rhoD[nodeIndex]+
+                PRESS;    
+            //achtung op hart gesetzt Annahme op = 1 ;                                                    ^^^^(1.0/op-0.5)=0.5
+        }
+    }
 }
 ////////////////////////////////////////////////////////////////////////////////
 
@@ -563,259 +516,152 @@ __global__ void LBCalcMedSP27( real* vxD,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-__global__ void LBCalcMedCompSP27( real* vxD,
-											  real* vyD,
-											  real* vzD,
-											  real* rhoD,
-											  real* pressD,
-											  unsigned int* geoD,
-											  unsigned int* neighborX,
-											  unsigned int* neighborY,
-											  unsigned int* neighborZ,
-											  unsigned int size_Mat,
-											  real* DD,
-											  bool isEvenTimestep)
+__global__ void LBCalcMedCompSP27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    real* distributions,
+    bool isEvenTimestep)
 {
-   Distributions27 D;
-   if (isEvenTimestep==true)
-   {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-   } 
-   else
-   {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-   }
-   ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
-   //////////////////////////////////////////////////////////////////////////
-
-   if(k<size_Mat)
-   {
-      //////////////////////////////////////////////////////////////////////////
-      //index
-      //unsigned int kzero= k;
-      unsigned int ke   = k;
-      unsigned int kw   = neighborX[k];
-      unsigned int kn   = k;
-      unsigned int ks   = neighborY[k];
-      unsigned int kt   = k;
-      unsigned int kb   = neighborZ[k];
-      unsigned int ksw  = neighborY[kw];
-      unsigned int kne  = k;
-      unsigned int kse  = ks;
-      unsigned int knw  = kw;
-      unsigned int kbw  = neighborZ[kw];
-      unsigned int kte  = k;
-      unsigned int kbe  = kb;
-      unsigned int ktw  = kw;
-      unsigned int kbs  = neighborZ[ks];
-      unsigned int ktn  = k;
-      unsigned int kbn  = kb;
-      unsigned int kts  = ks;
-      unsigned int ktse = ks;
-      unsigned int kbnw = kbw;
-      unsigned int ktnw = kw;
-      unsigned int kbse = kbs;
-      unsigned int ktsw = ksw;
-      unsigned int kbne = kb;
-      unsigned int ktne = k;
-      unsigned int kbsw = neighborZ[ksw];
-      //////////////////////////////////////////////////////////////////////////
-      real PRESS = pressD[k];
-      real RHO   = rhoD[k];
-      real VX    = vxD[k];
-      real VY    = vyD[k];
-      real VZ    = vzD[k];
-      //////////////////////////////////////////////////////////////////////////
-      pressD[k] = c0o1;
-	  rhoD[k]   = c0o1;
-	  vxD[k]    = c0o1;
-	  vyD[k]    = c0o1;
-	  vzD[k]    = c0o1;
-
-      if(geoD[k] == GEO_FLUID)
-      {
-		  real mfcbb = (D.f[DIR_P00])[k];//[ke   ];
-		  real mfabb = (D.f[DIR_M00])[kw];//[kw   ];  
-		  real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];
-		  real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];  
-		  real mfbbc = (D.f[DIR_00P])[k];//[kt   ];
-		  real mfbba = (D.f[DIR_00M])[kb];//[kb   ];  
-		  real mfccb = (D.f[DIR_PP0])[k];//[kne  ];  
-		  real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];
-		  real mfcab = (D.f[DIR_PM0])[ks];//[kse  ]; 
-		  real mfacb = (D.f[DIR_MP0])[kw];//[knw  ]; 
-		  real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];  
-		  real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];
-		  real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ]; 
-		  real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ]; 
-		  real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];  
-		  real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];
-		  real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ]; 
-		  real mfbac = (D.f[DIR_0MP])[ks];//[kts  ]; 
-		  real mfbbb = (D.f[DIR_000])[k];//[kzero];
-		  real mfccc = (D.f[DIR_PPP])[k];//[ktne ]; 
-		  real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ]; 
-		  real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];
-		  real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];
-		  real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];
-		  real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];
-		  real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ]; 
-		  real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ]; 
-		  ////////////////////////////////////////////////////////////////////////////////////
-		  real drho = 
-			  ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
-			  (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
-			  ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
-
-		  real rho = c1o1 + drho;
-		  
-		  rhoD[k] = drho + RHO;
-
-		  vxD[k] = 
-			  (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
-			  (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
-			  (mfcbb - mfabb)) / rho) + VX;
-		  vyD[k] = 
-			  (((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
-			  (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
-			  (mfbcb - mfbab)) / rho) + VY;
-		  vzD[k] = 
-			  (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
-			  (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
-			  (mfbbc - mfbba)) / rho) + VZ;
-
-		  //rhoD[k] =
-			 // (D.f[DIR_P00])[ke] + (D.f[DIR_M00])[kw] +
-			 // (D.f[DIR_0P0])[kn] + (D.f[DIR_0M0])[ks] +
-			 // (D.f[DIR_00P])[kt] + (D.f[DIR_00M])[kb] +
-			 // (D.f[DIR_PP0])[kne] + (D.f[DIR_MM0])[ksw] +
-			 // (D.f[DIR_PM0])[kse] + (D.f[DIR_MP0])[knw] +
-			 // (D.f[DIR_P0P])[kte] + (D.f[DIR_M0M])[kbw] +
-			 // (D.f[DIR_P0M])[kbe] + (D.f[DIR_M0P])[ktw] +
-			 // (D.f[DIR_0PP])[ktn] + (D.f[DIR_0MM])[kbs] +
-			 // (D.f[DIR_0PM])[kbn] + (D.f[DIR_0MP])[kts] +
-			 // (D.f[DIR_000])[kzero] +
-			 // (D.f[DIR_PPP])[ktne] + (D.f[DIR_MMP])[ktsw] +
-			 // (D.f[DIR_PMP])[ktse] + (D.f[DIR_MPP])[ktnw] +
-			 // (D.f[DIR_PPM])[kbne] + (D.f[DIR_MMM])[kbsw] +
-			 // (D.f[DIR_PMM])[kbse] + (D.f[DIR_MPM])[kbnw];// +RHO;
-
-    //     vxD[k] =  
-			 //((D.f[DIR_P00  ])[ke  ]- (D.f[DIR_M00   ])[kw  ]+ 
-    //         (D.f[DIR_PP0  ])[kne ]- (D.f[DIR_MM0  ])[ksw ]+
-    //         (D.f[DIR_PM0  ])[kse ]- (D.f[DIR_MP0  ])[knw ]+
-    //         (D.f[DIR_P0P  ])[kte ]- (D.f[DIR_M0M  ])[kbw ]+
-    //         (D.f[DIR_P0M  ])[kbe ]- (D.f[DIR_M0P  ])[ktw ]+
-    //         (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]+ 
-    //         (D.f[DIR_PMP ])[ktse]- (D.f[DIR_MPP ])[ktnw]+ 
-    //         (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]+ 
-    //         (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw]) / (one + rhoD[k])+
-    //         VX;
-
-    //     vyD[k] =  
-			 //((D.f[DIR_0P0  ])[kn  ]- (D.f[DIR_0M0   ])[ks  ]+
-    //         (D.f[DIR_PP0  ])[kne ]- (D.f[DIR_MM0  ])[ksw ]-
-    //         (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
-    //         (D.f[DIR_0PP  ])[ktn ]- (D.f[DIR_0MM  ])[kbs ]+
-    //         (D.f[DIR_0PM  ])[kbn ]- (D.f[DIR_0MP  ])[kts ]+
-    //         (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]- 
-    //         (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
-    //         (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- 
-    //         (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw]) / (one + rhoD[k])+
-    //         VY;
-
-    //     vzD[k] =  
-			 //((D.f[DIR_00P  ])[kt  ]- (D.f[DIR_00M   ])[kb  ]+
-    //         (D.f[DIR_P0P  ])[kte ]- (D.f[DIR_M0M  ])[kbw ]-
-    //         (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
-    //         (D.f[DIR_0PP  ])[ktn ]- (D.f[DIR_0MM  ])[kbs ]-
-    //         (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ]+
-    //         (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
-    //         (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]- 
-    //         (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- 
-    //         (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw]) / (one + rhoD[k])+
-    //         VZ;
-
-         pressD[k]  =  ((D.f[DIR_P00   ])[ke  ]+ (D.f[DIR_M00   ])[kw  ]+ 
-                        (D.f[DIR_0P0   ])[kn  ]+ (D.f[DIR_0M0   ])[ks  ]+
-                        (D.f[DIR_00P   ])[kt  ]+ (D.f[DIR_00M   ])[kb  ]+
-                        c2o1*(
-                        (D.f[DIR_PP0  ])[kne ]+ (D.f[DIR_MM0  ])[ksw ]+
-                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
-                        (D.f[DIR_P0P  ])[kte ]+ (D.f[DIR_M0M  ])[kbw ]+
-                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
-                        (D.f[DIR_0PP  ])[ktn ]+ (D.f[DIR_0MM  ])[kbs ]+
-                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ])+
-                        c3o1*(
-                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
-                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
-                        (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ 
-                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw])-
-                        rhoD[k]-(vxD[k] * vxD[k] + vyD[k] * vyD[k] + vzD[k] * vzD[k]) * (c1o1+rhoD[k])) * c1o2+rhoD[k]+
-                        PRESS;    
-         //achtung op hart gesetzt Annahme op = 1 ;                                                    ^^^^(1.0/op-0.5)=0.5
-      }
-   }
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
+
+    //////////////////////////////////////////////////////////////////////////
+    if( nodeIndex < numberOfLBnodes )
+    {
+        //////////////////////////////////////////////////////////////////////////
+        //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on
+        //! timestep is based on the esoteric twist algorithm \ref <a
+        //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
+        //! DOI:10.3390/computation5020019 ]</b></a>
+        //!
+        Distributions27 dist;
+        getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
+        
+        //////////////////////////////////////////////////////////////////////////
+        //index
+        //unsigned int kzero= k;
+        unsigned int ke   = nodeIndex;
+        unsigned int kw   = neighborX[nodeIndex];
+        unsigned int kn   = nodeIndex;
+        unsigned int ks   = neighborY[nodeIndex];
+        unsigned int kt   = nodeIndex;
+        unsigned int kb   = neighborZ[nodeIndex];
+        unsigned int ksw  = neighborY[kw];
+        unsigned int kne  = nodeIndex;
+        unsigned int kse  = ks;
+        unsigned int knw  = kw;
+        unsigned int kbw  = neighborZ[kw];
+        unsigned int kte  = nodeIndex;
+        unsigned int kbe  = kb;
+        unsigned int ktw  = kw;
+        unsigned int kbs  = neighborZ[ks];
+        unsigned int ktn  = nodeIndex;
+        unsigned int kbn  = kb;
+        unsigned int kts  = ks;
+        unsigned int ktse = ks;
+        unsigned int kbnw = kbw;
+        unsigned int ktnw = kw;
+        unsigned int kbse = kbs;
+        unsigned int ktsw = ksw;
+        unsigned int kbne = kb;
+        unsigned int ktne = nodeIndex;
+        unsigned int kbsw = neighborZ[ksw];
+        //////////////////////////////////////////////////////////////////////////
+        real PRESS = pressD[nodeIndex];
+        real RHO   = rhoD[nodeIndex];
+        real VX    = vxD[nodeIndex];
+        real VY    = vyD[nodeIndex];
+        real VZ    = vzD[nodeIndex];
+        //////////////////////////////////////////////////////////////////////////
+        pressD[nodeIndex] = c0o1;
+        rhoD[nodeIndex]   = c0o1;
+        vxD[nodeIndex]    = c0o1;
+        vyD[nodeIndex]    = c0o1;
+        vzD[nodeIndex]    = c0o1;
+        
+        if(geoD[nodeIndex] == GEO_FLUID)
+        {
+            real mfcbb = (dist.f[DIR_P00])[nodeIndex];//[ke   ];
+            real mfabb = (dist.f[DIR_M00])[kw];//[kw   ];  
+            real mfbcb = (dist.f[DIR_0P0])[nodeIndex];//[kn   ];
+            real mfbab = (dist.f[DIR_0M0])[ks];//[ks   ];  
+            real mfbbc = (dist.f[DIR_00P])[nodeIndex];//[kt   ];
+            real mfbba = (dist.f[DIR_00M])[kb];//[kb   ];  
+            real mfccb = (dist.f[DIR_PP0])[nodeIndex];//[kne  ];  
+            real mfaab = (dist.f[DIR_MM0])[ksw];//[ksw  ];
+            real mfcab = (dist.f[DIR_PM0])[ks];//[kse  ]; 
+            real mfacb = (dist.f[DIR_MP0])[kw];//[knw  ]; 
+            real mfcbc = (dist.f[DIR_P0P])[nodeIndex];//[kte  ];  
+            real mfaba = (dist.f[DIR_M0M])[kbw];//[kbw  ];
+            real mfcba = (dist.f[DIR_P0M])[kb];//[kbe  ]; 
+            real mfabc = (dist.f[DIR_M0P])[kw];//[ktw  ]; 
+            real mfbcc = (dist.f[DIR_0PP])[nodeIndex];//[ktn  ];  
+            real mfbaa = (dist.f[DIR_0MM])[kbs];//[kbs  ];
+            real mfbca = (dist.f[DIR_0PM])[kb];//[kbn  ]; 
+            real mfbac = (dist.f[DIR_0MP])[ks];//[kts  ]; 
+            real mfbbb = (dist.f[DIR_000])[nodeIndex];//[kzero];
+            real mfccc = (dist.f[DIR_PPP])[nodeIndex];//[ktne ]; 
+            real mfaac = (dist.f[DIR_MMP])[ksw];//[ktsw ]; 
+            real mfcac = (dist.f[DIR_PMP])[ks];//[ktse ];
+            real mfacc = (dist.f[DIR_MPP])[kw];//[ktnw ];
+            real mfcca = (dist.f[DIR_PPM])[kb];//[kbne ];
+            real mfaaa = (dist.f[DIR_MMM])[kbsw];//[kbsw ];
+            real mfcaa = (dist.f[DIR_PMM])[kbs];//[kbse ]; 
+            real mfaca = (dist.f[DIR_MPM])[kbw];//[kbnw ]; 
+            ////////////////////////////////////////////////////////////////////////////////////
+            real drho = 
+                ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
+                (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
+                ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
+
+            real rho = c1o1 + drho;
+
+            rhoD[nodeIndex] = drho + RHO;
+
+            vxD[nodeIndex] = 
+                (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+                (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
+                (mfcbb - mfabb)) / rho) + VX;
+            vyD[nodeIndex] = 
+                (((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+                (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
+                (mfbcb - mfbab)) / rho) + VY;
+            vzD[nodeIndex] = 
+                (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+                (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
+                (mfbbc - mfbba)) / rho) + VZ;
+
+            pressD[nodeIndex]  =
+                ((dist.f[DIR_P00])[ke  ]+ (dist.f[DIR_M00])[kw  ]+ 
+                (dist.f[DIR_0P0])[kn  ]+ (dist.f[DIR_0M0])[ks  ]+
+                (dist.f[DIR_00P])[kt  ]+ (dist.f[DIR_00M])[kb  ]+
+                c2o1*(
+                (dist.f[DIR_PP0])[kne ]+ (dist.f[DIR_MM0])[ksw ]+
+                (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+
+                (dist.f[DIR_P0P])[kte ]+ (dist.f[DIR_M0M])[kbw ]+
+                (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+
+                (dist.f[DIR_0PP])[ktn ]+ (dist.f[DIR_0MM])[kbs ]+
+                (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ])+
+                c3o1*(
+                (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ 
+                (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ 
+                (dist.f[DIR_PPM])[kbne]+ (dist.f[DIR_MMM])[kbsw]+ 
+                (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw])-
+                rhoD[nodeIndex]-(vxD[nodeIndex] * vxD[nodeIndex] + vyD[nodeIndex] * vyD[nodeIndex] + vzD[nodeIndex] * vzD[nodeIndex]) * (c1o1+rhoD[nodeIndex])) * c1o2+rhoD[nodeIndex]+
+                PRESS;    
+            //achtung op hart gesetzt Annahme op = 1 ;                                                    ^^^^(1.0/op-0.5)=0.5
+        }
+    }
 }
 ////////////////////////////////////////////////////////////////////////////////
 
@@ -841,309 +687,191 @@ __global__ void LBCalcMedCompSP27( real* vxD,
 
 ////////////////////////////////////////////////////////////////////////////////
 __global__ void LBCalcMedCompAD27(
-	real* vxD,
-	real* vyD,
-	real* vzD,
-	real* rhoD,
-	real* pressD,
-	real* concD,
-	unsigned int* geoD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	unsigned int size_Mat,
-	real* DD,
-	real* DD_AD,
-	bool isEvenTimestep)
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    real* concD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    real* distributions,
+    real* distributionsAD,
+    bool isEvenTimestep)
 {
-	Distributions27 D;
-	if (isEvenTimestep == true)
-	{
-		D.f[DIR_P00] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_M00] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0P0] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0M0] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00P] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00M] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_PP0] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_MM0] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_PM0] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_MP0] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_P0P] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_M0M] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_P0M] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_M0P] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0PP] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0MM] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0PM] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0MP] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_MMP] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_PMP] = &DD[DIR_PMP *size_Mat];
-		D.f[DIR_MPP] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_PPM] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_MMM] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_PMM] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_MPM] = &DD[DIR_MPM *size_Mat];
-	}
-	else
-	{
-		D.f[DIR_M00] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_P00] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0M0] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0P0] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00M] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00P] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_MM0] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_PP0] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_MP0] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_PM0] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_M0M] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_P0P] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_M0P] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_P0M] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0MM] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0PP] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0MP] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0PM] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_MMP] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_PMP] = &DD[DIR_MPM *size_Mat];
-		D.f[DIR_MPP] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_PPM] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_MMM] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_PMM] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_MPM] = &DD[DIR_PMP *size_Mat];
-	}
-	////////////////////////////////////////////////////////////////////////////////
-	Distributions27 Dad;
-	if (isEvenTimestep == true)
-	{
-		Dad.f[DIR_P00]    = &DD_AD[DIR_P00   *size_Mat];
-		Dad.f[DIR_M00]    = &DD_AD[DIR_M00   *size_Mat];
-		Dad.f[DIR_0P0]    = &DD_AD[DIR_0P0   *size_Mat];
-		Dad.f[DIR_0M0]    = &DD_AD[DIR_0M0   *size_Mat];
-		Dad.f[DIR_00P]    = &DD_AD[DIR_00P   *size_Mat];
-		Dad.f[DIR_00M]    = &DD_AD[DIR_00M   *size_Mat];
-		Dad.f[DIR_PP0]   = &DD_AD[DIR_PP0  *size_Mat];
-		Dad.f[DIR_MM0]   = &DD_AD[DIR_MM0  *size_Mat];
-		Dad.f[DIR_PM0]   = &DD_AD[DIR_PM0  *size_Mat];
-		Dad.f[DIR_MP0]   = &DD_AD[DIR_MP0  *size_Mat];
-		Dad.f[DIR_P0P]   = &DD_AD[DIR_P0P  *size_Mat];
-		Dad.f[DIR_M0M]   = &DD_AD[DIR_M0M  *size_Mat];
-		Dad.f[DIR_P0M]   = &DD_AD[DIR_P0M  *size_Mat];
-		Dad.f[DIR_M0P]   = &DD_AD[DIR_M0P  *size_Mat];
-		Dad.f[DIR_0PP]   = &DD_AD[DIR_0PP  *size_Mat];
-		Dad.f[DIR_0MM]   = &DD_AD[DIR_0MM  *size_Mat];
-		Dad.f[DIR_0PM]   = &DD_AD[DIR_0PM  *size_Mat];
-		Dad.f[DIR_0MP]   = &DD_AD[DIR_0MP  *size_Mat];
-		Dad.f[DIR_000] = &DD_AD[DIR_000*size_Mat];
-		Dad.f[DIR_PPP]  = &DD_AD[DIR_PPP *size_Mat];
-		Dad.f[DIR_MMP]  = &DD_AD[DIR_MMP *size_Mat];
-		Dad.f[DIR_PMP]  = &DD_AD[DIR_PMP *size_Mat];
-		Dad.f[DIR_MPP]  = &DD_AD[DIR_MPP *size_Mat];
-		Dad.f[DIR_PPM]  = &DD_AD[DIR_PPM *size_Mat];
-		Dad.f[DIR_MMM]  = &DD_AD[DIR_MMM *size_Mat];
-		Dad.f[DIR_PMM]  = &DD_AD[DIR_PMM *size_Mat];
-		Dad.f[DIR_MPM]  = &DD_AD[DIR_MPM *size_Mat];
-	}						
-	else					
-	{						
-		Dad.f[DIR_M00]    = &DD_AD[DIR_P00   *size_Mat];
-		Dad.f[DIR_P00]    = &DD_AD[DIR_M00   *size_Mat];
-		Dad.f[DIR_0M0]    = &DD_AD[DIR_0P0   *size_Mat];
-		Dad.f[DIR_0P0]    = &DD_AD[DIR_0M0   *size_Mat];
-		Dad.f[DIR_00M]    = &DD_AD[DIR_00P   *size_Mat];
-		Dad.f[DIR_00P]    = &DD_AD[DIR_00M   *size_Mat];
-		Dad.f[DIR_MM0]   = &DD_AD[DIR_PP0  *size_Mat];
-		Dad.f[DIR_PP0]   = &DD_AD[DIR_MM0  *size_Mat];
-		Dad.f[DIR_MP0]   = &DD_AD[DIR_PM0  *size_Mat];
-		Dad.f[DIR_PM0]   = &DD_AD[DIR_MP0  *size_Mat];
-		Dad.f[DIR_M0M]   = &DD_AD[DIR_P0P  *size_Mat];
-		Dad.f[DIR_P0P]   = &DD_AD[DIR_M0M  *size_Mat];
-		Dad.f[DIR_M0P]   = &DD_AD[DIR_P0M  *size_Mat];
-		Dad.f[DIR_P0M]   = &DD_AD[DIR_M0P  *size_Mat];
-		Dad.f[DIR_0MM]   = &DD_AD[DIR_0PP  *size_Mat];
-		Dad.f[DIR_0PP]   = &DD_AD[DIR_0MM  *size_Mat];
-		Dad.f[DIR_0MP]   = &DD_AD[DIR_0PM  *size_Mat];
-		Dad.f[DIR_0PM]   = &DD_AD[DIR_0MP  *size_Mat];
-		Dad.f[DIR_000] = &DD_AD[DIR_000*size_Mat];
-		Dad.f[DIR_PPP]  = &DD_AD[DIR_MMM *size_Mat];
-		Dad.f[DIR_MMP]  = &DD_AD[DIR_PPM *size_Mat];
-		Dad.f[DIR_PMP]  = &DD_AD[DIR_MPM *size_Mat];
-		Dad.f[DIR_MPP]  = &DD_AD[DIR_PMM *size_Mat];
-		Dad.f[DIR_PPM]  = &DD_AD[DIR_MMP *size_Mat];
-		Dad.f[DIR_MMM]  = &DD_AD[DIR_PPP *size_Mat];
-		Dad.f[DIR_PMM]  = &DD_AD[DIR_MPP *size_Mat];
-		Dad.f[DIR_MPM]  = &DD_AD[DIR_PMP *size_Mat];
-	}
-	////////////////////////////////////////////////////////////////////////////////
-	const unsigned  x = threadIdx.x;  // Globaler x-Index 
-	const unsigned  y = blockIdx.x;   // Globaler y-Index 
-	const unsigned  z = blockIdx.y;   // Globaler z-Index 
-
-	const unsigned nx = blockDim.x;
-	const unsigned ny = gridDim.x;
-
-	const unsigned k = nx*(ny*z + y) + x;
-	//////////////////////////////////////////////////////////////////////////
-
-	if (k < size_Mat)
-	{
-		//////////////////////////////////////////////////////////////////////////
-		//index
-		//unsigned int kzero = k;
-		unsigned int ke = k;
-		unsigned int kw = neighborX[k];
-		unsigned int kn = k;
-		unsigned int ks = neighborY[k];
-		unsigned int kt = k;
-		unsigned int kb = neighborZ[k];
-		unsigned int ksw = neighborY[kw];
-		unsigned int kne = k;
-		unsigned int kse = ks;
-		unsigned int knw = kw;
-		unsigned int kbw = neighborZ[kw];
-		unsigned int kte = k;
-		unsigned int kbe = kb;
-		unsigned int ktw = kw;
-		unsigned int kbs = neighborZ[ks];
-		unsigned int ktn = k;
-		unsigned int kbn = kb;
-		unsigned int kts = ks;
-		unsigned int ktse = ks;
-		unsigned int kbnw = kbw;
-		unsigned int ktnw = kw;
-		unsigned int kbse = kbs;
-		unsigned int ktsw = ksw;
-		unsigned int kbne = kb;
-		unsigned int ktne = k;
-		unsigned int kbsw = neighborZ[ksw];
-		//////////////////////////////////////////////////////////////////////////
-		real CONC  = concD[k];
-		real PRESS = pressD[k];
-		real RHO   = rhoD[k];
-		real VX    = vxD[k];
-		real VY    = vyD[k];
-		real VZ    = vzD[k];
-		//////////////////////////////////////////////////////////////////////////
-		concD[k] = c0o1;
-		pressD[k] = c0o1;
-		rhoD[k] = c0o1;
-		vxD[k] = c0o1;
-		vyD[k] = c0o1;
-		vzD[k] = c0o1;
-
-		if (geoD[k] == GEO_FLUID)
-		{
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];  
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];  
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];  
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];  
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ]; 
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ]; 
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];  
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ]; 
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ]; 
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];  
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ]; 
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ]; 
-			real mfbbb = (D.f[DIR_000])[k];//[kzero];
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ]; 
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ]; 
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ]; 
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ]; 
-			////////////////////////////////////////////////////////////////////////////////////
-			real drho =
-				((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
-				 (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
-				  ((mfabb + mfcbb) + (mfbab + mfbcb)  +  (mfbba + mfbbc))) + mfbbb;
-			real rho = c1o1 + drho;
-			////////////////////////////////////////////////////////////////////////////////////
-
-			rhoD[k] = drho + RHO;
-
-			vxD[k] =
-				(((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
-				(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
-					(mfcbb - mfabb)) / rho) + VX;
-			
-			vyD[k] =
-				(((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
-				(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
-					(mfbcb - mfbab)) / rho) + VY;
-			
-			vzD[k] =
-				(((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
-				(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
-					(mfbbc - mfbba)) / rho) + VZ;
-
-			pressD[k] = 
-				((D.f[DIR_P00])[ke] + (D.f[DIR_M00])[kw] +
-				 (D.f[DIR_0P0])[kn] + (D.f[DIR_0M0])[ks] +
-				 (D.f[DIR_00P])[kt] + (D.f[DIR_00M])[kb] +
-				 c2o1*(
-				 (D.f[DIR_PP0])[kne] + (D.f[DIR_MM0])[ksw] +
-				 (D.f[DIR_PM0])[kse] + (D.f[DIR_MP0])[knw] +
-				 (D.f[DIR_P0P])[kte] + (D.f[DIR_M0M])[kbw] +
-				 (D.f[DIR_P0M])[kbe] + (D.f[DIR_M0P])[ktw] +
-				 (D.f[DIR_0PP])[ktn] + (D.f[DIR_0MM])[kbs] +
-				 (D.f[DIR_0PM])[kbn] + (D.f[DIR_0MP])[kts]) +
-				 c3o1*(
-				 (D.f[DIR_PPP])[ktne] + (D.f[DIR_MMP])[ktsw] +
-				 (D.f[DIR_PMP])[ktse] + (D.f[DIR_MPP])[ktnw] +
-				 (D.f[DIR_PPM])[kbne] + (D.f[DIR_MMM])[kbsw] +
-				 (D.f[DIR_PMM])[kbse] + (D.f[DIR_MPM])[kbnw]) -
-				 rhoD[k] - (vxD[k] * vxD[k] + vyD[k] * vyD[k] + vzD[k] * vzD[k]) * (c1o1 + rhoD[k])) * c1o2 + rhoD[k] +
-				 PRESS;
-				 //achtung op hart gesetzt Annahme op = 1 ;                                                    ^^^^(1.0/op-0.5)=0.5
-			//////////////////////////////////////////////////////////////////////////
-			mfcbb = (Dad.f[DIR_P00   ])[k   ];
-			mfabb = (Dad.f[DIR_M00   ])[kw  ];
-			mfbcb = (Dad.f[DIR_0P0   ])[k   ];
-			mfbab = (Dad.f[DIR_0M0   ])[ks  ];
-			mfbbc = (Dad.f[DIR_00P   ])[k   ];
-			mfbba = (Dad.f[DIR_00M   ])[kb  ];
-			mfccb = (Dad.f[DIR_PP0  ])[k   ];
-			mfaab = (Dad.f[DIR_MM0  ])[ksw ];
-			mfcab = (Dad.f[DIR_PM0  ])[ks  ];
-			mfacb = (Dad.f[DIR_MP0  ])[kw  ];
-			mfcbc = (Dad.f[DIR_P0P  ])[k   ];
-			mfaba = (Dad.f[DIR_M0M  ])[kbw ];
-			mfcba = (Dad.f[DIR_P0M  ])[kb  ];
-			mfabc = (Dad.f[DIR_M0P  ])[kw  ];
-			mfbcc = (Dad.f[DIR_0PP  ])[k   ];
-			mfbaa = (Dad.f[DIR_0MM  ])[kbs ];
-			mfbca = (Dad.f[DIR_0PM  ])[kb  ];
-			mfbac = (Dad.f[DIR_0MP  ])[ks  ];
-			mfbbb = (Dad.f[DIR_000])[k   ];
-			mfccc = (Dad.f[DIR_PPP ])[k   ];
-			mfaac = (Dad.f[DIR_MMP ])[ksw ];
-			mfcac = (Dad.f[DIR_PMP ])[ks  ];
-			mfacc = (Dad.f[DIR_MPP ])[kw  ];
-			mfcca = (Dad.f[DIR_PPM ])[kb  ];
-			mfaaa = (Dad.f[DIR_MMM ])[kbsw];
-			mfcaa = (Dad.f[DIR_PMM ])[kbs ];
-			mfaca = (Dad.f[DIR_MPM ])[kbw ];
-			//////////////////////////////////////////////////////////////////////////
-			concD[k] = 
-				((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa)   + (mfaac + mfcca))) +
-				 (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba)   + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
-				  ((mfabb + mfcbb) + (mfbab + mfbcb)  +  (mfbba + mfbbc))) +  mfbbb + CONC;
-		}
-	}
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
+
+    //////////////////////////////////////////////////////////////////////////
+    if ( nodeIndex < numberOfLBnodes )
+    {
+        //////////////////////////////////////////////////////////////////////////
+        //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on
+        //! timestep is based on the esoteric twist algorithm \ref <a
+        //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
+        //! DOI:10.3390/computation5020019 ]</b></a>
+        //!
+        Distributions27 dist, distAD;
+        getPointersToDistributions(dist,   distributions,   numberOfLBnodes, isEvenTimestep);
+        getPointersToDistributions(distAD, distributionsAD, numberOfLBnodes, isEvenTimestep);
+
+        //////////////////////////////////////////////////////////////////////////
+        //index
+        //unsigned int kzero = k;
+        unsigned int ke = nodeIndex;
+        unsigned int kw = neighborX[nodeIndex];
+        unsigned int kn = nodeIndex;
+        unsigned int ks = neighborY[nodeIndex];
+        unsigned int kt = nodeIndex;
+        unsigned int kb = neighborZ[nodeIndex];
+        unsigned int ksw = neighborY[kw];
+        unsigned int kne = nodeIndex;
+        unsigned int kse = ks;
+        unsigned int knw = kw;
+        unsigned int kbw = neighborZ[kw];
+        unsigned int kte = nodeIndex;
+        unsigned int kbe = kb;
+        unsigned int ktw = kw;
+        unsigned int kbs = neighborZ[ks];
+        unsigned int ktn = nodeIndex;
+        unsigned int kbn = kb;
+        unsigned int kts = ks;
+        unsigned int ktse = ks;
+        unsigned int kbnw = kbw;
+        unsigned int ktnw = kw;
+        unsigned int kbse = kbs;
+        unsigned int ktsw = ksw;
+        unsigned int kbne = kb;
+        unsigned int ktne = nodeIndex;
+        unsigned int kbsw = neighborZ[ksw];
+        //////////////////////////////////////////////////////////////////////////
+        real CONC  = concD[nodeIndex];
+        real PRESS = pressD[nodeIndex];
+        real RHO   = rhoD[nodeIndex];
+        real VX    = vxD[nodeIndex];
+        real VY    = vyD[nodeIndex];
+        real VZ    = vzD[nodeIndex];
+        //////////////////////////////////////////////////////////////////////////
+        concD[nodeIndex]  = c0o1;
+        pressD[nodeIndex] = c0o1;
+        rhoD[nodeIndex]   = c0o1;
+        vxD[nodeIndex]    = c0o1;
+        vyD[nodeIndex]    = c0o1;
+        vzD[nodeIndex]    = c0o1;
+        
+        if (geoD[nodeIndex] == GEO_FLUID)
+        {
+            real mfcbb = (dist.f[DIR_P00])[nodeIndex];//[ke   ];
+            real mfabb = (dist.f[DIR_M00])[kw];//[kw   ];  
+            real mfbcb = (dist.f[DIR_0P0])[nodeIndex];//[kn   ];
+            real mfbab = (dist.f[DIR_0M0])[ks];//[ks   ];  
+            real mfbbc = (dist.f[DIR_00P])[nodeIndex];//[kt   ];
+            real mfbba = (dist.f[DIR_00M])[kb];//[kb   ];  
+            real mfccb = (dist.f[DIR_PP0])[nodeIndex];//[kne  ];  
+            real mfaab = (dist.f[DIR_MM0])[ksw];//[ksw  ];
+            real mfcab = (dist.f[DIR_PM0])[ks];//[kse  ]; 
+            real mfacb = (dist.f[DIR_MP0])[kw];//[knw  ]; 
+            real mfcbc = (dist.f[DIR_P0P])[nodeIndex];//[kte  ];  
+            real mfaba = (dist.f[DIR_M0M])[kbw];//[kbw  ];
+            real mfcba = (dist.f[DIR_P0M])[kb];//[kbe  ]; 
+            real mfabc = (dist.f[DIR_M0P])[kw];//[ktw  ]; 
+            real mfbcc = (dist.f[DIR_0PP])[nodeIndex];//[ktn  ];  
+            real mfbaa = (dist.f[DIR_0MM])[kbs];//[kbs  ];
+            real mfbca = (dist.f[DIR_0PM])[kb];//[kbn  ]; 
+            real mfbac = (dist.f[DIR_0MP])[ks];//[kts  ]; 
+            real mfbbb = (dist.f[DIR_000])[nodeIndex];//[kzero];
+            real mfccc = (dist.f[DIR_PPP])[nodeIndex];//[ktne ]; 
+            real mfaac = (dist.f[DIR_MMP])[ksw];//[ktsw ]; 
+            real mfcac = (dist.f[DIR_PMP])[ks];//[ktse ];
+            real mfacc = (dist.f[DIR_MPP])[kw];//[ktnw ];
+            real mfcca = (dist.f[DIR_PPM])[kb];//[kbne ];
+            real mfaaa = (dist.f[DIR_MMM])[kbsw];//[kbsw ];
+            real mfcaa = (dist.f[DIR_PMM])[kbs];//[kbse ]; 
+            real mfaca = (dist.f[DIR_MPM])[kbw];//[kbnw ]; 
+            ////////////////////////////////////////////////////////////////////////////////////
+            real drho =
+                ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
+                 (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
+                  ((mfabb + mfcbb) + (mfbab + mfbcb)  +  (mfbba + mfbbc))) + mfbbb;
+            real rho = c1o1 + drho;
+            ////////////////////////////////////////////////////////////////////////////////////
+            
+            rhoD[nodeIndex] = drho + RHO;
+            
+            vxD[nodeIndex] =
+                (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+                (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
+                    (mfcbb - mfabb)) / rho) + VX;
+            
+            vyD[nodeIndex] =
+                (((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+                (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
+                    (mfbcb - mfbab)) / rho) + VY;
+            
+            vzD[nodeIndex] =
+                (((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+                (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
+                    (mfbbc - mfbba)) / rho) + VZ;
+            
+            pressD[nodeIndex] = 
+                ((dist.f[DIR_P00])[ke] + (dist.f[DIR_M00])[kw] +
+                 (dist.f[DIR_0P0])[kn] + (dist.f[DIR_0M0])[ks] +
+                 (dist.f[DIR_00P])[kt] + (dist.f[DIR_00M])[kb] +
+                 c2o1*(
+                 (dist.f[DIR_PP0])[kne] + (dist.f[DIR_MM0])[ksw] +
+                 (dist.f[DIR_PM0])[kse] + (dist.f[DIR_MP0])[knw] +
+                 (dist.f[DIR_P0P])[kte] + (dist.f[DIR_M0M])[kbw] +
+                 (dist.f[DIR_P0M])[kbe] + (dist.f[DIR_M0P])[ktw] +
+                 (dist.f[DIR_0PP])[ktn] + (dist.f[DIR_0MM])[kbs] +
+                 (dist.f[DIR_0PM])[kbn] + (dist.f[DIR_0MP])[kts]) +
+                 c3o1*(
+                 (dist.f[DIR_PPP])[ktne] + (dist.f[DIR_MMP])[ktsw] +
+                 (dist.f[DIR_PMP])[ktse] + (dist.f[DIR_MPP])[ktnw] +
+                 (dist.f[DIR_PPM])[kbne] + (dist.f[DIR_MMM])[kbsw] +
+                 (dist.f[DIR_PMM])[kbse] + (dist.f[DIR_MPM])[kbnw]) -
+                 rhoD[nodeIndex] - (vxD[nodeIndex] * vxD[nodeIndex] + vyD[nodeIndex] * vyD[nodeIndex] + vzD[nodeIndex] * vzD[nodeIndex]) * (c1o1 + rhoD[nodeIndex])) * c1o2 + rhoD[nodeIndex] +
+                 PRESS;
+                 //achtung op hart gesetzt Annahme op = 1 ;                                                    ^^^^(1.0/op-0.5)=0.5
+            //////////////////////////////////////////////////////////////////////////
+            mfcbb = (distAD.f[DIR_P00])[nodeIndex   ];
+            mfabb = (distAD.f[DIR_M00])[kw  ];
+            mfbcb = (distAD.f[DIR_0P0])[nodeIndex   ];
+            mfbab = (distAD.f[DIR_0M0])[ks  ];
+            mfbbc = (distAD.f[DIR_00P])[nodeIndex   ];
+            mfbba = (distAD.f[DIR_00M])[kb  ];
+            mfccb = (distAD.f[DIR_PP0])[nodeIndex   ];
+            mfaab = (distAD.f[DIR_MM0])[ksw ];
+            mfcab = (distAD.f[DIR_PM0])[ks  ];
+            mfacb = (distAD.f[DIR_MP0])[kw  ];
+            mfcbc = (distAD.f[DIR_P0P])[nodeIndex   ];
+            mfaba = (distAD.f[DIR_M0M])[kbw ];
+            mfcba = (distAD.f[DIR_P0M])[kb  ];
+            mfabc = (distAD.f[DIR_M0P])[kw  ];
+            mfbcc = (distAD.f[DIR_0PP])[nodeIndex   ];
+            mfbaa = (distAD.f[DIR_0MM])[kbs ];
+            mfbca = (distAD.f[DIR_0PM])[kb  ];
+            mfbac = (distAD.f[DIR_0MP])[ks  ];
+            mfbbb = (distAD.f[DIR_000])[nodeIndex   ];
+            mfccc = (distAD.f[DIR_PPP])[nodeIndex   ];
+            mfaac = (distAD.f[DIR_MMP])[ksw ];
+            mfcac = (distAD.f[DIR_PMP])[ks  ];
+            mfacc = (distAD.f[DIR_MPP])[kw  ];
+            mfcca = (distAD.f[DIR_PPM])[kb  ];
+            mfaaa = (distAD.f[DIR_MMM])[kbsw];
+            mfcaa = (distAD.f[DIR_PMM])[kbs ];
+            mfaca = (distAD.f[DIR_MPM])[kbw ];
+            //////////////////////////////////////////////////////////////////////////
+            concD[nodeIndex] = 
+                ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa)   + (mfaac + mfcca))) +
+                 (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba)   + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
+                  ((mfabb + mfcbb) + (mfbab + mfbcb)  +  (mfbba + mfbbc))) +  mfbbb + CONC;
+        }
+    }
 }
 ////////////////////////////////////////////////////////////////////////////////
 
@@ -1168,54 +896,50 @@ __global__ void LBCalcMedCompAD27(
 
 
 ////////////////////////////////////////////////////////////////////////////////
-__global__ void LBCalcMacMedSP27( real* vxD,
-                                             real* vyD,
-                                             real* vzD,
-                                             real* rhoD,
-                                             real* pressD,
-                                             unsigned int* geoD,
-                                             unsigned int* neighborX,
-                                             unsigned int* neighborY,
-                                             unsigned int* neighborZ,
-                                             unsigned int tdiff,
-                                             unsigned int size_Mat,
-                                             bool isEvenTimestep)
+__global__ void LBCalcMacMedSP27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int tdiff,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
-   ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
-   //////////////////////////////////////////////////////////////////////////
-
-   if(k<size_Mat)
-   {
-      //////////////////////////////////////////////////////////////////////////
-      real PRESS = pressD[k];
-      real RHO   = rhoD[k];
-      real VX    = vxD[k];
-      real VY    = vyD[k];
-      real VZ    = vzD[k];
-      //////////////////////////////////////////////////////////////////////////
-      pressD[k] = c0o1;
-      rhoD[k]   = c0o1;
-      vxD[k]    = c0o1;
-      vyD[k]    = c0o1;
-      vzD[k]    = c0o1;
-
-      if(geoD[k] == GEO_FLUID)
-      {
-         rhoD[k]    =   RHO   / tdiff;
-         vxD[k]     =   VX    / tdiff;
-         vyD[k]     =   VY    / tdiff;
-         vzD[k]     =   VZ    / tdiff;
-         pressD[k]  =   PRESS / tdiff;    
-      }
-   }
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
+
+    //////////////////////////////////////////////////////////////////////////
+    if(nodeIndex<numberOfLBnodes)
+    {
+        //////////////////////////////////////////////////////////////////////////
+        real PRESS = pressD[nodeIndex];
+        real RHO   = rhoD[nodeIndex];
+        real VX    = vxD[nodeIndex];
+        real VY    = vyD[nodeIndex];
+        real VZ    = vzD[nodeIndex];
+        //////////////////////////////////////////////////////////////////////////
+        pressD[nodeIndex] = c0o1;
+        rhoD[nodeIndex]   = c0o1;
+        vxD[nodeIndex]    = c0o1;
+        vyD[nodeIndex]    = c0o1;
+        vzD[nodeIndex]    = c0o1;
+       
+        if(geoD[nodeIndex] == GEO_FLUID)
+        {
+            rhoD[nodeIndex]    =   RHO   / tdiff;
+            vxD[nodeIndex]     =   VX    / tdiff;
+            vyD[nodeIndex]     =   VY    / tdiff;
+            vzD[nodeIndex]     =   VZ    / tdiff;
+            pressD[nodeIndex]  =   PRESS / tdiff;    
+        }
+    }
 }
 ////////////////////////////////////////////////////////////////////////////////
 
@@ -1241,34 +965,29 @@ __global__ void LBCalcMacMedSP27( real* vxD,
 
 ////////////////////////////////////////////////////////////////////////////////
 __global__ void LBResetMedianValuesSP27(
-	real* vxD,
-	real* vyD,
-	real* vzD,
-	real* rhoD,
-	real* pressD,
-	unsigned int size_Mat,
-	bool isEvenTimestep)
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
-	////////////////////////////////////////////////////////////////////////////////
-	const unsigned  x = threadIdx.x;  // Globaler x-Index 
-	const unsigned  y = blockIdx.x;   // Globaler y-Index 
-	const unsigned  z = blockIdx.y;   // Globaler z-Index 
-
-	const unsigned nx = blockDim.x;
-	const unsigned ny = gridDim.x;
-
-	const unsigned k = nx*(ny*z + y) + x;
-	//////////////////////////////////////////////////////////////////////////
-
-	if (k<size_Mat)
-	{
-		//////////////////////////////////////////////////////////////////////////
-		pressD[k] = c0o1;
-		rhoD[k] = c0o1;
-		vxD[k] = c0o1;
-		vyD[k] = c0o1;
-		vzD[k] = c0o1;
-	}
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
+
+    //////////////////////////////////////////////////////////////////////////
+    if ( nodeIndex < numberOfLBnodes )
+    {
+        //////////////////////////////////////////////////////////////////////////
+        pressD[nodeIndex] = c0o1;
+        rhoD[nodeIndex] = c0o1;
+        vxD[nodeIndex] = c0o1;
+        vyD[nodeIndex] = c0o1;
+        vzD[nodeIndex] = c0o1;
+    }
 }
 ////////////////////////////////////////////////////////////////////////////////
 
@@ -1294,36 +1013,30 @@ __global__ void LBResetMedianValuesSP27(
 
 ////////////////////////////////////////////////////////////////////////////////
 __global__ void LBResetMedianValuesAD27(
-	real* vxD,
-	real* vyD,
-	real* vzD,
-	real* rhoD,
-	real* pressD,
-	real* concD,
-	unsigned int size_Mat,
-	bool isEvenTimestep)
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    real* concD,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
-	////////////////////////////////////////////////////////////////////////////////
-	const unsigned  x = threadIdx.x;  // Globaler x-Index 
-	const unsigned  y = blockIdx.x;   // Globaler y-Index 
-	const unsigned  z = blockIdx.y;   // Globaler z-Index 
-
-	const unsigned nx = blockDim.x;
-	const unsigned ny = gridDim.x;
-
-	const unsigned k = nx*(ny*z + y) + x;
-	//////////////////////////////////////////////////////////////////////////
-
-	if (k < size_Mat)
-	{
-		//////////////////////////////////////////////////////////////////////////
-		concD[k]  = c0o1;
-		pressD[k] = c0o1;
-		rhoD[k]   = c0o1;
-		vxD[k]    = c0o1;
-		vyD[k]    = c0o1;
-		vzD[k]    = c0o1;
-	}
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
+
+    //////////////////////////////////////////////////////////////////////////
+    if (nodeIndex < numberOfLBnodes)
+    {
+        concD[nodeIndex]  = c0o1;
+        pressD[nodeIndex] = c0o1;
+        rhoD[nodeIndex]   = c0o1;
+        vxD[nodeIndex]    = c0o1;
+        vyD[nodeIndex]    = c0o1;
+        vzD[nodeIndex]    = c0o1;
+    }
 }
 ////////////////////////////////////////////////////////////////////////////////
 
@@ -1348,177 +1061,121 @@ __global__ void LBResetMedianValuesAD27(
 
 
 ////////////////////////////////////////////////////////////////////////////////
-__global__ void LBCalcMeasurePoints( real* vxMP,
-												real* vyMP,
-												real* vzMP,
-												real* rhoMP,
-												unsigned int* kMP,
-												unsigned int numberOfPointskMP,
-												unsigned int MPClockCycle,
-												unsigned int t,
-												unsigned int* geoD,
-												unsigned int* neighborX,
-												unsigned int* neighborY,
-												unsigned int* neighborZ,
-												unsigned int size_Mat,
-												real* DD,
-												bool isEvenTimestep)
+__global__ void LBCalcMeasurePoints(
+    real* vxMP,
+    real* vyMP,
+    real* vzMP,
+    real* rhoMP,
+    unsigned int* kMP,
+    unsigned int numberOfPointskMP,
+    unsigned int MPClockCycle,
+    unsigned int t,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    real* distributions,
+    bool isEvenTimestep)
 {
-	Distributions27 D;
-	if (isEvenTimestep==true)
-	{
-		D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-	} 
-	else
-	{
-		D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-	}
-	////////////////////////////////////////////////////////////////////////////////
-	const unsigned  x = threadIdx.x;  // Globaler x-Index 
-	const unsigned  y = blockIdx.x;   // Globaler y-Index 
-	const unsigned  z = blockIdx.y;   // Globaler z-Index 
-
-	const unsigned nx = blockDim.x;
-	const unsigned ny = gridDim.x;
-
-	const unsigned k = nx*(ny*z + y) + x;
-	//////////////////////////////////////////////////////////////////////////
-
-	if(k<numberOfPointskMP)
-	{
-      //////////////////////////////////////////////////////////////////////////
-      //index
-      unsigned int kzero= kMP[k];//k;
-      unsigned int ke   = kzero;
-      unsigned int kw   = neighborX[kzero];
-      unsigned int kn   = kzero;
-      unsigned int ks   = neighborY[kzero];
-      unsigned int kt   = kzero;
-      unsigned int kb   = neighborZ[kzero];
-      unsigned int ksw  = neighborY[kw];
-      unsigned int kne  = kzero;
-      unsigned int kse  = ks;
-      unsigned int knw  = kw;
-      unsigned int kbw  = neighborZ[kw];
-      unsigned int kte  = kzero;
-      unsigned int kbe  = kb;
-      unsigned int ktw  = kw;
-      unsigned int kbs  = neighborZ[ks];
-      unsigned int ktn  = kzero;
-      unsigned int kbn  = kb;
-      unsigned int kts  = ks;
-      unsigned int ktse = ks;
-      unsigned int kbnw = kbw;
-      unsigned int ktnw = kw;
-      unsigned int kbse = kbs;
-      unsigned int ktsw = ksw;
-      unsigned int kbne = kb;
-      unsigned int ktne = kzero;
-      unsigned int kbsw = neighborZ[ksw];
-      //////////////////////////////////////////////////////////////////////////
-	  unsigned int kMac = k*MPClockCycle + t;
-	  //////////////////////////////////////////////////////////////////////////
-
-      if(geoD[kzero] == GEO_FLUID)
-      {
-         rhoMP[kMac]=   (D.f[DIR_P00   ])[ke  ]+ (D.f[DIR_M00   ])[kw  ]+ 
-                        (D.f[DIR_0P0   ])[kn  ]+ (D.f[DIR_0M0   ])[ks  ]+
-                        (D.f[DIR_00P   ])[kt  ]+ (D.f[DIR_00M   ])[kb  ]+
-                        (D.f[DIR_PP0  ])[kne ]+ (D.f[DIR_MM0  ])[ksw ]+
-                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
-                        (D.f[DIR_P0P  ])[kte ]+ (D.f[DIR_M0M  ])[kbw ]+
-                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
-                        (D.f[DIR_0PP  ])[ktn ]+ (D.f[DIR_0MM  ])[kbs ]+
-                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ]+
-                        (D.f[DIR_000])[kzero]+ 
-                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
-                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
-                        (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ 
-                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw];
-
-         vxMP[kMac] =   (D.f[DIR_P00   ])[ke  ]- (D.f[DIR_M00   ])[kw  ]+ 
-                        (D.f[DIR_PP0  ])[kne ]- (D.f[DIR_MM0  ])[ksw ]+
-                        (D.f[DIR_PM0  ])[kse ]- (D.f[DIR_MP0  ])[knw ]+
-                        (D.f[DIR_P0P  ])[kte ]- (D.f[DIR_M0M  ])[kbw ]+
-                        (D.f[DIR_P0M  ])[kbe ]- (D.f[DIR_M0P  ])[ktw ]+
-                        (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]+ 
-                        (D.f[DIR_PMP ])[ktse]- (D.f[DIR_MPP ])[ktnw]+ 
-                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]+ 
-                        (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw];
-
-         vyMP[kMac] =   (D.f[DIR_0P0   ])[kn  ]- (D.f[DIR_0M0   ])[ks  ]+
-                        (D.f[DIR_PP0  ])[kne ]- (D.f[DIR_MM0  ])[ksw ]-
-                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
-                        (D.f[DIR_0PP  ])[ktn ]- (D.f[DIR_0MM  ])[kbs ]+
-                        (D.f[DIR_0PM  ])[kbn ]- (D.f[DIR_0MP  ])[kts ]+
-                        (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]- 
-                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
-                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- 
-                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw];
-
-         vzMP[kMac] =   (D.f[DIR_00P   ])[kt  ]- (D.f[DIR_00M   ])[kb  ]+
-                        (D.f[DIR_P0P  ])[kte ]- (D.f[DIR_M0M  ])[kbw ]-
-                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
-                        (D.f[DIR_0PP  ])[ktn ]- (D.f[DIR_0MM  ])[kbs ]-
-                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ]+
-                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
-                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]- 
-                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- 
-                        (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw];
-      }
-   }
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
+
+    //////////////////////////////////////////////////////////////////////////
+    if( nodeIndex < numberOfPointskMP )
+    {
+        //////////////////////////////////////////////////////////////////////////
+        //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on
+        //! timestep is based on the esoteric twist algorithm \ref <a
+        //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
+        //! DOI:10.3390/computation5020019 ]</b></a>
+        //!
+        Distributions27 dist;
+        getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
+
+        //////////////////////////////////////////////////////////////////////////
+        //index
+        unsigned int kzero= kMP[nodeIndex];//k;
+        unsigned int ke   = kzero;
+        unsigned int kw   = neighborX[kzero];
+        unsigned int kn   = kzero;
+        unsigned int ks   = neighborY[kzero];
+        unsigned int kt   = kzero;
+        unsigned int kb   = neighborZ[kzero];
+        unsigned int ksw  = neighborY[kw];
+        unsigned int kne  = kzero;
+        unsigned int kse  = ks;
+        unsigned int knw  = kw;
+        unsigned int kbw  = neighborZ[kw];
+        unsigned int kte  = kzero;
+        unsigned int kbe  = kb;
+        unsigned int ktw  = kw;
+        unsigned int kbs  = neighborZ[ks];
+        unsigned int ktn  = kzero;
+        unsigned int kbn  = kb;
+        unsigned int kts  = ks;
+        unsigned int ktse = ks;
+        unsigned int kbnw = kbw;
+        unsigned int ktnw = kw;
+        unsigned int kbse = kbs;
+        unsigned int ktsw = ksw;
+        unsigned int kbne = kb;
+        unsigned int ktne = kzero;
+        unsigned int kbsw = neighborZ[ksw];
+        //////////////////////////////////////////////////////////////////////////
+	    unsigned int kMac = nodeIndex*MPClockCycle + t;
+	    //////////////////////////////////////////////////////////////////////////
+        
+        if(geoD[kzero] == GEO_FLUID)
+        {
+            rhoMP[kMac]= (dist.f[DIR_P00])[ke  ]+ (dist.f[DIR_M00])[kw  ]+ 
+                         (dist.f[DIR_0P0])[kn  ]+ (dist.f[DIR_0M0])[ks  ]+
+                         (dist.f[DIR_00P])[kt  ]+ (dist.f[DIR_00M])[kb  ]+
+                         (dist.f[DIR_PP0])[kne ]+ (dist.f[DIR_MM0])[ksw ]+
+                         (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+
+                         (dist.f[DIR_P0P])[kte ]+ (dist.f[DIR_M0M])[kbw ]+
+                         (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+
+                         (dist.f[DIR_0PP])[ktn ]+ (dist.f[DIR_0MM])[kbs ]+
+                         (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ]+
+                         (dist.f[DIR_000])[kzero]+ 
+                         (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ 
+                         (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ 
+                         (dist.f[DIR_PPM])[kbne]+ (dist.f[DIR_MMM])[kbsw]+ 
+                         (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw];
+           
+            vxMP[kMac] = (dist.f[DIR_P00])[ke  ]- (dist.f[DIR_M00])[kw  ]+ 
+                         (dist.f[DIR_PP0])[kne ]- (dist.f[DIR_MM0])[ksw ]+
+                         (dist.f[DIR_PM0])[kse ]- (dist.f[DIR_MP0])[knw ]+
+                         (dist.f[DIR_P0P])[kte ]- (dist.f[DIR_M0M])[kbw ]+
+                         (dist.f[DIR_P0M])[kbe ]- (dist.f[DIR_M0P])[ktw ]+
+                         (dist.f[DIR_PPP])[ktne]- (dist.f[DIR_MMP])[ktsw]+ 
+                         (dist.f[DIR_PMP])[ktse]- (dist.f[DIR_MPP])[ktnw]+ 
+                         (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]+ 
+                         (dist.f[DIR_PMM])[kbse]- (dist.f[DIR_MPM])[kbnw];
+           
+            vyMP[kMac] = (dist.f[DIR_0P0])[kn  ]- (dist.f[DIR_0M0])[ks  ]+
+                         (dist.f[DIR_PP0])[kne ]- (dist.f[DIR_MM0])[ksw ]-
+                         (dist.f[DIR_PM0])[kse ]+ (dist.f[DIR_MP0])[knw ]+
+                         (dist.f[DIR_0PP])[ktn ]- (dist.f[DIR_0MM])[kbs ]+
+                         (dist.f[DIR_0PM])[kbn ]- (dist.f[DIR_0MP])[kts ]+
+                         (dist.f[DIR_PPP])[ktne]- (dist.f[DIR_MMP])[ktsw]- 
+                         (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]+ 
+                         (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]- 
+                         (dist.f[DIR_PMM])[kbse]+ (dist.f[DIR_MPM])[kbnw];
+           
+            vzMP[kMac] = (dist.f[DIR_00P])[kt  ]- (dist.f[DIR_00M])[kb  ]+
+                         (dist.f[DIR_P0P])[kte ]- (dist.f[DIR_M0M])[kbw ]-
+                         (dist.f[DIR_P0M])[kbe ]+ (dist.f[DIR_M0P])[ktw ]+
+                         (dist.f[DIR_0PP])[ktn ]- (dist.f[DIR_0MM])[kbs ]-
+                         (dist.f[DIR_0PM])[kbn ]+ (dist.f[DIR_0MP])[kts ]+
+                         (dist.f[DIR_PPP])[ktne]+ (dist.f[DIR_MMP])[ktsw]+ 
+                         (dist.f[DIR_PMP])[ktse]+ (dist.f[DIR_MPP])[ktnw]- 
+                         (dist.f[DIR_PPM])[kbne]- (dist.f[DIR_MMM])[kbsw]- 
+                         (dist.f[DIR_PMM])[kbse]- (dist.f[DIR_MPM])[kbnw];
+        }
+    }
 }
 ////////////////////////////////////////////////////////////////////////////////
 
@@ -1559,40 +1216,36 @@ __global__ void LBCalcMeasurePoints( real* vxMP,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-__global__ void LBSetOutputWallVelocitySP27( real* vxD,
-														real* vyD,
-														real* vzD,
-														real* vxWall,
-														real* vyWall,
-														real* vzWall,
-														int numberOfWallNodes, 
-														int* kWallNodes, 
-														real* rhoD,
-														real* pressD,
-														unsigned int* geoD,
-														unsigned int* neighborX,
-														unsigned int* neighborY,
-														unsigned int* neighborZ,
-														unsigned int size_Mat,
-														real* DD,
-														bool isEvenTimestep)
+__global__ void LBSetOutputWallVelocitySP27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* vxWall,
+    real* vyWall,
+    real* vzWall,
+    int numberOfWallNodes, 
+    int* kWallNodes, 
+    real* rhoD,
+    real* pressD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    real* DD,
+    bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+   //!
+   const unsigned nodeIndex = getNodeIndex();
 
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
-
-   if(k<numberOfWallNodes)
+   if(nodeIndex<numberOfWallNodes)
    {
       //////////////////////////////////////////////////////////////////////////
       //index
-      unsigned int KWN  = kWallNodes[k];
+      unsigned int KWN  = kWallNodes[nodeIndex];
       //////////////////////////////////////////////////////////////////////////
       vxD[KWN] = 0.0;//vxWall[k];
       vyD[KWN] = 0.0;//vyWall[k];
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Cascade27.cu b/src/gpu/VirtualFluids_GPU/GPU/Cascade27.cu
index a79588421a624cae62ec32127739efb47bb7b2ef..e05a711015e372b3fc3169bf61bea9965ccf7c12 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Cascade27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Cascade27.cu
@@ -1,9 +1,9 @@
 /* Device code */
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 #include "math.h"
@@ -15,7 +15,7 @@ __global__ void LB_Kernel_Cascade_SP_27(     real omega,
 														unsigned int* neighborY,
 														unsigned int* neighborZ,
 														real* DDStart,
-														int size_Mat,
+														unsigned long long numberOfLBnodes,
 														bool EvenOrOdd)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -29,7 +29,7 @@ __global__ void LB_Kernel_Cascade_SP_27(     real omega,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if(k<size_Mat)
+	if(k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -40,63 +40,63 @@ __global__ void LB_Kernel_Cascade_SP_27(     real omega,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -129,33 +129,33 @@ __global__ void LB_Kernel_Cascade_SP_27(     real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00   ])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0   ])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M   ])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0  ])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0  ])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M  ])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P  ])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP  ])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k  ];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP ])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP ])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP ])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP ])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM ])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM ])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM ])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 			////////////////////////////////////////////////////////////////////////////////////
 			//slow
 			//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
@@ -842,7 +842,7 @@ __global__ void LB_Kernel_Casc_Comp_SP_27(      real omega,
 														   unsigned int* neighborY,
 														   unsigned int* neighborZ,
 														   real* DDStart,
-														   int size_Mat,
+														   unsigned long long numberOfLBnodes,
 														   bool EvenOrOdd)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -856,7 +856,7 @@ __global__ void LB_Kernel_Casc_Comp_SP_27(      real omega,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<size_Mat)
+   if(k<numberOfLBnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       unsigned int BC;
@@ -867,63 +867,63 @@ __global__ void LB_Kernel_Casc_Comp_SP_27(      real omega,
          Distributions27 D;
          if (EvenOrOdd==true)
          {
-            D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-            D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-            D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-            D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-            D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-            D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-            D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-            D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-            D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-            D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-            D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-            D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-            D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-            D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-            D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-            D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-            D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-            D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-            D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-            D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-            D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-            D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-            D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-            D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-            D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-            D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-            D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+            D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+            D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+            D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+            D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+            D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+            D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+            D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+            D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+            D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+            D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+            D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+            D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+            D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+            D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+            D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+            D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+            D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+            D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+            D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+            D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+            D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+            D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+            D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+            D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+            D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+            D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+            D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
          }
          else
          {
-            D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-            D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-            D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-            D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-            D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-            D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-            D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-            D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-            D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-            D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-            D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-            D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-            D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-            D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-            D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-            D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-            D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-            D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-            D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-            D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-            D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-            D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-            D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-            D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-            D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-            D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-            D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+            D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+            D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+            D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+            D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+            D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+            D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+            D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+            D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+            D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+            D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+            D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+            D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+            D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+            D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+            D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+            D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+            D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+            D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+            D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+            D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+            D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+            D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+            D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+            D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+            D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+            D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+            D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
          }
 
          ////////////////////////////////////////////////////////////////////////////////
@@ -956,33 +956,33 @@ __global__ void LB_Kernel_Casc_Comp_SP_27(      real omega,
          unsigned int ktne = k;
          unsigned int kbsw = neighborZ[ksw];
          //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-         real f_E     = (D.f[DIR_P00   ])[ke   ];// +  c2over27 ;
-         real f_W     = (D.f[DIR_M00   ])[kw   ];// +  c2over27 ;
-         real f_N     = (D.f[DIR_0P0   ])[kn   ];// +  c2over27 ;
-         real f_S     = (D.f[DIR_0M0   ])[ks   ];// +  c2over27 ;
-         real f_F     = (D.f[DIR_00P   ])[kt   ];// +  c2over27 ;
-         real f_B     = (D.f[DIR_00M   ])[kb   ];// +  c2over27 ;
-         real f_NE    = (D.f[DIR_PP0  ])[kne  ];// +  c1over54 ;
-         real f_SW    = (D.f[DIR_MM0  ])[ksw  ];// +  c1over54 ;
-         real f_SE    = (D.f[DIR_PM0  ])[kse  ];// +  c1over54 ;
-         real f_NW    = (D.f[DIR_MP0  ])[knw  ];// +  c1over54 ;
-         real f_Ef    = (D.f[DIR_P0P  ])[kte  ];// +  c1over54 ;
-         real f_Wb    = (D.f[DIR_M0M  ])[kbw  ];// +  c1over54 ;
-         real f_Eb    = (D.f[DIR_P0M  ])[kbe  ];// +  c1over54 ;
-         real f_Wf    = (D.f[DIR_M0P  ])[ktw  ];// +  c1over54 ;
-         real f_Nf    = (D.f[DIR_0PP  ])[ktn  ];// +  c1over54 ;
-         real f_Sb    = (D.f[DIR_0MM  ])[kbs  ];// +  c1over54 ;
-         real f_Nb    = (D.f[DIR_0PM  ])[kbn  ];// +  c1over54 ;
-         real f_Sf    = (D.f[DIR_0MP  ])[kts  ];// +  c1over54 ;
+         real f_E     = (D.f[DIR_P00])[ke   ];// +  c2over27 ;
+         real f_W     = (D.f[DIR_M00])[kw   ];// +  c2over27 ;
+         real f_N     = (D.f[DIR_0P0])[kn   ];// +  c2over27 ;
+         real f_S     = (D.f[DIR_0M0])[ks   ];// +  c2over27 ;
+         real f_F     = (D.f[DIR_00P])[kt   ];// +  c2over27 ;
+         real f_B     = (D.f[DIR_00M])[kb   ];// +  c2over27 ;
+         real f_NE    = (D.f[DIR_PP0])[kne  ];// +  c1over54 ;
+         real f_SW    = (D.f[DIR_MM0])[ksw  ];// +  c1over54 ;
+         real f_SE    = (D.f[DIR_PM0])[kse  ];// +  c1over54 ;
+         real f_NW    = (D.f[DIR_MP0])[knw  ];// +  c1over54 ;
+         real f_Ef    = (D.f[DIR_P0P])[kte  ];// +  c1over54 ;
+         real f_Wb    = (D.f[DIR_M0M])[kbw  ];// +  c1over54 ;
+         real f_Eb    = (D.f[DIR_P0M])[kbe  ];// +  c1over54 ;
+         real f_Wf    = (D.f[DIR_M0P])[ktw  ];// +  c1over54 ;
+         real f_Nf    = (D.f[DIR_0PP])[ktn  ];// +  c1over54 ;
+         real f_Sb    = (D.f[DIR_0MM])[kbs  ];// +  c1over54 ;
+         real f_Nb    = (D.f[DIR_0PM])[kbn  ];// +  c1over54 ;
+         real f_Sf    = (D.f[DIR_0MP])[kts  ];// +  c1over54 ;
          real f_R     = (D.f[DIR_000])[kzero];// +  c8over27 ;
-         real f_Nef   = (D.f[DIR_PPP ])[ktne ];// +  c1over216;
-         real f_Swf   = (D.f[DIR_MMP ])[ktsw ];// +  c1over216;
-         real f_Sef   = (D.f[DIR_PMP ])[ktse ];// +  c1over216;
-         real f_Nwf   = (D.f[DIR_MPP ])[ktnw ];// +  c1over216;
-         real f_Neb   = (D.f[DIR_PPM ])[kbne ];// +  c1over216;
-         real f_Swb   = (D.f[DIR_MMM ])[kbsw ];// +  c1over216;
-         real f_Seb   = (D.f[DIR_PMM ])[kbse ];// +  c1over216;
-         real f_Nwb   = (D.f[DIR_MPM ])[kbnw ];// +  c1over216;
+         real f_Nef   = (D.f[DIR_PPP])[ktne ];// +  c1over216;
+         real f_Swf   = (D.f[DIR_MMP])[ktsw ];// +  c1over216;
+         real f_Sef   = (D.f[DIR_PMP])[ktse ];// +  c1over216;
+         real f_Nwf   = (D.f[DIR_MPP])[ktnw ];// +  c1over216;
+         real f_Neb   = (D.f[DIR_PPM])[kbne ];// +  c1over216;
+         real f_Swb   = (D.f[DIR_MMM])[kbsw ];// +  c1over216;
+         real f_Seb   = (D.f[DIR_PMM])[kbse ];// +  c1over216;
+         real f_Nwb   = (D.f[DIR_MPM])[kbnw ];// +  c1over216;
          ////////////////////////////////////////////////////////////////////////////////////
 		 real rho=f_NW+f_W+f_SW+f_S+f_SE+f_E+f_NE+f_N+f_R+f_Nf+f_Nb+f_Sf+f_Sb+f_Ef+f_Eb+f_Wf+f_Wb+f_Nwf+f_Nwb+f_Nef+f_Neb+f_Swf+f_Swb+f_Sef+f_Seb+f_F+f_B+c1o1;// ACHTUNG ne EINS !!!!!!!!
 		 real pix=(f_NE+f_E+f_SE+f_Ef+f_Eb-f_NW-f_W-f_SW-f_Wf-f_Wb+f_Nef+f_Neb+f_Sef+f_Seb-f_Nwf-f_Nwb-f_Swf-f_Swb);
@@ -1689,7 +1689,7 @@ __global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real omega,
                                                          unsigned int* neighborY,
                                                          unsigned int* neighborZ,
                                                          real* DDStart,
-                                                         int size_Mat,
+                                                         unsigned long long numberOfLBnodes,
                                                          bool EvenOrOdd)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -1703,7 +1703,7 @@ __global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real omega,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<size_Mat)
+   if(k<numberOfLBnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       unsigned int BC;
@@ -1714,63 +1714,63 @@ __global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real omega,
          Distributions27 D;
          if (EvenOrOdd==true)
          {
-            D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-            D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-            D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-            D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-            D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-            D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-            D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-            D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-            D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-            D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-            D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-            D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-            D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-            D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-            D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-            D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-            D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-            D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-            D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-            D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-            D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-            D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-            D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-            D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-            D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-            D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-            D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+            D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+            D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+            D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+            D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+            D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+            D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+            D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+            D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+            D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+            D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+            D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+            D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+            D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+            D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+            D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+            D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+            D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+            D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+            D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+            D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+            D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+            D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+            D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+            D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+            D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+            D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+            D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
          }
          else
          {
-            D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-            D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-            D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-            D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-            D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-            D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-            D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-            D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-            D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-            D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-            D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-            D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-            D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-            D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-            D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-            D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-            D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-            D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-            D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-            D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-            D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-            D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-            D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-            D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-            D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-            D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-            D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+            D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+            D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+            D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+            D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+            D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+            D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+            D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+            D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+            D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+            D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+            D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+            D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+            D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+            D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+            D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+            D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+            D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+            D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+            D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+            D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+            D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+            D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+            D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+            D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+            D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+            D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+            D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
          }
 
          ////////////////////////////////////////////////////////////////////////////////
@@ -1803,33 +1803,33 @@ __global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real omega,
          //unsigned int ktne = k;
          unsigned int kbsw = neighborZ[ksw];
          //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-         real fE    =  (D.f[DIR_P00   ])[k  ];//ke
-         real fW    =  (D.f[DIR_M00   ])[kw ];
-         real fN    =  (D.f[DIR_0P0   ])[k  ];//kn
-         real fS    =  (D.f[DIR_0M0   ])[ks ];
-         real fT    =  (D.f[DIR_00P   ])[k  ];//kt
-         real fB    =  (D.f[DIR_00M   ])[kb ];
-         real fNE   =  (D.f[DIR_PP0  ])[k  ];//kne
-         real fSW   =  (D.f[DIR_MM0  ])[ksw];
-         real fSE   =  (D.f[DIR_PM0  ])[ks ];//kse
-         real fNW   =  (D.f[DIR_MP0  ])[kw ];//knw
-         real fTE   =  (D.f[DIR_P0P  ])[k  ];//kte
-         real fBW   =  (D.f[DIR_M0M  ])[kbw];
-         real fBE   =  (D.f[DIR_P0M  ])[kb ];//kbe
-         real fTW   =  (D.f[DIR_M0P  ])[kw ];//ktw
-         real fTN   =  (D.f[DIR_0PP  ])[k  ];//ktn
-         real fBS   =  (D.f[DIR_0MM  ])[kbs];
-         real fBN   =  (D.f[DIR_0PM  ])[kb ];//kbn
-         real fTS   =  (D.f[DIR_0MP  ])[ks ];//kts
+         real fE    =  (D.f[DIR_P00])[k  ];//ke
+         real fW    =  (D.f[DIR_M00])[kw ];
+         real fN    =  (D.f[DIR_0P0])[k  ];//kn
+         real fS    =  (D.f[DIR_0M0])[ks ];
+         real fT    =  (D.f[DIR_00P])[k  ];//kt
+         real fB    =  (D.f[DIR_00M])[kb ];
+         real fNE   =  (D.f[DIR_PP0])[k  ];//kne
+         real fSW   =  (D.f[DIR_MM0])[ksw];
+         real fSE   =  (D.f[DIR_PM0])[ks ];//kse
+         real fNW   =  (D.f[DIR_MP0])[kw ];//knw
+         real fTE   =  (D.f[DIR_P0P])[k  ];//kte
+         real fBW   =  (D.f[DIR_M0M])[kbw];
+         real fBE   =  (D.f[DIR_P0M])[kb ];//kbe
+         real fTW   =  (D.f[DIR_M0P])[kw ];//ktw
+         real fTN   =  (D.f[DIR_0PP])[k  ];//ktn
+         real fBS   =  (D.f[DIR_0MM])[kbs];
+         real fBN   =  (D.f[DIR_0PM])[kb ];//kbn
+         real fTS   =  (D.f[DIR_0MP])[ks ];//kts
          real fZERO =  (D.f[DIR_000])[k  ];//kzero
-         real fTNE   = (D.f[DIR_PPP ])[k  ];//ktne
-         real fTSW   = (D.f[DIR_MMP ])[ksw];//ktsw
-         real fTSE   = (D.f[DIR_PMP ])[ks ];//ktse
-         real fTNW   = (D.f[DIR_MPP ])[kw ];//ktnw
-         real fBNE   = (D.f[DIR_PPM ])[kb ];//kbne
-         real fBSW   = (D.f[DIR_MMM ])[kbsw];
-         real fBSE   = (D.f[DIR_PMM ])[kbs];//kbse
-         real fBNW   = (D.f[DIR_MPM ])[kbw];//kbnw
+         real fTNE   = (D.f[DIR_PPP])[k  ];//ktne
+         real fTSW   = (D.f[DIR_MMP])[ksw];//ktsw
+         real fTSE   = (D.f[DIR_PMP])[ks ];//ktse
+         real fTNW   = (D.f[DIR_MPP])[kw ];//ktnw
+         real fBNE   = (D.f[DIR_PPM])[kb ];//kbne
+         real fBSW   = (D.f[DIR_MMM])[kbsw];
+         real fBSE   = (D.f[DIR_PMM])[kbs];//kbse
+         real fBNW   = (D.f[DIR_MPM])[kbw];//kbnw
          ////////////////////////////////////////////////////////////////////////////////
          real rho0   =  (fTNE+fBSW)+(fTSW+fBNE)+(fTSE+fBNW)+(fTNW+fBSE)+(fNE+fSW)+(fNW+fSE)+(fTE+fBW)+(fBE+fTW)+(fTN+fBS)+(fBN+fTS)+(fE+fW)+(fN+fS)+(fT+fB)+fZERO;
          real rho    =  rho0 + c1o1;
@@ -2321,7 +2321,7 @@ __global__ void LB_Kernel_Casc_SP_MS_27(   real omega,
                                                       unsigned int* neighborY,
                                                       unsigned int* neighborZ,
                                                       real* DDStart,
-                                                      int size_Mat,
+                                                      unsigned long long numberOfLBnodes,
                                                       bool EvenOrOdd)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -2335,7 +2335,7 @@ __global__ void LB_Kernel_Casc_SP_MS_27(   real omega,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<size_Mat)
+   if(k<numberOfLBnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       unsigned int BC;
@@ -2346,63 +2346,63 @@ __global__ void LB_Kernel_Casc_SP_MS_27(   real omega,
          Distributions27 D;
          if (EvenOrOdd==true)
          {
-            D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-            D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-            D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-            D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-            D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-            D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-            D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-            D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-            D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-            D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-            D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-            D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-            D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-            D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-            D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-            D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-            D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-            D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-            D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-            D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-            D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-            D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-            D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-            D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-            D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-            D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-            D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+            D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+            D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+            D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+            D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+            D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+            D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+            D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+            D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+            D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+            D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+            D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+            D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+            D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+            D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+            D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+            D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+            D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+            D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+            D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+            D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+            D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+            D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+            D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+            D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+            D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+            D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+            D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
          }
          else
          {
-            D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-            D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-            D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-            D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-            D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-            D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-            D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-            D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-            D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-            D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-            D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-            D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-            D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-            D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-            D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-            D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-            D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-            D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-            D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-            D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-            D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-            D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-            D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-            D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-            D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-            D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-            D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+            D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+            D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+            D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+            D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+            D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+            D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+            D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+            D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+            D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+            D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+            D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+            D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+            D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+            D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+            D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+            D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+            D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+            D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+            D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+            D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+            D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+            D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+            D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+            D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+            D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+            D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+            D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
          }
 
          ////////////////////////////////////////////////////////////////////////////////
@@ -2435,33 +2435,33 @@ __global__ void LB_Kernel_Casc_SP_MS_27(   real omega,
          //unsigned int ktne = k;
          unsigned int kbsw = neighborZ[ksw];
          //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-         real fE    =  (D.f[DIR_P00   ])[k  ];//ke
-         real fW    =  (D.f[DIR_M00   ])[kw ];
-         real fN    =  (D.f[DIR_0P0   ])[k  ];//kn
-         real fS    =  (D.f[DIR_0M0   ])[ks ];
-         real fT    =  (D.f[DIR_00P   ])[k  ];//kt
-         real fB    =  (D.f[DIR_00M   ])[kb ];
-         real fNE   =  (D.f[DIR_PP0  ])[k  ];//kne
-         real fSW   =  (D.f[DIR_MM0  ])[ksw];
-         real fSE   =  (D.f[DIR_PM0  ])[ks ];//kse
-         real fNW   =  (D.f[DIR_MP0  ])[kw ];//knw
-         real fTE   =  (D.f[DIR_P0P  ])[k  ];//kte
-         real fBW   =  (D.f[DIR_M0M  ])[kbw];
-         real fBE   =  (D.f[DIR_P0M  ])[kb ];//kbe
-         real fTW   =  (D.f[DIR_M0P  ])[kw ];//ktw
-         real fTN   =  (D.f[DIR_0PP  ])[k  ];//ktn
-         real fBS   =  (D.f[DIR_0MM  ])[kbs];
-         real fBN   =  (D.f[DIR_0PM  ])[kb ];//kbn
-         real fTS   =  (D.f[DIR_0MP  ])[ks ];//kts
+         real fE    =  (D.f[DIR_P00])[k  ];//ke
+         real fW    =  (D.f[DIR_M00])[kw ];
+         real fN    =  (D.f[DIR_0P0])[k  ];//kn
+         real fS    =  (D.f[DIR_0M0])[ks ];
+         real fT    =  (D.f[DIR_00P])[k  ];//kt
+         real fB    =  (D.f[DIR_00M])[kb ];
+         real fNE   =  (D.f[DIR_PP0])[k  ];//kne
+         real fSW   =  (D.f[DIR_MM0])[ksw];
+         real fSE   =  (D.f[DIR_PM0])[ks ];//kse
+         real fNW   =  (D.f[DIR_MP0])[kw ];//knw
+         real fTE   =  (D.f[DIR_P0P])[k  ];//kte
+         real fBW   =  (D.f[DIR_M0M])[kbw];
+         real fBE   =  (D.f[DIR_P0M])[kb ];//kbe
+         real fTW   =  (D.f[DIR_M0P])[kw ];//ktw
+         real fTN   =  (D.f[DIR_0PP])[k  ];//ktn
+         real fBS   =  (D.f[DIR_0MM])[kbs];
+         real fBN   =  (D.f[DIR_0PM])[kb ];//kbn
+         real fTS   =  (D.f[DIR_0MP])[ks ];//kts
          real fZERO =  (D.f[DIR_000])[k  ];//kzero
-         real fTNE   = (D.f[DIR_PPP ])[k  ];//ktne
-         real fTSW   = (D.f[DIR_MMP ])[ksw];//ktsw
-         real fTSE   = (D.f[DIR_PMP ])[ks ];//ktse
-         real fTNW   = (D.f[DIR_MPP ])[kw ];//ktnw
-         real fBNE   = (D.f[DIR_PPM ])[kb ];//kbne
-         real fBSW   = (D.f[DIR_MMM ])[kbsw];
-         real fBSE   = (D.f[DIR_PMM ])[kbs];//kbse
-         real fBNW   = (D.f[DIR_MPM ])[kbw];//kbnw
+         real fTNE   = (D.f[DIR_PPP])[k  ];//ktne
+         real fTSW   = (D.f[DIR_MMP])[ksw];//ktsw
+         real fTSE   = (D.f[DIR_PMP])[ks ];//ktse
+         real fTNW   = (D.f[DIR_MPP])[kw ];//ktnw
+         real fBNE   = (D.f[DIR_PPM])[kb ];//kbne
+         real fBSW   = (D.f[DIR_MMM])[kbsw];
+         real fBSE   = (D.f[DIR_PMM])[kbs];//kbse
+         real fBNW   = (D.f[DIR_MPM])[kbw];//kbnw
          ////////////////////////////////////////////////////////////////////////////////
          real rho0   =  fZERO+fE+fW+fN+fS+fT+fB+fNE+fSW+fSE+fNW+fTE+fBW+fBE+fTW+fTN+fBS+fBN+fTS+fTNE+fTSW+fTSE+fTNW+fBNE+fBSW+fBSE+fBNW;
          real rho    =  rho0 + c1o1;
@@ -2846,7 +2846,7 @@ __global__ void LB_Kernel_Casc_SP_MS_Diff_27(real omega,
                                                         unsigned int* neighborY,
                                                         unsigned int* neighborZ,
                                                         real* DDStart,
-                                                        int size_Mat,
+                                                        unsigned long long numberOfLBnodes,
                                                         bool EvenOrOdd)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -2860,7 +2860,7 @@ __global__ void LB_Kernel_Casc_SP_MS_Diff_27(real omega,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<size_Mat)
+   if(k<numberOfLBnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       unsigned int BC;
@@ -2871,63 +2871,63 @@ __global__ void LB_Kernel_Casc_SP_MS_Diff_27(real omega,
          Distributions27 D;
          if (EvenOrOdd==true)
          {
-            D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-            D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-            D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-            D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-            D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-            D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-            D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-            D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-            D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-            D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-            D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-            D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-            D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-            D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-            D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-            D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-            D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-            D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-            D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-            D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-            D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-            D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-            D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-            D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-            D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-            D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-            D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+            D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+            D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+            D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+            D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+            D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+            D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+            D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+            D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+            D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+            D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+            D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+            D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+            D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+            D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+            D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+            D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+            D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+            D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+            D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+            D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+            D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+            D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+            D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+            D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+            D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+            D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+            D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
          }
          else
          {
-            D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-            D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-            D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-            D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-            D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-            D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-            D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-            D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-            D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-            D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-            D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-            D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-            D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-            D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-            D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-            D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-            D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-            D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-            D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-            D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-            D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-            D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-            D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-            D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-            D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-            D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-            D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+            D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+            D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+            D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+            D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+            D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+            D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+            D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+            D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+            D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+            D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+            D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+            D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+            D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+            D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+            D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+            D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+            D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+            D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+            D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+            D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+            D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+            D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+            D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+            D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+            D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+            D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+            D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
          }
 
          ////////////////////////////////////////////////////////////////////////////////
@@ -2960,33 +2960,33 @@ __global__ void LB_Kernel_Casc_SP_MS_Diff_27(real omega,
          //unsigned int ktne = k;
          unsigned int kbsw = neighborZ[ksw];
          //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-         real fE    =  (D.f[DIR_P00   ])[k  ];//ke
-         real fW    =  (D.f[DIR_M00   ])[kw ];
-         real fN    =  (D.f[DIR_0P0   ])[k  ];//kn
-         real fS    =  (D.f[DIR_0M0   ])[ks ];
-         real fT    =  (D.f[DIR_00P   ])[k  ];//kt
-         real fB    =  (D.f[DIR_00M   ])[kb ];
-         real fNE   =  (D.f[DIR_PP0  ])[k  ];//kne
-         real fSW   =  (D.f[DIR_MM0  ])[ksw];
-         real fSE   =  (D.f[DIR_PM0  ])[ks ];//kse
-         real fNW   =  (D.f[DIR_MP0  ])[kw ];//knw
-         real fTE   =  (D.f[DIR_P0P  ])[k  ];//kte
-         real fBW   =  (D.f[DIR_M0M  ])[kbw];
-         real fBE   =  (D.f[DIR_P0M  ])[kb ];//kbe
-         real fTW   =  (D.f[DIR_M0P  ])[kw ];//ktw
-         real fTN   =  (D.f[DIR_0PP  ])[k  ];//ktn
-         real fBS   =  (D.f[DIR_0MM  ])[kbs];
-         real fBN   =  (D.f[DIR_0PM  ])[kb ];//kbn
-         real fTS   =  (D.f[DIR_0MP  ])[ks ];//kts
+         real fE    =  (D.f[DIR_P00])[k  ];//ke
+         real fW    =  (D.f[DIR_M00])[kw ];
+         real fN    =  (D.f[DIR_0P0])[k  ];//kn
+         real fS    =  (D.f[DIR_0M0])[ks ];
+         real fT    =  (D.f[DIR_00P])[k  ];//kt
+         real fB    =  (D.f[DIR_00M])[kb ];
+         real fNE   =  (D.f[DIR_PP0])[k  ];//kne
+         real fSW   =  (D.f[DIR_MM0])[ksw];
+         real fSE   =  (D.f[DIR_PM0])[ks ];//kse
+         real fNW   =  (D.f[DIR_MP0])[kw ];//knw
+         real fTE   =  (D.f[DIR_P0P])[k  ];//kte
+         real fBW   =  (D.f[DIR_M0M])[kbw];
+         real fBE   =  (D.f[DIR_P0M])[kb ];//kbe
+         real fTW   =  (D.f[DIR_M0P])[kw ];//ktw
+         real fTN   =  (D.f[DIR_0PP])[k  ];//ktn
+         real fBS   =  (D.f[DIR_0MM])[kbs];
+         real fBN   =  (D.f[DIR_0PM])[kb ];//kbn
+         real fTS   =  (D.f[DIR_0MP])[ks ];//kts
          real fZERO =  (D.f[DIR_000])[k  ];//kzero
-         real fTNE   = (D.f[DIR_PPP ])[k  ];//ktne
-         real fTSW   = (D.f[DIR_MMP ])[ksw];//ktsw
-         real fTSE   = (D.f[DIR_PMP ])[ks ];//ktse
-         real fTNW   = (D.f[DIR_MPP ])[kw ];//ktnw
-         real fBNE   = (D.f[DIR_PPM ])[kb ];//kbne
-         real fBSW   = (D.f[DIR_MMM ])[kbsw];
-         real fBSE   = (D.f[DIR_PMM ])[kbs];//kbse
-         real fBNW   = (D.f[DIR_MPM ])[kbw];//kbnw
+         real fTNE   = (D.f[DIR_PPP])[k  ];//ktne
+         real fTSW   = (D.f[DIR_MMP])[ksw];//ktsw
+         real fTSE   = (D.f[DIR_PMP])[ks ];//ktse
+         real fTNW   = (D.f[DIR_MPP])[kw ];//ktnw
+         real fBNE   = (D.f[DIR_PPM])[kb ];//kbne
+         real fBSW   = (D.f[DIR_MMM])[kbsw];
+         real fBSE   = (D.f[DIR_PMM])[kbs];//kbse
+         real fBNW   = (D.f[DIR_MPM])[kbw];//kbnw
          ////////////////////////////////////////////////////////////////////////////////
          real rho0   =  fZERO+fE+fW+fN+fS+fT+fB+fNE+fSW+fSE+fNW+fTE+fBW+fBE+fTW+fTN+fBS+fBN+fTS+fTNE+fTSW+fTSE+fTNW+fBNE+fBSW+fBSE+fBNW;
          real rho    =  rho0 + c1o1;
@@ -3368,7 +3368,7 @@ __global__ void LB_Kernel_Casc_SP_27(  real omega,
                                                   unsigned int* neighborY,
                                                   unsigned int* neighborZ,
                                                   real* DDStart,
-                                                  int size_Mat,
+                                                  unsigned long long numberOfLBnodes,
                                                   bool EvenOrOdd)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -3382,7 +3382,7 @@ __global__ void LB_Kernel_Casc_SP_27(  real omega,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<size_Mat)
+   if(k<numberOfLBnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       unsigned int BC;
@@ -3393,63 +3393,63 @@ __global__ void LB_Kernel_Casc_SP_27(  real omega,
        Distributions27 D;
        if (EvenOrOdd==true)
        {
-          D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-          D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-          D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-          D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-          D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-          D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-          D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-          D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-          D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-          D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-          D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-          D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-          D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-          D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-          D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-          D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-          D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-          D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-          D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-          D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-          D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-          D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-          D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-          D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-          D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-          D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-          D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+          D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+          D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+          D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+          D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+          D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+          D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+          D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+          D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+          D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+          D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+          D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+          D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+          D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+          D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+          D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+          D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+          D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+          D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+          D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+          D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+          D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+          D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+          D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+          D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+          D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+          D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+          D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
        }
        else
        {
-          D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-          D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-          D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-          D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-          D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-          D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-          D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-          D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-          D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-          D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-          D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-          D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-          D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-          D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-          D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-          D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-          D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-          D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-          D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-          D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-          D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-          D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-          D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-          D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-          D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-          D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-          D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+          D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+          D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+          D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+          D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+          D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+          D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+          D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+          D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+          D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+          D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+          D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+          D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+          D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+          D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+          D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+          D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+          D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+          D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+          D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+          D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+          D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+          D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+          D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+          D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+          D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+          D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+          D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
        }
 
        ////////////////////////////////////////////////////////////////////////////////
@@ -3512,33 +3512,33 @@ __global__ void LB_Kernel_Casc_SP_27(  real omega,
        //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        real f_E,f_W,f_N,f_S,f_T,f_B,f_NE,f_SW,f_SE,f_NW,f_TE,f_BW,f_BE,f_TW,f_TN,f_BS,f_BN,f_TS,f_ZERO, f_TNE,f_TNW,f_TSE,f_TSW, f_BNE,f_BNW,f_BSE,f_BSW;
        //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-       f_E    =  (D.f[DIR_P00   ])[ke]+c2o27;
-       f_W    =  (D.f[DIR_M00   ])[kw]+c2o27;
-       f_N    =  (D.f[DIR_0P0   ])[kn]+c2o27;
-       f_S    =  (D.f[DIR_0M0   ])[ks]+c2o27;
-       f_T    =  (D.f[DIR_00P   ])[kt]+c2o27;
-       f_B    =  (D.f[DIR_00M   ])[kb]+c2o27;
-       f_NE   =  (D.f[DIR_PP0  ])[kne]+c1o54;
-       f_SW   =  (D.f[DIR_MM0  ])[ksw]+c1o54;
-       f_SE   =  (D.f[DIR_PM0  ])[kse]+c1o54;
-       f_NW   =  (D.f[DIR_MP0  ])[knw]+c1o54;
-       f_TE   =  (D.f[DIR_P0P  ])[kte]+c1o54;
-       f_BW   =  (D.f[DIR_M0M  ])[kbw]+c1o54;
-       f_BE   =  (D.f[DIR_P0M  ])[kbe]+c1o54;
-       f_TW   =  (D.f[DIR_M0P  ])[ktw]+c1o54;
-       f_TN   =  (D.f[DIR_0PP  ])[ktn]+c1o54;
-       f_BS   =  (D.f[DIR_0MM  ])[kbs]+c1o54;
-       f_BN   =  (D.f[DIR_0PM  ])[kbn]+c1o54;
-       f_TS   =  (D.f[DIR_0MP  ])[kts]+c1o54;
+       f_E    =  (D.f[DIR_P00])[ke]+c2o27;
+       f_W    =  (D.f[DIR_M00])[kw]+c2o27;
+       f_N    =  (D.f[DIR_0P0])[kn]+c2o27;
+       f_S    =  (D.f[DIR_0M0])[ks]+c2o27;
+       f_T    =  (D.f[DIR_00P])[kt]+c2o27;
+       f_B    =  (D.f[DIR_00M])[kb]+c2o27;
+       f_NE   =  (D.f[DIR_PP0])[kne]+c1o54;
+       f_SW   =  (D.f[DIR_MM0])[ksw]+c1o54;
+       f_SE   =  (D.f[DIR_PM0])[kse]+c1o54;
+       f_NW   =  (D.f[DIR_MP0])[knw]+c1o54;
+       f_TE   =  (D.f[DIR_P0P])[kte]+c1o54;
+       f_BW   =  (D.f[DIR_M0M])[kbw]+c1o54;
+       f_BE   =  (D.f[DIR_P0M])[kbe]+c1o54;
+       f_TW   =  (D.f[DIR_M0P])[ktw]+c1o54;
+       f_TN   =  (D.f[DIR_0PP])[ktn]+c1o54;
+       f_BS   =  (D.f[DIR_0MM])[kbs]+c1o54;
+       f_BN   =  (D.f[DIR_0PM])[kbn]+c1o54;
+       f_TS   =  (D.f[DIR_0MP])[kts]+c1o54;
        f_ZERO =  (D.f[DIR_000])[kzero]+c8o27;
-       f_TNE   = (D.f[DIR_PPP ])[ktne]+c1o216;
-       f_TSW   = (D.f[DIR_MMP ])[ktsw]+c1o216;
-       f_TSE   = (D.f[DIR_PMP ])[ktse]+c1o216;
-       f_TNW   = (D.f[DIR_MPP ])[ktnw]+c1o216;
-       f_BNE   = (D.f[DIR_PPM ])[kbne]+c1o216;
-       f_BSW   = (D.f[DIR_MMM ])[kbsw]+c1o216;
-       f_BSE   = (D.f[DIR_PMM ])[kbse]+c1o216;
-       f_BNW   = (D.f[DIR_MPM ])[kbnw]+c1o216;
+       f_TNE   = (D.f[DIR_PPP])[ktne]+c1o216;
+       f_TSW   = (D.f[DIR_MMP])[ktsw]+c1o216;
+       f_TSE   = (D.f[DIR_PMP])[ktse]+c1o216;
+       f_TNW   = (D.f[DIR_MPP])[ktnw]+c1o216;
+       f_BNE   = (D.f[DIR_PPM])[kbne]+c1o216;
+       f_BSW   = (D.f[DIR_MMM])[kbsw]+c1o216;
+       f_BSE   = (D.f[DIR_PMM])[kbse]+c1o216;
+       f_BNW   = (D.f[DIR_MPM])[kbnw]+c1o216;
        ////////////////////////////////////////////////////////////////////////////////
 
        if( BC == GEO_FLUID || BC == GEO_VELO)
@@ -4060,7 +4060,7 @@ __global__ void LB_Kernel_Casc27(real omega,
                                             unsigned int* neighborY,
                                             unsigned int* neighborZ,
                                             real* DDStart,
-                                            int size_Mat,
+                                            unsigned long long numberOfLBnodes,
                                             bool EvenOrOdd)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -4089,63 +4089,63 @@ __global__ void LB_Kernel_Casc27(real omega,
       Distributions27 D;
       if (EvenOrOdd==true)
       {
-         D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
       }
       else
       {
-         D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-         D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-         D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-         D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-         D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-         D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-         D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-         D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-         D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+         D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+         D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
       }
 
       ////////////////////////////////////////////////////////////////////////////////
@@ -4208,33 +4208,33 @@ __global__ void LB_Kernel_Casc27(real omega,
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real f_E,f_W,f_N,f_S,f_T,f_B,f_NE,f_SW,f_SE,f_NW,f_TE,f_BW,f_BE,f_TW,f_TN,f_BS,f_BN,f_TS,f_ZERO, f_TNE,f_TNW,f_TSE,f_TSW, f_BNE,f_BNW,f_BSE,f_BSW;
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      f_E    =  (D.f[DIR_P00   ])[ke]+c2o27;
-      f_W    =  (D.f[DIR_M00   ])[kw]+c2o27;
-      f_N    =  (D.f[DIR_0P0   ])[kn]+c2o27;
-      f_S    =  (D.f[DIR_0M0   ])[ks]+c2o27;
-      f_T    =  (D.f[DIR_00P   ])[kt]+c2o27;
-      f_B    =  (D.f[DIR_00M   ])[kb]+c2o27;
-      f_NE   =  (D.f[DIR_PP0  ])[kne]+c1o54;
-      f_SW   =  (D.f[DIR_MM0  ])[ksw]+c1o54;
-      f_SE   =  (D.f[DIR_PM0  ])[kse]+c1o54;
-      f_NW   =  (D.f[DIR_MP0  ])[knw]+c1o54;
-      f_TE   =  (D.f[DIR_P0P  ])[kte]+c1o54;
-      f_BW   =  (D.f[DIR_M0M  ])[kbw]+c1o54;
-      f_BE   =  (D.f[DIR_P0M  ])[kbe]+c1o54;
-      f_TW   =  (D.f[DIR_M0P  ])[ktw]+c1o54;
-      f_TN   =  (D.f[DIR_0PP  ])[ktn]+c1o54;
-      f_BS   =  (D.f[DIR_0MM  ])[kbs]+c1o54;
-      f_BN   =  (D.f[DIR_0PM  ])[kbn]+c1o54;
-      f_TS   =  (D.f[DIR_0MP  ])[kts]+c1o54;
+      f_E    =  (D.f[DIR_P00])[ke]+c2o27;
+      f_W    =  (D.f[DIR_M00])[kw]+c2o27;
+      f_N    =  (D.f[DIR_0P0])[kn]+c2o27;
+      f_S    =  (D.f[DIR_0M0])[ks]+c2o27;
+      f_T    =  (D.f[DIR_00P])[kt]+c2o27;
+      f_B    =  (D.f[DIR_00M])[kb]+c2o27;
+      f_NE   =  (D.f[DIR_PP0])[kne]+c1o54;
+      f_SW   =  (D.f[DIR_MM0])[ksw]+c1o54;
+      f_SE   =  (D.f[DIR_PM0])[kse]+c1o54;
+      f_NW   =  (D.f[DIR_MP0])[knw]+c1o54;
+      f_TE   =  (D.f[DIR_P0P])[kte]+c1o54;
+      f_BW   =  (D.f[DIR_M0M])[kbw]+c1o54;
+      f_BE   =  (D.f[DIR_P0M])[kbe]+c1o54;
+      f_TW   =  (D.f[DIR_M0P])[ktw]+c1o54;
+      f_TN   =  (D.f[DIR_0PP])[ktn]+c1o54;
+      f_BS   =  (D.f[DIR_0MM])[kbs]+c1o54;
+      f_BN   =  (D.f[DIR_0PM])[kbn]+c1o54;
+      f_TS   =  (D.f[DIR_0MP])[kts]+c1o54;
       f_ZERO =  (D.f[DIR_000])[kzero]+c8o27;
-      f_TNE   = (D.f[DIR_PPP ])[ktne]+c1o216;
-      f_TSW   = (D.f[DIR_MMP ])[ktsw]+c1o216;
-      f_TSE   = (D.f[DIR_PMP ])[ktse]+c1o216;
-      f_TNW   = (D.f[DIR_MPP ])[ktnw]+c1o216;
-      f_BNE   = (D.f[DIR_PPM ])[kbne]+c1o216;
-      f_BSW   = (D.f[DIR_MMM ])[kbsw]+c1o216;
-      f_BSE   = (D.f[DIR_PMM ])[kbse]+c1o216;
-      f_BNW   = (D.f[DIR_MPM ])[kbnw]+c1o216;
+      f_TNE   = (D.f[DIR_PPP])[ktne]+c1o216;
+      f_TSW   = (D.f[DIR_MMP])[ktsw]+c1o216;
+      f_TSE   = (D.f[DIR_PMP])[ktse]+c1o216;
+      f_TNW   = (D.f[DIR_MPP])[ktnw]+c1o216;
+      f_BNE   = (D.f[DIR_PPM])[kbne]+c1o216;
+      f_BSW   = (D.f[DIR_MMM])[kbsw]+c1o216;
+      f_BSE   = (D.f[DIR_PMM])[kbse]+c1o216;
+      f_BNW   = (D.f[DIR_MPM])[kbnw]+c1o216;
       ////////////////////////////////////////////////////////////////////////////////
 
       if( BC == GEO_FLUID || BC == GEO_VELO)
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
index f5908517491e252eae5078ec5ede4d5ba452bda7..14e090ff3f02eaec87a9f709fc0e0ac8df711189 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
@@ -13,67 +13,57 @@
 
 #include "Calculation/PorousMedia.h"
 
-#include "lbm/constants/NumericConstants.h"
+#include "basics/constants/NumericConstants.h"
 
 
-void CudaMemoryManager::cudaAllocFull(int lev)
-{
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->geo      ), parameter->getParH(lev)->mem_size_int  ));
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->k        ), parameter->getParH(lev)->mem_size_int  ));
-}
-void CudaMemoryManager::cudaFreeFull(int lev)
-{
-    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->geo   ));
-    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->k     ));
-}
 void CudaMemoryManager::cudaCopyPrint(int lev)
 {
-    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityX   , parameter->getParD(lev)->velocityX   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityY   , parameter->getParD(lev)->velocityY   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityZ   , parameter->getParD(lev)->velocityZ   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->rho  , parameter->getParD(lev)->rho  , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->pressure, parameter->getParD(lev)->pressure, parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityX   , parameter->getParD(lev)->velocityX   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityY   , parameter->getParD(lev)->velocityY   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityZ   , parameter->getParD(lev)->velocityZ   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->rho         , parameter->getParD(lev)->rho         , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->pressure    , parameter->getParD(lev)->pressure    , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
 
     if(parameter->getIsBodyForce())
     {
-        checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->forceX_SP   , parameter->getParD(lev)->forceX_SP   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-        checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->forceY_SP   , parameter->getParD(lev)->forceY_SP   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-        checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->forceZ_SP   , parameter->getParD(lev)->forceZ_SP   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
+        checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->forceX_SP   , parameter->getParD(lev)->forceX_SP   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+        checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->forceY_SP   , parameter->getParD(lev)->forceY_SP   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+        checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->forceZ_SP   , parameter->getParD(lev)->forceZ_SP   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
     }
 
     if(parameter->getUseTurbulentViscosity())
     {
-        checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->turbViscosity   , parameter->getParD(lev)->turbViscosity   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
+        checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->turbViscosity   , parameter->getParD(lev)->turbViscosity   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
     }
 }
 void CudaMemoryManager::cudaCopyMedianPrint(int lev)
 {
-    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vx_SP_Med   , parameter->getParD(lev)->vx_SP_Med   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vy_SP_Med   , parameter->getParD(lev)->vy_SP_Med   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vz_SP_Med   , parameter->getParD(lev)->vz_SP_Med   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->rho_SP_Med  , parameter->getParD(lev)->rho_SP_Med  , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->press_SP_Med, parameter->getParD(lev)->press_SP_Med, parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vx_SP_Med   , parameter->getParD(lev)->vx_SP_Med   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vy_SP_Med   , parameter->getParD(lev)->vy_SP_Med   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vz_SP_Med   , parameter->getParD(lev)->vz_SP_Med   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->rho_SP_Med  , parameter->getParD(lev)->rho_SP_Med  , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->press_SP_Med, parameter->getParD(lev)->press_SP_Med, parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
 }
 void CudaMemoryManager::cudaAllocCoord(int lev)
 {
 	//Host
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->coordinateX      ), parameter->getParH(lev)->mem_size_real_SP  ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->coordinateY      ), parameter->getParH(lev)->mem_size_real_SP  ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->coordinateZ      ), parameter->getParH(lev)->mem_size_real_SP  ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->coordinateX      ), parameter->getParH(lev)->memSizeRealLBnodes  ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->coordinateY      ), parameter->getParH(lev)->memSizeRealLBnodes  ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->coordinateZ      ), parameter->getParH(lev)->memSizeRealLBnodes  ));
 	//Device (spinning ship + uppsala)
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->coordinateX      ), parameter->getParH(lev)->mem_size_real_SP  ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->coordinateY      ), parameter->getParH(lev)->mem_size_real_SP  ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->coordinateZ      ), parameter->getParH(lev)->mem_size_real_SP  ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->coordinateX      ), parameter->getParH(lev)->memSizeRealLBnodes  ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->coordinateY      ), parameter->getParH(lev)->memSizeRealLBnodes  ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->coordinateZ      ), parameter->getParH(lev)->memSizeRealLBnodes  ));
 	//////////////////////////////////////////////////////////////////////////
-	double tmp = 3. * (double)parameter->getParH(lev)->mem_size_real_SP;
+	double tmp = 3. * (double)parameter->getParH(lev)->memSizeRealLBnodes;
 	setMemsizeGPU(tmp, false);
 }
 void CudaMemoryManager::cudaCopyCoord(int lev)
 {
 	//copy host to device
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->coordinateX,  parameter->getParH(lev)->coordinateX,  parameter->getParH(lev)->mem_size_real_SP     , cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->coordinateY,  parameter->getParH(lev)->coordinateY,  parameter->getParH(lev)->mem_size_real_SP     , cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->coordinateZ,  parameter->getParH(lev)->coordinateZ,  parameter->getParH(lev)->mem_size_real_SP     , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->coordinateX,  parameter->getParH(lev)->coordinateX,  parameter->getParH(lev)->memSizeRealLBnodes     , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->coordinateY,  parameter->getParH(lev)->coordinateY,  parameter->getParH(lev)->memSizeRealLBnodes     , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->coordinateZ,  parameter->getParH(lev)->coordinateZ,  parameter->getParH(lev)->memSizeRealLBnodes     , cudaMemcpyHostToDevice));
 }
 void CudaMemoryManager::cudaFreeCoord(int lev)
 {
@@ -84,24 +74,24 @@ void CudaMemoryManager::cudaFreeCoord(int lev)
 void CudaMemoryManager::cudaAllocBodyForce(int lev)
 {
     //Host
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->forceX_SP      ), parameter->getParH(lev)->mem_size_real_SP  ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->forceY_SP      ), parameter->getParH(lev)->mem_size_real_SP  ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->forceZ_SP      ), parameter->getParH(lev)->mem_size_real_SP  ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->forceX_SP      ), parameter->getParH(lev)->memSizeRealLBnodes  ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->forceY_SP      ), parameter->getParH(lev)->memSizeRealLBnodes  ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->forceZ_SP      ), parameter->getParH(lev)->memSizeRealLBnodes  ));
 	//Device
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->forceX_SP      ), parameter->getParH(lev)->mem_size_real_SP  ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->forceY_SP      ), parameter->getParH(lev)->mem_size_real_SP  ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->forceZ_SP      ), parameter->getParH(lev)->mem_size_real_SP  ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->forceX_SP      ), parameter->getParH(lev)->memSizeRealLBnodes  ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->forceY_SP      ), parameter->getParH(lev)->memSizeRealLBnodes  ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->forceZ_SP      ), parameter->getParH(lev)->memSizeRealLBnodes  ));
 	//////////////////////////////////////////////////////////////////////////
-	double tmp = 3. * (double)parameter->getParH(lev)->mem_size_real_SP;
+	double tmp = 3. * (double)parameter->getParH(lev)->memSizeRealLBnodes;
 	setMemsizeGPU(tmp, false);
 
 }
 void CudaMemoryManager::cudaCopyBodyForce(int lev)
 {
    	//copy host to device
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->forceX_SP,  parameter->getParH(lev)->forceX_SP,  parameter->getParH(lev)->mem_size_real_SP     , cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->forceY_SP,  parameter->getParH(lev)->forceY_SP,  parameter->getParH(lev)->mem_size_real_SP     , cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->forceZ_SP,  parameter->getParH(lev)->forceZ_SP,  parameter->getParH(lev)->mem_size_real_SP     , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->forceX_SP,  parameter->getParH(lev)->forceX_SP,  parameter->getParH(lev)->memSizeRealLBnodes     , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->forceY_SP,  parameter->getParH(lev)->forceY_SP,  parameter->getParH(lev)->memSizeRealLBnodes     , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->forceZ_SP,  parameter->getParH(lev)->forceZ_SP,  parameter->getParH(lev)->memSizeRealLBnodes     , cudaMemcpyHostToDevice));
 
 }
 void CudaMemoryManager::cudaFreeBodyForce(int lev)
@@ -114,71 +104,71 @@ void CudaMemoryManager::cudaFreeBodyForce(int lev)
 //print
 void CudaMemoryManager::cudaCopyDataToHost(int lev)
 {
-	checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityX   , parameter->getParD(lev)->velocityX   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-	checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityY   , parameter->getParD(lev)->velocityY   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-	checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityZ   , parameter->getParD(lev)->velocityZ   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-	checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->rho  , parameter->getParD(lev)->rho  , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
-	checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->pressure, parameter->getParD(lev)->pressure, parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
+	checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityX   , parameter->getParD(lev)->velocityX   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+	checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityY   , parameter->getParD(lev)->velocityY   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+	checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityZ   , parameter->getParD(lev)->velocityZ   , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+	checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->rho         , parameter->getParD(lev)->rho         , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
+	checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->pressure    , parameter->getParD(lev)->pressure    , parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
 }
 //sparse
 void CudaMemoryManager::cudaAllocSP(int lev)
 {
 	//Host
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->typeOfGridNode           ), parameter->getParH(lev)->mem_size_int_SP    ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborX    ), parameter->getParH(lev)->mem_size_int_SP    ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborY    ), parameter->getParH(lev)->mem_size_int_SP    ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborZ    ), parameter->getParH(lev)->mem_size_int_SP    ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->rho          ), parameter->getParH(lev)->mem_size_real_SP));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityX           ), parameter->getParH(lev)->mem_size_real_SP));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityY           ), parameter->getParH(lev)->mem_size_real_SP));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityZ           ), parameter->getParH(lev)->mem_size_real_SP));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressure        ), parameter->getParH(lev)->mem_size_real_SP));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->typeOfGridNode), parameter->getParH(lev)->memSizeLonglongLBnodes));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborX     ), parameter->getParH(lev)->memSizeLonglongLBnodes));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborY     ), parameter->getParH(lev)->memSizeLonglongLBnodes));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborZ     ), parameter->getParH(lev)->memSizeLonglongLBnodes));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->rho           ), parameter->getParH(lev)->memSizeRealLBnodes    ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityX     ), parameter->getParH(lev)->memSizeRealLBnodes    ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityY     ), parameter->getParH(lev)->memSizeRealLBnodes    ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityZ     ), parameter->getParH(lev)->memSizeRealLBnodes    ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressure      ), parameter->getParH(lev)->memSizeRealLBnodes    ));
 	//Device
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->typeOfGridNode               ), parameter->getParD(lev)->mem_size_int_SP    ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborX        ), parameter->getParD(lev)->mem_size_int_SP    ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborY        ), parameter->getParD(lev)->mem_size_int_SP    ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborZ        ), parameter->getParD(lev)->mem_size_int_SP    ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->rho              ), parameter->getParD(lev)->mem_size_real_SP));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityX               ), parameter->getParD(lev)->mem_size_real_SP));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityY               ), parameter->getParD(lev)->mem_size_real_SP));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityZ               ), parameter->getParD(lev)->mem_size_real_SP));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressure            ), parameter->getParD(lev)->mem_size_real_SP));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->distributions.f[0]           ), (unsigned long long)parameter->getD3Qxx()*(unsigned long long)parameter->getParD(lev)->mem_size_real_SP));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->typeOfGridNode    ), parameter->getParD(lev)->memSizeLonglongLBnodes));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborX         ), parameter->getParD(lev)->memSizeLonglongLBnodes));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborY         ), parameter->getParD(lev)->memSizeLonglongLBnodes));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborZ         ), parameter->getParD(lev)->memSizeLonglongLBnodes));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->rho               ), parameter->getParD(lev)->memSizeRealLBnodes    ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityX         ), parameter->getParD(lev)->memSizeRealLBnodes    ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityY         ), parameter->getParD(lev)->memSizeRealLBnodes    ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityZ         ), parameter->getParD(lev)->memSizeRealLBnodes    ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressure          ), parameter->getParD(lev)->memSizeRealLBnodes    ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->distributions.f[0]), (unsigned long long)parameter->getD3Qxx()*(unsigned long long)parameter->getParD(lev)->memSizeRealLBnodes));
 	//////////////////////////////////////////////////////////////////////////
-	double tmp = 4. * (double)parameter->getParH(lev)->mem_size_int_SP + 5. * (double)parameter->getParH(lev)->mem_size_real_SP + (double)parameter->getD3Qxx() * (double)parameter->getParH(lev)->mem_size_real_SP;
+	double tmp = 4. * (double)parameter->getParH(lev)->memSizeLonglongLBnodes + 5. * (double)parameter->getParH(lev)->memSizeRealLBnodes + (double)parameter->getD3Qxx() * (double)parameter->getParH(lev)->memSizeRealLBnodes;
 	setMemsizeGPU(tmp, false);
 }
 void CudaMemoryManager::cudaCopySP(int lev)
 {
 	//copy host to device
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->typeOfGridNode       ,  parameter->getParH(lev)->typeOfGridNode       ,  parameter->getParH(lev)->mem_size_int_SP     , cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->neighborX,  parameter->getParH(lev)->neighborX,  parameter->getParH(lev)->mem_size_int_SP     , cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->neighborY,  parameter->getParH(lev)->neighborY,  parameter->getParH(lev)->mem_size_int_SP     , cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->neighborZ,  parameter->getParH(lev)->neighborZ,  parameter->getParH(lev)->mem_size_int_SP     , cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->rho      ,  parameter->getParH(lev)->rho      ,  parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityX       ,  parameter->getParH(lev)->velocityX       ,  parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityY       ,  parameter->getParH(lev)->velocityY       ,  parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityZ       ,  parameter->getParH(lev)->velocityZ       ,  parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->pressure    ,  parameter->getParH(lev)->pressure    ,  parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->typeOfGridNode, parameter->getParH(lev)->typeOfGridNode,  parameter->getParH(lev)->memSizeLonglongLBnodes , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->neighborX     , parameter->getParH(lev)->neighborX     ,  parameter->getParH(lev)->memSizeLonglongLBnodes , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->neighborY     , parameter->getParH(lev)->neighborY     ,  parameter->getParH(lev)->memSizeLonglongLBnodes , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->neighborZ     , parameter->getParH(lev)->neighborZ     ,  parameter->getParH(lev)->memSizeLonglongLBnodes , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->rho           , parameter->getParH(lev)->rho           ,  parameter->getParH(lev)->memSizeRealLBnodes     , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityX     , parameter->getParH(lev)->velocityX     ,  parameter->getParH(lev)->memSizeRealLBnodes     , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityY     , parameter->getParH(lev)->velocityY     ,  parameter->getParH(lev)->memSizeRealLBnodes     , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityZ     , parameter->getParH(lev)->velocityZ     ,  parameter->getParH(lev)->memSizeRealLBnodes     , cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->pressure      , parameter->getParH(lev)->pressure      ,  parameter->getParH(lev)->memSizeRealLBnodes     , cudaMemcpyHostToDevice));
 }
 void CudaMemoryManager::cudaFreeSP(int lev)
 {
-	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->typeOfGridNode       ));
-	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->velocityX       ));
-	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->velocityY       ));
-	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->velocityZ       ));
-	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->rho      ));
-	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->pressure    ));
-	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborX));
-	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborY));
-	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborZ));
+	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->typeOfGridNode ));
+	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->velocityX      ));
+	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->velocityY      ));
+	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->velocityZ      ));
+	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->rho            ));
+	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->pressure       ));
+	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborX      ));
+	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborY      ));
+	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborZ      ));
 }
 void CudaMemoryManager::cudaAllocF3SP(int lev)
 {
     //Device
-    checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->g6.g[0]), (unsigned long long)6*(unsigned long long)parameter->getParD(lev)->mem_size_real_SP));
+    checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->g6.g[0]), (unsigned long long)6*(unsigned long long)parameter->getParD(lev)->memSizeRealLBnodes));
     //////////////////////////////////////////////////////////////////////////
-    double tmp = (double)6 * (double)parameter->getParH(lev)->mem_size_real_SP;
+    double tmp = (double)6 * (double)parameter->getParH(lev)->memSizeRealLBnodes;
     setMemsizeGPU(tmp, false);
 }
 
@@ -210,20 +200,20 @@ void CudaMemoryManager::cudaAllocVeloBC(int lev)
 
 	//Host
 	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.q27[0]),  parameter->getD3Qxx()*mem_size_inflow_Q_q ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.k),                  mem_size_inflow_Q_k ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.Vx),                 mem_size_inflow_Q_q ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.Vy),                 mem_size_inflow_Q_q ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.Vz),                 mem_size_inflow_Q_q ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.deltaVz),            mem_size_inflow_Q_q ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.RhoBC),              mem_size_inflow_Q_q ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.k),                             mem_size_inflow_Q_k ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.Vx),                            mem_size_inflow_Q_q ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.Vy),                            mem_size_inflow_Q_q ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.Vz),                            mem_size_inflow_Q_q ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.deltaVz),                       mem_size_inflow_Q_q ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->velocityBC.RhoBC),                         mem_size_inflow_Q_q ));
 
 	//Device
 	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.q27[0]),      parameter->getD3Qxx()*mem_size_inflow_Q_q ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.k),                      mem_size_inflow_Q_k ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.Vx),                     mem_size_inflow_Q_q ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.Vy),                     mem_size_inflow_Q_q ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.Vz),                     mem_size_inflow_Q_q ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.deltaVz),                mem_size_inflow_Q_q ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.k),                                 mem_size_inflow_Q_k ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.Vx),                                mem_size_inflow_Q_q ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.Vy),                                mem_size_inflow_Q_q ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.Vz),                                mem_size_inflow_Q_q ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->velocityBC.deltaVz),                           mem_size_inflow_Q_q ));
 
 	//////////////////////////////////////////////////////////////////////////
 	double tmp = (double)mem_size_inflow_Q_k + 4. * (double)mem_size_inflow_Q_q + (double)parameter->getD3Qxx() * (double)mem_size_inflow_Q_q;
@@ -235,11 +225,11 @@ void CudaMemoryManager::cudaCopyVeloBC(int lev)
 	unsigned int mem_size_inflow_Q_q = sizeof(real)*parameter->getParH(lev)->velocityBC.numberOfBCnodes;
 
 	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.q27[0],  parameter->getParH(lev)->velocityBC.q27[0], parameter->getD3Qxx()* mem_size_inflow_Q_q,  cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.k,       parameter->getParH(lev)->velocityBC.k,                  mem_size_inflow_Q_k,  cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.Vx,      parameter->getParH(lev)->velocityBC.Vx,                 mem_size_inflow_Q_q,  cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.Vy,      parameter->getParH(lev)->velocityBC.Vy,                 mem_size_inflow_Q_q,  cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.Vz,      parameter->getParH(lev)->velocityBC.Vz,                 mem_size_inflow_Q_q,  cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.deltaVz, parameter->getParH(lev)->velocityBC.deltaVz,            mem_size_inflow_Q_q,  cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.k,       parameter->getParH(lev)->velocityBC.k,                             mem_size_inflow_Q_k,  cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.Vx,      parameter->getParH(lev)->velocityBC.Vx,                            mem_size_inflow_Q_q,  cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.Vy,      parameter->getParH(lev)->velocityBC.Vy,                            mem_size_inflow_Q_q,  cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.Vz,      parameter->getParH(lev)->velocityBC.Vz,                            mem_size_inflow_Q_q,  cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->velocityBC.deltaVz, parameter->getParH(lev)->velocityBC.deltaVz,                       mem_size_inflow_Q_q,  cudaMemcpyHostToDevice));
 
 }
 
@@ -260,15 +250,15 @@ void CudaMemoryManager::cudaAllocOutflowBC(int lev)
 
 	//Host
 	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->outflowBC.q27[0]), parameter->getD3Qxx()*mem_size_outflow_Q_q ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->outflowBC.k),                 mem_size_outflow_Q_k ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->outflowBC.kN),                mem_size_outflow_Q_k ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->outflowBC.RhoBC),             mem_size_outflow_Q_q ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->outflowBC.k),                            mem_size_outflow_Q_k ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->outflowBC.kN),                           mem_size_outflow_Q_k ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->outflowBC.RhoBC),                        mem_size_outflow_Q_q ));
 
 	//Device
 	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->outflowBC.q27[0]),     parameter->getD3Qxx()* mem_size_outflow_Q_q ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->outflowBC.k),                      mem_size_outflow_Q_k ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->outflowBC.kN),                     mem_size_outflow_Q_k ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->outflowBC.RhoBC),                  mem_size_outflow_Q_q ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->outflowBC.k),                                 mem_size_outflow_Q_k ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->outflowBC.kN),                                mem_size_outflow_Q_k ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->outflowBC.RhoBC),                             mem_size_outflow_Q_q ));
 
 	//////////////////////////////////////////////////////////////////////////
 	double tmp = (double)mem_size_outflow_Q_q + 2. * (double)mem_size_outflow_Q_k + (double)parameter->getD3Qxx()*(double)mem_size_outflow_Q_q;
@@ -280,9 +270,9 @@ void CudaMemoryManager::cudaCopyOutflowBC(int lev)
 	unsigned int mem_size_outflow_Q_q = sizeof(real)*parameter->getParH(lev)->outflowBC.numberOfBCnodes;
 
 	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->outflowBC.q27[0],  parameter->getParH(lev)->outflowBC.q27[0], parameter->getD3Qxx()* mem_size_outflow_Q_q,  cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->outflowBC.k,       parameter->getParH(lev)->outflowBC.k,                  mem_size_outflow_Q_k,  cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->outflowBC.kN,      parameter->getParH(lev)->outflowBC.kN,                 mem_size_outflow_Q_k,  cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->outflowBC.RhoBC,   parameter->getParH(lev)->outflowBC.RhoBC,              mem_size_outflow_Q_q,  cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->outflowBC.k,       parameter->getParH(lev)->outflowBC.k,                             mem_size_outflow_Q_k,  cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->outflowBC.kN,      parameter->getParH(lev)->outflowBC.kN,                            mem_size_outflow_Q_k,  cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->outflowBC.RhoBC,   parameter->getParH(lev)->outflowBC.RhoBC,                         mem_size_outflow_Q_q,  cudaMemcpyHostToDevice));
 }
 void CudaMemoryManager::cudaFreeOutflowBC(int lev)
 {
@@ -301,13 +291,13 @@ void CudaMemoryManager::cudaAllocNoSlipBC(int lev)
 
 	//Host
 	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->noSlipBC.q27[0]), parameter->getD3Qxx()*mem_size_Q_q      ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->noSlipBC.k),                 mem_size_Q_k      ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->noSlipBC.qread),             mem_size_Q_q_read ));//Geller
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->noSlipBC.valueQ),            mem_size_Q_value  ));//Geller
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->noSlipBC.k),                            mem_size_Q_k      ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->noSlipBC.qread),                        mem_size_Q_q_read ));//Geller
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->noSlipBC.valueQ),                       mem_size_Q_value  ));//Geller
 
 	//Device
 	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->noSlipBC.q27[0]),     parameter->getD3Qxx()* mem_size_Q_q     ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->noSlipBC.k),                      mem_size_Q_k     ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->noSlipBC.k),                                 mem_size_Q_k     ));
 
 	//////////////////////////////////////////////////////////////////////////
 	double tmp = (double)mem_size_Q_k + (double)parameter->getD3Qxx()*(double)mem_size_Q_q;
@@ -319,7 +309,7 @@ void CudaMemoryManager::cudaCopyNoSlipBC(int lev)
 	unsigned int mem_size_Q_q = sizeof(real)*parameter->getParH(lev)->noSlipBC.numberOfBCnodes;
 
 	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->noSlipBC.q27[0], parameter->getParH(lev)->noSlipBC.q27[0], parameter->getD3Qxx()* mem_size_Q_q,       cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->noSlipBC.k,      parameter->getParH(lev)->noSlipBC.k,                  mem_size_Q_k,       cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->noSlipBC.k,      parameter->getParH(lev)->noSlipBC.k,                             mem_size_Q_k,       cudaMemcpyHostToDevice));
 }
 void CudaMemoryManager::cudaFreeNoSlipBC(int lev)
 {
@@ -336,11 +326,11 @@ void CudaMemoryManager::cudaAllocGeomBC(int lev)
 
 	//Host
 	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->geometryBC.q27[0]), parameter->getD3Qxx()*mem_size_Q_q      ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->geometryBC.k),                 mem_size_Q_k      ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->geometryBC.k),                            mem_size_Q_k      ));
 
 	//Device
 	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->geometryBC.q27[0]),     parameter->getD3Qxx()* mem_size_Q_q     ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->geometryBC.k),                      mem_size_Q_k     ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->geometryBC.k),                                 mem_size_Q_k     ));
 
 	//////////////////////////////////////////////////////////////////////////
 	double tmp = (double)mem_size_Q_k + (double)parameter->getD3Qxx()*(double)mem_size_Q_q;
@@ -352,7 +342,7 @@ void CudaMemoryManager::cudaCopyGeomBC(int lev)
 	unsigned int mem_size_Q_q = sizeof(real)*parameter->getParH(lev)->geometryBC.numberOfBCnodes;
 
 	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->geometryBC.q27[0], parameter->getParH(lev)->geometryBC.q27[0], parameter->getD3Qxx()* mem_size_Q_q,       cudaMemcpyHostToDevice));
-	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->geometryBC.k,      parameter->getParH(lev)->geometryBC.k,                  mem_size_Q_k,       cudaMemcpyHostToDevice));
+	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->geometryBC.k,      parameter->getParH(lev)->geometryBC.k,                             mem_size_Q_k,       cudaMemcpyHostToDevice));
 }
 void CudaMemoryManager::cudaFreeGeomBC(int lev)
 {
@@ -367,15 +357,15 @@ void CudaMemoryManager::cudaAllocPress(int lev)
 
 	//Host
 	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressureBC.q27[0]), parameter->getD3Qxx()*mem_size_Q_q      ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressureBC.k),                 mem_size_Q_k      ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressureBC.kN),                mem_size_Q_k      ));
-	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressureBC.RhoBC),             mem_size_Q_q      ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressureBC.k),                            mem_size_Q_k      ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressureBC.kN),                           mem_size_Q_k      ));
+	checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->pressureBC.RhoBC),                        mem_size_Q_q      ));
 
 	//Device
 	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressureBC.q27[0]),     parameter->getD3Qxx()* mem_size_Q_q     ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressureBC.k),                      mem_size_Q_k     ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressureBC.kN),                     mem_size_Q_k     ));
-	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressureBC.RhoBC),                  mem_size_Q_q     ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressureBC.k),                                 mem_size_Q_k     ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressureBC.kN),                                mem_size_Q_k     ));
+	checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->pressureBC.RhoBC),                             mem_size_Q_q     ));
 
 	//////////////////////////////////////////////////////////////////////////
 	double tmp = 2. * (double)mem_size_Q_k + (double)mem_size_Q_q + (double)parameter->getD3Qxx()*(double)mem_size_Q_q;
@@ -432,9 +422,9 @@ void CudaMemoryManager::cudaAllocLevelForcing(int level)
 {
     real fx_t{ 1. }, fy_t{ 1. }, fz_t{ 1. };
     for (int i = 0; i < level; i++) {
-        fx_t *= vf::lbm::constant::c2o1;
-        fy_t *= vf::lbm::constant::c2o1;
-        fz_t *= vf::lbm::constant::c2o1;
+        fx_t *= vf::basics::constant::c2o1;
+        fy_t *= vf::basics::constant::c2o1;
+        fz_t *= vf::basics::constant::c2o1;
     }
 
     const unsigned int mem_size = sizeof(real) * 3;
@@ -889,17 +879,17 @@ void CudaMemoryManager::cudaFreeProcessNeighborF3Z(int lev, unsigned int process
 void CudaMemoryManager::cudaAllocNeighborWSB(int lev)
 {
     //Host
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborInverse    ), parameter->getParH(lev)->mem_size_int_SP    ));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborInverse    ), parameter->getParH(lev)->memSizeLonglongLBnodes    ));
     //Device
-    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborInverse        ), parameter->getParD(lev)->mem_size_int_SP    ));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborInverse        ), parameter->getParD(lev)->memSizeLonglongLBnodes    ));
     //////////////////////////////////////////////////////////////////////////
-    double tmp = (double)parameter->getParH(lev)->mem_size_int_SP;
+    double tmp = (double)parameter->getParH(lev)->memSizeLonglongLBnodes;
     setMemsizeGPU(tmp, false);
 }
 void CudaMemoryManager::cudaCopyNeighborWSB(int lev)
 {
     //copy host to device
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->neighborInverse,  parameter->getParH(lev)->neighborInverse,  parameter->getParH(lev)->mem_size_int_SP     , cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->neighborInverse,  parameter->getParH(lev)->neighborInverse,  parameter->getParH(lev)->memSizeLonglongLBnodes     , cudaMemcpyHostToDevice));
 }
 void CudaMemoryManager::cudaFreeNeighborWSB(int lev)
 {
@@ -909,7 +899,7 @@ void CudaMemoryManager::cudaFreeNeighborWSB(int lev)
 void CudaMemoryManager::cudaAllocTurbulentViscosity(int lev)
 {
     //Host
-    checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->turbViscosity), parameter->getParH(lev)->mem_size_real_SP));
+    checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->turbViscosity), parameter->getParH(lev)->memSizeRealLBnodes));
     //Debug
     // checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->gSij ), parameter->getParH(lev)->mem_size_real_SP));
     // checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->gSDij), parameter->getParH(lev)->mem_size_real_SP));
@@ -924,7 +914,7 @@ void CudaMemoryManager::cudaAllocTurbulentViscosity(int lev)
     // checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->gDzvz), parameter->getParH(lev)->mem_size_real_SP));
 
     //Device
-    checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->turbViscosity), parameter->getParD(lev)->mem_size_real_SP));
+    checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->turbViscosity), parameter->getParD(lev)->memSizeRealLBnodes));
     //Debug
     // checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->gSij ), parameter->getParD(lev)->mem_size_real_SP));
     // checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->gSDij), parameter->getParD(lev)->mem_size_real_SP));
@@ -939,13 +929,13 @@ void CudaMemoryManager::cudaAllocTurbulentViscosity(int lev)
     // checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->gDzvz), parameter->getParD(lev)->mem_size_real_SP));
     // //////////////////////////////////////////////////////////////////////////
     // double tmp = (double)parameter->getParH(lev)->mem_size_real_SP * 12.0;
-    double tmp = (double)parameter->getParH(lev)->mem_size_real_SP;
+    double tmp = (double)parameter->getParH(lev)->memSizeRealLBnodes;
     setMemsizeGPU(tmp, false);
 }
 void CudaMemoryManager::cudaCopyTurbulentViscosityHD(int lev)
 {
     //copy host to device
-    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->turbViscosity, parameter->getParH(lev)->turbViscosity, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyHostToDevice));
+    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->turbViscosity, parameter->getParH(lev)->turbViscosity, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyHostToDevice));
     //Debug
     // checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->gSij , parameter->getParH(lev)->gSij , parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyHostToDevice));
     // checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->gSDij, parameter->getParH(lev)->gSDij, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyHostToDevice));
@@ -962,7 +952,7 @@ void CudaMemoryManager::cudaCopyTurbulentViscosityHD(int lev)
 void CudaMemoryManager::cudaCopyTurbulentViscosityDH(int lev)
 {
     //copy device to host
-    checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->turbViscosity, parameter->getParD(lev)->turbViscosity, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyDeviceToHost));
+    checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->turbViscosity, parameter->getParD(lev)->turbViscosity, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyDeviceToHost));
     //Debug
     // checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->gSij , parameter->getParD(lev)->gSij , parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyDeviceToHost));
     // checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->gSDij, parameter->getParD(lev)->gSDij, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyDeviceToHost));
@@ -1064,29 +1054,29 @@ void CudaMemoryManager::cudaFreeTurbulenceIntensity(int lev)
 void CudaMemoryManager::cudaAllocMedianSP(int lev)
 {
     //Host
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->rho_SP_Med      ), parameter->getParH(lev)->mem_size_real_SP));
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vx_SP_Med       ), parameter->getParH(lev)->mem_size_real_SP));
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vy_SP_Med       ), parameter->getParH(lev)->mem_size_real_SP));
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vz_SP_Med       ), parameter->getParH(lev)->mem_size_real_SP));
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->press_SP_Med    ), parameter->getParH(lev)->mem_size_real_SP));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->rho_SP_Med      ), parameter->getParH(lev)->memSizeRealLBnodes));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vx_SP_Med       ), parameter->getParH(lev)->memSizeRealLBnodes));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vy_SP_Med       ), parameter->getParH(lev)->memSizeRealLBnodes));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vz_SP_Med       ), parameter->getParH(lev)->memSizeRealLBnodes));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->press_SP_Med    ), parameter->getParH(lev)->memSizeRealLBnodes));
     //Device
-    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->rho_SP_Med          ), parameter->getParD(lev)->mem_size_real_SP));
-    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vx_SP_Med           ), parameter->getParD(lev)->mem_size_real_SP));
-    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vy_SP_Med           ), parameter->getParD(lev)->mem_size_real_SP));
-    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vz_SP_Med           ), parameter->getParD(lev)->mem_size_real_SP));
-    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->press_SP_Med        ), parameter->getParD(lev)->mem_size_real_SP));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->rho_SP_Med          ), parameter->getParD(lev)->memSizeRealLBnodes));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vx_SP_Med           ), parameter->getParD(lev)->memSizeRealLBnodes));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vy_SP_Med           ), parameter->getParD(lev)->memSizeRealLBnodes));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vz_SP_Med           ), parameter->getParD(lev)->memSizeRealLBnodes));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->press_SP_Med        ), parameter->getParD(lev)->memSizeRealLBnodes));
     //////////////////////////////////////////////////////////////////////////
-    double tmp = 5. * (double)parameter->getParH(lev)->mem_size_real_SP;
+    double tmp = 5. * (double)parameter->getParH(lev)->memSizeRealLBnodes;
     setMemsizeGPU(tmp, false);
 }
 void CudaMemoryManager::cudaCopyMedianSP(int lev)
 {
     //copy host to device
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->rho_SP_Med  ,  parameter->getParH(lev)->rho_SP_Med  ,  parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice));
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vx_SP_Med   ,  parameter->getParH(lev)->vx_SP_Med   ,  parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice));
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vy_SP_Med   ,  parameter->getParH(lev)->vy_SP_Med   ,  parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice));
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vz_SP_Med   ,  parameter->getParH(lev)->vz_SP_Med   ,  parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice));
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->press_SP_Med,  parameter->getParH(lev)->press_SP_Med,  parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->rho_SP_Med  ,  parameter->getParH(lev)->rho_SP_Med  ,  parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vx_SP_Med   ,  parameter->getParH(lev)->vx_SP_Med   ,  parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vy_SP_Med   ,  parameter->getParH(lev)->vy_SP_Med   ,  parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vz_SP_Med   ,  parameter->getParH(lev)->vz_SP_Med   ,  parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->press_SP_Med,  parameter->getParH(lev)->press_SP_Med,  parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice));
 }
 void CudaMemoryManager::cudaFreeMedianSP(int lev)
 {
@@ -1099,11 +1089,11 @@ void CudaMemoryManager::cudaFreeMedianSP(int lev)
 void CudaMemoryManager::cudaAllocMedianOut(int lev)
 {
     //Host
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->rho_SP_Med_Out      ), parameter->getParH(lev)->mem_size_real_SP));
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vx_SP_Med_Out       ), parameter->getParH(lev)->mem_size_real_SP));
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vy_SP_Med_Out       ), parameter->getParH(lev)->mem_size_real_SP));
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vz_SP_Med_Out       ), parameter->getParH(lev)->mem_size_real_SP));
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->press_SP_Med_Out    ), parameter->getParH(lev)->mem_size_real_SP));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->rho_SP_Med_Out      ), parameter->getParH(lev)->memSizeRealLBnodes));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vx_SP_Med_Out       ), parameter->getParH(lev)->memSizeRealLBnodes));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vy_SP_Med_Out       ), parameter->getParH(lev)->memSizeRealLBnodes));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vz_SP_Med_Out       ), parameter->getParH(lev)->memSizeRealLBnodes));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->press_SP_Med_Out    ), parameter->getParH(lev)->memSizeRealLBnodes));
 }
 void CudaMemoryManager::cudaFreeMedianOut(int lev)
 {
@@ -1116,116 +1106,131 @@ void CudaMemoryManager::cudaFreeMedianOut(int lev)
 //Interface CF
 void CudaMemoryManager::cudaAllocInterfaceCF(int lev)
 {
+    uint mem_size_kCF = sizeof(uint) * parameter->getParH(lev)->coarseToFine.numberOfCells;
     //Host
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->intCF.ICellCFC), parameter->getParH(lev)->mem_size_kCF  ));
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->intCF.ICellCFF), parameter->getParH(lev)->mem_size_kCF  ));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->coarseToFine.coarseCellIndices), mem_size_kCF  ));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->coarseToFine.fineCellIndices), mem_size_kCF  ));
     //Device
-    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->intCF.ICellCFC), parameter->getParD(lev)->mem_size_kCF  ));
-    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->intCF.ICellCFF), parameter->getParD(lev)->mem_size_kCF  ));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->coarseToFine.coarseCellIndices), mem_size_kCF  ));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->coarseToFine.fineCellIndices), mem_size_kCF  ));
     //////////////////////////////////////////////////////////////////////////
-    double tmp = 2. * (double)parameter->getParH(lev)->mem_size_kCF;
+    double tmp = 2. * (double)mem_size_kCF;
     setMemsizeGPU(tmp, false);
 }
 void CudaMemoryManager::cudaCopyInterfaceCF(int lev)
 {
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->intCF.ICellCFC, parameter->getParH(lev)->intCF.ICellCFC, parameter->getParH(lev)->mem_size_kCF, cudaMemcpyHostToDevice));
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->intCF.ICellCFF, parameter->getParH(lev)->intCF.ICellCFF, parameter->getParH(lev)->mem_size_kCF, cudaMemcpyHostToDevice));
+    uint mem_size_kCF = sizeof(uint) * parameter->getParH(lev)->coarseToFine.numberOfCells;
+
+    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->coarseToFine.coarseCellIndices, parameter->getParH(lev)->coarseToFine.coarseCellIndices, mem_size_kCF, cudaMemcpyHostToDevice));
+    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->coarseToFine.fineCellIndices, parameter->getParH(lev)->coarseToFine.fineCellIndices, mem_size_kCF, cudaMemcpyHostToDevice));
 }
 void CudaMemoryManager::cudaFreeInterfaceCF(int lev)
 {
-    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->intCF.ICellCFC));
-    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->intCF.ICellCFF));
+    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->coarseToFine.coarseCellIndices));
+    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->coarseToFine.fineCellIndices));
 }
 //Interface FC
 void CudaMemoryManager::cudaAllocInterfaceFC(int lev)
 {
+    uint mem_size_kFC = sizeof(uint) * parameter->getParH(lev)->fineToCoarse.numberOfCells;
+
     //Host
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->intFC.ICellFCF), parameter->getParH(lev)->mem_size_kFC  ));
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->intFC.ICellFCC), parameter->getParH(lev)->mem_size_kFC  ));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->fineToCoarse.fineCellIndices), mem_size_kFC  ));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->fineToCoarse.coarseCellIndices), mem_size_kFC  ));
     //Device
-    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->intFC.ICellFCF), parameter->getParD(lev)->mem_size_kFC  ));
-    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->intFC.ICellFCC), parameter->getParD(lev)->mem_size_kFC  ));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->fineToCoarse.fineCellIndices), mem_size_kFC  ));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->fineToCoarse.coarseCellIndices), mem_size_kFC  ));
     //////////////////////////////////////////////////////////////////////////
-    double tmp = 2. * (double)parameter->getParH(lev)->mem_size_kFC;
+    double tmp = 2. * (double)mem_size_kFC;
     setMemsizeGPU(tmp, false);
 }
 void CudaMemoryManager::cudaCopyInterfaceFC(int lev)
 {
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->intFC.ICellFCF, parameter->getParH(lev)->intFC.ICellFCF, parameter->getParH(lev)->mem_size_kFC, cudaMemcpyHostToDevice));
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->intFC.ICellFCC, parameter->getParH(lev)->intFC.ICellFCC, parameter->getParH(lev)->mem_size_kFC, cudaMemcpyHostToDevice));
+    uint mem_size_kFC = sizeof(uint) * parameter->getParH(lev)->fineToCoarse.numberOfCells;
+
+    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->fineToCoarse.fineCellIndices, parameter->getParH(lev)->fineToCoarse.fineCellIndices, mem_size_kFC, cudaMemcpyHostToDevice));
+    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->fineToCoarse.coarseCellIndices, parameter->getParH(lev)->fineToCoarse.coarseCellIndices, mem_size_kFC, cudaMemcpyHostToDevice));
 }
 void CudaMemoryManager::cudaCheckInterfaceFCBulk(int lev)
 {
     // only use for testing!
-    size_t memsize = sizeof(uint) * parameter->getParH(lev)->intFCBulk.kFC;
-    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->intFCBulk.ICellFCC, parameter->getParH(lev)->intFCBulk.ICellFCC, memsize, cudaMemcpyDeviceToDevice));
-    for (uint i = 0; i < parameter->getParH(lev)->intFCBulk.kFC; i++)
-        printf("%d %d\n", i, parameter->getParH(lev)->intFCBulk.ICellFCC[i]);
+    size_t memsize = sizeof(uint) * parameter->getParH(lev)->fineToCoarseBulk.numberOfCells;
+    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->fineToCoarseBulk.coarseCellIndices, parameter->getParH(lev)->fineToCoarseBulk.coarseCellIndices, memsize, cudaMemcpyDeviceToDevice));
+    for (uint i = 0; i < parameter->getParH(lev)->fineToCoarseBulk.numberOfCells; i++)
+        printf("%d %d\n", i, parameter->getParH(lev)->fineToCoarseBulk.coarseCellIndices[i]);
 }
 void CudaMemoryManager::cudaFreeInterfaceFC(int lev)
 {
-    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->intFC.ICellFCF));
-    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->intFC.ICellFCC));
+    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->fineToCoarse.fineCellIndices));
+    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->fineToCoarse.coarseCellIndices));
 }
 //Interface Offset CF
 void CudaMemoryManager::cudaAllocInterfaceOffCF(int lev)
 {
+    uint mem_size_kCF_off = sizeof(real) * parameter->getParH(lev)->coarseToFine.numberOfCells;
+
     //Host
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->offCF.xOffCF),   parameter->getParH(lev)->mem_size_kCF_off  ));
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->offCF.yOffCF),   parameter->getParH(lev)->mem_size_kCF_off  ));
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->offCF.zOffCF),   parameter->getParH(lev)->mem_size_kCF_off  ));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborCoarseToFine.x), mem_size_kCF_off  ));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborCoarseToFine.y), mem_size_kCF_off  ));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborCoarseToFine.z), mem_size_kCF_off  ));
     getLastCudaError("Allocate host memory");
     //Device
-    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->offCF.xOffCF),   parameter->getParD(lev)->mem_size_kCF_off  ));
-    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->offCF.yOffCF),   parameter->getParD(lev)->mem_size_kCF_off  ));
-    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->offCF.zOffCF),   parameter->getParD(lev)->mem_size_kCF_off  ));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborCoarseToFine.x), mem_size_kCF_off  ));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborCoarseToFine.y), mem_size_kCF_off  ));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborCoarseToFine.z), mem_size_kCF_off  ));
     getLastCudaError("Allocate device memory");
     //////////////////////////////////////////////////////////////////////////
-    double tmp = 3. * (double)parameter->getParH(lev)->mem_size_kCF_off;
+    double tmp = 3. * (double)mem_size_kCF_off;
     setMemsizeGPU(tmp, false);
 }
 void CudaMemoryManager::cudaCopyInterfaceOffCF(int lev)
 {
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->offCF.xOffCF,   parameter->getParH(lev)->offCF.xOffCF,   parameter->getParH(lev)->mem_size_kCF_off, cudaMemcpyHostToDevice));
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->offCF.yOffCF,   parameter->getParH(lev)->offCF.yOffCF,   parameter->getParH(lev)->mem_size_kCF_off, cudaMemcpyHostToDevice));
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->offCF.zOffCF,   parameter->getParH(lev)->offCF.zOffCF,   parameter->getParH(lev)->mem_size_kCF_off, cudaMemcpyHostToDevice));
+    uint mem_size_kCF_off = sizeof(real) * parameter->getParH(lev)->coarseToFine.numberOfCells;
+
+    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->neighborCoarseToFine.x, parameter->getParH(lev)->neighborCoarseToFine.x, mem_size_kCF_off, cudaMemcpyHostToDevice));
+    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->neighborCoarseToFine.y, parameter->getParH(lev)->neighborCoarseToFine.y, mem_size_kCF_off, cudaMemcpyHostToDevice));
+    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->neighborCoarseToFine.z, parameter->getParH(lev)->neighborCoarseToFine.z, mem_size_kCF_off, cudaMemcpyHostToDevice));
     getLastCudaError("Copy host memory to device");
 }
 void CudaMemoryManager::cudaFreeInterfaceOffCF(int lev)
 {
-    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->offCF.xOffCF));
-    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->offCF.yOffCF));
-    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->offCF.zOffCF));
+    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborCoarseToFine.x));
+    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborCoarseToFine.y));
+    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborCoarseToFine.z));
 }
 //Interface Offset FC
 void CudaMemoryManager::cudaAllocInterfaceOffFC(int lev)
 {
+    uint mem_size_kFC_off = sizeof(real) * parameter->getParH(lev)->fineToCoarse.numberOfCells;
+
     //Host
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->offFC.xOffFC),   parameter->getParH(lev)->mem_size_kFC_off  ));
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->offFC.yOffFC),   parameter->getParH(lev)->mem_size_kFC_off  ));
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->offFC.zOffFC),   parameter->getParH(lev)->mem_size_kFC_off  ));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborFineToCoarse.x), mem_size_kFC_off  ));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborFineToCoarse.y), mem_size_kFC_off  ));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->neighborFineToCoarse.z), mem_size_kFC_off  ));
     getLastCudaError("Allocate host memory");
     //Device
-    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->offFC.xOffFC),   parameter->getParD(lev)->mem_size_kFC_off  ));
-    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->offFC.yOffFC),   parameter->getParD(lev)->mem_size_kFC_off  ));
-    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->offFC.zOffFC),   parameter->getParD(lev)->mem_size_kFC_off  ));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborFineToCoarse.x), mem_size_kFC_off  ));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborFineToCoarse.y), mem_size_kFC_off  ));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->neighborFineToCoarse.z), mem_size_kFC_off  ));
     getLastCudaError("Allocate device memory");
     //////////////////////////////////////////////////////////////////////////
-    double tmp = 3. * (double)parameter->getParH(lev)->mem_size_kFC_off;
+    double tmp = 3. * (double)mem_size_kFC_off;
     setMemsizeGPU(tmp, false);
 }
 void CudaMemoryManager::cudaCopyInterfaceOffFC(int lev)
 {
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->offFC.xOffFC,   parameter->getParH(lev)->offFC.xOffFC,   parameter->getParH(lev)->mem_size_kFC_off, cudaMemcpyHostToDevice));
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->offFC.yOffFC,   parameter->getParH(lev)->offFC.yOffFC,   parameter->getParH(lev)->mem_size_kFC_off, cudaMemcpyHostToDevice));
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->offFC.zOffFC,   parameter->getParH(lev)->offFC.zOffFC,   parameter->getParH(lev)->mem_size_kFC_off, cudaMemcpyHostToDevice));
+    uint mem_size_kFC_off = sizeof(real) * parameter->getParH(lev)->fineToCoarse.numberOfCells;
+
+    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->neighborFineToCoarse.x, parameter->getParH(lev)->neighborFineToCoarse.x, mem_size_kFC_off, cudaMemcpyHostToDevice));
+    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->neighborFineToCoarse.y, parameter->getParH(lev)->neighborFineToCoarse.y, mem_size_kFC_off, cudaMemcpyHostToDevice));
+    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->neighborFineToCoarse.z, parameter->getParH(lev)->neighborFineToCoarse.z, mem_size_kFC_off, cudaMemcpyHostToDevice));
     getLastCudaError("Copy host memory to device");
 }
 void CudaMemoryManager::cudaFreeInterfaceOffFC(int lev)
 {
-    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->offFC.xOffFC));
-    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->offFC.yOffFC));
-    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->offFC.zOffFC));
+    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborFineToCoarse.x));
+    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborFineToCoarse.y));
+    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->neighborFineToCoarse.z));
 }
 
 //Inlet
@@ -2042,15 +2047,15 @@ void CudaMemoryManager::cudaFreeMeasurePointsIndex(int lev)
 }
 void CudaMemoryManager::cudaAllocFsForCheckPointAndRestart(int lev)
 {
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->distributions.f[0] ),           (unsigned long long)parameter->getD3Qxx()*(unsigned long long)parameter->getParH(lev)->mem_size_real_SP));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->distributions.f[0] ),           (unsigned long long)parameter->getD3Qxx()*(unsigned long long)parameter->getParH(lev)->memSizeRealLBnodes));
 }
 void CudaMemoryManager::cudaCopyFsForRestart(int lev)
 {
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->distributions.f[0],  parameter->getParH(lev)->distributions.f[0],     (unsigned long long)parameter->getD3Qxx()*(unsigned long long)parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->distributions.f[0],  parameter->getParH(lev)->distributions.f[0],     (unsigned long long)parameter->getD3Qxx()*(unsigned long long)parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyHostToDevice));
 }
 void CudaMemoryManager::cudaCopyFsForCheckPoint(int lev)
 {
-    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->distributions.f[0],  parameter->getParD(lev)->distributions.f[0],     (unsigned long long)parameter->getD3Qxx()*(unsigned long long)parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->distributions.f[0],  parameter->getParD(lev)->distributions.f[0],     (unsigned long long)parameter->getD3Qxx()*(unsigned long long)parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
 }
 void CudaMemoryManager::cudaFreeFsForCheckPointAndRestart(int lev)
 {
@@ -2529,24 +2534,24 @@ void CudaMemoryManager::cudaFreePorousMedia(PorousMedia* pm, int lev)
 void CudaMemoryManager::cudaAllocConcentration(int lev)
 {
     //Host
-    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->Conc), parameter->getParH(lev)->mem_size_real_SP));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->concentration), parameter->getParH(lev)->memSizeRealLBnodes));
     //Device
-    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->Conc), parameter->getParD(lev)->mem_size_real_SP));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->concentration), parameter->getParD(lev)->memSizeRealLBnodes));
     //////////////////////////////////////////////////////////////////////////
-    double tmp = (double)parameter->getParH(lev)->mem_size_real_SP;
+    double tmp = (double)parameter->getParH(lev)->memSizeRealLBnodes;
     setMemsizeGPU(tmp, false);
 }
 void CudaMemoryManager::cudaCopyConcentrationDeviceToHost(int lev)
 {
-    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->Conc, parameter->getParD(lev)->Conc,  parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->concentration, parameter->getParD(lev)->concentration,  parameter->getParH(lev)->memSizeRealLBnodes , cudaMemcpyDeviceToHost));
 }
 void CudaMemoryManager::cudaCopyConcentrationHostToDevice(int lev)
 {
-    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->Conc, parameter->getParH(lev)->Conc, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->concentration, parameter->getParH(lev)->concentration, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyHostToDevice));
 }
 void CudaMemoryManager::cudaFreeConcentration(int lev)
 {
-    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->Conc));
+    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->concentration));
 }
 //////////////////////////////////////////////////////////////////////////
 void CudaMemoryManager::cudaAllocTempFs(int lev)
@@ -2554,14 +2559,14 @@ void CudaMemoryManager::cudaAllocTempFs(int lev)
     //Device
     if (parameter->getDiffMod() == 7)
     {
-        checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->distributionsAD7.f[0]), parameter->getDiffMod()*parameter->getParH(lev)->mem_size_real_SP));
+        checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->distributionsAD7.f[0]), parameter->getDiffMod()*parameter->getParH(lev)->memSizeRealLBnodes));
     }
     else if (parameter->getDiffMod() == 27)
     {
-        checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->distributionsAD27.f[0]), parameter->getDiffMod()*parameter->getParH(lev)->mem_size_real_SP));
+        checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->distributionsAD.f[0]), parameter->getDiffMod()*parameter->getParH(lev)->memSizeRealLBnodes));
     }
     //////////////////////////////////////////////////////////////////////////
-    double tmp = (double)(parameter->getDiffMod() * parameter->getParH(lev)->mem_size_real_SP);
+    double tmp = (double)(parameter->getDiffMod() * parameter->getParH(lev)->memSizeRealLBnodes);
     setMemsizeGPU(tmp, false);
 }
 //////////////////////////////////////////////////////////////////////////
@@ -2756,12 +2761,12 @@ void CudaMemoryManager::cudaFreeConcFile(int lev)
 void CudaMemoryManager::cudaAllocMedianOutAD(int lev)
 {
 	//Host
-	checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->rho_SP_Med_Out),   parameter->getParH(lev)->mem_size_real_SP));
-	checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->vx_SP_Med_Out),    parameter->getParH(lev)->mem_size_real_SP));
-	checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->vy_SP_Med_Out),    parameter->getParH(lev)->mem_size_real_SP));
-	checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->vz_SP_Med_Out),    parameter->getParH(lev)->mem_size_real_SP));
-	checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->press_SP_Med_Out), parameter->getParH(lev)->mem_size_real_SP));
-	checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->Conc_Med_Out),     parameter->getParH(lev)->mem_size_real_SP));
+	checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->rho_SP_Med_Out),   parameter->getParH(lev)->memSizeRealLBnodes));
+	checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->vx_SP_Med_Out),    parameter->getParH(lev)->memSizeRealLBnodes));
+	checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->vy_SP_Med_Out),    parameter->getParH(lev)->memSizeRealLBnodes));
+	checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->vz_SP_Med_Out),    parameter->getParH(lev)->memSizeRealLBnodes));
+	checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->press_SP_Med_Out), parameter->getParH(lev)->memSizeRealLBnodes));
+	checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->Conc_Med_Out),     parameter->getParH(lev)->memSizeRealLBnodes));
 }
 void CudaMemoryManager::cudaFreeMedianOutAD(int lev)
 {
@@ -3015,31 +3020,31 @@ void CudaMemoryManager::cudaFreeProcessNeighborADZ(int lev, unsigned int process
 void CudaMemoryManager::cudaAlloc2ndOrderDerivitivesIsoTest(int lev)
 {
     //Host
-    checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->dxxUx), parameter->getParH(lev)->mem_size_real_SP));
-    checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->dyyUy), parameter->getParH(lev)->mem_size_real_SP));
-    checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->dzzUz), parameter->getParH(lev)->mem_size_real_SP));
+    checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->dxxUx), parameter->getParH(lev)->memSizeRealLBnodes));
+    checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->dyyUy), parameter->getParH(lev)->memSizeRealLBnodes));
+    checkCudaErrors(cudaMallocHost((void**) &(parameter->getParH(lev)->dzzUz), parameter->getParH(lev)->memSizeRealLBnodes));
     //Device (spinning ship)
-    checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->dxxUx), parameter->getParH(lev)->mem_size_real_SP));
-    checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->dyyUy), parameter->getParH(lev)->mem_size_real_SP));
-    checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->dzzUz), parameter->getParH(lev)->mem_size_real_SP));
+    checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->dxxUx), parameter->getParH(lev)->memSizeRealLBnodes));
+    checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->dyyUy), parameter->getParH(lev)->memSizeRealLBnodes));
+    checkCudaErrors(cudaMalloc((void**) &(parameter->getParD(lev)->dzzUz), parameter->getParH(lev)->memSizeRealLBnodes));
     //////////////////////////////////////////////////////////////////////////
-    double tmp = 3. * (double)parameter->getParH(lev)->mem_size_real_SP;
+    double tmp = 3. * (double)parameter->getParH(lev)->memSizeRealLBnodes;
     setMemsizeGPU(tmp, false);
     //printf("Coord = %f MB",tmp/1000000.);
 }
 void CudaMemoryManager::cudaCopy2ndOrderDerivitivesIsoTestDH(int lev)
 {
     //copy device to host
-    checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->dxxUx, parameter->getParD(lev)->dxxUx, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyDeviceToHost));
-    checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->dyyUy, parameter->getParD(lev)->dyyUy, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyDeviceToHost));
-    checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->dzzUz, parameter->getParD(lev)->dzzUz, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyDeviceToHost));
+    checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->dxxUx, parameter->getParD(lev)->dxxUx, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyDeviceToHost));
+    checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->dyyUy, parameter->getParD(lev)->dyyUy, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyDeviceToHost));
+    checkCudaErrors(cudaMemcpy(parameter->getParH(lev)->dzzUz, parameter->getParD(lev)->dzzUz, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyDeviceToHost));
 }
 void CudaMemoryManager::cudaCopy2ndOrderDerivitivesIsoTestHD(int lev)
 {
     //copy host to device
-    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->dxxUx, parameter->getParH(lev)->dxxUx, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyHostToDevice));
-    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->dyyUy, parameter->getParH(lev)->dyyUy, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyHostToDevice));
-    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->dzzUz, parameter->getParH(lev)->dzzUz, parameter->getParH(lev)->mem_size_real_SP, cudaMemcpyHostToDevice));
+    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->dxxUx, parameter->getParH(lev)->dxxUx, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyHostToDevice));
+    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->dyyUy, parameter->getParH(lev)->dyyUy, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyHostToDevice));
+    checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->dzzUz, parameter->getParH(lev)->dzzUz, parameter->getParH(lev)->memSizeRealLBnodes, cudaMemcpyHostToDevice));
 
 }
 void CudaMemoryManager::cudaFree2ndOrderDerivitivesIsoTest(int lev)
@@ -3168,33 +3173,33 @@ void CudaMemoryManager::cudaFreeBladeOrientations(ActuatorFarm* actuatorFarm)
 
 void CudaMemoryManager::cudaAllocBladeCoords(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeCoordsXH, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
-    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeCoordsYH, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
-    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeCoordsZH, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeCoordsXH, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeCoordsYH, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeCoordsZH, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
 
-    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeCoordsXDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
-    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeCoordsYDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
-    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeCoordsZDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );    
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeCoordsXDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeCoordsYDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeCoordsZDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );    
     
-    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeCoordsXDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
-    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeCoordsYDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
-    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeCoordsZDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeCoordsXDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeCoordsYDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeCoordsZDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
 
-    setMemsizeGPU(6.f*actuatorFarm->getNumberOfNodes(), false);
+    setMemsizeGPU(6.f*actuatorFarm->getNumberOfGridNodes(), false);
 }
 
 void CudaMemoryManager::cudaCopyBladeCoordsHtoD(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeCoordsXDCurrentTimestep, actuatorFarm->bladeCoordsXH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) );
-    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeCoordsYDCurrentTimestep, actuatorFarm->bladeCoordsYH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) );
-    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeCoordsZDCurrentTimestep, actuatorFarm->bladeCoordsZH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeCoordsXDCurrentTimestep, actuatorFarm->bladeCoordsXH, sizeof(real)*actuatorFarm->getNumberOfGridNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeCoordsYDCurrentTimestep, actuatorFarm->bladeCoordsYH, sizeof(real)*actuatorFarm->getNumberOfGridNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeCoordsZDCurrentTimestep, actuatorFarm->bladeCoordsZH, sizeof(real)*actuatorFarm->getNumberOfGridNodes(), cudaMemcpyHostToDevice) );
 }
 
 void CudaMemoryManager::cudaCopyBladeCoordsDtoH(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeCoordsXH, actuatorFarm->bladeCoordsXDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
-    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeCoordsYH, actuatorFarm->bladeCoordsYDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
-    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeCoordsZH, actuatorFarm->bladeCoordsZDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeCoordsXH, actuatorFarm->bladeCoordsXDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeCoordsYH, actuatorFarm->bladeCoordsYDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeCoordsZH, actuatorFarm->bladeCoordsZDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes(), cudaMemcpyDeviceToHost) );
 }
 
 void CudaMemoryManager::cudaFreeBladeCoords(ActuatorFarm* actuatorFarm)
@@ -3214,16 +3219,16 @@ void CudaMemoryManager::cudaFreeBladeCoords(ActuatorFarm* actuatorFarm)
 
 void CudaMemoryManager::cudaAllocBladeIndices(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeIndicesH, sizeof(uint)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeIndicesH, sizeof(uint)*actuatorFarm->getNumberOfGridNodes()) );
 
-    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeIndicesD, sizeof(uint)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeIndicesD, sizeof(uint)*actuatorFarm->getNumberOfGridNodes()) );
 
-    setMemsizeGPU(sizeof(uint)*actuatorFarm->getNumberOfNodes(), false);
+    setMemsizeGPU(sizeof(uint)*actuatorFarm->getNumberOfGridNodes(), false);
 }
 
 void CudaMemoryManager::cudaCopyBladeIndicesHtoD(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeIndicesD, actuatorFarm->bladeIndicesH, sizeof(uint)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeIndicesD, actuatorFarm->bladeIndicesH, sizeof(uint)*actuatorFarm->getNumberOfGridNodes(), cudaMemcpyHostToDevice) );
 }
 
 void CudaMemoryManager::cudaFreeBladeIndices(ActuatorFarm* actuatorFarm)
@@ -3235,33 +3240,33 @@ void CudaMemoryManager::cudaFreeBladeIndices(ActuatorFarm* actuatorFarm)
 
 void CudaMemoryManager::cudaAllocBladeVelocities(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeVelocitiesXH, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
-    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeVelocitiesYH, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
-    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeVelocitiesZH, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeVelocitiesXH, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeVelocitiesYH, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeVelocitiesZH, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
 
-    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeVelocitiesXDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
-    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeVelocitiesYDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
-    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeVelocitiesZDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeVelocitiesXDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeVelocitiesYDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeVelocitiesZDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
 
-    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeVelocitiesXDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
-    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeVelocitiesYDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
-    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeVelocitiesZDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeVelocitiesXDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeVelocitiesYDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeVelocitiesZDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
 
-    setMemsizeGPU(3.*sizeof(real)*actuatorFarm->getNumberOfNodes(), false);
+    setMemsizeGPU(3.*sizeof(real)*actuatorFarm->getNumberOfGridNodes(), false);
 }
 
 void CudaMemoryManager::cudaCopyBladeVelocitiesHtoD(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeVelocitiesXDCurrentTimestep, actuatorFarm->bladeVelocitiesXH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) );
-    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeVelocitiesYDCurrentTimestep, actuatorFarm->bladeVelocitiesYH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) );
-    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeVelocitiesZDCurrentTimestep, actuatorFarm->bladeVelocitiesZH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeVelocitiesXDCurrentTimestep, actuatorFarm->bladeVelocitiesXH, sizeof(real)*actuatorFarm->getNumberOfGridNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeVelocitiesYDCurrentTimestep, actuatorFarm->bladeVelocitiesYH, sizeof(real)*actuatorFarm->getNumberOfGridNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeVelocitiesZDCurrentTimestep, actuatorFarm->bladeVelocitiesZH, sizeof(real)*actuatorFarm->getNumberOfGridNodes(), cudaMemcpyHostToDevice) );
 }
 
 void CudaMemoryManager::cudaCopyBladeVelocitiesDtoH(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeVelocitiesXH, actuatorFarm->bladeVelocitiesXDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
-    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeVelocitiesYH, actuatorFarm->bladeVelocitiesYDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
-    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeVelocitiesZH, actuatorFarm->bladeVelocitiesZDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeVelocitiesXH, actuatorFarm->bladeVelocitiesXDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeVelocitiesYH, actuatorFarm->bladeVelocitiesYDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeVelocitiesZH, actuatorFarm->bladeVelocitiesZDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes(), cudaMemcpyDeviceToHost) );
 }
 
 void CudaMemoryManager::cudaFreeBladeVelocities(ActuatorFarm* actuatorFarm)
@@ -3281,33 +3286,33 @@ void CudaMemoryManager::cudaFreeBladeVelocities(ActuatorFarm* actuatorFarm)
 
 void CudaMemoryManager::cudaAllocBladeForces(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeForcesXH, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
-    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeForcesYH, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
-    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeForcesZH, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeForcesXH, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeForcesYH, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
+    checkCudaErrors( cudaMallocHost((void**) &actuatorFarm->bladeForcesZH, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
 
-    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeForcesXDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
-    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeForcesYDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
-    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeForcesZDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeForcesXDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeForcesYDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeForcesZDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
 
-    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeForcesXDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
-    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeForcesYDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
-    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeForcesZDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeForcesXDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeForcesYDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
+    checkCudaErrors( cudaMalloc((void**) &actuatorFarm->bladeForcesZDPreviousTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes()) );
 
-    setMemsizeGPU(3.*sizeof(real)*actuatorFarm->getNumberOfNodes(), false);
+    setMemsizeGPU(3.*sizeof(real)*actuatorFarm->getNumberOfGridNodes(), false);
 }
 
 void CudaMemoryManager::cudaCopyBladeForcesHtoD(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeForcesXDCurrentTimestep, actuatorFarm->bladeForcesXH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) );
-    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeForcesYDCurrentTimestep, actuatorFarm->bladeForcesYH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) );
-    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeForcesZDCurrentTimestep, actuatorFarm->bladeForcesZH, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeForcesXDCurrentTimestep, actuatorFarm->bladeForcesXH, sizeof(real)*actuatorFarm->getNumberOfGridNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeForcesYDCurrentTimestep, actuatorFarm->bladeForcesYH, sizeof(real)*actuatorFarm->getNumberOfGridNodes(), cudaMemcpyHostToDevice) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeForcesZDCurrentTimestep, actuatorFarm->bladeForcesZH, sizeof(real)*actuatorFarm->getNumberOfGridNodes(), cudaMemcpyHostToDevice) );
 }
 
 void CudaMemoryManager::cudaCopyBladeForcesDtoH(ActuatorFarm* actuatorFarm)
 {
-    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeForcesXH, actuatorFarm->bladeForcesXDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
-    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeForcesYH, actuatorFarm->bladeForcesYDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
-    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeForcesZH, actuatorFarm->bladeForcesZDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfNodes(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeForcesXH, actuatorFarm->bladeForcesXDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeForcesYH, actuatorFarm->bladeForcesYDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes(), cudaMemcpyDeviceToHost) );
+    checkCudaErrors( cudaMemcpy(actuatorFarm->bladeForcesZH, actuatorFarm->bladeForcesZDCurrentTimestep, sizeof(real)*actuatorFarm->getNumberOfGridNodes(), cudaMemcpyDeviceToHost) );
 }
 
 void CudaMemoryManager::cudaFreeBladeForces(ActuatorFarm* actuatorFarm)
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h
index 67347d005b1abb6ae6b6badf8f187a11b2bde15a..e2f2e8658b6ef7a9453546454dd8e1f643574e17 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h
@@ -32,8 +32,8 @@ public:
     void setMemsizeGPU(double admem, bool reset);
     double getMemsizeGPU();
 
-    void cudaAllocFull(int lev);
-    void cudaFreeFull(int lev);
+    //void cudaAllocFull(int lev); //DEPRECATED: related to full matrix
+    //void cudaFreeFull(int lev);  //DEPRECATED: related to full matrix
 
     void cudaCopyPrint(int lev);
     void cudaCopyMedianPrint(int lev);
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Cumulant27.cu b/src/gpu/VirtualFluids_GPU/GPU/Cumulant27.cu
index bbce8181d814fc8b9dbb086764becb73a86c0eda..59b24df1061af16e79ad35eeceb949e6326407fd 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Cumulant27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Cumulant27.cu
@@ -7,11 +7,11 @@
 //////////////////////////////////////////////////////////////////////////
 /* Device code */
 #include "LBM/LB.h" 
-#include "lbm/constants/NumericConstants.h"
+#include "basics/constants/NumericConstants.h"
 #include "lbm/constants/D3Q27.h"
 
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -21,7 +21,7 @@ __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
 																unsigned int* neighborY,
 																unsigned int* neighborZ,
 																real* DDStart,
-																int size_Mat,
+																unsigned long long numberOfLBnodes,
 																int level,
 																real* forces,
 																bool EvenOrOdd)
@@ -37,7 +37,7 @@ __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if(k<size_Mat)
+	if(k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -48,63 +48,63 @@ __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -137,33 +137,33 @@ __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00   ])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0   ])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M   ])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0  ])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0  ])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M  ])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P  ])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP  ])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k  ];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP ])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP ])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP ])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP ])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM ])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM ])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM ])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
 							(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
@@ -975,7 +975,7 @@ __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
 														real* dxxUx,
 														real* dyyUy,
 														real* dzzUz,
-														int size_Mat,
+														unsigned long long numberOfLBnodes,
 														bool EvenOrOdd)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -989,7 +989,7 @@ __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if(k<size_Mat)
+	if(k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -1000,63 +1000,63 @@ __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -1089,33 +1089,33 @@ __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00   ])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0   ])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M   ])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0  ])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0  ])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M  ])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P  ])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP  ])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k  ];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP ])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP ])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP ])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP ])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM ])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM ])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM ])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 			////////////////////////////////////////////////////////////////////////////////////
 			//slow
 			//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
@@ -2016,7 +2016,7 @@ __global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
 													real* coordY,
 													real* coordZ,
 													real* DDStart,
-													int size_Mat,
+													unsigned long long numberOfLBnodes,
 													bool EvenOrOdd)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -2030,7 +2030,7 @@ __global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if(k<size_Mat)
+	if(k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -2041,63 +2041,63 @@ __global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -2158,33 +2158,33 @@ __global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
 			//unsigned int ktne = k;
 			//unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00   ])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0   ])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M   ])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0  ])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0  ])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M  ])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P  ])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP  ])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k  ];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP ])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP ])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP ])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP ])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM ])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM ])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM ])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 			////////////////////////////////////////////////////////////////////////////////////
 			//Ship
 			real coord0X = 281.125f;//7.5f;
@@ -3238,7 +3238,7 @@ __global__ void LB_Kernel_Kum_New_SP_27(     real omega,
 														unsigned int* neighborY,
 														unsigned int* neighborZ,
 														real* DDStart,
-														int size_Mat,
+														unsigned long long numberOfLBnodes,
 														bool EvenOrOdd)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -3252,7 +3252,7 @@ __global__ void LB_Kernel_Kum_New_SP_27(     real omega,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if(k<size_Mat)
+	if(k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -3263,63 +3263,63 @@ __global__ void LB_Kernel_Kum_New_SP_27(     real omega,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -3380,33 +3380,33 @@ __global__ void LB_Kernel_Kum_New_SP_27(     real omega,
 			//unsigned int ktne = k;
 			//unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00   ])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0   ])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M   ])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0  ])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0  ])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M  ])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P  ])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP  ])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k  ];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP ])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP ])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP ])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP ])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM ])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM ])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM ])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 			////////////////////////////////////////////////////////////////////////////////////
 			//slow
 			//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
@@ -4510,7 +4510,7 @@ __global__ void LB_Kernel_Kum_Comp_SP_27(    real omega,
 														unsigned int* neighborY,
 														unsigned int* neighborZ,
 														real* DDStart,
-														int size_Mat,
+														unsigned long long numberOfLBnodes,
 														bool EvenOrOdd)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -4524,7 +4524,7 @@ __global__ void LB_Kernel_Kum_Comp_SP_27(    real omega,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if(k<size_Mat)
+	if(k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -4535,63 +4535,63 @@ __global__ void LB_Kernel_Kum_Comp_SP_27(    real omega,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -4624,33 +4624,33 @@ __global__ void LB_Kernel_Kum_Comp_SP_27(    real omega,
 			unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real f_E     = (D.f[DIR_P00   ])[ke   ];// +  c2over27 ;
-			real f_W     = (D.f[DIR_M00   ])[kw   ];// +  c2over27 ;
-			real f_N     = (D.f[DIR_0P0   ])[kn   ];// +  c2over27 ;
-			real f_S     = (D.f[DIR_0M0   ])[ks   ];// +  c2over27 ;
-			real f_T     = (D.f[DIR_00P   ])[kt   ];// +  c2over27 ;
-			real f_B     = (D.f[DIR_00M   ])[kb   ];// +  c2over27 ;
-			real f_NE    = (D.f[DIR_PP0  ])[kne  ];// +  c1over54 ;
-			real f_SW    = (D.f[DIR_MM0  ])[ksw  ];// +  c1over54 ;
-			real f_SE    = (D.f[DIR_PM0  ])[kse  ];// +  c1over54 ;
-			real f_NW    = (D.f[DIR_MP0  ])[knw  ];// +  c1over54 ;
-			real f_TE    = (D.f[DIR_P0P  ])[kte  ];// +  c1over54 ;
-			real f_BW    = (D.f[DIR_M0M  ])[kbw  ];// +  c1over54 ;
-			real f_BE    = (D.f[DIR_P0M  ])[kbe  ];// +  c1over54 ;
-			real f_TW    = (D.f[DIR_M0P  ])[ktw  ];// +  c1over54 ;
-			real f_TN    = (D.f[DIR_0PP  ])[ktn  ];// +  c1over54 ;
-			real f_BS    = (D.f[DIR_0MM  ])[kbs  ];// +  c1over54 ;
-			real f_BN    = (D.f[DIR_0PM  ])[kbn  ];// +  c1over54 ;
-			real f_TS    = (D.f[DIR_0MP  ])[kts  ];// +  c1over54 ;
+			real f_E     = (D.f[DIR_P00])[ke   ];// +  c2over27 ;
+			real f_W     = (D.f[DIR_M00])[kw   ];// +  c2over27 ;
+			real f_N     = (D.f[DIR_0P0])[kn   ];// +  c2over27 ;
+			real f_S     = (D.f[DIR_0M0])[ks   ];// +  c2over27 ;
+			real f_T     = (D.f[DIR_00P])[kt   ];// +  c2over27 ;
+			real f_B     = (D.f[DIR_00M])[kb   ];// +  c2over27 ;
+			real f_NE    = (D.f[DIR_PP0])[kne  ];// +  c1over54 ;
+			real f_SW    = (D.f[DIR_MM0])[ksw  ];// +  c1over54 ;
+			real f_SE    = (D.f[DIR_PM0])[kse  ];// +  c1over54 ;
+			real f_NW    = (D.f[DIR_MP0])[knw  ];// +  c1over54 ;
+			real f_TE    = (D.f[DIR_P0P])[kte  ];// +  c1over54 ;
+			real f_BW    = (D.f[DIR_M0M])[kbw  ];// +  c1over54 ;
+			real f_BE    = (D.f[DIR_P0M])[kbe  ];// +  c1over54 ;
+			real f_TW    = (D.f[DIR_M0P])[ktw  ];// +  c1over54 ;
+			real f_TN    = (D.f[DIR_0PP])[ktn  ];// +  c1over54 ;
+			real f_BS    = (D.f[DIR_0MM])[kbs  ];// +  c1over54 ;
+			real f_BN    = (D.f[DIR_0PM])[kbn  ];// +  c1over54 ;
+			real f_TS    = (D.f[DIR_0MP])[kts  ];// +  c1over54 ;
 			real f_R     = (D.f[DIR_000])[kzero];// +  c8over27 ;
-			real f_TNE   = (D.f[DIR_PPP ])[ktne ];// +  c1over216;
-			real f_TSW   = (D.f[DIR_MMP ])[ktsw ];// +  c1over216;
-			real f_TSE   = (D.f[DIR_PMP ])[ktse ];// +  c1over216;
-			real f_TNW   = (D.f[DIR_MPP ])[ktnw ];// +  c1over216;
-			real f_BNE   = (D.f[DIR_PPM ])[kbne ];// +  c1over216;
-			real f_BSW   = (D.f[DIR_MMM ])[kbsw ];// +  c1over216;
-			real f_BSE   = (D.f[DIR_PMM ])[kbse ];// +  c1over216;
-			real f_BNW   = (D.f[DIR_MPM ])[kbnw ];// +  c1over216;
+			real f_TNE   = (D.f[DIR_PPP])[ktne ];// +  c1over216;
+			real f_TSW   = (D.f[DIR_MMP])[ktsw ];// +  c1over216;
+			real f_TSE   = (D.f[DIR_PMP])[ktse ];// +  c1over216;
+			real f_TNW   = (D.f[DIR_MPP])[ktnw ];// +  c1over216;
+			real f_BNE   = (D.f[DIR_PPM])[kbne ];// +  c1over216;
+			real f_BSW   = (D.f[DIR_MMM])[kbsw ];// +  c1over216;
+			real f_BSE   = (D.f[DIR_PMM])[kbse ];// +  c1over216;
+			real f_BNW   = (D.f[DIR_MPM])[kbnw ];// +  c1over216;
 			////////////////////////////////////////////////////////////////////////////////////
 			real fx = c0o1;
 			real fy = c0o1;
@@ -5451,7 +5451,7 @@ __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	real* DDStart,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	int level,
 	real* forces,
 	bool EvenOrOdd)
@@ -5467,7 +5467,7 @@ __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if (k<size_Mat)
+	if (k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -5478,63 +5478,63 @@ __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -5568,33 +5568,33 @@ __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
 			unsigned int kbsw = neighborZ[ksw];
 
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k   ];
-			real mfabb = (D.f[DIR_M00   ])[kw  ];
-			real mfbcb = (D.f[DIR_0P0   ])[k   ];
-			real mfbab = (D.f[DIR_0M0   ])[ks  ];
-			real mfbbc = (D.f[DIR_00P   ])[k   ];
-			real mfbba = (D.f[DIR_00M   ])[kb  ];
-			real mfccb = (D.f[DIR_PP0  ])[k   ];
-			real mfaab = (D.f[DIR_MM0  ])[ksw ];
-			real mfcab = (D.f[DIR_PM0  ])[ks  ];
-			real mfacb = (D.f[DIR_MP0  ])[kw  ];
-			real mfcbc = (D.f[DIR_P0P  ])[k   ];
-			real mfaba = (D.f[DIR_M0M  ])[kbw ];
-			real mfcba = (D.f[DIR_P0M  ])[kb  ];
-			real mfabc = (D.f[DIR_M0P  ])[kw  ];
-			real mfbcc = (D.f[DIR_0PP  ])[k   ];
-			real mfbaa = (D.f[DIR_0MM  ])[kbs ];
-			real mfbca = (D.f[DIR_0PM  ])[kb  ];
-			real mfbac = (D.f[DIR_0MP  ])[ks  ];
+			real mfcbb = (D.f[DIR_P00])[k   ];
+			real mfabb = (D.f[DIR_M00])[kw  ];
+			real mfbcb = (D.f[DIR_0P0])[k   ];
+			real mfbab = (D.f[DIR_0M0])[ks  ];
+			real mfbbc = (D.f[DIR_00P])[k   ];
+			real mfbba = (D.f[DIR_00M])[kb  ];
+			real mfccb = (D.f[DIR_PP0])[k   ];
+			real mfaab = (D.f[DIR_MM0])[ksw ];
+			real mfcab = (D.f[DIR_PM0])[ks  ];
+			real mfacb = (D.f[DIR_MP0])[kw  ];
+			real mfcbc = (D.f[DIR_P0P])[k   ];
+			real mfaba = (D.f[DIR_M0M])[kbw ];
+			real mfcba = (D.f[DIR_P0M])[kb  ];
+			real mfabc = (D.f[DIR_M0P])[kw  ];
+			real mfbcc = (D.f[DIR_0PP])[k   ];
+			real mfbaa = (D.f[DIR_0MM])[kbs ];
+			real mfbca = (D.f[DIR_0PM])[kb  ];
+			real mfbac = (D.f[DIR_0MP])[ks  ];
 			real mfbbb = (D.f[DIR_000])[k   ];
-			real mfccc = (D.f[DIR_PPP ])[k   ];
-			real mfaac = (D.f[DIR_MMP ])[ksw ];
-			real mfcac = (D.f[DIR_PMP ])[ks  ];
-			real mfacc = (D.f[DIR_MPP ])[kw  ];
-			real mfcca = (D.f[DIR_PPM ])[kb  ];
-			real mfaaa = (D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs ];
-			real mfaca = (D.f[DIR_MPM ])[kbw ];
+			real mfccc = (D.f[DIR_PPP])[k   ];
+			real mfaac = (D.f[DIR_MMP])[ksw ];
+			real mfcac = (D.f[DIR_PMP])[ks  ];
+			real mfacc = (D.f[DIR_MPP])[kw  ];
+			real mfcca = (D.f[DIR_PPM])[kb  ];
+			real mfaaa = (D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs ];
+			real mfaca = (D.f[DIR_MPM])[kbw ];
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -6349,33 +6349,33 @@ __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
 					((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
 			mfbbb += drho - drhoPost;
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[DIR_P00   ])[k   ] = mfabb;                                                                   
-			(D.f[DIR_M00   ])[kw  ] = mfcbb;                                                                 
-			(D.f[DIR_0P0   ])[k   ] = mfbab;
-			(D.f[DIR_0M0   ])[ks  ] = mfbcb;
-			(D.f[DIR_00P   ])[k   ] = mfbba;
-			(D.f[DIR_00M   ])[kb  ] = mfbbc;
-			(D.f[DIR_PP0  ])[k   ] = mfaab;
-			(D.f[DIR_MM0  ])[ksw ] = mfccb;
-			(D.f[DIR_PM0  ])[ks  ] = mfacb;
-			(D.f[DIR_MP0  ])[kw  ] = mfcab;
-			(D.f[DIR_P0P  ])[k   ] = mfaba;
-			(D.f[DIR_M0M  ])[kbw ] = mfcbc;
-			(D.f[DIR_P0M  ])[kb  ] = mfabc;
-			(D.f[DIR_M0P  ])[kw  ] = mfcba;
-			(D.f[DIR_0PP  ])[k   ] = mfbaa;
-			(D.f[DIR_0MM  ])[kbs ] = mfbcc;
-			(D.f[DIR_0PM  ])[kb  ] = mfbac;
-			(D.f[DIR_0MP  ])[ks  ] = mfbca;
+			(D.f[DIR_P00])[k   ] = mfabb;                                                                   
+			(D.f[DIR_M00])[kw  ] = mfcbb;                                                                 
+			(D.f[DIR_0P0])[k   ] = mfbab;
+			(D.f[DIR_0M0])[ks  ] = mfbcb;
+			(D.f[DIR_00P])[k   ] = mfbba;
+			(D.f[DIR_00M])[kb  ] = mfbbc;
+			(D.f[DIR_PP0])[k   ] = mfaab;
+			(D.f[DIR_MM0])[ksw ] = mfccb;
+			(D.f[DIR_PM0])[ks  ] = mfacb;
+			(D.f[DIR_MP0])[kw  ] = mfcab;
+			(D.f[DIR_P0P])[k   ] = mfaba;
+			(D.f[DIR_M0M])[kbw ] = mfcbc;
+			(D.f[DIR_P0M])[kb  ] = mfabc;
+			(D.f[DIR_M0P])[kw  ] = mfcba;
+			(D.f[DIR_0PP])[k   ] = mfbaa;
+			(D.f[DIR_0MM])[kbs ] = mfbcc;
+			(D.f[DIR_0PM])[kb  ] = mfbac;
+			(D.f[DIR_0MP])[ks  ] = mfbca;
 			(D.f[DIR_000])[k   ] = mfbbb;
-			(D.f[DIR_PPP ])[k   ] = mfaaa;
-			(D.f[DIR_PMP ])[ks  ] = mfaca;
-			(D.f[DIR_PPM ])[kb  ] = mfaac;
-			(D.f[DIR_PMM ])[kbs ] = mfacc;
-			(D.f[DIR_MPP ])[kw  ] = mfcaa;
-			(D.f[DIR_MMP ])[ksw ] = mfcca;
-			(D.f[DIR_MPM ])[kbw ] = mfcac;
-			(D.f[DIR_MMM ])[kbsw] = mfccc;
+			(D.f[DIR_PPP])[k   ] = mfaaa;
+			(D.f[DIR_PMP])[ks  ] = mfaca;
+			(D.f[DIR_PPM])[kb  ] = mfaac;
+			(D.f[DIR_PMM])[kbs ] = mfacc;
+			(D.f[DIR_MPP])[kw  ] = mfcaa;
+			(D.f[DIR_MMP])[ksw ] = mfcca;
+			(D.f[DIR_MPM])[kbw ] = mfcac;
+			(D.f[DIR_MMM])[kbsw] = mfccc;
 		}
 	}
 }
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu b/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu
index 97c1aff4d26cb85deaf1dd0d145245f28affc2e3..2c482c3b0fc368c52cca1e74246c75210131b326 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu
@@ -33,9 +33,9 @@
 /* Device code */
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 #include "math.h"
@@ -51,7 +51,7 @@ __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27(
     unsigned int* neighborY,
     unsigned int* neighborZ,
     real* DDStart,
-    int size_Mat,
+    unsigned long long numberOfLBnodes,
     int level,
     real* forces,
     bool EvenOrOdd)
@@ -67,7 +67,7 @@ __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27(
     const unsigned k = nx*(ny*z + y) + x;
     //////////////////////////////////////////////////////////////////////////
 
-    if (k<size_Mat)
+    if (k<numberOfLBnodes)
     {
         ////////////////////////////////////////////////////////////////////////////////
         unsigned int BC;
@@ -78,63 +78,63 @@ __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27(
             Distributions27 D;
             if (EvenOrOdd == true)
             {
-                D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-                D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-                D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-                D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-                D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-                D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-                D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-                D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-                D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-                D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-                D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-                D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-                D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-                D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-                D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-                D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-                D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-                D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-                D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-                D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-                D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-                D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-                D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-                D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-                D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-                D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-                D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+                D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+                D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+                D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+                D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+                D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+                D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+                D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+                D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+                D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+                D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+                D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+                D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+                D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+                D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+                D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+                D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+                D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+                D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+                D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+                D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+                D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+                D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+                D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+                D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+                D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+                D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+                D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
             }
             else
             {
-                D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-                D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-                D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-                D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-                D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-                D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-                D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-                D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-                D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-                D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-                D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-                D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-                D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-                D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-                D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-                D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-                D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-                D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-                D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-                D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-                D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-                D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-                D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-                D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-                D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-                D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-                D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+                D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+                D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+                D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+                D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+                D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+                D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+                D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+                D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+                D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+                D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+                D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+                D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+                D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+                D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+                D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+                D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+                D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+                D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+                D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+                D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+                D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+                D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+                D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+                D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+                D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+                D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+                D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
             }
 
             ////////////////////////////////////////////////////////////////////////////////
@@ -170,33 +170,33 @@ __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27(
 
 
             //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-            real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-            real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-            real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-            real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-            real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-            real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-            real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-            real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-            real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-            real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-            real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-            real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-            real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-            real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-            real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-            real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-            real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-            real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+            real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+            real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+            real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+            real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+            real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+            real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+            real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+            real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+            real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+            real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+            real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+            real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+            real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+            real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+            real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+            real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+            real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+            real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
             real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-            real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-            real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-            real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-            real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-            real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-            real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-            real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-            real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+            real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+            real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+            real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+            real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+            real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+            real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+            real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+            real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
                                                ////////////////////////////////////////////////////////////////////////////////////
             real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
                 (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -966,7 +966,7 @@ __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
     unsigned int* neighborY,
     unsigned int* neighborZ,
     real* DDStart,
-    int size_Mat,
+    unsigned long long numberOfLBnodes,
     int level,
     real* forces,
     bool EvenOrOdd)
@@ -982,7 +982,7 @@ __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
     const unsigned k = nx*(ny*z + y) + x;
     //////////////////////////////////////////////////////////////////////////
 
-    if (k<size_Mat)
+    if (k<numberOfLBnodes)
     {
         ////////////////////////////////////////////////////////////////////////////////
         unsigned int BC;
@@ -993,63 +993,63 @@ __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
             Distributions27 D;
             if (EvenOrOdd == true)
             {
-                D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-                D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-                D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-                D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-                D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-                D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-                D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-                D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-                D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-                D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-                D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-                D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-                D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-                D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-                D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-                D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-                D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-                D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-                D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-                D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-                D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-                D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-                D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-                D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-                D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-                D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-                D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+                D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+                D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+                D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+                D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+                D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+                D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+                D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+                D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+                D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+                D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+                D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+                D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+                D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+                D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+                D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+                D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+                D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+                D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+                D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+                D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+                D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+                D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+                D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+                D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+                D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+                D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+                D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
             }
             else
             {
-                D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-                D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-                D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-                D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-                D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-                D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-                D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-                D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-                D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-                D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-                D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-                D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-                D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-                D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-                D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-                D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-                D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-                D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-                D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-                D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-                D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-                D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-                D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-                D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-                D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-                D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-                D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+                D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+                D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+                D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+                D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+                D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+                D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+                D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+                D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+                D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+                D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+                D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+                D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+                D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+                D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+                D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+                D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+                D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+                D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+                D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+                D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+                D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+                D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+                D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+                D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+                D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+                D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+                D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
             }
 
             ////////////////////////////////////////////////////////////////////////////////
@@ -1085,33 +1085,33 @@ __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
 
 
             //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-            real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-            real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-            real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-            real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-            real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-            real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-            real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-            real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-            real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-            real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-            real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-            real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-            real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-            real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-            real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-            real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-            real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-            real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+            real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+            real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+            real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+            real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+            real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+            real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+            real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+            real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+            real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+            real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+            real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+            real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+            real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+            real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+            real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+            real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+            real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+            real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
             real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-            real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-            real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-            real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-            real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-            real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-            real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-            real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-            real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+            real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+            real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+            real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+            real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+            real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+            real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+            real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+            real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
                                                ////////////////////////////////////////////////////////////////////////////////////
             real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
                 (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -1762,7 +1762,7 @@ __global__ void Cumulant_One_chim_Comp_SP_27(
     unsigned int* neighborY,
     unsigned int* neighborZ,
     real* DDStart,
-    int size_Mat,
+    unsigned long long numberOfLBnodes,
     int level,
     real* forces,
     bool EvenOrOdd)
@@ -1778,7 +1778,7 @@ __global__ void Cumulant_One_chim_Comp_SP_27(
     const unsigned k = nx*(ny*z + y) + x;
     //////////////////////////////////////////////////////////////////////////
 
-    if (k<size_Mat)
+    if (k<numberOfLBnodes)
     {
         ////////////////////////////////////////////////////////////////////////////////
         unsigned int BC;
@@ -1789,63 +1789,63 @@ __global__ void Cumulant_One_chim_Comp_SP_27(
             Distributions27 D;
             if (EvenOrOdd == true)
             {
-                D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-                D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-                D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-                D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-                D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-                D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-                D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-                D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-                D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-                D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-                D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-                D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-                D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-                D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-                D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-                D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-                D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-                D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-                D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-                D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-                D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-                D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-                D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-                D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-                D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-                D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-                D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+                D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+                D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+                D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+                D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+                D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+                D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+                D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+                D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+                D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+                D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+                D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+                D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+                D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+                D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+                D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+                D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+                D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+                D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+                D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+                D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+                D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+                D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+                D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+                D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+                D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+                D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+                D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
             }
             else
             {
-                D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-                D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-                D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-                D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-                D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-                D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-                D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-                D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-                D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-                D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-                D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-                D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-                D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-                D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-                D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-                D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-                D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-                D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-                D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-                D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-                D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-                D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-                D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-                D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-                D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-                D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-                D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+                D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+                D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+                D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+                D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+                D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+                D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+                D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+                D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+                D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+                D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+                D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+                D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+                D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+                D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+                D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+                D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+                D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+                D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+                D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+                D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+                D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+                D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+                D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+                D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+                D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+                D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+                D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
             }
             ////////////////////////////////////////////////////////////////////////////////
             //index
@@ -1857,33 +1857,33 @@ __global__ void Cumulant_One_chim_Comp_SP_27(
             unsigned int kbs = neighborZ[ks];
             unsigned int kbsw = neighborZ[ksw];
             ////////////////////////////////////////////////////////////////////////////////////
-            real mfcbb = (D.f[DIR_P00   ])[k   ];
-            real mfabb = (D.f[DIR_M00   ])[kw  ];
-            real mfbcb = (D.f[DIR_0P0   ])[k   ];
-            real mfbab = (D.f[DIR_0M0   ])[ks  ];
-            real mfbbc = (D.f[DIR_00P   ])[k   ];
-            real mfbba = (D.f[DIR_00M   ])[kb  ];
-            real mfccb = (D.f[DIR_PP0  ])[k   ];
-            real mfaab = (D.f[DIR_MM0  ])[ksw ];
-            real mfcab = (D.f[DIR_PM0  ])[ks  ];
-            real mfacb = (D.f[DIR_MP0  ])[kw  ];
-            real mfcbc = (D.f[DIR_P0P  ])[k   ];
-            real mfaba = (D.f[DIR_M0M  ])[kbw ];
-            real mfcba = (D.f[DIR_P0M  ])[kb  ];
-            real mfabc = (D.f[DIR_M0P  ])[kw  ];
-            real mfbcc = (D.f[DIR_0PP  ])[k   ];
-            real mfbaa = (D.f[DIR_0MM  ])[kbs ];
-            real mfbca = (D.f[DIR_0PM  ])[kb  ];
-            real mfbac = (D.f[DIR_0MP  ])[ks  ];
+            real mfcbb = (D.f[DIR_P00])[k   ];
+            real mfabb = (D.f[DIR_M00])[kw  ];
+            real mfbcb = (D.f[DIR_0P0])[k   ];
+            real mfbab = (D.f[DIR_0M0])[ks  ];
+            real mfbbc = (D.f[DIR_00P])[k   ];
+            real mfbba = (D.f[DIR_00M])[kb  ];
+            real mfccb = (D.f[DIR_PP0])[k   ];
+            real mfaab = (D.f[DIR_MM0])[ksw ];
+            real mfcab = (D.f[DIR_PM0])[ks  ];
+            real mfacb = (D.f[DIR_MP0])[kw  ];
+            real mfcbc = (D.f[DIR_P0P])[k   ];
+            real mfaba = (D.f[DIR_M0M])[kbw ];
+            real mfcba = (D.f[DIR_P0M])[kb  ];
+            real mfabc = (D.f[DIR_M0P])[kw  ];
+            real mfbcc = (D.f[DIR_0PP])[k   ];
+            real mfbaa = (D.f[DIR_0MM])[kbs ];
+            real mfbca = (D.f[DIR_0PM])[kb  ];
+            real mfbac = (D.f[DIR_0MP])[ks  ];
             real mfbbb = (D.f[DIR_000])[k   ];
-            real mfccc = (D.f[DIR_PPP ])[k   ];
-            real mfaac = (D.f[DIR_MMP ])[ksw ];
-            real mfcac = (D.f[DIR_PMP ])[ks  ];
-            real mfacc = (D.f[DIR_MPP ])[kw  ];
-            real mfcca = (D.f[DIR_PPM ])[kb  ];
-            real mfaaa = (D.f[DIR_MMM ])[kbsw];
-            real mfcaa = (D.f[DIR_PMM ])[kbs ];
-            real mfaca = (D.f[DIR_MPM ])[kbw ];
+            real mfccc = (D.f[DIR_PPP])[k   ];
+            real mfaac = (D.f[DIR_MMP])[ksw ];
+            real mfcac = (D.f[DIR_PMP])[ks  ];
+            real mfacc = (D.f[DIR_MPP])[kw  ];
+            real mfcca = (D.f[DIR_PPM])[kb  ];
+            real mfaaa = (D.f[DIR_MMM])[kbsw];
+            real mfcaa = (D.f[DIR_PMM])[kbs ];
+            real mfaca = (D.f[DIR_MPM])[kbw ];
             ////////////////////////////////////////////////////////////////////////////////////
             real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
                 (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -2204,33 +2204,33 @@ __global__ void Cumulant_One_chim_Comp_SP_27(
                     ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
             mfbbb += drho - drhoPost;
             ////////////////////////////////////////////////////////////////////////////////////
-            (D.f[DIR_P00   ])[k   ] = mfabb;                                                                   
-            (D.f[DIR_M00   ])[kw  ] = mfcbb;                                                                 
-            (D.f[DIR_0P0   ])[k   ] = mfbab;
-            (D.f[DIR_0M0   ])[ks  ] = mfbcb;
-            (D.f[DIR_00P   ])[k   ] = mfbba;
-            (D.f[DIR_00M   ])[kb  ] = mfbbc;
-            (D.f[DIR_PP0  ])[k   ] = mfaab;
-            (D.f[DIR_MM0  ])[ksw ] = mfccb;
-            (D.f[DIR_PM0  ])[ks  ] = mfacb;
-            (D.f[DIR_MP0  ])[kw  ] = mfcab;
-            (D.f[DIR_P0P  ])[k   ] = mfaba;
-            (D.f[DIR_M0M  ])[kbw ] = mfcbc;
-            (D.f[DIR_P0M  ])[kb  ] = mfabc;
-            (D.f[DIR_M0P  ])[kw  ] = mfcba;
-            (D.f[DIR_0PP  ])[k   ] = mfbaa;
-            (D.f[DIR_0MM  ])[kbs ] = mfbcc;
-            (D.f[DIR_0PM  ])[kb  ] = mfbac;
-            (D.f[DIR_0MP  ])[ks  ] = mfbca;
+            (D.f[DIR_P00])[k   ] = mfabb;                                                                   
+            (D.f[DIR_M00])[kw  ] = mfcbb;                                                                 
+            (D.f[DIR_0P0])[k   ] = mfbab;
+            (D.f[DIR_0M0])[ks  ] = mfbcb;
+            (D.f[DIR_00P])[k   ] = mfbba;
+            (D.f[DIR_00M])[kb  ] = mfbbc;
+            (D.f[DIR_PP0])[k   ] = mfaab;
+            (D.f[DIR_MM0])[ksw ] = mfccb;
+            (D.f[DIR_PM0])[ks  ] = mfacb;
+            (D.f[DIR_MP0])[kw  ] = mfcab;
+            (D.f[DIR_P0P])[k   ] = mfaba;
+            (D.f[DIR_M0M])[kbw ] = mfcbc;
+            (D.f[DIR_P0M])[kb  ] = mfabc;
+            (D.f[DIR_M0P])[kw  ] = mfcba;
+            (D.f[DIR_0PP])[k   ] = mfbaa;
+            (D.f[DIR_0MM])[kbs ] = mfbcc;
+            (D.f[DIR_0PM])[kb  ] = mfbac;
+            (D.f[DIR_0MP])[ks  ] = mfbca;
             (D.f[DIR_000])[k   ] = mfbbb;
-            (D.f[DIR_PPP ])[k   ] = mfaaa;
-            (D.f[DIR_PMP ])[ks  ] = mfaca;
-            (D.f[DIR_PPM ])[kb  ] = mfaac;
-            (D.f[DIR_PMM ])[kbs ] = mfacc;
-            (D.f[DIR_MPP ])[kw  ] = mfcaa;
-            (D.f[DIR_MMP ])[ksw ] = mfcca;
-            (D.f[DIR_MPM ])[kbw ] = mfcac;
-            (D.f[DIR_MMM ])[kbsw] = mfccc;
+            (D.f[DIR_PPP])[k   ] = mfaaa;
+            (D.f[DIR_PMP])[ks  ] = mfaca;
+            (D.f[DIR_PPM])[kb  ] = mfaac;
+            (D.f[DIR_PMM])[kbs ] = mfacc;
+            (D.f[DIR_MPP])[kw  ] = mfcaa;
+            (D.f[DIR_MMP])[ksw ] = mfcca;
+            (D.f[DIR_MPM])[kbw ] = mfcac;
+            (D.f[DIR_MMM])[kbsw] = mfccc;
         }
     }
 }
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Cumulant_F3_27.cu b/src/gpu/VirtualFluids_GPU/GPU/Cumulant_F3_27.cu
index 7adfd40da157d825d83c63b084bf1f855ea6dca2..9e0275e7be38b8b56cf71cfa0b8299dc1b49106c 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Cumulant_F3_27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Cumulant_F3_27.cu
@@ -8,9 +8,9 @@
 /* Device code */
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 #include "math.h"
@@ -27,7 +27,7 @@ __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 															 real* vzOut,
 															 real* DDStart,
 															 real* G6,
-															 int size_Mat,
+															 unsigned long long numberOfLBnodes,
 															 int level,
 															 real* forces,
 															 bool EvenOrOdd)
@@ -43,7 +43,7 @@ __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if (k < size_Mat)
+	if (k < numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -54,83 +54,83 @@ __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			Distributions6 G;
 			if (EvenOrOdd == true)
 			{
-				G.g[DIR_P00] = &G6[DIR_P00   *size_Mat];
-				G.g[DIR_M00] = &G6[DIR_M00   *size_Mat];
-				G.g[DIR_0P0] = &G6[DIR_0P0   *size_Mat];
-				G.g[DIR_0M0] = &G6[DIR_0M0   *size_Mat];
-				G.g[DIR_00P] = &G6[DIR_00P   *size_Mat];
-				G.g[DIR_00M] = &G6[DIR_00M   *size_Mat];
+				G.g[DIR_P00] = &G6[DIR_P00 * numberOfLBnodes];
+				G.g[DIR_M00] = &G6[DIR_M00 * numberOfLBnodes];
+				G.g[DIR_0P0] = &G6[DIR_0P0 * numberOfLBnodes];
+				G.g[DIR_0M0] = &G6[DIR_0M0 * numberOfLBnodes];
+				G.g[DIR_00P] = &G6[DIR_00P * numberOfLBnodes];
+				G.g[DIR_00M] = &G6[DIR_00M * numberOfLBnodes];
 			}
 			else
 			{
-				G.g[DIR_M00] = &G6[DIR_P00   *size_Mat];
-				G.g[DIR_P00] = &G6[DIR_M00   *size_Mat];
-				G.g[DIR_0M0] = &G6[DIR_0P0   *size_Mat];
-				G.g[DIR_0P0] = &G6[DIR_0M0   *size_Mat];
-				G.g[DIR_00M] = &G6[DIR_00P   *size_Mat];
-				G.g[DIR_00P] = &G6[DIR_00M   *size_Mat];
+				G.g[DIR_M00] = &G6[DIR_P00 * numberOfLBnodes];
+				G.g[DIR_P00] = &G6[DIR_M00 * numberOfLBnodes];
+				G.g[DIR_0M0] = &G6[DIR_0P0 * numberOfLBnodes];
+				G.g[DIR_0P0] = &G6[DIR_0M0 * numberOfLBnodes];
+				G.g[DIR_00M] = &G6[DIR_00P * numberOfLBnodes];
+				G.g[DIR_00P] = &G6[DIR_00M * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -1026,83 +1026,83 @@ __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 //			Distributions27 D;
 //			if (EvenOrOdd == true)
 //			{
-//				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-//				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-//				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-//				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-//				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-//				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-//				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-//				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-//				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-//				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-//				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-//				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-//				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-//				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-//				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-//				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-//				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-//				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-//				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-//				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-//				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-//				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-//				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-//				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-//				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-//				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-//				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+//				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+//				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+//				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+//				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+//				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+//				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+//				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+//				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+//				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+//				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+//				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+//				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+//				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+//				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+//				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+//				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+//				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+//				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+//				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+//				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+//				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+//				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+//				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+//				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+//				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+//				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+//				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 //			}
 //			else
 //			{
-//				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-//				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-//				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-//				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-//				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-//				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-//				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-//				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-//				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-//				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-//				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-//				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-//				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-//				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-//				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-//				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-//				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-//				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-//				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-//				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-//				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-//				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-//				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-//				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-//				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-//				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-//				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+//				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+//				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+//				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+//				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+//				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+//				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+//				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+//				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+//				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+//				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+//				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+//				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+//				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+//				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+//				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+//				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+//				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+//				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+//				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+//				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+//				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+//				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+//				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+//				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+//				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+//				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+//				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 //			}
 //
 //			Distributions6 G;
 //			if (EvenOrOdd == true)
 //			{
-//				G.g[DIR_P00] = &G6[DIR_P00   *size_Mat];
-//				G.g[DIR_M00] = &G6[DIR_M00   *size_Mat];
-//				G.g[DIR_0P0] = &G6[DIR_0P0   *size_Mat];
-//				G.g[DIR_0M0] = &G6[DIR_0M0   *size_Mat];
-//				G.g[DIR_00P] = &G6[DIR_00P   *size_Mat];
-//				G.g[DIR_00M] = &G6[DIR_00M   *size_Mat];
+//				G.g[DIR_P00] = &G6[DIR_P00 * size_Mat];
+//				G.g[DIR_M00] = &G6[DIR_M00 * size_Mat];
+//				G.g[DIR_0P0] = &G6[DIR_0P0 * size_Mat];
+//				G.g[DIR_0M0] = &G6[DIR_0M0 * size_Mat];
+//				G.g[DIR_00P] = &G6[DIR_00P * size_Mat];
+//				G.g[DIR_00M] = &G6[DIR_00M * size_Mat];
 //			}
 //			else
 //			{
-//				G.g[DIR_M00] = &G6[DIR_P00   *size_Mat];
-//				G.g[DIR_P00] = &G6[DIR_M00   *size_Mat];
-//				G.g[DIR_0M0] = &G6[DIR_0P0   *size_Mat];
-//				G.g[DIR_0P0] = &G6[DIR_0M0   *size_Mat];
-//				G.g[DIR_00M] = &G6[DIR_00P   *size_Mat];
-//				G.g[DIR_00P] = &G6[DIR_00M   *size_Mat];
+//				G.g[DIR_M00] = &G6[DIR_P00 * size_Mat];
+//				G.g[DIR_P00] = &G6[DIR_M00 * size_Mat];
+//				G.g[DIR_0M0] = &G6[DIR_0P0 * size_Mat];
+//				G.g[DIR_0P0] = &G6[DIR_0M0 * size_Mat];
+//				G.g[DIR_00M] = &G6[DIR_00P * size_Mat];
+//				G.g[DIR_00P] = &G6[DIR_00M * size_Mat];
 //			}
 //
 //			////////////////////////////////////////////////////////////////////////////////
@@ -2006,83 +2006,83 @@ __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 //			Distributions27 D;
 //			if (EvenOrOdd == true)
 //			{
-//				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-//				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-//				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-//				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-//				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-//				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-//				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-//				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-//				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-//				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-//				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-//				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-//				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-//				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-//				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-//				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-//				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-//				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-//				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-//				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-//				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-//				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-//				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-//				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-//				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-//				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-//				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+//				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+//				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+//				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+//				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+//				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+//				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+//				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+//				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+//				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+//				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+//				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+//				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+//				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+//				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+//				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+//				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+//				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+//				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+//				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+//				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+//				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+//				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+//				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+//				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+//				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+//				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+//				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 //			}
 //			else
 //			{
-//				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-//				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-//				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-//				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-//				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-//				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-//				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-//				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-//				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-//				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-//				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-//				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-//				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-//				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-//				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-//				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-//				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-//				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-//				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-//				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-//				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-//				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-//				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-//				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-//				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-//				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-//				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+//				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+//				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+//				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+//				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+//				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+//				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+//				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+//				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+//				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+//				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+//				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+//				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+//				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+//				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+//				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+//				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+//				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+//				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+//				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+//				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+//				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+//				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+//				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+//				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+//				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+//				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+//				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 //			}
 //
 //			Distributions6 G;
 //			if (EvenOrOdd == true)
 //			{
-//				G.g[DIR_P00] = &G6[DIR_P00   *size_Mat];
-//				G.g[DIR_M00] = &G6[DIR_M00   *size_Mat];
-//				G.g[DIR_0P0] = &G6[DIR_0P0   *size_Mat];
-//				G.g[DIR_0M0] = &G6[DIR_0M0   *size_Mat];
-//				G.g[DIR_00P] = &G6[DIR_00P   *size_Mat];
-//				G.g[DIR_00M] = &G6[DIR_00M   *size_Mat];
+//				G.g[DIR_P00] = &G6[DIR_P00 * size_Mat];
+//				G.g[DIR_M00] = &G6[DIR_M00 * size_Mat];
+//				G.g[DIR_0P0] = &G6[DIR_0P0 * size_Mat];
+//				G.g[DIR_0M0] = &G6[DIR_0M0 * size_Mat];
+//				G.g[DIR_00P] = &G6[DIR_00P * size_Mat];
+//				G.g[DIR_00M] = &G6[DIR_00M * size_Mat];
 //			}
 //			else
 //			{
-//				G.g[DIR_M00] = &G6[DIR_P00   *size_Mat];
-//				G.g[DIR_P00] = &G6[DIR_M00   *size_Mat];
-//				G.g[DIR_0M0] = &G6[DIR_0P0   *size_Mat];
-//				G.g[DIR_0P0] = &G6[DIR_0M0   *size_Mat];
-//				G.g[DIR_00M] = &G6[DIR_00P   *size_Mat];
-//				G.g[DIR_00P] = &G6[DIR_00M   *size_Mat];
+//				G.g[DIR_M00] = &G6[DIR_P00 * size_Mat];
+//				G.g[DIR_P00] = &G6[DIR_M00 * size_Mat];
+//				G.g[DIR_0M0] = &G6[DIR_0P0 * size_Mat];
+//				G.g[DIR_0P0] = &G6[DIR_0M0 * size_Mat];
+//				G.g[DIR_00M] = &G6[DIR_00P * size_Mat];
+//				G.g[DIR_00P] = &G6[DIR_00M * size_Mat];
 //			}
 //
 //			////////////////////////////////////////////////////////////////////////////////
@@ -2153,33 +2153,33 @@ __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 //			real dyyuy = c1o2 * (-mgbcb + mgbab);
 //			real dzzuz = c1o2 * (-mgbbc + mgbba);
 //			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-//			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-//			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-//			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-//			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-//			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-//			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-//			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-//			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-//			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-//			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-//			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-//			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-//			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-//			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-//			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-//			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-//			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+//			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+//			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+//			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+//			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+//			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+//			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+//			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+//			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+//			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+//			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+//			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+//			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+//			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+//			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+//			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+//			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+//			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+//			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 //			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-//			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-//			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-//			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-//			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-//			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-//			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-//			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-//			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+//			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+//			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+//			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+//			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+//			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+//			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+//			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+//			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 //			////////////////////////////////////////////////////////////////////////////////////
 //			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 //				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
diff --git a/src/gpu/VirtualFluids_GPU/GPU/DragLift27.cu b/src/gpu/VirtualFluids_GPU/GPU/DragLift27.cu
index 5146242fed374a919b6dcc02774db1d8ce4f864a..d1fc15a6b7a8f73083b41b926ce58916bdf61b59 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/DragLift27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/DragLift27.cu
@@ -1,9 +1,9 @@
 /* Device code */
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -17,69 +17,69 @@ __global__ void DragLiftPost27(  real* DD,
 											unsigned int* neighborX,
 											unsigned int* neighborY,
 											unsigned int* neighborZ,
-											unsigned int size_Mat, 
+											unsigned long long numberOfLBnodes, 
 											bool isEvenTimestep)
 {
 	Distributions27 D;
 	if (isEvenTimestep==true)
 	{
-		D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+		D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
 	} 
 	else
 	{
-		D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+		D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -100,24 +100,24 @@ __global__ void DragLiftPost27(  real* DD,
 			*q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
 			*q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 			*q_dirBSE, *q_dirBNW; 
-		q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-		q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-		q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-		q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-		q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-		q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-		q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-		q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-		q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-		q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-		q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-		q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-		q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-		q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-		q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-		q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-		q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-		q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+		q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+		q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+		q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+		q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+		q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+		q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+		q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+		q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+		q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+		q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+		q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+		q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+		q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+		q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+		q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+		q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+		q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+		q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
 		q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
 		q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
 		q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -160,32 +160,32 @@ __global__ void DragLiftPost27(  real* DD,
 		real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
                 f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-		f_W    = (D.f[DIR_P00   ])[ke   ];
-		f_E    = (D.f[DIR_M00   ])[kw   ];
-		f_S    = (D.f[DIR_0P0   ])[kn   ];
-		f_N    = (D.f[DIR_0M0   ])[ks   ];
-		f_B    = (D.f[DIR_00P   ])[kt   ];
-		f_T    = (D.f[DIR_00M   ])[kb   ];
-		f_SW   = (D.f[DIR_PP0  ])[kne  ];
-		f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-		f_NW   = (D.f[DIR_PM0  ])[kse  ];
-		f_SE   = (D.f[DIR_MP0  ])[knw  ];
-		f_BW   = (D.f[DIR_P0P  ])[kte  ];
-		f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-		f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-		f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-		f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-		f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-		f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-		f_BN   = (D.f[DIR_0MP  ])[kts  ];
-		f_BSW  = (D.f[DIR_PPP ])[ktne ];
-		f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-		f_BNW  = (D.f[DIR_PMP ])[ktse ];
-		f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-		f_TSW  = (D.f[DIR_PPM ])[kbne ];
-		f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-		f_TNW  = (D.f[DIR_PMM ])[kbse ];
-		f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+		f_W    = (D.f[DIR_P00])[ke   ];
+		f_E    = (D.f[DIR_M00])[kw   ];
+		f_S    = (D.f[DIR_0P0])[kn   ];
+		f_N    = (D.f[DIR_0M0])[ks   ];
+		f_B    = (D.f[DIR_00P])[kt   ];
+		f_T    = (D.f[DIR_00M])[kb   ];
+		f_SW   = (D.f[DIR_PP0])[kne  ];
+		f_NE   = (D.f[DIR_MM0])[ksw  ];
+		f_NW   = (D.f[DIR_PM0])[kse  ];
+		f_SE   = (D.f[DIR_MP0])[knw  ];
+		f_BW   = (D.f[DIR_P0P])[kte  ];
+		f_TE   = (D.f[DIR_M0M])[kbw  ];
+		f_TW   = (D.f[DIR_P0M])[kbe  ];
+		f_BE   = (D.f[DIR_M0P])[ktw  ];
+		f_BS   = (D.f[DIR_0PP])[ktn  ];
+		f_TN   = (D.f[DIR_0MM])[kbs  ];
+		f_TS   = (D.f[DIR_0PM])[kbn  ];
+		f_BN   = (D.f[DIR_0MP])[kts  ];
+		f_BSW  = (D.f[DIR_PPP])[ktne ];
+		f_BNE  = (D.f[DIR_MMP])[ktsw ];
+		f_BNW  = (D.f[DIR_PMP])[ktse ];
+		f_BSE  = (D.f[DIR_MPP])[ktnw ];
+		f_TSW  = (D.f[DIR_PPM])[kbne ];
+		f_TNE  = (D.f[DIR_MMM])[kbsw ];
+		f_TNW  = (D.f[DIR_PMM])[kbse ];
+		f_TSE  = (D.f[DIR_MPM])[kbnw ];
 		////////////////////////////////////////////////////////////////////////////////
 		double	OnE   = c0o1, OnW   = c0o1, OnN   = c0o1, OnS   = c0o1, OnT = c0o1, OnB = c0o1, 
 				OnNE  = c0o1, OnSW  = c0o1, OnSE  = c0o1, OnNW  = c0o1, 
@@ -282,69 +282,69 @@ __global__ void DragLiftPre27(   real* DD,
 											unsigned int* neighborX,
 											unsigned int* neighborY,
 											unsigned int* neighborZ,
-											unsigned int size_Mat, 
+											unsigned long long numberOfLBnodes, 
 											bool isEvenTimestep)
 {
 	Distributions27 D;
 	if (isEvenTimestep==true)
 	{
-		D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+		D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
 	} 
 	else
 	{
-		D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+		D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -365,24 +365,24 @@ __global__ void DragLiftPre27(   real* DD,
 			*q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
 			*q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 			*q_dirBSE, *q_dirBNW; 
-		q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-		q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-		q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-		q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-		q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-		q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-		q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-		q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-		q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-		q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-		q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-		q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-		q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-		q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-		q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-		q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-		q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-		q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+		q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+		q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+		q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+		q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+		q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+		q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+		q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+		q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+		q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+		q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+		q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+		q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+		q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+		q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+		q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+		q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+		q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+		q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
 		q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
 		q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
 		q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -425,32 +425,32 @@ __global__ void DragLiftPre27(   real* DD,
 		real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
                 f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-		f_E   = (D.f[DIR_P00   ])[ke   ];
-		f_W   = (D.f[DIR_M00   ])[kw   ];
-		f_N   = (D.f[DIR_0P0   ])[kn   ];
-		f_S   = (D.f[DIR_0M0   ])[ks   ];
-		f_T   = (D.f[DIR_00P   ])[kt   ];
-		f_B   = (D.f[DIR_00M   ])[kb   ];
-		f_NE  = (D.f[DIR_PP0  ])[kne  ];
-		f_SW  = (D.f[DIR_MM0  ])[ksw  ];
-		f_SE  = (D.f[DIR_PM0  ])[kse  ];
-		f_NW  = (D.f[DIR_MP0  ])[knw  ];
-		f_TE  = (D.f[DIR_P0P  ])[kte  ];
-		f_BW  = (D.f[DIR_M0M  ])[kbw  ];
-		f_BE  = (D.f[DIR_P0M  ])[kbe  ];
-		f_TW  = (D.f[DIR_M0P  ])[ktw  ];
-		f_TN  = (D.f[DIR_0PP  ])[ktn  ];
-		f_BS  = (D.f[DIR_0MM  ])[kbs  ];
-		f_BN  = (D.f[DIR_0PM  ])[kbn  ];
-		f_TS  = (D.f[DIR_0MP  ])[kts  ];
-		f_TNE = (D.f[DIR_PPP ])[ktne ];
-		f_TSW = (D.f[DIR_MMP ])[ktsw ];
-		f_TSE = (D.f[DIR_PMP ])[ktse ];
-		f_TNW = (D.f[DIR_MPP ])[ktnw ];
-		f_BNE = (D.f[DIR_PPM ])[kbne ];
-		f_BSW = (D.f[DIR_MMM ])[kbsw ];
-		f_BSE = (D.f[DIR_PMM ])[kbse ];
-		f_BNW = (D.f[DIR_MPM ])[kbnw ];
+		f_E   = (D.f[DIR_P00])[ke   ];
+		f_W   = (D.f[DIR_M00])[kw   ];
+		f_N   = (D.f[DIR_0P0])[kn   ];
+		f_S   = (D.f[DIR_0M0])[ks   ];
+		f_T   = (D.f[DIR_00P])[kt   ];
+		f_B   = (D.f[DIR_00M])[kb   ];
+		f_NE  = (D.f[DIR_PP0])[kne  ];
+		f_SW  = (D.f[DIR_MM0])[ksw  ];
+		f_SE  = (D.f[DIR_PM0])[kse  ];
+		f_NW  = (D.f[DIR_MP0])[knw  ];
+		f_TE  = (D.f[DIR_P0P])[kte  ];
+		f_BW  = (D.f[DIR_M0M])[kbw  ];
+		f_BE  = (D.f[DIR_P0M])[kbe  ];
+		f_TW  = (D.f[DIR_M0P])[ktw  ];
+		f_TN  = (D.f[DIR_0PP])[ktn  ];
+		f_BS  = (D.f[DIR_0MM])[kbs  ];
+		f_BN  = (D.f[DIR_0PM])[kbn  ];
+		f_TS  = (D.f[DIR_0MP])[kts  ];
+		f_TNE = (D.f[DIR_PPP])[ktne ];
+		f_TSW = (D.f[DIR_MMP])[ktsw ];
+		f_TSE = (D.f[DIR_PMP])[ktse ];
+		f_TNW = (D.f[DIR_MPP])[ktnw ];
+		f_BNE = (D.f[DIR_PPM])[kbne ];
+		f_BSW = (D.f[DIR_MMM])[kbsw ];
+		f_BSE = (D.f[DIR_PMM])[kbse ];
+		f_BNW = (D.f[DIR_MPM])[kbnw ];
 		 ////////////////////////////////////////////////////////////////////////////////
 		double	OnE   = c0o1, OnW   = c0o1, OnN   = c0o1, OnS   = c0o1, OnT = c0o1, OnB = c0o1, 
 				OnNE  = c0o1, OnSW  = c0o1, OnSE  = c0o1, OnNW  = c0o1, 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/EnstrophyAnalyzer.cu b/src/gpu/VirtualFluids_GPU/GPU/EnstrophyAnalyzer.cu
index acd62b46c5666fc5f621c3772438e42b7ebef5c6..e447062d292908c02800c4559cc4444476290629 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/EnstrophyAnalyzer.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/EnstrophyAnalyzer.cu
@@ -13,16 +13,16 @@
 
 #include <iomanip>
 
-//#include "Core/Logger/Logger.h"
+#include "cuda/CudaGrid.h"
 
 #include "Parameter/Parameter.h"
 // includes, kernels
 #include "GPU/GPU_Kernels.cuh"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 
-__global__                 void enstrophyKernel  ( real* veloX, real* veloY, real* veloZ, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* enstrophy, uint* isFluid, uint size_Mat );
+__global__                 void enstrophyKernel  ( real* veloX, real* veloY, real* veloZ, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* enstrophy, uint* isFluid, unsigned long long numberOfLBnodes );
 
 __host__ __device__ inline void enstrophyFunction( real* veloX, real* veloY, real* veloZ, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* enstrophy, uint* isFluid, uint index );
 
@@ -32,55 +32,40 @@ bool EnstrophyAnalyzer::run(uint iter)
 {
     if( iter % this->analyzeIter != 0 ) return false;
 
-	int lev = 0;
-	int size_Mat = this->para->getParD(lev)->numberOfNodes;
-	
-	thrust::device_vector<real> enstrophy( size_Mat, c0o1 );
-    thrust::device_vector<uint> isFluid  ( size_Mat, 0);
-
-	unsigned int numberOfThreads = 128;
-    int Grid = (size_Mat / numberOfThreads)+1;
-    int Grid1, Grid2;
-    if (Grid>512)
-    {
-       Grid1 = 512;
-       Grid2 = (Grid/Grid1)+1;
-    } 
-    else
-    {
-       Grid1 = 1;
-       Grid2 = Grid;
-    }
-    dim3 grid(Grid1, Grid2);
-    dim3 threads(numberOfThreads, 1, 1 );
-
-    LBCalcMacCompSP27<<< grid, threads >>> (para->getParD(lev)->velocityX,
-										    para->getParD(lev)->velocityY,
-										    para->getParD(lev)->velocityZ,
-										    para->getParD(lev)->rho,
-										    para->getParD(lev)->pressure,
-										    para->getParD(lev)->typeOfGridNode,
-										    para->getParD(lev)->neighborX,
-										    para->getParD(lev)->neighborY,
-										    para->getParD(lev)->neighborZ,
-										    para->getParD(lev)->numberOfNodes,
-										    para->getParD(lev)->distributions.f[0],
-										    para->getParD(lev)->isEvenTimestep); 
-	//cudaDeviceSynchronize();
-	getLastCudaError("LBCalcMacSP27 execution failed"); 
-
-	enstrophyKernel <<< grid, threads >>> ( para->getParD(lev)->velocityX,
-											para->getParD(lev)->velocityY, 
-											para->getParD(lev)->velocityZ, 
-											para->getParD(lev)->rho, 
-											para->getParD(lev)->neighborX,
-											para->getParD(lev)->neighborY,
-											para->getParD(lev)->neighborZ,
-											para->getParD(lev)->neighborInverse,
-											para->getParD(lev)->typeOfGridNode,
-											enstrophy.data().get(), 
-                                            isFluid.data().get(),
-											size_Mat);
+    int lev = 0;
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(lev)->numberofthreads, para->getParD(lev)->numberOfNodes);
+
+    thrust::device_vector<real> enstrophy( this->para->getParD(lev)->numberOfNodes, c0o1);
+    thrust::device_vector<uint> isFluid  ( this->para->getParD(lev)->numberOfNodes, 0);
+
+    LBCalcMacCompSP27<<< grid.grid, grid.threads >>>(
+        para->getParD(lev)->velocityX,
+        para->getParD(lev)->velocityY,
+        para->getParD(lev)->velocityZ,
+        para->getParD(lev)->rho,
+        para->getParD(lev)->pressure,
+        para->getParD(lev)->typeOfGridNode,
+        para->getParD(lev)->neighborX,
+        para->getParD(lev)->neighborY,
+        para->getParD(lev)->neighborZ,
+        para->getParD(lev)->numberOfNodes,
+        para->getParD(lev)->distributions.f[0],
+        para->getParD(lev)->isEvenTimestep); 
+    getLastCudaError("LBCalcMacCompSP27 execution failed");
+
+    enstrophyKernel<<< grid.grid, grid.threads >>>(
+        para->getParD(lev)->velocityX,
+        para->getParD(lev)->velocityY, 
+        para->getParD(lev)->velocityZ, 
+        para->getParD(lev)->rho, 
+        para->getParD(lev)->neighborX,
+        para->getParD(lev)->neighborY,
+        para->getParD(lev)->neighborZ,
+        para->getParD(lev)->neighborInverse,
+        para->getParD(lev)->typeOfGridNode,
+        enstrophy.data().get(), 
+        isFluid.data().get(),
+        para->getParD(lev)->numberOfNodes);
 	cudaDeviceSynchronize(); 
 	getLastCudaError("enstrophyKernel execution failed");
 
@@ -97,7 +82,7 @@ bool EnstrophyAnalyzer::run(uint iter)
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-__global__ void enstrophyKernel(real* veloX, real* veloY, real* veloZ, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* enstrophy, uint* isFluid, uint size_Mat)
+__global__ void enstrophyKernel(real* veloX, real* veloY, real* veloZ, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* enstrophy, uint* isFluid, unsigned long long numberOfLBnodes)
 {
     //////////////////////////////////////////////////////////////////////////
     const uint x = threadIdx.x;  // Globaler x-Index 
@@ -113,7 +98,7 @@ __global__ void enstrophyKernel(real* veloX, real* veloY, real* veloZ, real* rho
 
     //if( index % 34 == 0 || index % 34 == 33 ) return;
 
-    if( index >= size_Mat) return;
+    if( index >= (uint)numberOfLBnodes) return;
 
 	unsigned int BC;
 	BC = geo[index];
@@ -340,7 +325,6 @@ EnstrophyAnalyzer::EnstrophyAnalyzer(SPtr<Parameter> para, uint analyzeIter)
 
 void EnstrophyAnalyzer::writeToFile( std::string filename )
 {
-    //*logging::out << logging::Logger::INFO_INTERMEDIATE << "EnstrophyAnalyzer::writeToFile( " << filename << " )" << "\n";
 	std::cout << "EnstrophyAnalyzer::writeToFile( " << filename << " )" << "\n";
 
     std::ofstream file;
@@ -353,7 +337,6 @@ void EnstrophyAnalyzer::writeToFile( std::string filename )
     file.close();
 
 	std::cout << "done!\n";
-	//*logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
 }
 
 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/EnstrophyAnalyzer.h b/src/gpu/VirtualFluids_GPU/GPU/EnstrophyAnalyzer.h
index 27915f1acdfc9bfecc291442727647dc4abda206..ab5c52e77cd4c028e044bfd6ec4b25b974f80ee2 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/EnstrophyAnalyzer.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/EnstrophyAnalyzer.h
@@ -7,7 +7,7 @@
 
 
 #include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
+#include "DataTypes.h"
 #include "VirtualFluids_GPU_export.h"
 
 class Parameter;
diff --git a/src/gpu/VirtualFluids_GPU/GPU/ExchangeData27.cu b/src/gpu/VirtualFluids_GPU/GPU/ExchangeData27.cu
index 5470da46342c85e57370227313c8c82674a17e6e..288ec7ff26bae5b7415e08f4d39aa8cd2ffa4a9b 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/ExchangeData27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/ExchangeData27.cu
@@ -1,9 +1,9 @@
 /* Device code */
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -14,7 +14,7 @@ __global__ void getSendFsPost27(real* DD,
                                            unsigned int* neighborX,
                                            unsigned int* neighborY,
                                            unsigned int* neighborZ,
-                                           unsigned int size_Mat, 
+                                           unsigned long long numberOfLBnodes, 
                                            bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -65,150 +65,150 @@ __global__ void getSendFsPost27(real* DD,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //set Pointer for Buffer Fs
       Distributions27 Dbuff;
-      Dbuff.f[DIR_P00   ] = &bufferFs[DIR_P00   *buffmax];
-      Dbuff.f[DIR_M00   ] = &bufferFs[DIR_M00   *buffmax];
-      Dbuff.f[DIR_0P0   ] = &bufferFs[DIR_0P0   *buffmax];
-      Dbuff.f[DIR_0M0   ] = &bufferFs[DIR_0M0   *buffmax];
-      Dbuff.f[DIR_00P   ] = &bufferFs[DIR_00P   *buffmax];
-      Dbuff.f[DIR_00M   ] = &bufferFs[DIR_00M   *buffmax];
-      Dbuff.f[DIR_PP0  ] = &bufferFs[DIR_PP0  *buffmax];
-      Dbuff.f[DIR_MM0  ] = &bufferFs[DIR_MM0  *buffmax];
-      Dbuff.f[DIR_PM0  ] = &bufferFs[DIR_PM0  *buffmax];
-      Dbuff.f[DIR_MP0  ] = &bufferFs[DIR_MP0  *buffmax];
-      Dbuff.f[DIR_P0P  ] = &bufferFs[DIR_P0P  *buffmax];
-      Dbuff.f[DIR_M0M  ] = &bufferFs[DIR_M0M  *buffmax];
-      Dbuff.f[DIR_P0M  ] = &bufferFs[DIR_P0M  *buffmax];
-      Dbuff.f[DIR_M0P  ] = &bufferFs[DIR_M0P  *buffmax];
-      Dbuff.f[DIR_0PP  ] = &bufferFs[DIR_0PP  *buffmax];
-      Dbuff.f[DIR_0MM  ] = &bufferFs[DIR_0MM  *buffmax];
-      Dbuff.f[DIR_0PM  ] = &bufferFs[DIR_0PM  *buffmax];
-      Dbuff.f[DIR_0MP  ] = &bufferFs[DIR_0MP  *buffmax];
-      Dbuff.f[DIR_000] = &bufferFs[DIR_000*buffmax];
-      Dbuff.f[DIR_PPP ] = &bufferFs[DIR_PPP *buffmax];
-      Dbuff.f[DIR_MMP ] = &bufferFs[DIR_MMP *buffmax];
-      Dbuff.f[DIR_PMP ] = &bufferFs[DIR_PMP *buffmax];
-      Dbuff.f[DIR_MPP ] = &bufferFs[DIR_MPP *buffmax];
-      Dbuff.f[DIR_PPM ] = &bufferFs[DIR_PPM *buffmax];
-      Dbuff.f[DIR_MMM ] = &bufferFs[DIR_MMM *buffmax];
-      Dbuff.f[DIR_PMM ] = &bufferFs[DIR_PMM *buffmax];
-      Dbuff.f[DIR_MPM ] = &bufferFs[DIR_MPM *buffmax];
+      Dbuff.f[DIR_P00] = &bufferFs[DIR_P00 * buffmax];
+      Dbuff.f[DIR_M00] = &bufferFs[DIR_M00 * buffmax];
+      Dbuff.f[DIR_0P0] = &bufferFs[DIR_0P0 * buffmax];
+      Dbuff.f[DIR_0M0] = &bufferFs[DIR_0M0 * buffmax];
+      Dbuff.f[DIR_00P] = &bufferFs[DIR_00P * buffmax];
+      Dbuff.f[DIR_00M] = &bufferFs[DIR_00M * buffmax];
+      Dbuff.f[DIR_PP0] = &bufferFs[DIR_PP0 * buffmax];
+      Dbuff.f[DIR_MM0] = &bufferFs[DIR_MM0 * buffmax];
+      Dbuff.f[DIR_PM0] = &bufferFs[DIR_PM0 * buffmax];
+      Dbuff.f[DIR_MP0] = &bufferFs[DIR_MP0 * buffmax];
+      Dbuff.f[DIR_P0P] = &bufferFs[DIR_P0P * buffmax];
+      Dbuff.f[DIR_M0M] = &bufferFs[DIR_M0M * buffmax];
+      Dbuff.f[DIR_P0M] = &bufferFs[DIR_P0M * buffmax];
+      Dbuff.f[DIR_M0P] = &bufferFs[DIR_M0P * buffmax];
+      Dbuff.f[DIR_0PP] = &bufferFs[DIR_0PP * buffmax];
+      Dbuff.f[DIR_0MM] = &bufferFs[DIR_0MM * buffmax];
+      Dbuff.f[DIR_0PM] = &bufferFs[DIR_0PM * buffmax];
+      Dbuff.f[DIR_0MP] = &bufferFs[DIR_0MP * buffmax];
+      Dbuff.f[DIR_000] = &bufferFs[DIR_000 * buffmax];
+      Dbuff.f[DIR_PPP] = &bufferFs[DIR_PPP * buffmax];
+      Dbuff.f[DIR_MMP] = &bufferFs[DIR_MMP * buffmax];
+      Dbuff.f[DIR_PMP] = &bufferFs[DIR_PMP * buffmax];
+      Dbuff.f[DIR_MPP] = &bufferFs[DIR_MPP * buffmax];
+      Dbuff.f[DIR_PPM] = &bufferFs[DIR_PPM * buffmax];
+      Dbuff.f[DIR_MMM] = &bufferFs[DIR_MMM * buffmax];
+      Dbuff.f[DIR_PMM] = &bufferFs[DIR_PMM * buffmax];
+      Dbuff.f[DIR_MPM] = &bufferFs[DIR_MPM * buffmax];
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //copy to buffer
-      //(Dbuff.f[DIR_P00   ])[k] = (D.f[DIR_P00   ])[ke   ];
-      //(Dbuff.f[DIR_M00   ])[k] = (D.f[DIR_M00   ])[kw   ];
-      //(Dbuff.f[DIR_0P0   ])[k] = (D.f[DIR_0P0   ])[kn   ];
-      //(Dbuff.f[DIR_0M0   ])[k] = (D.f[DIR_0M0   ])[ks   ];
-      //(Dbuff.f[DIR_00P   ])[k] = (D.f[DIR_00P   ])[kt   ];
-      //(Dbuff.f[DIR_00M   ])[k] = (D.f[DIR_00M   ])[kb   ];
-      //(Dbuff.f[DIR_PP0  ])[k] = (D.f[DIR_PP0  ])[kne  ];
-      //(Dbuff.f[DIR_MM0  ])[k] = (D.f[DIR_MM0  ])[ksw  ];
-      //(Dbuff.f[DIR_PM0  ])[k] = (D.f[DIR_PM0  ])[kse  ];
-      //(Dbuff.f[DIR_MP0  ])[k] = (D.f[DIR_MP0  ])[knw  ];
-      //(Dbuff.f[DIR_P0P  ])[k] = (D.f[DIR_P0P  ])[kte  ];
-      //(Dbuff.f[DIR_M0M  ])[k] = (D.f[DIR_M0M  ])[kbw  ];
-      //(Dbuff.f[DIR_P0M  ])[k] = (D.f[DIR_P0M  ])[kbe  ];
-      //(Dbuff.f[DIR_M0P  ])[k] = (D.f[DIR_M0P  ])[ktw  ];
-      //(Dbuff.f[DIR_0PP  ])[k] = (D.f[DIR_0PP  ])[ktn  ];
-      //(Dbuff.f[DIR_0MM  ])[k] = (D.f[DIR_0MM  ])[kbs  ];
-      //(Dbuff.f[DIR_0PM  ])[k] = (D.f[DIR_0PM  ])[kbn  ];
-      //(Dbuff.f[DIR_0MP  ])[k] = (D.f[DIR_0MP  ])[kts  ];
+      //(Dbuff.f[DIR_P00])[k] = (D.f[DIR_P00])[ke   ];
+      //(Dbuff.f[DIR_M00])[k] = (D.f[DIR_M00])[kw   ];
+      //(Dbuff.f[DIR_0P0])[k] = (D.f[DIR_0P0])[kn   ];
+      //(Dbuff.f[DIR_0M0])[k] = (D.f[DIR_0M0])[ks   ];
+      //(Dbuff.f[DIR_00P])[k] = (D.f[DIR_00P])[kt   ];
+      //(Dbuff.f[DIR_00M])[k] = (D.f[DIR_00M])[kb   ];
+      //(Dbuff.f[DIR_PP0])[k] = (D.f[DIR_PP0])[kne  ];
+      //(Dbuff.f[DIR_MM0])[k] = (D.f[DIR_MM0])[ksw  ];
+      //(Dbuff.f[DIR_PM0])[k] = (D.f[DIR_PM0])[kse  ];
+      //(Dbuff.f[DIR_MP0])[k] = (D.f[DIR_MP0])[knw  ];
+      //(Dbuff.f[DIR_P0P])[k] = (D.f[DIR_P0P])[kte  ];
+      //(Dbuff.f[DIR_M0M])[k] = (D.f[DIR_M0M])[kbw  ];
+      //(Dbuff.f[DIR_P0M])[k] = (D.f[DIR_P0M])[kbe  ];
+      //(Dbuff.f[DIR_M0P])[k] = (D.f[DIR_M0P])[ktw  ];
+      //(Dbuff.f[DIR_0PP])[k] = (D.f[DIR_0PP])[ktn  ];
+      //(Dbuff.f[DIR_0MM])[k] = (D.f[DIR_0MM])[kbs  ];
+      //(Dbuff.f[DIR_0PM])[k] = (D.f[DIR_0PM])[kbn  ];
+      //(Dbuff.f[DIR_0MP])[k] = (D.f[DIR_0MP])[kts  ];
       //(Dbuff.f[DIR_000])[k] = (D.f[DIR_000])[kzero];
-      //(Dbuff.f[DIR_PPP ])[k] = (D.f[DIR_PPP ])[ktne ];
-      //(Dbuff.f[DIR_MMP ])[k] = (D.f[DIR_MMP ])[ktsw ];
-      //(Dbuff.f[DIR_PMP ])[k] = (D.f[DIR_PMP ])[ktse ];
-      //(Dbuff.f[DIR_MPP ])[k] = (D.f[DIR_MPP ])[ktnw ];
-      //(Dbuff.f[DIR_PPM ])[k] = (D.f[DIR_PPM ])[kbne ];
-      //(Dbuff.f[DIR_MMM ])[k] = (D.f[DIR_MMM ])[kbsw ];
-      //(Dbuff.f[DIR_PMM ])[k] = (D.f[DIR_PMM ])[kbse ];
-      //(Dbuff.f[DIR_MPM ])[k] = (D.f[DIR_MPM ])[kbnw ];
-      (Dbuff.f[DIR_P00   ])[k] = (D.f[DIR_M00   ])[kw   ];
-      (Dbuff.f[DIR_M00   ])[k] = (D.f[DIR_P00   ])[ke   ];
-      (Dbuff.f[DIR_0P0   ])[k] = (D.f[DIR_0M0   ])[ks   ];
-      (Dbuff.f[DIR_0M0   ])[k] = (D.f[DIR_0P0   ])[kn   ];
-      (Dbuff.f[DIR_00P   ])[k] = (D.f[DIR_00M   ])[kb   ];
-      (Dbuff.f[DIR_00M   ])[k] = (D.f[DIR_00P   ])[kt   ];
-      (Dbuff.f[DIR_PP0  ])[k] = (D.f[DIR_MM0  ])[ksw  ];
-      (Dbuff.f[DIR_MM0  ])[k] = (D.f[DIR_PP0  ])[kne  ];
-      (Dbuff.f[DIR_PM0  ])[k] = (D.f[DIR_MP0  ])[knw  ];
-      (Dbuff.f[DIR_MP0  ])[k] = (D.f[DIR_PM0  ])[kse  ];
-      (Dbuff.f[DIR_P0P  ])[k] = (D.f[DIR_M0M  ])[kbw  ];
-      (Dbuff.f[DIR_M0M  ])[k] = (D.f[DIR_P0P  ])[kte  ];
-      (Dbuff.f[DIR_P0M  ])[k] = (D.f[DIR_M0P  ])[ktw  ];
-      (Dbuff.f[DIR_M0P  ])[k] = (D.f[DIR_P0M  ])[kbe  ];
-      (Dbuff.f[DIR_0PP  ])[k] = (D.f[DIR_0MM  ])[kbs  ];
-      (Dbuff.f[DIR_0MM  ])[k] = (D.f[DIR_0PP  ])[ktn  ];
-      (Dbuff.f[DIR_0PM  ])[k] = (D.f[DIR_0MP  ])[kts  ];
-      (Dbuff.f[DIR_0MP  ])[k] = (D.f[DIR_0PM  ])[kbn  ];
+      //(Dbuff.f[DIR_PPP])[k] = (D.f[DIR_PPP])[ktne ];
+      //(Dbuff.f[DIR_MMP])[k] = (D.f[DIR_MMP])[ktsw ];
+      //(Dbuff.f[DIR_PMP])[k] = (D.f[DIR_PMP])[ktse ];
+      //(Dbuff.f[DIR_MPP])[k] = (D.f[DIR_MPP])[ktnw ];
+      //(Dbuff.f[DIR_PPM])[k] = (D.f[DIR_PPM])[kbne ];
+      //(Dbuff.f[DIR_MMM])[k] = (D.f[DIR_MMM])[kbsw ];
+      //(Dbuff.f[DIR_PMM])[k] = (D.f[DIR_PMM])[kbse ];
+      //(Dbuff.f[DIR_MPM])[k] = (D.f[DIR_MPM])[kbnw ];
+      (Dbuff.f[DIR_P00])[k] = (D.f[DIR_M00])[kw   ];
+      (Dbuff.f[DIR_M00])[k] = (D.f[DIR_P00])[ke   ];
+      (Dbuff.f[DIR_0P0])[k] = (D.f[DIR_0M0])[ks   ];
+      (Dbuff.f[DIR_0M0])[k] = (D.f[DIR_0P0])[kn   ];
+      (Dbuff.f[DIR_00P])[k] = (D.f[DIR_00M])[kb   ];
+      (Dbuff.f[DIR_00M])[k] = (D.f[DIR_00P])[kt   ];
+      (Dbuff.f[DIR_PP0])[k] = (D.f[DIR_MM0])[ksw  ];
+      (Dbuff.f[DIR_MM0])[k] = (D.f[DIR_PP0])[kne  ];
+      (Dbuff.f[DIR_PM0])[k] = (D.f[DIR_MP0])[knw  ];
+      (Dbuff.f[DIR_MP0])[k] = (D.f[DIR_PM0])[kse  ];
+      (Dbuff.f[DIR_P0P])[k] = (D.f[DIR_M0M])[kbw  ];
+      (Dbuff.f[DIR_M0M])[k] = (D.f[DIR_P0P])[kte  ];
+      (Dbuff.f[DIR_P0M])[k] = (D.f[DIR_M0P])[ktw  ];
+      (Dbuff.f[DIR_M0P])[k] = (D.f[DIR_P0M])[kbe  ];
+      (Dbuff.f[DIR_0PP])[k] = (D.f[DIR_0MM])[kbs  ];
+      (Dbuff.f[DIR_0MM])[k] = (D.f[DIR_0PP])[ktn  ];
+      (Dbuff.f[DIR_0PM])[k] = (D.f[DIR_0MP])[kts  ];
+      (Dbuff.f[DIR_0MP])[k] = (D.f[DIR_0PM])[kbn  ];
       (Dbuff.f[DIR_000])[k] = (D.f[DIR_000])[kzero];
-      (Dbuff.f[DIR_PPP ])[k] = (D.f[DIR_MMM ])[kbsw ];
-      (Dbuff.f[DIR_MMP ])[k] = (D.f[DIR_PPM ])[kbne ];
-      (Dbuff.f[DIR_PMP ])[k] = (D.f[DIR_MPM ])[kbnw ];
-      (Dbuff.f[DIR_MPP ])[k] = (D.f[DIR_PMM ])[kbse ];
-      (Dbuff.f[DIR_PPM ])[k] = (D.f[DIR_MMP ])[ktsw ];
-      (Dbuff.f[DIR_MMM ])[k] = (D.f[DIR_PPP ])[ktne ];
-      (Dbuff.f[DIR_PMM ])[k] = (D.f[DIR_MPP ])[ktnw ];
-      (Dbuff.f[DIR_MPM ])[k] = (D.f[DIR_PMP ])[ktse ];
+      (Dbuff.f[DIR_PPP])[k] = (D.f[DIR_MMM])[kbsw ];
+      (Dbuff.f[DIR_MMP])[k] = (D.f[DIR_PPM])[kbne ];
+      (Dbuff.f[DIR_PMP])[k] = (D.f[DIR_MPM])[kbnw ];
+      (Dbuff.f[DIR_MPP])[k] = (D.f[DIR_PMM])[kbse ];
+      (Dbuff.f[DIR_PPM])[k] = (D.f[DIR_MMP])[ktsw ];
+      (Dbuff.f[DIR_MMM])[k] = (D.f[DIR_PPP])[ktne ];
+      (Dbuff.f[DIR_PMM])[k] = (D.f[DIR_MPP])[ktnw ];
+      (Dbuff.f[DIR_MPM])[k] = (D.f[DIR_PMP])[ktse ];
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -250,7 +250,7 @@ __global__ void setRecvFsPost27(real* DD,
                                            unsigned int* neighborX,
                                            unsigned int* neighborY,
                                            unsigned int* neighborZ,
-                                           unsigned int size_Mat, 
+                                           unsigned long long numberOfLBnodes, 
                                            bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -301,150 +301,150 @@ __global__ void setRecvFsPost27(real* DD,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //set Pointer for Buffer Fs
       Distributions27 Dbuff;
-      Dbuff.f[DIR_P00   ] = &bufferFs[DIR_P00   *buffmax];
-      Dbuff.f[DIR_M00   ] = &bufferFs[DIR_M00   *buffmax];
-      Dbuff.f[DIR_0P0   ] = &bufferFs[DIR_0P0   *buffmax];
-      Dbuff.f[DIR_0M0   ] = &bufferFs[DIR_0M0   *buffmax];
-      Dbuff.f[DIR_00P   ] = &bufferFs[DIR_00P   *buffmax];
-      Dbuff.f[DIR_00M   ] = &bufferFs[DIR_00M   *buffmax];
-      Dbuff.f[DIR_PP0  ] = &bufferFs[DIR_PP0  *buffmax];
-      Dbuff.f[DIR_MM0  ] = &bufferFs[DIR_MM0  *buffmax];
-      Dbuff.f[DIR_PM0  ] = &bufferFs[DIR_PM0  *buffmax];
-      Dbuff.f[DIR_MP0  ] = &bufferFs[DIR_MP0  *buffmax];
-      Dbuff.f[DIR_P0P  ] = &bufferFs[DIR_P0P  *buffmax];
-      Dbuff.f[DIR_M0M  ] = &bufferFs[DIR_M0M  *buffmax];
-      Dbuff.f[DIR_P0M  ] = &bufferFs[DIR_P0M  *buffmax];
-      Dbuff.f[DIR_M0P  ] = &bufferFs[DIR_M0P  *buffmax];
-      Dbuff.f[DIR_0PP  ] = &bufferFs[DIR_0PP  *buffmax];
-      Dbuff.f[DIR_0MM  ] = &bufferFs[DIR_0MM  *buffmax];
-      Dbuff.f[DIR_0PM  ] = &bufferFs[DIR_0PM  *buffmax];
-      Dbuff.f[DIR_0MP  ] = &bufferFs[DIR_0MP  *buffmax];
-      Dbuff.f[DIR_000] = &bufferFs[DIR_000*buffmax];
-      Dbuff.f[DIR_PPP ] = &bufferFs[DIR_PPP *buffmax];
-      Dbuff.f[DIR_MMP ] = &bufferFs[DIR_MMP *buffmax];
-      Dbuff.f[DIR_PMP ] = &bufferFs[DIR_PMP *buffmax];
-      Dbuff.f[DIR_MPP ] = &bufferFs[DIR_MPP *buffmax];
-      Dbuff.f[DIR_PPM ] = &bufferFs[DIR_PPM *buffmax];
-      Dbuff.f[DIR_MMM ] = &bufferFs[DIR_MMM *buffmax];
-      Dbuff.f[DIR_PMM ] = &bufferFs[DIR_PMM *buffmax];
-      Dbuff.f[DIR_MPM ] = &bufferFs[DIR_MPM *buffmax];
+      Dbuff.f[DIR_P00] = &bufferFs[DIR_P00 * buffmax];
+      Dbuff.f[DIR_M00] = &bufferFs[DIR_M00 * buffmax];
+      Dbuff.f[DIR_0P0] = &bufferFs[DIR_0P0 * buffmax];
+      Dbuff.f[DIR_0M0] = &bufferFs[DIR_0M0 * buffmax];
+      Dbuff.f[DIR_00P] = &bufferFs[DIR_00P * buffmax];
+      Dbuff.f[DIR_00M] = &bufferFs[DIR_00M * buffmax];
+      Dbuff.f[DIR_PP0] = &bufferFs[DIR_PP0 * buffmax];
+      Dbuff.f[DIR_MM0] = &bufferFs[DIR_MM0 * buffmax];
+      Dbuff.f[DIR_PM0] = &bufferFs[DIR_PM0 * buffmax];
+      Dbuff.f[DIR_MP0] = &bufferFs[DIR_MP0 * buffmax];
+      Dbuff.f[DIR_P0P] = &bufferFs[DIR_P0P * buffmax];
+      Dbuff.f[DIR_M0M] = &bufferFs[DIR_M0M * buffmax];
+      Dbuff.f[DIR_P0M] = &bufferFs[DIR_P0M * buffmax];
+      Dbuff.f[DIR_M0P] = &bufferFs[DIR_M0P * buffmax];
+      Dbuff.f[DIR_0PP] = &bufferFs[DIR_0PP * buffmax];
+      Dbuff.f[DIR_0MM] = &bufferFs[DIR_0MM * buffmax];
+      Dbuff.f[DIR_0PM] = &bufferFs[DIR_0PM * buffmax];
+      Dbuff.f[DIR_0MP] = &bufferFs[DIR_0MP * buffmax];
+      Dbuff.f[DIR_000] = &bufferFs[DIR_000 * buffmax];
+      Dbuff.f[DIR_PPP] = &bufferFs[DIR_PPP * buffmax];
+      Dbuff.f[DIR_MMP] = &bufferFs[DIR_MMP * buffmax];
+      Dbuff.f[DIR_PMP] = &bufferFs[DIR_PMP * buffmax];
+      Dbuff.f[DIR_MPP] = &bufferFs[DIR_MPP * buffmax];
+      Dbuff.f[DIR_PPM] = &bufferFs[DIR_PPM * buffmax];
+      Dbuff.f[DIR_MMM] = &bufferFs[DIR_MMM * buffmax];
+      Dbuff.f[DIR_PMM] = &bufferFs[DIR_PMM * buffmax];
+      Dbuff.f[DIR_MPM] = &bufferFs[DIR_MPM * buffmax];
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //copy from buffer
-      //(D.f[DIR_P00   ])[ke   ] = (Dbuff.f[DIR_P00   ])[k];
-      //(D.f[DIR_M00   ])[kw   ] = (Dbuff.f[DIR_M00   ])[k];
-      //(D.f[DIR_0P0   ])[kn   ] = (Dbuff.f[DIR_0P0   ])[k];
-      //(D.f[DIR_0M0   ])[ks   ] = (Dbuff.f[DIR_0M0   ])[k];
-      //(D.f[DIR_00P   ])[kt   ] = (Dbuff.f[DIR_00P   ])[k];
-      //(D.f[DIR_00M   ])[kb   ] = (Dbuff.f[DIR_00M   ])[k];
-      //(D.f[DIR_PP0  ])[kne  ] = (Dbuff.f[DIR_PP0  ])[k];
-      //(D.f[DIR_MM0  ])[ksw  ] = (Dbuff.f[DIR_MM0  ])[k];
-      //(D.f[DIR_PM0  ])[kse  ] = (Dbuff.f[DIR_PM0  ])[k];
-      //(D.f[DIR_MP0  ])[knw  ] = (Dbuff.f[DIR_MP0  ])[k];
-      //(D.f[DIR_P0P  ])[kte  ] = (Dbuff.f[DIR_P0P  ])[k];
-      //(D.f[DIR_M0M  ])[kbw  ] = (Dbuff.f[DIR_M0M  ])[k];
-      //(D.f[DIR_P0M  ])[kbe  ] = (Dbuff.f[DIR_P0M  ])[k];
-      //(D.f[DIR_M0P  ])[ktw  ] = (Dbuff.f[DIR_M0P  ])[k];
-      //(D.f[DIR_0PP  ])[ktn  ] = (Dbuff.f[DIR_0PP  ])[k];
-      //(D.f[DIR_0MM  ])[kbs  ] = (Dbuff.f[DIR_0MM  ])[k];
-      //(D.f[DIR_0PM  ])[kbn  ] = (Dbuff.f[DIR_0PM  ])[k];
-      //(D.f[DIR_0MP  ])[kts  ] = (Dbuff.f[DIR_0MP  ])[k];
+      //(D.f[DIR_P00])[ke   ] = (Dbuff.f[DIR_P00])[k];
+      //(D.f[DIR_M00])[kw   ] = (Dbuff.f[DIR_M00])[k];
+      //(D.f[DIR_0P0])[kn   ] = (Dbuff.f[DIR_0P0])[k];
+      //(D.f[DIR_0M0])[ks   ] = (Dbuff.f[DIR_0M0])[k];
+      //(D.f[DIR_00P])[kt   ] = (Dbuff.f[DIR_00P])[k];
+      //(D.f[DIR_00M])[kb   ] = (Dbuff.f[DIR_00M])[k];
+      //(D.f[DIR_PP0])[kne  ] = (Dbuff.f[DIR_PP0])[k];
+      //(D.f[DIR_MM0])[ksw  ] = (Dbuff.f[DIR_MM0])[k];
+      //(D.f[DIR_PM0])[kse  ] = (Dbuff.f[DIR_PM0])[k];
+      //(D.f[DIR_MP0])[knw  ] = (Dbuff.f[DIR_MP0])[k];
+      //(D.f[DIR_P0P])[kte  ] = (Dbuff.f[DIR_P0P])[k];
+      //(D.f[DIR_M0M])[kbw  ] = (Dbuff.f[DIR_M0M])[k];
+      //(D.f[DIR_P0M])[kbe  ] = (Dbuff.f[DIR_P0M])[k];
+      //(D.f[DIR_M0P])[ktw  ] = (Dbuff.f[DIR_M0P])[k];
+      //(D.f[DIR_0PP])[ktn  ] = (Dbuff.f[DIR_0PP])[k];
+      //(D.f[DIR_0MM])[kbs  ] = (Dbuff.f[DIR_0MM])[k];
+      //(D.f[DIR_0PM])[kbn  ] = (Dbuff.f[DIR_0PM])[k];
+      //(D.f[DIR_0MP])[kts  ] = (Dbuff.f[DIR_0MP])[k];
       //(D.f[DIR_000])[kzero] = (Dbuff.f[DIR_000])[k];
-      //(D.f[DIR_PPP ])[ktne ] = (Dbuff.f[DIR_PPP ])[k];
-      //(D.f[DIR_MMP ])[ktsw ] = (Dbuff.f[DIR_MMP ])[k];
-      //(D.f[DIR_PMP ])[ktse ] = (Dbuff.f[DIR_PMP ])[k];
-      //(D.f[DIR_MPP ])[ktnw ] = (Dbuff.f[DIR_MPP ])[k];
-      //(D.f[DIR_PPM ])[kbne ] = (Dbuff.f[DIR_PPM ])[k];
-      //(D.f[DIR_MMM ])[kbsw ] = (Dbuff.f[DIR_MMM ])[k];
-      //(D.f[DIR_PMM ])[kbse ] = (Dbuff.f[DIR_PMM ])[k];
-      //(D.f[DIR_MPM ])[kbnw ] = (Dbuff.f[DIR_MPM ])[k];
-      (D.f[DIR_M00   ])[kw   ] = (Dbuff.f[DIR_P00   ])[k];
-      (D.f[DIR_P00   ])[ke   ] = (Dbuff.f[DIR_M00   ])[k];
-      (D.f[DIR_0M0   ])[ks   ] = (Dbuff.f[DIR_0P0   ])[k];
-      (D.f[DIR_0P0   ])[kn   ] = (Dbuff.f[DIR_0M0   ])[k];
-      (D.f[DIR_00M   ])[kb   ] = (Dbuff.f[DIR_00P   ])[k];
-      (D.f[DIR_00P   ])[kt   ] = (Dbuff.f[DIR_00M   ])[k];
-      (D.f[DIR_MM0  ])[ksw  ] = (Dbuff.f[DIR_PP0  ])[k];
-      (D.f[DIR_PP0  ])[kne  ] = (Dbuff.f[DIR_MM0  ])[k];
-      (D.f[DIR_MP0  ])[knw  ] = (Dbuff.f[DIR_PM0  ])[k];
-      (D.f[DIR_PM0  ])[kse  ] = (Dbuff.f[DIR_MP0  ])[k];
-      (D.f[DIR_M0M  ])[kbw  ] = (Dbuff.f[DIR_P0P  ])[k];
-      (D.f[DIR_P0P  ])[kte  ] = (Dbuff.f[DIR_M0M  ])[k];
-      (D.f[DIR_M0P  ])[ktw  ] = (Dbuff.f[DIR_P0M  ])[k];
-      (D.f[DIR_P0M  ])[kbe  ] = (Dbuff.f[DIR_M0P  ])[k];
-      (D.f[DIR_0MM  ])[kbs  ] = (Dbuff.f[DIR_0PP  ])[k];
-      (D.f[DIR_0PP  ])[ktn  ] = (Dbuff.f[DIR_0MM  ])[k];
-      (D.f[DIR_0MP  ])[kts  ] = (Dbuff.f[DIR_0PM  ])[k];
-      (D.f[DIR_0PM  ])[kbn  ] = (Dbuff.f[DIR_0MP  ])[k];
+      //(D.f[DIR_PPP])[ktne ] = (Dbuff.f[DIR_PPP])[k];
+      //(D.f[DIR_MMP])[ktsw ] = (Dbuff.f[DIR_MMP])[k];
+      //(D.f[DIR_PMP])[ktse ] = (Dbuff.f[DIR_PMP])[k];
+      //(D.f[DIR_MPP])[ktnw ] = (Dbuff.f[DIR_MPP])[k];
+      //(D.f[DIR_PPM])[kbne ] = (Dbuff.f[DIR_PPM])[k];
+      //(D.f[DIR_MMM])[kbsw ] = (Dbuff.f[DIR_MMM])[k];
+      //(D.f[DIR_PMM])[kbse ] = (Dbuff.f[DIR_PMM])[k];
+      //(D.f[DIR_MPM])[kbnw ] = (Dbuff.f[DIR_MPM])[k];
+      (D.f[DIR_M00])[kw   ] = (Dbuff.f[DIR_P00])[k];
+      (D.f[DIR_P00])[ke   ] = (Dbuff.f[DIR_M00])[k];
+      (D.f[DIR_0M0])[ks   ] = (Dbuff.f[DIR_0P0])[k];
+      (D.f[DIR_0P0])[kn   ] = (Dbuff.f[DIR_0M0])[k];
+      (D.f[DIR_00M])[kb   ] = (Dbuff.f[DIR_00P])[k];
+      (D.f[DIR_00P])[kt   ] = (Dbuff.f[DIR_00M])[k];
+      (D.f[DIR_MM0])[ksw  ] = (Dbuff.f[DIR_PP0])[k];
+      (D.f[DIR_PP0])[kne  ] = (Dbuff.f[DIR_MM0])[k];
+      (D.f[DIR_MP0])[knw  ] = (Dbuff.f[DIR_PM0])[k];
+      (D.f[DIR_PM0])[kse  ] = (Dbuff.f[DIR_MP0])[k];
+      (D.f[DIR_M0M])[kbw  ] = (Dbuff.f[DIR_P0P])[k];
+      (D.f[DIR_P0P])[kte  ] = (Dbuff.f[DIR_M0M])[k];
+      (D.f[DIR_M0P])[ktw  ] = (Dbuff.f[DIR_P0M])[k];
+      (D.f[DIR_P0M])[kbe  ] = (Dbuff.f[DIR_M0P])[k];
+      (D.f[DIR_0MM])[kbs  ] = (Dbuff.f[DIR_0PP])[k];
+      (D.f[DIR_0PP])[ktn  ] = (Dbuff.f[DIR_0MM])[k];
+      (D.f[DIR_0MP])[kts  ] = (Dbuff.f[DIR_0PM])[k];
+      (D.f[DIR_0PM])[kbn  ] = (Dbuff.f[DIR_0MP])[k];
       (D.f[DIR_000])[kzero] = (Dbuff.f[DIR_000])[k];
-      (D.f[DIR_MMM ])[kbsw ] = (Dbuff.f[DIR_PPP ])[k];
-      (D.f[DIR_PPM ])[kbne ] = (Dbuff.f[DIR_MMP ])[k];
-      (D.f[DIR_MPM ])[kbnw ] = (Dbuff.f[DIR_PMP ])[k];
-      (D.f[DIR_PMM ])[kbse ] = (Dbuff.f[DIR_MPP ])[k];
-      (D.f[DIR_MMP ])[ktsw ] = (Dbuff.f[DIR_PPM ])[k];
-      (D.f[DIR_PPP ])[ktne ] = (Dbuff.f[DIR_MMM ])[k];
-      (D.f[DIR_MPP ])[ktnw ] = (Dbuff.f[DIR_PMM ])[k];
-      (D.f[DIR_PMP ])[ktse ] = (Dbuff.f[DIR_MPM ])[k];
+      (D.f[DIR_MMM])[kbsw ] = (Dbuff.f[DIR_PPP])[k];
+      (D.f[DIR_PPM])[kbne ] = (Dbuff.f[DIR_MMP])[k];
+      (D.f[DIR_MPM])[kbnw ] = (Dbuff.f[DIR_PMP])[k];
+      (D.f[DIR_PMM])[kbse ] = (Dbuff.f[DIR_MPP])[k];
+      (D.f[DIR_MMP])[ktsw ] = (Dbuff.f[DIR_PPM])[k];
+      (D.f[DIR_PPP])[ktne ] = (Dbuff.f[DIR_MMM])[k];
+      (D.f[DIR_MPP])[ktnw ] = (Dbuff.f[DIR_PMM])[k];
+      (D.f[DIR_PMP])[ktse ] = (Dbuff.f[DIR_MPM])[k];
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -485,7 +485,7 @@ __global__ void getSendFsPre27(real* DD,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat, 
+                                          unsigned long long numberOfLBnodes, 
                                           bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -536,123 +536,123 @@ __global__ void getSendFsPre27(real* DD,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //set Pointer for Buffer Fs
       Distributions27 Dbuff;
-      Dbuff.f[DIR_P00   ] = &bufferFs[DIR_P00   *buffmax];
-      Dbuff.f[DIR_M00   ] = &bufferFs[DIR_M00   *buffmax];
-      Dbuff.f[DIR_0P0   ] = &bufferFs[DIR_0P0   *buffmax];
-      Dbuff.f[DIR_0M0   ] = &bufferFs[DIR_0M0   *buffmax];
-      Dbuff.f[DIR_00P   ] = &bufferFs[DIR_00P   *buffmax];
-      Dbuff.f[DIR_00M   ] = &bufferFs[DIR_00M   *buffmax];
-      Dbuff.f[DIR_PP0  ] = &bufferFs[DIR_PP0  *buffmax];
-      Dbuff.f[DIR_MM0  ] = &bufferFs[DIR_MM0  *buffmax];
-      Dbuff.f[DIR_PM0  ] = &bufferFs[DIR_PM0  *buffmax];
-      Dbuff.f[DIR_MP0  ] = &bufferFs[DIR_MP0  *buffmax];
-      Dbuff.f[DIR_P0P  ] = &bufferFs[DIR_P0P  *buffmax];
-      Dbuff.f[DIR_M0M  ] = &bufferFs[DIR_M0M  *buffmax];
-      Dbuff.f[DIR_P0M  ] = &bufferFs[DIR_P0M  *buffmax];
-      Dbuff.f[DIR_M0P  ] = &bufferFs[DIR_M0P  *buffmax];
-      Dbuff.f[DIR_0PP  ] = &bufferFs[DIR_0PP  *buffmax];
-      Dbuff.f[DIR_0MM  ] = &bufferFs[DIR_0MM  *buffmax];
-      Dbuff.f[DIR_0PM  ] = &bufferFs[DIR_0PM  *buffmax];
-      Dbuff.f[DIR_0MP  ] = &bufferFs[DIR_0MP  *buffmax];
-      Dbuff.f[DIR_000] = &bufferFs[DIR_000*buffmax];
-      Dbuff.f[DIR_PPP ] = &bufferFs[DIR_PPP *buffmax];
-      Dbuff.f[DIR_MMP ] = &bufferFs[DIR_MMP *buffmax];
-      Dbuff.f[DIR_PMP ] = &bufferFs[DIR_PMP *buffmax];
-      Dbuff.f[DIR_MPP ] = &bufferFs[DIR_MPP *buffmax];
-      Dbuff.f[DIR_PPM ] = &bufferFs[DIR_PPM *buffmax];
-      Dbuff.f[DIR_MMM ] = &bufferFs[DIR_MMM *buffmax];
-      Dbuff.f[DIR_PMM ] = &bufferFs[DIR_PMM *buffmax];
-      Dbuff.f[DIR_MPM ] = &bufferFs[DIR_MPM *buffmax];
+      Dbuff.f[DIR_P00] = &bufferFs[DIR_P00 * buffmax];
+      Dbuff.f[DIR_M00] = &bufferFs[DIR_M00 * buffmax];
+      Dbuff.f[DIR_0P0] = &bufferFs[DIR_0P0 * buffmax];
+      Dbuff.f[DIR_0M0] = &bufferFs[DIR_0M0 * buffmax];
+      Dbuff.f[DIR_00P] = &bufferFs[DIR_00P * buffmax];
+      Dbuff.f[DIR_00M] = &bufferFs[DIR_00M * buffmax];
+      Dbuff.f[DIR_PP0] = &bufferFs[DIR_PP0 * buffmax];
+      Dbuff.f[DIR_MM0] = &bufferFs[DIR_MM0 * buffmax];
+      Dbuff.f[DIR_PM0] = &bufferFs[DIR_PM0 * buffmax];
+      Dbuff.f[DIR_MP0] = &bufferFs[DIR_MP0 * buffmax];
+      Dbuff.f[DIR_P0P] = &bufferFs[DIR_P0P * buffmax];
+      Dbuff.f[DIR_M0M] = &bufferFs[DIR_M0M * buffmax];
+      Dbuff.f[DIR_P0M] = &bufferFs[DIR_P0M * buffmax];
+      Dbuff.f[DIR_M0P] = &bufferFs[DIR_M0P * buffmax];
+      Dbuff.f[DIR_0PP] = &bufferFs[DIR_0PP * buffmax];
+      Dbuff.f[DIR_0MM] = &bufferFs[DIR_0MM * buffmax];
+      Dbuff.f[DIR_0PM] = &bufferFs[DIR_0PM * buffmax];
+      Dbuff.f[DIR_0MP] = &bufferFs[DIR_0MP * buffmax];
+      Dbuff.f[DIR_000] = &bufferFs[DIR_000 * buffmax];
+      Dbuff.f[DIR_PPP] = &bufferFs[DIR_PPP * buffmax];
+      Dbuff.f[DIR_MMP] = &bufferFs[DIR_MMP * buffmax];
+      Dbuff.f[DIR_PMP] = &bufferFs[DIR_PMP * buffmax];
+      Dbuff.f[DIR_MPP] = &bufferFs[DIR_MPP * buffmax];
+      Dbuff.f[DIR_PPM] = &bufferFs[DIR_PPM * buffmax];
+      Dbuff.f[DIR_MMM] = &bufferFs[DIR_MMM * buffmax];
+      Dbuff.f[DIR_PMM] = &bufferFs[DIR_PMM * buffmax];
+      Dbuff.f[DIR_MPM] = &bufferFs[DIR_MPM * buffmax];
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //copy to buffer
-      (Dbuff.f[DIR_P00   ])[k] = (D.f[DIR_P00   ])[ke   ];
-      (Dbuff.f[DIR_M00   ])[k] = (D.f[DIR_M00   ])[kw   ];
-      (Dbuff.f[DIR_0P0   ])[k] = (D.f[DIR_0P0   ])[kn   ];
-      (Dbuff.f[DIR_0M0   ])[k] = (D.f[DIR_0M0   ])[ks   ];
-      (Dbuff.f[DIR_00P   ])[k] = (D.f[DIR_00P   ])[kt   ];
-      (Dbuff.f[DIR_00M   ])[k] = (D.f[DIR_00M   ])[kb   ];
-      (Dbuff.f[DIR_PP0  ])[k] = (D.f[DIR_PP0  ])[kne  ];
-      (Dbuff.f[DIR_MM0  ])[k] = (D.f[DIR_MM0  ])[ksw  ];
-      (Dbuff.f[DIR_PM0  ])[k] = (D.f[DIR_PM0  ])[kse  ];
-      (Dbuff.f[DIR_MP0  ])[k] = (D.f[DIR_MP0  ])[knw  ];
-      (Dbuff.f[DIR_P0P  ])[k] = (D.f[DIR_P0P  ])[kte  ];
-      (Dbuff.f[DIR_M0M  ])[k] = (D.f[DIR_M0M  ])[kbw  ];
-      (Dbuff.f[DIR_P0M  ])[k] = (D.f[DIR_P0M  ])[kbe  ];
-      (Dbuff.f[DIR_M0P  ])[k] = (D.f[DIR_M0P  ])[ktw  ];
-      (Dbuff.f[DIR_0PP  ])[k] = (D.f[DIR_0PP  ])[ktn  ];
-      (Dbuff.f[DIR_0MM  ])[k] = (D.f[DIR_0MM  ])[kbs  ];
-      (Dbuff.f[DIR_0PM  ])[k] = (D.f[DIR_0PM  ])[kbn  ];
-      (Dbuff.f[DIR_0MP  ])[k] = (D.f[DIR_0MP  ])[kts  ];
+      (Dbuff.f[DIR_P00])[k] = (D.f[DIR_P00])[ke   ];
+      (Dbuff.f[DIR_M00])[k] = (D.f[DIR_M00])[kw   ];
+      (Dbuff.f[DIR_0P0])[k] = (D.f[DIR_0P0])[kn   ];
+      (Dbuff.f[DIR_0M0])[k] = (D.f[DIR_0M0])[ks   ];
+      (Dbuff.f[DIR_00P])[k] = (D.f[DIR_00P])[kt   ];
+      (Dbuff.f[DIR_00M])[k] = (D.f[DIR_00M])[kb   ];
+      (Dbuff.f[DIR_PP0])[k] = (D.f[DIR_PP0])[kne  ];
+      (Dbuff.f[DIR_MM0])[k] = (D.f[DIR_MM0])[ksw  ];
+      (Dbuff.f[DIR_PM0])[k] = (D.f[DIR_PM0])[kse  ];
+      (Dbuff.f[DIR_MP0])[k] = (D.f[DIR_MP0])[knw  ];
+      (Dbuff.f[DIR_P0P])[k] = (D.f[DIR_P0P])[kte  ];
+      (Dbuff.f[DIR_M0M])[k] = (D.f[DIR_M0M])[kbw  ];
+      (Dbuff.f[DIR_P0M])[k] = (D.f[DIR_P0M])[kbe  ];
+      (Dbuff.f[DIR_M0P])[k] = (D.f[DIR_M0P])[ktw  ];
+      (Dbuff.f[DIR_0PP])[k] = (D.f[DIR_0PP])[ktn  ];
+      (Dbuff.f[DIR_0MM])[k] = (D.f[DIR_0MM])[kbs  ];
+      (Dbuff.f[DIR_0PM])[k] = (D.f[DIR_0PM])[kbn  ];
+      (Dbuff.f[DIR_0MP])[k] = (D.f[DIR_0MP])[kts  ];
       (Dbuff.f[DIR_000])[k] = (D.f[DIR_000])[kzero];
-      (Dbuff.f[DIR_PPP ])[k] = (D.f[DIR_PPP ])[ktne ];
-      (Dbuff.f[DIR_MMP ])[k] = (D.f[DIR_MMP ])[ktsw ];
-      (Dbuff.f[DIR_PMP ])[k] = (D.f[DIR_PMP ])[ktse ];
-      (Dbuff.f[DIR_MPP ])[k] = (D.f[DIR_MPP ])[ktnw ];
-      (Dbuff.f[DIR_PPM ])[k] = (D.f[DIR_PPM ])[kbne ];
-      (Dbuff.f[DIR_MMM ])[k] = (D.f[DIR_MMM ])[kbsw ];
-      (Dbuff.f[DIR_PMM ])[k] = (D.f[DIR_PMM ])[kbse ];
-      (Dbuff.f[DIR_MPM ])[k] = (D.f[DIR_MPM ])[kbnw ];
+      (Dbuff.f[DIR_PPP])[k] = (D.f[DIR_PPP])[ktne ];
+      (Dbuff.f[DIR_MMP])[k] = (D.f[DIR_MMP])[ktsw ];
+      (Dbuff.f[DIR_PMP])[k] = (D.f[DIR_PMP])[ktse ];
+      (Dbuff.f[DIR_MPP])[k] = (D.f[DIR_MPP])[ktnw ];
+      (Dbuff.f[DIR_PPM])[k] = (D.f[DIR_PPM])[kbne ];
+      (Dbuff.f[DIR_MMM])[k] = (D.f[DIR_MMM])[kbsw ];
+      (Dbuff.f[DIR_PMM])[k] = (D.f[DIR_PMM])[kbse ];
+      (Dbuff.f[DIR_MPM])[k] = (D.f[DIR_MPM])[kbnw ];
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -694,7 +694,7 @@ __global__ void setRecvFsPre27(real* DD,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat, 
+                                          unsigned long long numberOfLBnodes, 
                                           bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -745,123 +745,123 @@ __global__ void setRecvFsPre27(real* DD,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //set Pointer for Buffer Fs
       Distributions27 Dbuff;
-      Dbuff.f[DIR_P00   ] = &bufferFs[DIR_P00   *buffmax];
-      Dbuff.f[DIR_M00   ] = &bufferFs[DIR_M00   *buffmax];
-      Dbuff.f[DIR_0P0   ] = &bufferFs[DIR_0P0   *buffmax];
-      Dbuff.f[DIR_0M0   ] = &bufferFs[DIR_0M0   *buffmax];
-      Dbuff.f[DIR_00P   ] = &bufferFs[DIR_00P   *buffmax];
-      Dbuff.f[DIR_00M   ] = &bufferFs[DIR_00M   *buffmax];
-      Dbuff.f[DIR_PP0  ] = &bufferFs[DIR_PP0  *buffmax];
-      Dbuff.f[DIR_MM0  ] = &bufferFs[DIR_MM0  *buffmax];
-      Dbuff.f[DIR_PM0  ] = &bufferFs[DIR_PM0  *buffmax];
-      Dbuff.f[DIR_MP0  ] = &bufferFs[DIR_MP0  *buffmax];
-      Dbuff.f[DIR_P0P  ] = &bufferFs[DIR_P0P  *buffmax];
-      Dbuff.f[DIR_M0M  ] = &bufferFs[DIR_M0M  *buffmax];
-      Dbuff.f[DIR_P0M  ] = &bufferFs[DIR_P0M  *buffmax];
-      Dbuff.f[DIR_M0P  ] = &bufferFs[DIR_M0P  *buffmax];
-      Dbuff.f[DIR_0PP  ] = &bufferFs[DIR_0PP  *buffmax];
-      Dbuff.f[DIR_0MM  ] = &bufferFs[DIR_0MM  *buffmax];
-      Dbuff.f[DIR_0PM  ] = &bufferFs[DIR_0PM  *buffmax];
-      Dbuff.f[DIR_0MP  ] = &bufferFs[DIR_0MP  *buffmax];
-      Dbuff.f[DIR_000] = &bufferFs[DIR_000*buffmax];
-      Dbuff.f[DIR_PPP ] = &bufferFs[DIR_PPP *buffmax];
-      Dbuff.f[DIR_MMP ] = &bufferFs[DIR_MMP *buffmax];
-      Dbuff.f[DIR_PMP ] = &bufferFs[DIR_PMP *buffmax];
-      Dbuff.f[DIR_MPP ] = &bufferFs[DIR_MPP *buffmax];
-      Dbuff.f[DIR_PPM ] = &bufferFs[DIR_PPM *buffmax];
-      Dbuff.f[DIR_MMM ] = &bufferFs[DIR_MMM *buffmax];
-      Dbuff.f[DIR_PMM ] = &bufferFs[DIR_PMM *buffmax];
-      Dbuff.f[DIR_MPM ] = &bufferFs[DIR_MPM *buffmax];
+      Dbuff.f[DIR_P00] = &bufferFs[DIR_P00 * buffmax];
+      Dbuff.f[DIR_M00] = &bufferFs[DIR_M00 * buffmax];
+      Dbuff.f[DIR_0P0] = &bufferFs[DIR_0P0 * buffmax];
+      Dbuff.f[DIR_0M0] = &bufferFs[DIR_0M0 * buffmax];
+      Dbuff.f[DIR_00P] = &bufferFs[DIR_00P * buffmax];
+      Dbuff.f[DIR_00M] = &bufferFs[DIR_00M * buffmax];
+      Dbuff.f[DIR_PP0] = &bufferFs[DIR_PP0 * buffmax];
+      Dbuff.f[DIR_MM0] = &bufferFs[DIR_MM0 * buffmax];
+      Dbuff.f[DIR_PM0] = &bufferFs[DIR_PM0 * buffmax];
+      Dbuff.f[DIR_MP0] = &bufferFs[DIR_MP0 * buffmax];
+      Dbuff.f[DIR_P0P] = &bufferFs[DIR_P0P * buffmax];
+      Dbuff.f[DIR_M0M] = &bufferFs[DIR_M0M * buffmax];
+      Dbuff.f[DIR_P0M] = &bufferFs[DIR_P0M * buffmax];
+      Dbuff.f[DIR_M0P] = &bufferFs[DIR_M0P * buffmax];
+      Dbuff.f[DIR_0PP] = &bufferFs[DIR_0PP * buffmax];
+      Dbuff.f[DIR_0MM] = &bufferFs[DIR_0MM * buffmax];
+      Dbuff.f[DIR_0PM] = &bufferFs[DIR_0PM * buffmax];
+      Dbuff.f[DIR_0MP] = &bufferFs[DIR_0MP * buffmax];
+      Dbuff.f[DIR_000] = &bufferFs[DIR_000 * buffmax];
+      Dbuff.f[DIR_PPP] = &bufferFs[DIR_PPP * buffmax];
+      Dbuff.f[DIR_MMP] = &bufferFs[DIR_MMP * buffmax];
+      Dbuff.f[DIR_PMP] = &bufferFs[DIR_PMP * buffmax];
+      Dbuff.f[DIR_MPP] = &bufferFs[DIR_MPP * buffmax];
+      Dbuff.f[DIR_PPM] = &bufferFs[DIR_PPM * buffmax];
+      Dbuff.f[DIR_MMM] = &bufferFs[DIR_MMM * buffmax];
+      Dbuff.f[DIR_PMM] = &bufferFs[DIR_PMM * buffmax];
+      Dbuff.f[DIR_MPM] = &bufferFs[DIR_MPM * buffmax];
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //copy from buffer
-      (D.f[DIR_P00   ])[ke   ] = (Dbuff.f[DIR_P00   ])[k];
-      (D.f[DIR_M00   ])[kw   ] = (Dbuff.f[DIR_M00   ])[k];
-      (D.f[DIR_0P0   ])[kn   ] = (Dbuff.f[DIR_0P0   ])[k];
-      (D.f[DIR_0M0   ])[ks   ] = (Dbuff.f[DIR_0M0   ])[k];
-      (D.f[DIR_00P   ])[kt   ] = (Dbuff.f[DIR_00P   ])[k];
-      (D.f[DIR_00M   ])[kb   ] = (Dbuff.f[DIR_00M   ])[k];
-      (D.f[DIR_PP0  ])[kne  ] = (Dbuff.f[DIR_PP0  ])[k];
-      (D.f[DIR_MM0  ])[ksw  ] = (Dbuff.f[DIR_MM0  ])[k];
-      (D.f[DIR_PM0  ])[kse  ] = (Dbuff.f[DIR_PM0  ])[k];
-      (D.f[DIR_MP0  ])[knw  ] = (Dbuff.f[DIR_MP0  ])[k];
-      (D.f[DIR_P0P  ])[kte  ] = (Dbuff.f[DIR_P0P  ])[k];
-      (D.f[DIR_M0M  ])[kbw  ] = (Dbuff.f[DIR_M0M  ])[k];
-      (D.f[DIR_P0M  ])[kbe  ] = (Dbuff.f[DIR_P0M  ])[k];
-      (D.f[DIR_M0P  ])[ktw  ] = (Dbuff.f[DIR_M0P  ])[k];
-      (D.f[DIR_0PP  ])[ktn  ] = (Dbuff.f[DIR_0PP  ])[k];
-      (D.f[DIR_0MM  ])[kbs  ] = (Dbuff.f[DIR_0MM  ])[k];
-      (D.f[DIR_0PM  ])[kbn  ] = (Dbuff.f[DIR_0PM  ])[k];
-      (D.f[DIR_0MP  ])[kts  ] = (Dbuff.f[DIR_0MP  ])[k];
+      (D.f[DIR_P00])[ke   ] = (Dbuff.f[DIR_P00])[k];
+      (D.f[DIR_M00])[kw   ] = (Dbuff.f[DIR_M00])[k];
+      (D.f[DIR_0P0])[kn   ] = (Dbuff.f[DIR_0P0])[k];
+      (D.f[DIR_0M0])[ks   ] = (Dbuff.f[DIR_0M0])[k];
+      (D.f[DIR_00P])[kt   ] = (Dbuff.f[DIR_00P])[k];
+      (D.f[DIR_00M])[kb   ] = (Dbuff.f[DIR_00M])[k];
+      (D.f[DIR_PP0])[kne  ] = (Dbuff.f[DIR_PP0])[k];
+      (D.f[DIR_MM0])[ksw  ] = (Dbuff.f[DIR_MM0])[k];
+      (D.f[DIR_PM0])[kse  ] = (Dbuff.f[DIR_PM0])[k];
+      (D.f[DIR_MP0])[knw  ] = (Dbuff.f[DIR_MP0])[k];
+      (D.f[DIR_P0P])[kte  ] = (Dbuff.f[DIR_P0P])[k];
+      (D.f[DIR_M0M])[kbw  ] = (Dbuff.f[DIR_M0M])[k];
+      (D.f[DIR_P0M])[kbe  ] = (Dbuff.f[DIR_P0M])[k];
+      (D.f[DIR_M0P])[ktw  ] = (Dbuff.f[DIR_M0P])[k];
+      (D.f[DIR_0PP])[ktn  ] = (Dbuff.f[DIR_0PP])[k];
+      (D.f[DIR_0MM])[kbs  ] = (Dbuff.f[DIR_0MM])[k];
+      (D.f[DIR_0PM])[kbn  ] = (Dbuff.f[DIR_0PM])[k];
+      (D.f[DIR_0MP])[kts  ] = (Dbuff.f[DIR_0MP])[k];
       (D.f[DIR_000])[kzero] = (Dbuff.f[DIR_000])[k];
-      (D.f[DIR_PPP ])[ktne ] = (Dbuff.f[DIR_PPP ])[k];
-      (D.f[DIR_MMP ])[ktsw ] = (Dbuff.f[DIR_MMP ])[k];
-      (D.f[DIR_PMP ])[ktse ] = (Dbuff.f[DIR_PMP ])[k];
-      (D.f[DIR_MPP ])[ktnw ] = (Dbuff.f[DIR_MPP ])[k];
-      (D.f[DIR_PPM ])[kbne ] = (Dbuff.f[DIR_PPM ])[k];
-      (D.f[DIR_MMM ])[kbsw ] = (Dbuff.f[DIR_MMM ])[k];
-      (D.f[DIR_PMM ])[kbse ] = (Dbuff.f[DIR_PMM ])[k];
-      (D.f[DIR_MPM ])[kbnw ] = (Dbuff.f[DIR_MPM ])[k];
+      (D.f[DIR_PPP])[ktne ] = (Dbuff.f[DIR_PPP])[k];
+      (D.f[DIR_MMP])[ktsw ] = (Dbuff.f[DIR_MMP])[k];
+      (D.f[DIR_PMP])[ktse ] = (Dbuff.f[DIR_PMP])[k];
+      (D.f[DIR_MPP])[ktnw ] = (Dbuff.f[DIR_MPP])[k];
+      (D.f[DIR_PPM])[kbne ] = (Dbuff.f[DIR_PPM])[k];
+      (D.f[DIR_MMM])[kbsw ] = (Dbuff.f[DIR_MMM])[k];
+      (D.f[DIR_PMM])[kbse ] = (Dbuff.f[DIR_PMM])[k];
+      (D.f[DIR_MPM])[kbnw ] = (Dbuff.f[DIR_MPM])[k];
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -903,7 +903,7 @@ __global__ void getSendGsF3(
 	unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
-	unsigned int size_Mat,
+	unsigned long long numberOfLBnodes,
 	bool isEvenTimestep)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -931,31 +931,31 @@ __global__ void getSendGsF3(
 		Distributions6 G;
 		if (isEvenTimestep)
 		{
-			G.g[DIR_P00] = &G6[DIR_P00   *size_Mat];
-			G.g[DIR_M00] = &G6[DIR_M00   *size_Mat];
-			G.g[DIR_0P0] = &G6[DIR_0P0   *size_Mat];
-			G.g[DIR_0M0] = &G6[DIR_0M0   *size_Mat];
-			G.g[DIR_00P] = &G6[DIR_00P   *size_Mat];
-			G.g[DIR_00M] = &G6[DIR_00M   *size_Mat];
+			G.g[DIR_P00] = &G6[DIR_P00 * numberOfLBnodes];
+			G.g[DIR_M00] = &G6[DIR_M00 * numberOfLBnodes];
+			G.g[DIR_0P0] = &G6[DIR_0P0 * numberOfLBnodes];
+			G.g[DIR_0M0] = &G6[DIR_0M0 * numberOfLBnodes];
+			G.g[DIR_00P] = &G6[DIR_00P * numberOfLBnodes];
+			G.g[DIR_00M] = &G6[DIR_00M * numberOfLBnodes];
 		}
 		else
 		{
-			G.g[DIR_M00] = &G6[DIR_P00   *size_Mat];
-			G.g[DIR_P00] = &G6[DIR_M00   *size_Mat];
-			G.g[DIR_0M0] = &G6[DIR_0P0   *size_Mat];
-			G.g[DIR_0P0] = &G6[DIR_0M0   *size_Mat];
-			G.g[DIR_00M] = &G6[DIR_00P   *size_Mat];
-			G.g[DIR_00P] = &G6[DIR_00M   *size_Mat];
+			G.g[DIR_M00] = &G6[DIR_P00 * numberOfLBnodes];
+			G.g[DIR_P00] = &G6[DIR_M00 * numberOfLBnodes];
+			G.g[DIR_0M0] = &G6[DIR_0P0 * numberOfLBnodes];
+			G.g[DIR_0P0] = &G6[DIR_0M0 * numberOfLBnodes];
+			G.g[DIR_00M] = &G6[DIR_00P * numberOfLBnodes];
+			G.g[DIR_00P] = &G6[DIR_00M * numberOfLBnodes];
 		}
 		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		//set Pointer for Buffer Gs
 		Distributions6 Dbuff;
-		Dbuff.g[DIR_P00] = &bufferGs[DIR_P00   *buffmax];
-		Dbuff.g[DIR_M00] = &bufferGs[DIR_M00   *buffmax];
-		Dbuff.g[DIR_0P0] = &bufferGs[DIR_0P0   *buffmax];
-		Dbuff.g[DIR_0M0] = &bufferGs[DIR_0M0   *buffmax];
-		Dbuff.g[DIR_00P] = &bufferGs[DIR_00P   *buffmax];
-		Dbuff.g[DIR_00M] = &bufferGs[DIR_00M   *buffmax];
+		Dbuff.g[DIR_P00] = &bufferGs[DIR_P00 * buffmax];
+		Dbuff.g[DIR_M00] = &bufferGs[DIR_M00 * buffmax];
+		Dbuff.g[DIR_0P0] = &bufferGs[DIR_0P0 * buffmax];
+		Dbuff.g[DIR_0M0] = &bufferGs[DIR_0M0 * buffmax];
+		Dbuff.g[DIR_00P] = &bufferGs[DIR_00P * buffmax];
+		Dbuff.g[DIR_00M] = &bufferGs[DIR_00M * buffmax];
 		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		//write Gs to buffer
 		(Dbuff.g[DIR_P00])[k] = (G.g[DIR_M00])[kw];
@@ -1006,7 +1006,7 @@ __global__ void setRecvGsF3(
 	unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
-	unsigned int size_Mat,
+	unsigned long long numberOfLBnodes,
 	bool isEvenTimestep)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -1034,31 +1034,31 @@ __global__ void setRecvGsF3(
 		Distributions6 G;
 		if (isEvenTimestep)
 		{
-			G.g[DIR_P00] = &G6[DIR_P00   *size_Mat];
-			G.g[DIR_M00] = &G6[DIR_M00   *size_Mat];
-			G.g[DIR_0P0] = &G6[DIR_0P0   *size_Mat];
-			G.g[DIR_0M0] = &G6[DIR_0M0   *size_Mat];
-			G.g[DIR_00P] = &G6[DIR_00P   *size_Mat];
-			G.g[DIR_00M] = &G6[DIR_00M   *size_Mat];
+			G.g[DIR_P00] = &G6[DIR_P00 * numberOfLBnodes];
+			G.g[DIR_M00] = &G6[DIR_M00 * numberOfLBnodes];
+			G.g[DIR_0P0] = &G6[DIR_0P0 * numberOfLBnodes];
+			G.g[DIR_0M0] = &G6[DIR_0M0 * numberOfLBnodes];
+			G.g[DIR_00P] = &G6[DIR_00P * numberOfLBnodes];
+			G.g[DIR_00M] = &G6[DIR_00M * numberOfLBnodes];
 		}
 		else
 		{
-			G.g[DIR_M00] = &G6[DIR_P00   *size_Mat];
-			G.g[DIR_P00] = &G6[DIR_M00   *size_Mat];
-			G.g[DIR_0M0] = &G6[DIR_0P0   *size_Mat];
-			G.g[DIR_0P0] = &G6[DIR_0M0   *size_Mat];
-			G.g[DIR_00M] = &G6[DIR_00P   *size_Mat];
-			G.g[DIR_00P] = &G6[DIR_00M   *size_Mat];
+			G.g[DIR_M00] = &G6[DIR_P00 * numberOfLBnodes];
+			G.g[DIR_P00] = &G6[DIR_M00 * numberOfLBnodes];
+			G.g[DIR_0M0] = &G6[DIR_0P0 * numberOfLBnodes];
+			G.g[DIR_0P0] = &G6[DIR_0M0 * numberOfLBnodes];
+			G.g[DIR_00M] = &G6[DIR_00P * numberOfLBnodes];
+			G.g[DIR_00P] = &G6[DIR_00M * numberOfLBnodes];
 		}
 		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		//set Pointer for Buffer Gs
 		Distributions6 Dbuff;
-		Dbuff.g[DIR_P00] = &bufferGs[DIR_P00   *buffmax];
-		Dbuff.g[DIR_M00] = &bufferGs[DIR_M00   *buffmax];
-		Dbuff.g[DIR_0P0] = &bufferGs[DIR_0P0   *buffmax];
-		Dbuff.g[DIR_0M0] = &bufferGs[DIR_0M0   *buffmax];
-		Dbuff.g[DIR_00P] = &bufferGs[DIR_00P   *buffmax];
-		Dbuff.g[DIR_00M] = &bufferGs[DIR_00M   *buffmax];
+		Dbuff.g[DIR_P00] = &bufferGs[DIR_P00 * buffmax];
+		Dbuff.g[DIR_M00] = &bufferGs[DIR_M00 * buffmax];
+		Dbuff.g[DIR_0P0] = &bufferGs[DIR_0P0 * buffmax];
+		Dbuff.g[DIR_0M0] = &bufferGs[DIR_0M0 * buffmax];
+		Dbuff.g[DIR_00P] = &bufferGs[DIR_00P * buffmax];
+		Dbuff.g[DIR_00M] = &bufferGs[DIR_00M * buffmax];
 		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		//write buffer to Gs
 		(G.g[DIR_M00])[kw] = (Dbuff.g[DIR_P00])[k];
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
index ceb70fb123c52c282200137a00522ff2b9905f86..78e6bf8ecf0588eaf6e216916505ec0cbab6c215 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
@@ -29,7 +29,7 @@ void KernelCas27(unsigned int grid_nx,
                             unsigned int* neighborY,
                             unsigned int* neighborZ,
                             real* DD,
-                            int size_Mat,
+                            unsigned long long numberOfLBnodes,
                             bool EvenOrOdd);
 
 void KernelCasSP27(unsigned int numberOfThreads, 
@@ -39,7 +39,7 @@ void KernelCasSP27(unsigned int numberOfThreads,
                               unsigned int* neighborY,
                               unsigned int* neighborZ,
                               real* DD,
-                              int size_Mat,
+                              unsigned long long numberOfLBnodes,
                               bool EvenOrOdd);
 
 void KernelCasSPMS27(unsigned int numberOfThreads, 
@@ -49,7 +49,7 @@ void KernelCasSPMS27(unsigned int numberOfThreads,
                                 unsigned int* neighborY,
                                 unsigned int* neighborZ,
                                 real* DD,
-                                int size_Mat,
+                                unsigned long long numberOfLBnodes,
                                 bool EvenOrOdd);
 
 void KernelCasSPMSOHM27( unsigned int numberOfThreads, 
@@ -59,7 +59,7 @@ void KernelCasSPMSOHM27( unsigned int numberOfThreads,
                                    unsigned int* neighborY,
                                    unsigned int* neighborZ,
                                    real* DD,
-                                   int size_Mat,
+                                   unsigned long long numberOfLBnodes,
                                    bool EvenOrOdd);
 
 void KernelKumCompSRTSP27(
@@ -70,7 +70,7 @@ void KernelKumCompSRTSP27(
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	real* DDStart,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	int level,
 	real* forces,
 	bool EvenOrOdd);
@@ -82,7 +82,7 @@ void KernelCumulantD3Q27All4(unsigned int numberOfThreads,
 									    unsigned int* neighborY,
 									    unsigned int* neighborZ,
 									    real* DD,
-									    int size_Mat,
+									    unsigned long long numberOfLBnodes,
 									    int level,
 									    real* forces,
 									    bool EvenOrOdd);
@@ -94,7 +94,7 @@ void KernelKumAA2016CompBulkSP27(unsigned int numberOfThreads,
 											unsigned int* neighborY,
 											unsigned int* neighborZ,
 											real* DD,
-											int size_Mat,
+											unsigned long long numberOfLBnodes,
 											int size_Array,
 											int level,
 											real* forces,
@@ -112,7 +112,7 @@ void KernelKum1hSP27(    unsigned int numberOfThreads,
 									real* coordY,
 									real* coordZ,
 									real* DDStart,
-									int size_Mat,
+									unsigned long long numberOfLBnodes,
 									bool EvenOrOdd);
 
 void KernelCascadeSP27(unsigned int numberOfThreads, 
@@ -122,7 +122,7 @@ void KernelCascadeSP27(unsigned int numberOfThreads,
 								  unsigned int* neighborY,
 								  unsigned int* neighborZ,
 								  real* DD,
-								  int size_Mat,
+								  unsigned long long numberOfLBnodes,
 								  bool EvenOrOdd);
 
 void KernelKumNewSP27(   unsigned int numberOfThreads, 
@@ -132,7 +132,7 @@ void KernelKumNewSP27(   unsigned int numberOfThreads,
 									unsigned int* neighborY,
 									unsigned int* neighborZ,
 									real* DD,
-									int size_Mat,
+									unsigned long long numberOfLBnodes,
 									bool EvenOrOdd);
 
 
@@ -144,7 +144,7 @@ void CumulantOnePreconditionedErrorDiffusionChimCompSP27(
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	real* DD,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	int size_Array,
 	int level,
 	real* forces,
@@ -158,7 +158,7 @@ void CumulantOnePreconditionedChimCompSP27(
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	real* DD,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	int size_Array,
 	int level,
 	real* forces,
@@ -172,7 +172,7 @@ void CumulantOneChimCompSP27(
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	real* DD,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	int size_Array,
 	int level,
 	real* forces,
@@ -189,7 +189,7 @@ void KernelKumIsoTestSP27(unsigned int numberOfThreads,
 									 real* dxxUx,
 									 real* dyyUy,
 									 real* dzzUz,
-									 int size_Mat,
+									 unsigned long long numberOfLBnodes,
 									 bool EvenOrOdd);
 
 void KernelKumCompSP27(  unsigned int numberOfThreads, 
@@ -199,7 +199,7 @@ void KernelKumCompSP27(  unsigned int numberOfThreads,
 									unsigned int* neighborY,
 									unsigned int* neighborZ,
 									real* DD,
-									int size_Mat,
+									unsigned long long numberOfLBnodes,
 									bool EvenOrOdd);
 
 void KernelWaleBySoniMalavCumAA2016CompSP27(
@@ -215,7 +215,7 @@ void KernelWaleBySoniMalavCumAA2016CompSP27(
 	real* veloZ,
 	real* DD,
 	real* turbulentViscosity,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	int size_Array,
 	int level,
 	real* forces,
@@ -227,7 +227,7 @@ void KernelPMCumOneCompSP27(unsigned int numberOfThreads,
 									   unsigned int* neighborY,
 									   unsigned int* neighborZ,
 									   real* DD,
-									   int size_Mat,
+									   unsigned long long numberOfLBnodes,
 									   int level,
 									   real* forces,
 									   real porosity,
@@ -245,7 +245,7 @@ void KernelADincomp7(   unsigned int numberOfThreads,
 								   unsigned int* neighborZ,
 								   real* DD,
 								   real* DD7,
-								   int size_Mat,
+								   unsigned long long numberOfLBnodes,
 								   bool EvenOrOdd);
 
 void KernelADincomp27(   unsigned int numberOfThreads, 
@@ -256,7 +256,7 @@ void KernelADincomp27(   unsigned int numberOfThreads,
 									unsigned int* neighborZ,
 									real* DD,
 									real* DD7,
-									int size_Mat,
+									unsigned long long numberOfLBnodes,
 									bool EvenOrOdd);
 
 void Init27(int myid,
@@ -267,7 +267,7 @@ void Init27(int myid,
                        unsigned int* neighborY,
                        unsigned int* neighborZ,
                        real* vParab,
-                       unsigned int size_Mat,
+                       unsigned long long numberOfLBnodes,
                        unsigned int grid_nx, 
                        unsigned int grid_ny, 
                        unsigned int grid_nz, 
@@ -285,7 +285,7 @@ void InitNonEqPartSP27(unsigned int numberOfThreads,
                                   real* ux,
                                   real* uy,
                                   real* uz,
-                                  unsigned int size_Mat,
+                                  unsigned long long numberOfLBnodes,
                                   real* DD,
                                   real omega,
                                   bool EvenOrOdd);
@@ -300,7 +300,7 @@ void InitThS7(  unsigned int numberOfThreads,
                            real* ux,
                            real* uy,
                            real* uz,
-                           unsigned int size_Mat,
+                           unsigned long long numberOfLBnodes,
                            real* DD7,
                            bool EvenOrOdd);
 
@@ -313,7 +313,7 @@ void InitADDev27( unsigned int numberOfThreads,
                            real* ux,
                            real* uy,
                            real* uz,
-                           unsigned int size_Mat,
+                           unsigned long long numberOfLBnodes,
                            real* DD27,
                            bool EvenOrOdd);
 
@@ -330,7 +330,7 @@ void PostProcessorF3_2018Fehlberg(
 	real* vzOut,
 	real* DDStart,
 	real* G6,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	int level,
 	real* forces,
 	bool EvenOrOdd);
@@ -343,7 +343,7 @@ void CalcMac27( real* vxD,
                           unsigned int* neighborX,
                           unsigned int* neighborY,
                           unsigned int* neighborZ,
-                          unsigned int size_Mat,
+                          unsigned long long numberOfLBnodes,
                           unsigned int grid_nx, 
                           unsigned int grid_ny, 
                           unsigned int grid_nz, 
@@ -359,7 +359,7 @@ void CalcMacSP27(real* vxD,
                             unsigned int* neighborX,
                             unsigned int* neighborY,
                             unsigned int* neighborZ,
-                            unsigned int size_Mat,
+                            unsigned long long numberOfLBnodes,
                             unsigned int numberOfThreads, 
                             real* DD,
                             bool isEvenTimestep);
@@ -373,7 +373,7 @@ void CalcMacCompSP27(real* vxD,
 								unsigned int* neighborX,
 								unsigned int* neighborY,
 								unsigned int* neighborZ,
-								unsigned int size_Mat,
+								unsigned long long numberOfLBnodes,
 								unsigned int numberOfThreads, 
 								real* DD,
 								bool isEvenTimestep);
@@ -383,7 +383,7 @@ void CalcMacThS7(  real* Conc,
                               unsigned int* neighborX,
                               unsigned int* neighborY,
                               unsigned int* neighborZ,
-                              unsigned int size_Mat,
+                              unsigned long long numberOfLBnodes,
                               unsigned int numberOfThreads, 
                               real* DD7,
                               bool isEvenTimestep);
@@ -395,7 +395,7 @@ void PlaneConcThS7(real* Conc,
 							  unsigned int* neighborX,
 							  unsigned int* neighborY,
 							  unsigned int* neighborZ,
-							  unsigned int size_Mat,
+							  unsigned long long numberOfLBnodes,
 							  unsigned int numberOfThreads, 
 							  real* DD7,
 							  bool isEvenTimestep);
@@ -407,7 +407,7 @@ void PlaneConcThS27(real* Conc,
 							   unsigned int* neighborX,
 							   unsigned int* neighborY,
 							   unsigned int* neighborZ,
-							   unsigned int size_Mat,
+							   unsigned long long numberOfLBnodes,
 							   unsigned int numberOfThreads, 
 							   real* DD27,
 							   bool isEvenTimestep);
@@ -418,7 +418,7 @@ void CalcConcentration27( unsigned int numberOfThreads,
                                      unsigned int* neighborX,
                                      unsigned int* neighborY,
                                      unsigned int* neighborZ,
-                                     unsigned int size_Mat,
+                                     unsigned long long numberOfLBnodes,
                                      real* DD27,
                                      bool isEvenTimestep);
 
@@ -431,7 +431,7 @@ void CalcMedSP27(  real* vxD,
                               unsigned int* neighborX,
                               unsigned int* neighborY,
                               unsigned int* neighborZ,
-                              unsigned int size_Mat,
+                              unsigned long long numberOfLBnodes,
                               unsigned int numberOfThreads, 
                               real* DD,
                               bool isEvenTimestep);
@@ -445,7 +445,7 @@ void CalcMedCompSP27(real* vxD,
 								unsigned int* neighborX,
 								unsigned int* neighborY,
 								unsigned int* neighborZ,
-								unsigned int size_Mat,
+								unsigned long long numberOfLBnodes,
 								unsigned int numberOfThreads, 
 								real* DD,
 								bool isEvenTimestep);
@@ -461,7 +461,7 @@ void CalcMedCompAD27(
 	unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
-	unsigned int size_Mat,
+	unsigned long long numberOfLBnodes,
 	unsigned int numberOfThreads,
 	real* DD,
 	real* DD_AD,
@@ -477,7 +477,7 @@ void CalcMacMedSP27(  real* vxD,
                                  unsigned int* neighborY,
                                  unsigned int* neighborZ,
                                  unsigned int tdiff,
-                                 unsigned int size_Mat,
+                                 unsigned long long numberOfLBnodes,
                                  unsigned int numberOfThreads, 
                                  bool isEvenTimestep);
 
@@ -487,7 +487,7 @@ void ResetMedianValuesSP27(
 	real* vzD,
 	real* rhoD,
 	real* pressD,
-	unsigned int size_Mat,
+	unsigned long long numberOfLBnodes,
 	unsigned int numberOfThreads,
 	bool isEvenTimestep);
 
@@ -498,7 +498,7 @@ void ResetMedianValuesAD27(
 	real* rhoD,
 	real* pressD,
 	real* concD,
-	unsigned int size_Mat,
+	unsigned long long numberOfLBnodes,
 	unsigned int numberOfThreads,
 	bool isEvenTimestep);
 
@@ -511,7 +511,7 @@ void Calc2ndMomentsIncompSP27(real* kxyFromfcNEQ,
 										 unsigned int* neighborX,
 										 unsigned int* neighborY,
 										 unsigned int* neighborZ,
-										 unsigned int size_Mat,
+										 unsigned long long numberOfLBnodes,
 										 unsigned int numberOfThreads, 
 										 real* DD,
 										 bool isEvenTimestep);
@@ -525,7 +525,7 @@ void Calc2ndMomentsCompSP27(real* kxyFromfcNEQ,
 									   unsigned int* neighborX,
 									   unsigned int* neighborY,
 									   unsigned int* neighborZ,
-									   unsigned int size_Mat,
+									   unsigned long long numberOfLBnodes,
 									   unsigned int numberOfThreads, 
 									   real* DD,
 									   bool isEvenTimestep);
@@ -541,7 +541,7 @@ void Calc3rdMomentsIncompSP27(real* CUMbbb,
 										 unsigned int* neighborX,
 										 unsigned int* neighborY,
 										 unsigned int* neighborZ,
-										 unsigned int size_Mat,
+										 unsigned long long numberOfLBnodes,
 										 unsigned int numberOfThreads, 
 										 real* DD,
 										 bool isEvenTimestep);
@@ -557,7 +557,7 @@ void Calc3rdMomentsCompSP27(real* CUMbbb,
 									   unsigned int* neighborX,
 									   unsigned int* neighborY,
 									   unsigned int* neighborZ,
-									   unsigned int size_Mat,
+									   unsigned long long numberOfLBnodes,
 									   unsigned int numberOfThreads, 
 									   real* DD,
 									   bool isEvenTimestep);
@@ -576,7 +576,7 @@ void CalcHigherMomentsIncompSP27(real* CUMcbb,
 											unsigned int* neighborX,
 											unsigned int* neighborY,
 											unsigned int* neighborZ,
-											unsigned int size_Mat,
+											unsigned long long numberOfLBnodes,
 											unsigned int numberOfThreads, 
 											real* DD,
 											bool isEvenTimestep);
@@ -595,7 +595,7 @@ void CalcHigherMomentsCompSP27(real* CUMcbb,
 										  unsigned int* neighborX,
 										  unsigned int* neighborY,
 										  unsigned int* neighborZ,
-										  unsigned int size_Mat,
+										  unsigned long long numberOfLBnodes,
 										  unsigned int numberOfThreads, 
 										  real* DD,
 										  bool isEvenTimestep);
@@ -612,7 +612,7 @@ void LBCalcMeasurePoints27(real* vxMP,
                                       unsigned int* neighborX,
                                       unsigned int* neighborY,
                                       unsigned int* neighborZ,
-                                      unsigned int size_Mat,
+                                      unsigned long long numberOfLBnodes,
                                       real* DD,
                                       unsigned int numberOfThreads, 
                                       bool isEvenTimestep);
@@ -627,7 +627,7 @@ void BcPress27(int nx,
                           unsigned int* neighborY,
                           unsigned int* neighborZ,
                           real* DD, 
-                          unsigned int size_Mat, 
+                          unsigned long long numberOfLBnodes, 
                           bool isEvenTimestep);
 
 void BcVel27(int nx, 
@@ -641,7 +641,7 @@ void BcVel27(int nx,
                         unsigned int* neighborY,
                         unsigned int* neighborZ,
                         real* DD, 
-                        unsigned int size_Mat, 
+                        unsigned long long numberOfLBnodes, 
                         bool isEvenTimestep, 
                         real u0x, 
                         real om);
@@ -661,7 +661,7 @@ void QDevCompThinWalls27(unsigned int numberOfThreads,
 									unsigned int* neighborY,
 									unsigned int* neighborZ,
 									unsigned int* neighborWSB,
-									unsigned int size_Mat, 
+									unsigned long long numberOfLBnodes, 
 									bool isEvenTimestep);
 
 void QDev3rdMomentsComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
@@ -675,7 +675,7 @@ void QDevIncompHighNu27(  unsigned int numberOfThreads,
 									 unsigned int* neighborX,
 									 unsigned int* neighborY,
 									 unsigned int* neighborZ,
-									 unsigned int size_Mat, 
+									 unsigned long long numberOfLBnodes, 
 									 bool isEvenTimestep);
 
 void QDevCompHighNu27(unsigned int numberOfThreads,
@@ -687,7 +687,7 @@ void QDevCompHighNu27(unsigned int numberOfThreads,
 								 unsigned int* neighborX,
 								 unsigned int* neighborY,
 								 unsigned int* neighborZ,
-								 unsigned int size_Mat, 
+								 unsigned long long numberOfLBnodes, 
 								 bool isEvenTimestep);
 
 void QVelDevicePlainBB27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
@@ -704,7 +704,7 @@ void QVelDeviceCouette27(unsigned int numberOfThreads,
 									unsigned int* neighborX,
 									unsigned int* neighborY,
 									unsigned int* neighborZ,
-									unsigned int size_Mat, 
+									unsigned long long numberOfLBnodes, 
 									bool isEvenTimestep);
 
 void QVelDevice1h27( unsigned int numberOfThreads,
@@ -726,7 +726,7 @@ void QVelDevice1h27( unsigned int numberOfThreads,
 								real* coordX,
 								real* coordY,
 								real* coordZ,
-								unsigned int size_Mat, 
+								unsigned long long numberOfLBnodes, 
 								bool isEvenTimestep);
 
 void QVelDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
@@ -743,7 +743,7 @@ void QVelDevCompPlusSlip27(unsigned int numberOfThreads,
 									  unsigned int* neighborX,
 									  unsigned int* neighborY,
 									  unsigned int* neighborZ,
-									  unsigned int size_Mat, 
+									  unsigned long long numberOfLBnodes, 
 									  bool isEvenTimestep);
 
 void QVelDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
@@ -762,7 +762,7 @@ void QVelDevCompThinWalls27(unsigned int numberOfThreads,
 							           unsigned int* neighborY,
 							           unsigned int* neighborZ,
 									   unsigned int* neighborWSB,
-							           unsigned int size_Mat, 
+							           unsigned long long numberOfLBnodes, 
 							           bool isEvenTimestep);
 
 void QVelDevCompZeroPress27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
@@ -779,7 +779,7 @@ void QVelDevIncompHighNu27(  unsigned int numberOfThreads,
 										unsigned int* neighborX,
 										unsigned int* neighborY,
 										unsigned int* neighborZ,
-										unsigned int size_Mat, 
+										unsigned long long numberOfLBnodes, 
 										bool isEvenTimestep);
 
 void QVelDevCompHighNu27(unsigned int numberOfThreads,
@@ -794,7 +794,7 @@ void QVelDevCompHighNu27(unsigned int numberOfThreads,
 									unsigned int* neighborX,
 									unsigned int* neighborY,
 									unsigned int* neighborZ,
-									unsigned int size_Mat, 
+									unsigned long long numberOfLBnodes, 
 									bool isEvenTimestep);
 
 void QVeloDevEQ27(unsigned int numberOfThreads,
@@ -808,24 +808,9 @@ void QVeloDevEQ27(unsigned int numberOfThreads,
 							 unsigned int* neighborX,
 							 unsigned int* neighborY,
 							 unsigned int* neighborZ,
-							 unsigned int size_Mat, 
+							 unsigned long long numberOfLBnodes, 
 							 bool isEvenTimestep);
 
-void QVeloStreetDevEQ27(
-	uint  numberOfThreads,
-	real* veloXfraction,
-	real* veloYfraction,
-	int*  naschVelo,
-	real* DD,
-	int*  naschIndex,
-	int   numberOfStreetNodes,
-	real  velocityRatio,
-	uint* neighborX,
-	uint* neighborY,
-	uint* neighborZ,
-	uint  size_Mat,
-	bool  isEvenTimestep);
-
 void QSlipDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
 void QSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
@@ -848,7 +833,7 @@ void QSlipGeomDevComp27( unsigned int numberOfThreads,
 									unsigned int* neighborX,
 									unsigned int* neighborY,
 									unsigned int* neighborZ,
-									unsigned int size_Mat, 
+									unsigned long long numberOfLBnodes, 
 									bool isEvenTimestep);
 
 void QSlipNormDevComp27(unsigned int numberOfThreads,
@@ -863,7 +848,7 @@ void QSlipNormDevComp27(unsigned int numberOfThreads,
 								   unsigned int* neighborX,
 								   unsigned int* neighborY,
 								   unsigned int* neighborZ,
-								   unsigned int size_Mat, 
+								   unsigned long long numberOfLBnodes, 
 								   bool isEvenTimestep);
 
 void QStressDevComp27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level);
@@ -883,7 +868,7 @@ void QPressDevFixBackflow27(unsigned int numberOfThreads,
                                        unsigned int* neighborX,
                                        unsigned int* neighborY,
                                        unsigned int* neighborZ,
-                                       unsigned int size_Mat, 
+                                       unsigned long long numberOfLBnodes, 
                                        bool isEvenTimestep);
 
 void QPressDevDirDepBot27(unsigned int numberOfThreads,
@@ -895,7 +880,7 @@ void QPressDevDirDepBot27(unsigned int numberOfThreads,
                                      unsigned int* neighborX,
                                      unsigned int* neighborY,
                                      unsigned int* neighborZ,
-                                     unsigned int size_Mat, 
+                                     unsigned long long numberOfLBnodes, 
                                      bool isEvenTimestep);
 
 void QPressNoRhoDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
@@ -914,7 +899,7 @@ void QPressDevOld27(unsigned int numberOfThreads,
                                unsigned int* neighborX,
                                unsigned int* neighborY,
                                unsigned int* neighborZ,
-                               unsigned int size_Mat, 
+                               unsigned long long numberOfLBnodes, 
                                bool isEvenTimestep);
 
 void QPressDevIncompNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
@@ -930,7 +915,7 @@ void QPressDevZero27(unsigned int numberOfThreads,
                                 unsigned int* neighborX,
                                 unsigned int* neighborY,
                                 unsigned int* neighborZ,
-                                unsigned int size_Mat, 
+                                unsigned long long numberOfLBnodes, 
                                 bool isEvenTimestep);
 
 void QPressDevFake27(   unsigned int numberOfThreads,
@@ -943,7 +928,7 @@ void QPressDevFake27(   unsigned int numberOfThreads,
 								   unsigned int* neighborX,
 								   unsigned int* neighborY,
 								   unsigned int* neighborZ,
-								   unsigned int size_Mat, 
+								   unsigned long long numberOfLBnodes, 
 								   bool isEvenTimestep);
 
 void BBDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
@@ -958,7 +943,7 @@ void QPressDev27_IntBB(  unsigned int numberOfThreads,
 									unsigned int* neighborX,
 									unsigned int* neighborY,
 									unsigned int* neighborZ,
-									unsigned int size_Mat, 
+									unsigned long long numberOfLBnodes, 
 									bool isEvenTimestep);
 
 void QPressDevAntiBB27(  unsigned int numberOfThreads,
@@ -974,7 +959,7 @@ void QPressDevAntiBB27(  unsigned int numberOfThreads,
 								  unsigned int* neighborX,
 								  unsigned int* neighborY,
 								  unsigned int* neighborZ,
-								  unsigned int size_Mat, 
+								  unsigned long long numberOfLBnodes, 
 								  bool isEvenTimestep);
 
 void PressSchlaffer27(unsigned int numberOfThreads,
@@ -991,7 +976,7 @@ void PressSchlaffer27(unsigned int numberOfThreads,
                                  unsigned int* neighborX,
                                  unsigned int* neighborY,
                                  unsigned int* neighborZ,
-                                 unsigned int size_Mat, 
+                                 unsigned long long numberOfLBnodes, 
                                  bool isEvenTimestep);
 
 void VelSchlaffer27(  unsigned int numberOfThreads,
@@ -1006,7 +991,7 @@ void VelSchlaffer27(  unsigned int numberOfThreads,
                                  unsigned int* neighborX,
                                  unsigned int* neighborY,
                                  unsigned int* neighborZ,
-                                 unsigned int size_Mat, 
+                                 unsigned long long numberOfLBnodes, 
                                  bool isEvenTimestep);
 
 void QPrecursorDevCompZeroPress(LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio);
@@ -1029,7 +1014,7 @@ void QADDev7(unsigned int numberOfThreads,
                         unsigned int* neighborX,
                         unsigned int* neighborY,
                         unsigned int* neighborZ,
-                        unsigned int size_Mat, 
+                        unsigned long long numberOfLBnodes, 
                         bool isEvenTimestep);
 
 //////////////////////////////////////////////////////////////////////////
@@ -1043,7 +1028,7 @@ void FactorizedCentralMomentsAdvectionDiffusionDeviceKernel(
 	uint* neighborZ,
 	real* distributions,
 	real* distributionsAD,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	real* forces,
 	bool isEvenTimestep);
 
@@ -1063,7 +1048,7 @@ void ADSlipVelDevComp(
 	uint * neighborX,
 	uint * neighborY,
 	uint * neighborZ,
-	uint size_Mat,
+	unsigned long long numberOfLBnodes,
 	bool isEvenTimestep);
 	
 void QADDirichletDev27( unsigned int numberOfThreads,
@@ -1078,7 +1063,7 @@ void QADDirichletDev27( unsigned int numberOfThreads,
 								   unsigned int* neighborX,
 								   unsigned int* neighborY,
 								   unsigned int* neighborZ,
-								   unsigned int size_Mat, 
+								   unsigned long long numberOfLBnodes, 
 								   bool isEvenTimestep);
 
 void QADBBDev27(  unsigned int numberOfThreads,
@@ -1093,7 +1078,7 @@ void QADBBDev27(  unsigned int numberOfThreads,
 							 unsigned int* neighborX,
 							 unsigned int* neighborY,
 							 unsigned int* neighborZ,
-							 unsigned int size_Mat, 
+							 unsigned long long numberOfLBnodes, 
 							 bool isEvenTimestep);
 
 void QADVelDev7(unsigned int numberOfThreads,
@@ -1109,7 +1094,7 @@ void QADVelDev7(unsigned int numberOfThreads,
                            unsigned int* neighborX,
                            unsigned int* neighborY,
                            unsigned int* neighborZ,
-                           unsigned int size_Mat, 
+                           unsigned long long numberOfLBnodes, 
                            bool isEvenTimestep);
 
 
@@ -1126,7 +1111,7 @@ void QADVelDev27(  unsigned int numberOfThreads,
                               unsigned int* neighborX,
                               unsigned int* neighborY,
                               unsigned int* neighborZ,
-                              unsigned int size_Mat, 
+                              unsigned long long numberOfLBnodes, 
                               bool isEvenTimestep);
 
 void QADPressDev7( unsigned int numberOfThreads,
@@ -1142,7 +1127,7 @@ void QADPressDev7( unsigned int numberOfThreads,
                               unsigned int* neighborX,
                               unsigned int* neighborY,
                               unsigned int* neighborZ,
-                              unsigned int size_Mat, 
+                              unsigned long long numberOfLBnodes, 
                               bool isEvenTimestep);
 
 void QADPressDev27(unsigned int numberOfThreads,
@@ -1158,7 +1143,7 @@ void QADPressDev27(unsigned int numberOfThreads,
                               unsigned int* neighborX,
                               unsigned int* neighborY,
                               unsigned int* neighborZ,
-                              unsigned int size_Mat, 
+                              unsigned long long numberOfLBnodes, 
                               bool isEvenTimestep);
 
 void QADPressNEQNeighborDev27(
@@ -1171,7 +1156,7 @@ void QADPressNEQNeighborDev27(
 											unsigned int* neighborX,
 											unsigned int* neighborY,
 											unsigned int* neighborZ,
-											unsigned int size_Mat,
+											unsigned long long numberOfLBnodes,
 											bool isEvenTimestep
 										);
 
@@ -1187,7 +1172,7 @@ void QNoSlipADincompDev7(unsigned int numberOfThreads,
 									unsigned int* neighborX,
 									unsigned int* neighborY,
 									unsigned int* neighborZ,
-									unsigned int size_Mat, 
+									unsigned long long numberOfLBnodes, 
 									bool isEvenTimestep);
 
 void QNoSlipADincompDev27(unsigned int numberOfThreads,
@@ -1202,7 +1187,7 @@ void QNoSlipADincompDev27(unsigned int numberOfThreads,
 									 unsigned int* neighborX,
 									 unsigned int* neighborY,
 									 unsigned int* neighborZ,
-									 unsigned int size_Mat, 
+									 unsigned long long numberOfLBnodes, 
 									 bool isEvenTimestep);
 
 void QADVeloIncompDev7( unsigned int numberOfThreads,
@@ -1218,7 +1203,7 @@ void QADVeloIncompDev7( unsigned int numberOfThreads,
 								   unsigned int* neighborX,
 								   unsigned int* neighborY,
 								   unsigned int* neighborZ,
-								   unsigned int size_Mat, 
+								   unsigned long long numberOfLBnodes, 
 								   bool isEvenTimestep);
 
 
@@ -1235,7 +1220,7 @@ void QADVeloIncompDev27( unsigned int numberOfThreads,
 									unsigned int* neighborX,
 									unsigned int* neighborY,
 									unsigned int* neighborZ,
-									unsigned int size_Mat, 
+									unsigned long long numberOfLBnodes, 
 									bool isEvenTimestep);
 
 void QADPressIncompDev7(  unsigned int numberOfThreads,
@@ -1251,7 +1236,7 @@ void QADPressIncompDev7(  unsigned int numberOfThreads,
 									 unsigned int* neighborX,
 									 unsigned int* neighborY,
 									 unsigned int* neighborZ,
-									 unsigned int size_Mat, 
+									 unsigned long long numberOfLBnodes, 
 									 bool isEvenTimestep);
 
 void QADPressIncompDev27(  unsigned int numberOfThreads,
@@ -1267,7 +1252,7 @@ void QADPressIncompDev27(  unsigned int numberOfThreads,
 									  unsigned int* neighborX,
 									  unsigned int* neighborY,
 									  unsigned int* neighborZ,
-									  unsigned int size_Mat, 
+									  unsigned long long numberOfLBnodes, 
 									  bool isEvenTimestep);
 
 void PropVelo(   unsigned int numberOfThreads,
@@ -1280,7 +1265,7 @@ void PropVelo(   unsigned int numberOfThreads,
 							real* uz,
 							int* k_Q, 
 							unsigned int size_Prop,
-							unsigned int size_Mat,
+							unsigned long long numberOfLBnodes,
 							unsigned int* bcMatD,
 							real* DD,
 							bool EvenOrOdd);
@@ -1293,8 +1278,8 @@ void ScaleCF27( real* DC,
                            unsigned int* neighborFX,
                            unsigned int* neighborFY,
                            unsigned int* neighborFZ,
-                           unsigned int size_MatC, 
-                           unsigned int size_MatF, 
+                           unsigned long long numberOfLBnodesC, 
+                           unsigned long long numberOfLBnodesF, 
                            bool isEvenTimestep,
                            unsigned int* posCSWB, 
                            unsigned int* posFSWB, 
@@ -1316,8 +1301,8 @@ void ScaleFC27( real* DC,
                            unsigned int* neighborFX,
                            unsigned int* neighborFY,
                            unsigned int* neighborFZ,
-                           unsigned int size_MatC, 
-                           unsigned int size_MatF, 
+                           unsigned long long numberOfLBnodesC, 
+                           unsigned long long numberOfLBnodesF, 
                            bool isEvenTimestep,
                            unsigned int* posC, 
                            unsigned int* posFSWB, 
@@ -1339,8 +1324,8 @@ void ScaleCFEff27(real* DC,
                              unsigned int* neighborFX,
                              unsigned int* neighborFY,
                              unsigned int* neighborFZ,
-                             unsigned int size_MatC, 
-                             unsigned int size_MatF, 
+                             unsigned long long numberOfLBnodesC, 
+                             unsigned long long numberOfLBnodesF, 
                              bool isEvenTimestep,
                              unsigned int* posCSWB, 
                              unsigned int* posFSWB, 
@@ -1353,7 +1338,7 @@ void ScaleCFEff27(real* DC,
                              unsigned int nxF, 
                              unsigned int nyF,
                              unsigned int numberOfThreads,
-                             OffCF offCF);
+                             ICellNeigh neighborCoarseToFine);
 
 void ScaleFCEff27(real* DC, 
                              real* DF, 
@@ -1363,8 +1348,8 @@ void ScaleFCEff27(real* DC,
                              unsigned int* neighborFX,
                              unsigned int* neighborFY,
                              unsigned int* neighborFZ,
-                             unsigned int size_MatC, 
-                             unsigned int size_MatF, 
+                             unsigned long long numberOfLBnodesC, 
+                             unsigned long long numberOfLBnodesF, 
                              bool isEvenTimestep,
                              unsigned int* posC, 
                              unsigned int* posFSWB, 
@@ -1377,7 +1362,7 @@ void ScaleFCEff27(real* DC,
                              unsigned int nxF, 
                              unsigned int nyF,
                              unsigned int numberOfThreads,
-                             OffFC offFC);
+                             ICellNeigh neighborFineToCoarse);
 
 void ScaleCFLast27(real* DC, 
                               real* DF, 
@@ -1387,8 +1372,8 @@ void ScaleCFLast27(real* DC,
                               unsigned int* neighborFX,
                               unsigned int* neighborFY,
                               unsigned int* neighborFZ,
-                              unsigned int size_MatC, 
-                              unsigned int size_MatF, 
+                              unsigned long long numberOfLBnodesC, 
+                              unsigned long long numberOfLBnodesF, 
                               bool isEvenTimestep,
                               unsigned int* posCSWB, 
                               unsigned int* posFSWB, 
@@ -1401,7 +1386,7 @@ void ScaleCFLast27(real* DC,
                               unsigned int nxF, 
                               unsigned int nyF,
                               unsigned int numberOfThreads,
-                              OffCF offCF);
+                              ICellNeigh neighborCoarseToFine);
 
 void ScaleFCLast27(real* DC, 
                               real* DF, 
@@ -1411,8 +1396,8 @@ void ScaleFCLast27(real* DC,
                               unsigned int* neighborFX,
                               unsigned int* neighborFY,
                               unsigned int* neighborFZ,
-                              unsigned int size_MatC, 
-                              unsigned int size_MatF, 
+                              unsigned long long numberOfLBnodesC, 
+                              unsigned long long numberOfLBnodesF, 
                               bool isEvenTimestep,
                               unsigned int* posC, 
                               unsigned int* posFSWB, 
@@ -1425,7 +1410,7 @@ void ScaleFCLast27(real* DC,
                               unsigned int nxF, 
                               unsigned int nyF,
                               unsigned int numberOfThreads,
-                              OffFC offFC);
+                              ICellNeigh neighborFineToCoarse);
 
 void ScaleCFpress27(real* DC, 
                               real* DF, 
@@ -1435,8 +1420,8 @@ void ScaleCFpress27(real* DC,
                               unsigned int* neighborFX,
                               unsigned int* neighborFY,
                               unsigned int* neighborFZ,
-                              unsigned int size_MatC, 
-                              unsigned int size_MatF, 
+                              unsigned long long numberOfLBnodesC, 
+                              unsigned long long numberOfLBnodesF, 
                               bool isEvenTimestep,
                               unsigned int* posCSWB, 
                               unsigned int* posFSWB, 
@@ -1449,7 +1434,7 @@ void ScaleCFpress27(real* DC,
                               unsigned int nxF, 
                               unsigned int nyF,
                               unsigned int numberOfThreads,
-                              OffCF offCF);
+                              ICellNeigh neighborCoarseToFine);
 
 void ScaleFCpress27(  real* DC, 
                                  real* DF, 
@@ -1459,8 +1444,8 @@ void ScaleFCpress27(  real* DC,
                                  unsigned int* neighborFX,
                                  unsigned int* neighborFY,
                                  unsigned int* neighborFZ,
-                                 unsigned int size_MatC, 
-                                 unsigned int size_MatF, 
+                                 unsigned long long numberOfLBnodesC, 
+                                 unsigned long long numberOfLBnodesF, 
                                  bool isEvenTimestep,
                                  unsigned int* posC, 
                                  unsigned int* posFSWB, 
@@ -1473,7 +1458,7 @@ void ScaleFCpress27(  real* DC,
                                  unsigned int nxF, 
                                  unsigned int nyF,
                                  unsigned int numberOfThreads,
-                                 OffFC offFC);
+                                 ICellNeigh neighborFineToCoarse);
 
 void ScaleCF_Fix_27(real* DC, 
                               real* DF, 
@@ -1483,8 +1468,8 @@ void ScaleCF_Fix_27(real* DC,
                               unsigned int* neighborFX,
                               unsigned int* neighborFY,
                               unsigned int* neighborFZ,
-                              unsigned int size_MatC, 
-                              unsigned int size_MatF, 
+                              unsigned long long numberOfLBnodesC, 
+                              unsigned long long numberOfLBnodesF, 
                               bool isEvenTimestep,
                               unsigned int* posCSWB, 
                               unsigned int* posFSWB, 
@@ -1497,7 +1482,7 @@ void ScaleCF_Fix_27(real* DC,
                               unsigned int nxF, 
                               unsigned int nyF,
                               unsigned int numberOfThreads,
-                              OffCF offCF);
+                              ICellNeigh neighborCoarseToFine);
 
 void ScaleCF_Fix_comp_27(   real* DC, 
 									   real* DF, 
@@ -1507,8 +1492,8 @@ void ScaleCF_Fix_comp_27(   real* DC,
 									   unsigned int* neighborFX,
 									   unsigned int* neighborFY,
 									   unsigned int* neighborFZ,
-									   unsigned int size_MatC, 
-									   unsigned int size_MatF, 
+									   unsigned long long numberOfLBnodesC, 
+									   unsigned long long numberOfLBnodesF, 
 									   bool isEvenTimestep,
 									   unsigned int* posCSWB, 
 									   unsigned int* posFSWB, 
@@ -1521,7 +1506,7 @@ void ScaleCF_Fix_comp_27(   real* DC,
 									   unsigned int nxF, 
 									   unsigned int nyF,
 									   unsigned int numberOfThreads,
-									   OffCF offCF);
+									   ICellNeigh neighborCoarseToFine);
 
 void ScaleCF_0817_comp_27(  real* DC, 
 									   real* DF, 
@@ -1531,8 +1516,8 @@ void ScaleCF_0817_comp_27(  real* DC,
 									   unsigned int* neighborFX,
 									   unsigned int* neighborFY,
 									   unsigned int* neighborFZ,
-									   unsigned int size_MatC, 
-									   unsigned int size_MatF, 
+									   unsigned long long numberOfLBnodesC, 
+									   unsigned long long numberOfLBnodesF, 
 									   bool isEvenTimestep,
 									   unsigned int* posCSWB, 
 									   unsigned int* posFSWB, 
@@ -1545,7 +1530,7 @@ void ScaleCF_0817_comp_27(  real* DC,
 									   unsigned int nxF, 
 									   unsigned int nyF,
 									   unsigned int numberOfThreads,
-									   OffCF offCF,
+									   ICellNeigh neighborCoarseToFine,
 									   CUstream_st* stream);
 
 void ScaleCF_comp_D3Q27F3_2018(	real* DC,
@@ -1557,8 +1542,8 @@ void ScaleCF_comp_D3Q27F3_2018(	real* DC,
 											unsigned int* neighborFX,
 											unsigned int* neighborFY,
 											unsigned int* neighborFZ,
-											unsigned int size_MatC, 
-											unsigned int size_MatF, 
+											unsigned long long numberOfLBnodesC, 
+											unsigned long long numberOfLBnodesF, 
 											bool isEvenTimestep,
 											unsigned int* posCSWB, 
 											unsigned int* posFSWB, 
@@ -1571,7 +1556,7 @@ void ScaleCF_comp_D3Q27F3_2018(	real* DC,
 											unsigned int nxF, 
 											unsigned int nyF,
 											unsigned int numberOfThreads,
-											OffCF offCF);
+											ICellNeigh neighborCoarseToFine);
 
 void ScaleCF_comp_D3Q27F3(real* DC,
 									 real* DF,
@@ -1582,8 +1567,8 @@ void ScaleCF_comp_D3Q27F3(real* DC,
 									 unsigned int* neighborFX,
 									 unsigned int* neighborFY,
 									 unsigned int* neighborFZ,
-									 unsigned int size_MatC, 
-									 unsigned int size_MatF, 
+									 unsigned long long numberOfLBnodesC, 
+									 unsigned long long numberOfLBnodesF, 
 									 bool isEvenTimestep,
 									 unsigned int* posCSWB, 
 									 unsigned int* posFSWB, 
@@ -1596,7 +1581,7 @@ void ScaleCF_comp_D3Q27F3(real* DC,
 									 unsigned int nxF, 
 									 unsigned int nyF,
 									 unsigned int numberOfThreads,
-									 OffCF offCF,
+									 ICellNeigh neighborCoarseToFine,
 									 CUstream_st *stream);
 
 void ScaleCF_staggered_time_comp_27( real* DC, 
@@ -1607,8 +1592,8 @@ void ScaleCF_staggered_time_comp_27( real* DC,
 												unsigned int* neighborFX,
 												unsigned int* neighborFY,
 												unsigned int* neighborFZ,
-												unsigned int size_MatC, 
-												unsigned int size_MatF, 
+												unsigned long long numberOfLBnodesC, 
+												unsigned long long numberOfLBnodesF, 
 												bool isEvenTimestep,
 												unsigned int* posCSWB, 
 												unsigned int* posFSWB, 
@@ -1621,10 +1606,11 @@ void ScaleCF_staggered_time_comp_27( real* DC,
 												unsigned int nxF, 
 												unsigned int nyF,
 												unsigned int numberOfThreads,
-												OffCF offCF);
+												ICellNeigh neighborCoarseToFine);
+
+void ScaleCF_RhoSq_comp_27(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICells * interpolationCellsCoarseToFine, ICellNeigh &neighborCoarseToFine, CUstream_st *stream);
 
-void ScaleCF_RhoSq_comp_27(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellCF * icellCF, OffCF &offsetCF, CUstream_st *stream);
-void ScaleCF_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellCF * icellCF, OffCF &offsetCF, CUstream_st *stream);
+template<bool hasTurbulentViscosity> void ScaleCF_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICells * interpolationCellsCoarseToFine, ICellNeigh &neighborCoarseToFine, CUstream_st *stream);
 
 void ScaleCF_RhoSq_3rdMom_comp_27( real* DC, 
 											  real* DF, 
@@ -1634,8 +1620,8 @@ void ScaleCF_RhoSq_3rdMom_comp_27( real* DC,
 											  unsigned int* neighborFX,
 											  unsigned int* neighborFY,
 											  unsigned int* neighborFZ,
-											  unsigned int size_MatC, 
-											  unsigned int size_MatF, 
+											  unsigned long long numberOfLBnodesC, 
+											  unsigned long long numberOfLBnodesF, 
 											  bool isEvenTimestep,
 											  unsigned int* posCSWB, 
 											  unsigned int* posFSWB, 
@@ -1648,7 +1634,7 @@ void ScaleCF_RhoSq_3rdMom_comp_27( real* DC,
 											  unsigned int nxF, 
 											  unsigned int nyF,
 											  unsigned int numberOfThreads,
-											  OffCF offCF,
+											  ICellNeigh neighborCoarseToFine,
 											  CUstream_st *stream);
 
 void ScaleCF_AA2016_comp_27( real* DC, 
@@ -1659,8 +1645,8 @@ void ScaleCF_AA2016_comp_27( real* DC,
 										unsigned int* neighborFX,
 										unsigned int* neighborFY,
 										unsigned int* neighborFZ,
-										unsigned int size_MatC, 
-										unsigned int size_MatF, 
+										unsigned long long numberOfLBnodesC, 
+										unsigned long long numberOfLBnodesF, 
 										bool isEvenTimestep,
 										unsigned int* posCSWB, 
 										unsigned int* posFSWB, 
@@ -1673,7 +1659,7 @@ void ScaleCF_AA2016_comp_27( real* DC,
 										unsigned int nxF, 
 										unsigned int nyF,
 										unsigned int numberOfThreads,
-										OffCF offCF,
+										ICellNeigh neighborCoarseToFine,
 										CUstream_st *stream);
 
 void ScaleCF_NSPress_27(real* DC, 
@@ -1684,8 +1670,8 @@ void ScaleCF_NSPress_27(real* DC,
 								  unsigned int* neighborFX,
 								  unsigned int* neighborFY,
 								  unsigned int* neighborFZ,
-								  unsigned int size_MatC, 
-								  unsigned int size_MatF, 
+								  unsigned long long numberOfLBnodesC, 
+								  unsigned long long numberOfLBnodesF, 
 								  bool isEvenTimestep,
 								  unsigned int* posCSWB, 
 								  unsigned int* posFSWB, 
@@ -1698,7 +1684,7 @@ void ScaleCF_NSPress_27(real* DC,
 								  unsigned int nxF, 
 								  unsigned int nyF,
 								  unsigned int numberOfThreads,
-								  OffCF offCF);
+								  ICellNeigh neighborCoarseToFine);
 
 void ScaleFC_Fix_27(  real* DC, 
                                  real* DF, 
@@ -1708,8 +1694,8 @@ void ScaleFC_Fix_27(  real* DC,
                                  unsigned int* neighborFX,
                                  unsigned int* neighborFY,
                                  unsigned int* neighborFZ,
-                                 unsigned int size_MatC, 
-                                 unsigned int size_MatF, 
+                                 unsigned long long numberOfLBnodesC, 
+                                 unsigned long long numberOfLBnodesF, 
                                  bool isEvenTimestep,
                                  unsigned int* posC, 
                                  unsigned int* posFSWB, 
@@ -1722,7 +1708,7 @@ void ScaleFC_Fix_27(  real* DC,
                                  unsigned int nxF, 
                                  unsigned int nyF,
                                  unsigned int numberOfThreads,
-                                 OffFC offFC);
+                                 ICellNeigh neighborFineToCoarse);
 
 void ScaleFC_Fix_comp_27(   real* DC, 
 									   real* DF, 
@@ -1732,8 +1718,8 @@ void ScaleFC_Fix_comp_27(   real* DC,
 									   unsigned int* neighborFX,
 									   unsigned int* neighborFY,
 									   unsigned int* neighborFZ,
-									   unsigned int size_MatC, 
-									   unsigned int size_MatF, 
+									   unsigned long long numberOfLBnodesC, 
+									   unsigned long long numberOfLBnodesF, 
 									   bool isEvenTimestep,
 									   unsigned int* posC, 
 									   unsigned int* posFSWB, 
@@ -1746,7 +1732,7 @@ void ScaleFC_Fix_comp_27(   real* DC,
 									   unsigned int nxF, 
 									   unsigned int nyF,
 									   unsigned int numberOfThreads,
-									   OffFC offFC);
+									   ICellNeigh neighborFineToCoarse);
 
 void ScaleFC_0817_comp_27(  real* DC, 
 									   real* DF, 
@@ -1756,8 +1742,8 @@ void ScaleFC_0817_comp_27(  real* DC,
 									   unsigned int* neighborFX,
 									   unsigned int* neighborFY,
 									   unsigned int* neighborFZ,
-									   unsigned int size_MatC, 
-									   unsigned int size_MatF, 
+									   unsigned long long numberOfLBnodesC, 
+									   unsigned long long numberOfLBnodesF, 
 									   bool isEvenTimestep,
 									   unsigned int* posC, 
 									   unsigned int* posFSWB, 
@@ -1770,7 +1756,7 @@ void ScaleFC_0817_comp_27(  real* DC,
 									   unsigned int nxF, 
 									   unsigned int nyF,
 									   unsigned int numberOfThreads,
-									   OffFC offFC,
+									   ICellNeigh neighborFineToCoarse,
 									   CUstream_st *stream);
 
 void ScaleFC_comp_D3Q27F3_2018(real* DC,
@@ -1782,8 +1768,8 @@ void ScaleFC_comp_D3Q27F3_2018(real* DC,
 										  unsigned int* neighborFX,
 										  unsigned int* neighborFY,
 										  unsigned int* neighborFZ,
-										  unsigned int size_MatC, 
-										  unsigned int size_MatF, 
+										  unsigned long long numberOfLBnodesC, 
+										  unsigned long long numberOfLBnodesF, 
 										  bool isEvenTimestep,
 										  unsigned int* posC, 
 										  unsigned int* posFSWB, 
@@ -1796,7 +1782,7 @@ void ScaleFC_comp_D3Q27F3_2018(real* DC,
 										  unsigned int nxF, 
 										  unsigned int nyF,
 										  unsigned int numberOfThreads,
-										  OffFC offFC);
+										  ICellNeigh neighborFineToCoarse);
 
 void ScaleFC_comp_D3Q27F3( real* DC,
 									  real* DF,
@@ -1807,8 +1793,8 @@ void ScaleFC_comp_D3Q27F3( real* DC,
 									  unsigned int* neighborFX,
 									  unsigned int* neighborFY,
 									  unsigned int* neighborFZ,
-									  unsigned int size_MatC, 
-									  unsigned int size_MatF, 
+									  unsigned long long numberOfLBnodesC, 
+									  unsigned long long numberOfLBnodesF, 
 									  bool isEvenTimestep,
 									  unsigned int* posC, 
 									  unsigned int* posFSWB, 
@@ -1821,7 +1807,7 @@ void ScaleFC_comp_D3Q27F3( real* DC,
 									  unsigned int nxF, 
 									  unsigned int nyF,
 									  unsigned int numberOfThreads,
-									  OffFC offFC,
+									  ICellNeigh neighborFineToCoarse,
 									  CUstream_st *stream);
 
 void ScaleFC_staggered_time_comp_27( real* DC, 
@@ -1832,8 +1818,8 @@ void ScaleFC_staggered_time_comp_27( real* DC,
 												unsigned int* neighborFX,
 												unsigned int* neighborFY,
 												unsigned int* neighborFZ,
-												unsigned int size_MatC, 
-												unsigned int size_MatF, 
+												unsigned long long numberOfLBnodesC, 
+												unsigned long long numberOfLBnodesF, 
 												bool isEvenTimestep,
 												unsigned int* posC, 
 												unsigned int* posFSWB, 
@@ -1846,10 +1832,11 @@ void ScaleFC_staggered_time_comp_27( real* DC,
 												unsigned int nxF, 
 												unsigned int nyF,
 												unsigned int numberOfThreads,
-												OffFC offFC);
+												ICellNeigh neighborFineToCoarse);
+
+void ScaleFC_RhoSq_comp_27(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICells * interpolationCellsFineToCoarse, ICellNeigh& neighborFineToCoarse, CUstream_st *stream);
 
-void ScaleFC_RhoSq_comp_27(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellFC * icellFC, OffFC& offsetFC, CUstream_st *stream);
-void ScaleFC_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellFC * icellFC, OffFC& offsetFC, CUstream_st *stream);
+template<bool hasTurbulentViscosity> void ScaleFC_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICells * icellFC, ICellNeigh& neighborFineToCoarse, CUstream_st *stream);
 
 void ScaleFC_RhoSq_3rdMom_comp_27( real* DC, 
 											  real* DF, 
@@ -1859,8 +1846,8 @@ void ScaleFC_RhoSq_3rdMom_comp_27( real* DC,
 											  unsigned int* neighborFX,
 											  unsigned int* neighborFY,
 											  unsigned int* neighborFZ,
-											  unsigned int size_MatC, 
-											  unsigned int size_MatF, 
+											  unsigned long long numberOfLBnodesC, 
+											  unsigned long long numberOfLBnodesF, 
 											  bool isEvenTimestep,
 											  unsigned int* posC, 
 											  unsigned int* posFSWB, 
@@ -1873,7 +1860,7 @@ void ScaleFC_RhoSq_3rdMom_comp_27( real* DC,
 											  unsigned int nxF, 
 											  unsigned int nyF,
 											  unsigned int numberOfThreads,
-											  OffFC offFC,
+											  ICellNeigh neighborFineToCoarse,
 											  CUstream_st *stream);
 
 void ScaleFC_AA2016_comp_27( real* DC, 
@@ -1884,8 +1871,8 @@ void ScaleFC_AA2016_comp_27( real* DC,
 										unsigned int* neighborFX,
 										unsigned int* neighborFY,
 										unsigned int* neighborFZ,
-										unsigned int size_MatC, 
-										unsigned int size_MatF, 
+										unsigned long long numberOfLBnodesC, 
+										unsigned long long numberOfLBnodesF, 
 										bool isEvenTimestep,
 										unsigned int* posC, 
 										unsigned int* posFSWB, 
@@ -1898,7 +1885,7 @@ void ScaleFC_AA2016_comp_27( real* DC,
 										unsigned int nxF, 
 										unsigned int nyF,
 										unsigned int numberOfThreads,
-										OffFC offFC,
+										ICellNeigh neighborFineToCoarse,
 										CUstream_st *stream);
 
 void ScaleFC_NSPress_27(  real* DC, 
@@ -1909,8 +1896,8 @@ void ScaleFC_NSPress_27(  real* DC,
 									 unsigned int* neighborFX,
 									 unsigned int* neighborFY,
 									 unsigned int* neighborFZ,
-									 unsigned int size_MatC, 
-									 unsigned int size_MatF, 
+									 unsigned long long numberOfLBnodesC, 
+									 unsigned long long numberOfLBnodesF, 
 									 bool isEvenTimestep,
 									 unsigned int* posC, 
 									 unsigned int* posFSWB, 
@@ -1923,7 +1910,7 @@ void ScaleFC_NSPress_27(  real* DC,
 									 unsigned int nxF, 
 									 unsigned int nyF,
 									 unsigned int numberOfThreads,
-									 OffFC offFC);
+									 ICellNeigh neighborFineToCoarse);
 
 void ScaleCFThS7(  real* DC, 
                               real* DF, 
@@ -1935,8 +1922,8 @@ void ScaleCFThS7(  real* DC,
                               unsigned int* neighborFX,
                               unsigned int* neighborFY,
                               unsigned int* neighborFZ,
-                              unsigned int size_MatC, 
-                              unsigned int size_MatF, 
+                              unsigned long long numberOfLBnodesC, 
+                              unsigned long long numberOfLBnodesF, 
                               bool isEvenTimestep,
                               unsigned int* posCSWB, 
                               unsigned int* posFSWB, 
@@ -1955,8 +1942,8 @@ void ScaleFCThS7(  real* DC,
                               unsigned int* neighborFX,
                               unsigned int* neighborFY,
                               unsigned int* neighborFZ,
-                              unsigned int size_MatC, 
-                              unsigned int size_MatF, 
+                              unsigned long long numberOfLBnodesC, 
+                              unsigned long long numberOfLBnodesF, 
                               bool isEvenTimestep,
                               unsigned int* posC, 
                               unsigned int* posFSWB, 
@@ -1975,8 +1962,8 @@ void ScaleCFThSMG7(   real* DC,
                                  unsigned int* neighborFX,
                                  unsigned int* neighborFY,
                                  unsigned int* neighborFZ,
-                                 unsigned int size_MatC, 
-                                 unsigned int size_MatF, 
+                                 unsigned long long numberOfLBnodesC, 
+                                 unsigned long long numberOfLBnodesF, 
                                  bool isEvenTimestep,
                                  unsigned int* posCSWB, 
                                  unsigned int* posFSWB, 
@@ -1984,7 +1971,7 @@ void ScaleCFThSMG7(   real* DC,
                                  real nu,
                                  real diffusivity_fine,
                                  unsigned int numberOfThreads,
-                                 OffCF offCF);
+                                 ICellNeigh neighborCoarseToFine);
 
 void ScaleFCThSMG7(real* DC, 
                               real* DF,
@@ -1996,8 +1983,8 @@ void ScaleFCThSMG7(real* DC,
                               unsigned int* neighborFX,
                               unsigned int* neighborFY,
                               unsigned int* neighborFZ,
-                              unsigned int size_MatC, 
-                              unsigned int size_MatF, 
+                              unsigned long long numberOfLBnodesC, 
+                              unsigned long long numberOfLBnodesF, 
                               bool isEvenTimestep,
                               unsigned int* posC, 
                               unsigned int* posFSWB, 
@@ -2005,7 +1992,7 @@ void ScaleFCThSMG7(real* DC,
                               real nu,
                               real diffusivity_coarse,
                               unsigned int numberOfThreads,
-                              OffFC offFC);
+                              ICellNeigh neighborFineToCoarse);
 
 void ScaleCFThS27( real* DC, 
                               real* DF, 
@@ -2017,8 +2004,8 @@ void ScaleCFThS27( real* DC,
                               unsigned int* neighborFX,
                               unsigned int* neighborFY,
                               unsigned int* neighborFZ,
-                              unsigned int size_MatC, 
-                              unsigned int size_MatF, 
+                              unsigned long long numberOfLBnodesC, 
+                              unsigned long long numberOfLBnodesF, 
                               bool isEvenTimestep,
                               unsigned int* posCSWB, 
                               unsigned int* posFSWB, 
@@ -2026,7 +2013,7 @@ void ScaleCFThS27( real* DC,
                               real nu,
                               real diffusivity_fine,
 							  unsigned int numberOfThreads,
-							  OffCF offCF);
+							  ICellNeigh neighborCoarseToFine);
 
 void ScaleFCThS27( real* DC, 
                               real* DF,
@@ -2038,8 +2025,8 @@ void ScaleFCThS27( real* DC,
                               unsigned int* neighborFX,
                               unsigned int* neighborFY,
                               unsigned int* neighborFZ,
-                              unsigned int size_MatC, 
-                              unsigned int size_MatF, 
+                              unsigned long long numberOfLBnodesC, 
+                              unsigned long long numberOfLBnodesF, 
                               bool isEvenTimestep,
                               unsigned int* posC, 
                               unsigned int* posFSWB, 
@@ -2047,7 +2034,7 @@ void ScaleFCThS27( real* DC,
                               real nu,
                               real diffusivity_coarse,
 							  unsigned int numberOfThreads,
-							  OffFC offFC);
+							  ICellNeigh neighborFineToCoarse);
 
 void DragLiftPostD27(real* DD, 
 								int* k_Q, 
@@ -2059,7 +2046,7 @@ void DragLiftPostD27(real* DD,
 								unsigned int* neighborX,
 								unsigned int* neighborY,
 								unsigned int* neighborZ,
-								unsigned int size_Mat, 
+								unsigned long long numberOfLBnodes, 
 								bool isEvenTimestep,
 								unsigned int numberOfThreads);
 
@@ -2073,7 +2060,7 @@ void DragLiftPreD27( real* DD,
 								unsigned int* neighborX,
 								unsigned int* neighborY,
 								unsigned int* neighborZ,
-								unsigned int size_Mat, 
+								unsigned long long numberOfLBnodes, 
 								bool isEvenTimestep,
 								unsigned int numberOfThreads);
 
@@ -2084,7 +2071,7 @@ void CalcCPtop27(real* DD,
 							unsigned int* neighborX,
 							unsigned int* neighborY,
 							unsigned int* neighborZ,
-							unsigned int size_Mat, 
+							unsigned long long numberOfLBnodes, 
 							bool isEvenTimestep,
 							unsigned int numberOfThreads);
 
@@ -2095,7 +2082,7 @@ void CalcCPbottom27(real* DD,
 							   unsigned int* neighborX,
 							   unsigned int* neighborY,
 							   unsigned int* neighborZ,
-							   unsigned int size_Mat, 
+							   unsigned long long numberOfLBnodes, 
 							   bool isEvenTimestep,
 							   unsigned int numberOfThreads);
 
@@ -2106,7 +2093,7 @@ void GetSendFsPreDev27(real* DD,
 								  unsigned int* neighborX,
 								  unsigned int* neighborY,
 								  unsigned int* neighborZ,
-								  unsigned int size_Mat, 
+								  unsigned long long numberOfLBnodes, 
 								  bool isEvenTimestep,
 								  unsigned int numberOfThreads, 
 	                              cudaStream_t stream = CU_STREAM_LEGACY);
@@ -2118,7 +2105,7 @@ void GetSendFsPostDev27(real* DD,
 								   unsigned int* neighborX,
 								   unsigned int* neighborY,
 								   unsigned int* neighborZ,
-								   unsigned int size_Mat, 
+								   unsigned long long numberOfLBnodes, 
 								   bool isEvenTimestep,
 								   unsigned int numberOfThreads, 
 	                               cudaStream_t stream = CU_STREAM_LEGACY);
@@ -2130,7 +2117,7 @@ void SetRecvFsPreDev27(real* DD,
 								  unsigned int* neighborX,
 								  unsigned int* neighborY,
 								  unsigned int* neighborZ,
-								  unsigned int size_Mat, 
+								  unsigned long long numberOfLBnodes, 
 								  bool isEvenTimestep, unsigned int numberOfThreads, 
 	                              cudaStream_t stream = CU_STREAM_LEGACY);
 
@@ -2141,7 +2128,7 @@ void SetRecvFsPostDev27(real* DD,
 								   unsigned int* neighborX,
 								   unsigned int* neighborY,
 								   unsigned int* neighborZ,
-								   unsigned int size_Mat, 
+								   unsigned long long numberOfLBnodes, 
 								   bool isEvenTimestep,
 								   unsigned int numberOfThreads,
                                    cudaStream_t stream = CU_STREAM_LEGACY);
@@ -2154,7 +2141,7 @@ void getSendGsDevF3(
 	unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
-	unsigned int size_Mat,
+	unsigned long long numberOfLBnodes,
 	bool isEvenTimestep,
 	unsigned int numberOfThreads);
 
@@ -2166,7 +2153,7 @@ void setRecvGsDevF3(
 	unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
-	unsigned int size_Mat,
+	unsigned long long numberOfLBnodes,
 	bool isEvenTimestep,
 	unsigned int numberOfThreads);
 
@@ -2182,7 +2169,7 @@ void WallFuncDev27(unsigned int numberOfThreads,
 							  unsigned int* neighborX,
 							  unsigned int* neighborY,
 							  unsigned int* neighborZ,
-							  unsigned int size_Mat, 
+							  unsigned long long numberOfLBnodes, 
 							  bool isEvenTimestep);
 
 void SetOutputWallVelocitySP27(unsigned int numberOfThreads,
@@ -2200,7 +2187,7 @@ void SetOutputWallVelocitySP27(unsigned int numberOfThreads,
 										  unsigned int* neighborX,
 										  unsigned int* neighborY,
 										  unsigned int* neighborZ,
-										  unsigned int size_Mat,
+										  unsigned long long numberOfLBnodes,
 										  real* DD,
 										  bool isEvenTimestep);
 
@@ -2214,7 +2201,7 @@ void GetVelotoForce27(unsigned int numberOfThreads,
 								 unsigned int* neighborX,
 								 unsigned int* neighborY,
 								 unsigned int* neighborZ,
-								 unsigned int size_Mat, 
+								 unsigned long long numberOfLBnodes, 
 								 bool isEvenTimestep);
 
 void InitParticlesDevice(real* coordX,
@@ -2239,7 +2226,7 @@ void InitParticlesDevice(real* coordX,
 									unsigned int* neighborWSB,
 									int level,
 									unsigned int numberOfParticles, 
-									unsigned int size_Mat,
+									unsigned long long numberOfLBnodes,
 									unsigned int numberOfThreads);
 
 void MoveParticlesDevice(real* coordX,
@@ -2267,16 +2254,16 @@ void MoveParticlesDevice(real* coordX,
 									unsigned int timestep, 
 									unsigned int numberOfTimesteps, 
 									unsigned int numberOfParticles, 
-									unsigned int size_Mat,
+									unsigned long long numberOfLBnodes,
 									unsigned int numberOfThreads,
 									bool isEvenTimestep);
 
 void initRandomDevice(curandState* state,
-								 unsigned int size_Mat,
+								 unsigned long long numberOfLBnodes,
 								 unsigned int numberOfThreads);
 
 void generateRandomValuesDevice(curandState* state,
-										   unsigned int size_Mat,
+										   unsigned long long numberOfLBnodes,
 										   real* randArray,
 										   unsigned int numberOfThreads);
 
@@ -2295,7 +2282,7 @@ void CalcTurbulenceIntensityDevice(
    unsigned int* neighborX,
    unsigned int* neighborY,
    unsigned int* neighborZ,
-   unsigned int size_Mat, 
+   unsigned long long numberOfLBnodes, 
    bool isEvenTimestep,
    uint numberOfThreads);
 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
index 567a4112e935e0a154288ec7beb1fc82d01c9d7a..877390c822b4828b0007249be524d2534a2482f0 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
+++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
@@ -22,7 +22,7 @@ __global__ void LB_Kernel_Casc27(real s9,
                                             unsigned int* neighborY,
                                             unsigned int* neighborZ,
                                             real* DDStart,
-                                            int size_Mat,
+                                            unsigned long long numberOfLBnodes,
                                             bool EvenOrOdd);
 
 __global__ void LB_Kernel_Casc_SP_27(  real s9,
@@ -31,7 +31,7 @@ __global__ void LB_Kernel_Casc_SP_27(  real s9,
                                                   unsigned int* neighborY,
                                                   unsigned int* neighborZ,
                                                   real* DDStart,
-                                                  int size_Mat,
+                                                  unsigned long long numberOfLBnodes,
                                                   bool EvenOrOdd);
 
 __global__ void LB_Kernel_Casc_SP_MS_27(   real s9,
@@ -40,7 +40,7 @@ __global__ void LB_Kernel_Casc_SP_MS_27(   real s9,
                                                       unsigned int* neighborY,
                                                       unsigned int* neighborZ,
                                                       real* DDStart,
-                                                      int size_Mat,
+                                                      unsigned long long numberOfLBnodes,
                                                       bool EvenOrOdd);
 
 __global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real s9,
@@ -49,7 +49,7 @@ __global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real s9,
                                                          unsigned int* neighborY,
                                                          unsigned int* neighborZ,
                                                          real* DDStart,
-                                                         int size_Mat,
+                                                         unsigned long long numberOfLBnodes,
                                                          bool EvenOrOdd);
 
 __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
@@ -59,7 +59,7 @@ __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
     unsigned int* neighborY,
     unsigned int* neighborZ,
     real* DDStart,
-    int size_Mat,
+    unsigned long long numberOfLBnodes,
     int level,
     real* forces,
     bool EvenOrOdd);
@@ -70,7 +70,7 @@ __global__ void LB_Kernel_Cumulant_D3Q27All4(real omega,
                                                         unsigned int* neighborY,
                                                         unsigned int* neighborZ,
                                                         real* DDStart,
-                                                        int size_Mat,
+                                                        unsigned long long numberOfLBnodes,
                                                         int level,
                                                         real* forces,
                                                         bool EvenOrOdd);
@@ -82,7 +82,7 @@ __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
                                                                 unsigned int* neighborY,
                                                                 unsigned int* neighborZ,
                                                                 real* DDStart,
-                                                                int size_Mat,
+                                                                unsigned long long numberOfLBnodes,
                                                                 int level,
                                                                 real* forces,
                                                                 bool EvenOrOdd);
@@ -100,7 +100,7 @@ __global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
                                                     real* coordY,
                                                     real* coordZ,
                                                     real* DDStart,
-                                                    int size_Mat,
+                                                    unsigned long long numberOfLBnodes,
                                                     bool EvenOrOdd);
 
 __global__ void LB_Kernel_Cascade_SP_27( real s9,
@@ -109,7 +109,7 @@ __global__ void LB_Kernel_Cascade_SP_27( real s9,
                                                     unsigned int* neighborY,
                                                     unsigned int* neighborZ,
                                                     real* DDStart,
-                                                    int size_Mat,
+                                                    unsigned long long numberOfLBnodes,
                                                     bool EvenOrOdd);
 
 __global__ void LB_Kernel_Kum_New_SP_27( real s9,
@@ -118,7 +118,7 @@ __global__ void LB_Kernel_Kum_New_SP_27( real s9,
                                                     unsigned int* neighborY,
                                                     unsigned int* neighborZ,
                                                     real* DDStart,
-                                                    int size_Mat,
+                                                    unsigned long long numberOfLBnodes,
                                                     bool EvenOrOdd);
 
 __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
@@ -130,7 +130,7 @@ __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
                                                         real* dxxUx,
                                                         real* dyyUy,
                                                         real* dzzUz,
-                                                        int size_Mat,
+                                                        unsigned long long numberOfLBnodes,
                                                         bool EvenOrOdd);
 
 __global__ void LB_Kernel_Kum_Comp_SP_27(real s9,
@@ -139,7 +139,7 @@ __global__ void LB_Kernel_Kum_Comp_SP_27(real s9,
                                                     unsigned int* neighborY,
                                                     unsigned int* neighborZ,
                                                     real* DDStart,
-                                                    int size_Mat,
+                                                    unsigned long long numberOfLBnodes,
                                                     bool EvenOrOdd);
 
 __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27(
@@ -149,7 +149,7 @@ __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27(
     unsigned int* neighborY,
     unsigned int* neighborZ,
     real* DDStart,
-    int size_Mat,
+    unsigned long long numberOfLBnodes,
     int level,
     real* forces,
     bool EvenOrOdd);
@@ -161,7 +161,7 @@ __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
     unsigned int* neighborY,
     unsigned int* neighborZ,
     real* DDStart,
-    int size_Mat,
+    unsigned long long numberOfLBnodes,
     int level,
     real* forces,
     bool EvenOrOdd);
@@ -173,7 +173,7 @@ __global__ void Cumulant_One_chim_Comp_SP_27(
     unsigned int* neighborY,
     unsigned int* neighborZ,
     real* DDStart,
-    int size_Mat,
+    unsigned long long numberOfLBnodes,
     int level,
     real* forces,
     bool EvenOrOdd);
@@ -200,7 +200,7 @@ __global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27(
     real* veloZ,
     real* DDStart,
     real* turbulentViscosity,
-    int size_Mat,
+    unsigned long long numberOfLBnodes,
     int level,
     real* forces,
     bool EvenOrOdd);
@@ -211,7 +211,7 @@ __global__ void LB_Kernel_PM_Cum_One_Comp_SP_27( real omega,
                                                             unsigned int* neighborY,
                                                             unsigned int* neighborZ,
                                                             real* DDStart,
-                                                            int size_Mat,
+                                                            unsigned long long numberOfLBnodes,
                                                             int level,
                                                             real* forces,
                                                             real porosity,
@@ -228,7 +228,7 @@ __global__ void LB_Kernel_AD_Incomp_7( real diffusivity,
                                                   unsigned int* neighborZ,
                                                   real* DDStart,
                                                   real* DD7,
-                                                  int size_Mat,
+                                                  unsigned long long numberOfLBnodes,
                                                   bool EvenOrOdd);
 
 __global__ void LB_Kernel_AD_Incomp_27( real diffusivity,
@@ -238,7 +238,7 @@ __global__ void LB_Kernel_AD_Incomp_27( real diffusivity,
                                                    unsigned int* neighborZ,
                                                    real* DDStart,
                                                    real* DD27,
-                                                   int size_Mat,
+                                                   unsigned long long numberOfLBnodes,
                                                    bool EvenOrOdd);
 
 __global__ void LBInit27( int myid,
@@ -249,7 +249,7 @@ __global__ void LBInit27( int myid,
                                      unsigned int* neighborY,
                                      unsigned int* neighborZ,
                                      real* vParabel,
-                                     unsigned int size_Mat,
+                                     unsigned long long numberOfLBnodes,
                                      unsigned int grid_nx,
                                      unsigned int grid_ny,
                                      unsigned int grid_nz,
@@ -266,7 +266,7 @@ __global__ void LBInitNonEqPartSP27(unsigned int* neighborX,
                                                real* ux,
                                                real* uy,
                                                real* uz,
-                                               unsigned int size_Mat,
+                                               unsigned long long numberOfLBnodes,
                                                real* DD,
                                                real omega,
                                                bool EvenOrOdd);
@@ -279,7 +279,7 @@ __global__ void InitAD7( unsigned int* neighborX,
                                        real* ux,
                                        real* uy,
                                        real* uz,
-                                       unsigned int size_Mat,
+                                       unsigned long long numberOfLBnodes,
                                        real* DD7,
                                        bool EvenOrOdd);
 
@@ -291,7 +291,7 @@ __global__ void InitAD27(unsigned int* neighborX,
                                        real* ux,
                                        real* uy,
                                        real* uz,
-                                       unsigned int size_Mat,
+                                       unsigned long long numberOfLBnodes,
                                        real* DD27,
                                        bool EvenOrOdd);
 
@@ -307,7 +307,7 @@ __global__ void LB_PostProcessor_F3_2018_Fehlberg(
     real* vzOut,
     real* DDStart,
     real* G6,
-    int size_Mat,
+    unsigned long long numberOfLBnodes,
     int level,
     real* forces,
     bool EvenOrOdd);
@@ -320,7 +320,7 @@ __global__ void LBCalcMac27( real* vxD,
                                         unsigned int* neighborY,
                                         unsigned int* neighborZ,
                                         unsigned int* geoD,
-                                        unsigned int size_Mat,
+                                        unsigned long long numberOfLBnodes,
                                         real* DD,
                                         bool isEvenTimestep);
 
@@ -333,7 +333,7 @@ __global__ void LBCalcMacSP27( real* vxD,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat,
+                                          unsigned long long numberOfLBnodes,
                                           real* DD,
                                           bool isEvenTimestep);
 
@@ -346,7 +346,7 @@ __global__ void LBCalcMacCompSP27( real* vxD,
                                               unsigned int* neighborX,
                                               unsigned int* neighborY,
                                               unsigned int* neighborZ,
-                                              unsigned int size_Mat,
+                                              unsigned long long numberOfLBnodes,
                                               real* DD,
                                               bool isEvenTimestep);
 
@@ -355,7 +355,7 @@ __global__ void CalcConc7( real* Conc,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat,
+                                          unsigned long long numberOfLBnodes,
                                           real* DD7,
                                           bool isEvenTimestep);
 
@@ -366,7 +366,7 @@ __global__ void GetPlaneConc7(real* Conc,
                                             unsigned int* neighborX,
                                             unsigned int* neighborY,
                                             unsigned int* neighborZ,
-                                            unsigned int size_Mat,
+                                            unsigned long long numberOfLBnodes,
                                             real* DD7,
                                             bool isEvenTimestep);
 
@@ -377,7 +377,7 @@ __global__ void GetPlaneConc27(real* Conc,
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
-                                             unsigned int size_Mat,
+                                             unsigned long long numberOfLBnodes,
                                              real* DD27,
                                              bool isEvenTimestep);
 
@@ -386,7 +386,7 @@ __global__ void CalcConc27(real* Conc,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat,
+                                          unsigned long long numberOfLBnodes,
                                           real* DD27,
                                           bool isEvenTimestep);
 
@@ -399,7 +399,7 @@ __global__ void LBCalcMedSP27( real* vxD,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat,
+                                          unsigned long long numberOfLBnodes,
                                           real* DD,
                                           bool isEvenTimestep);
 
@@ -412,7 +412,7 @@ __global__ void LBCalcMedCompSP27( real* vxD,
                                               unsigned int* neighborX,
                                               unsigned int* neighborY,
                                               unsigned int* neighborZ,
-                                              unsigned int size_Mat,
+                                              unsigned long long numberOfLBnodes,
                                               real* DD,
                                               bool isEvenTimestep);
 
@@ -427,7 +427,7 @@ __global__ void LBCalcMedCompAD27(
     unsigned int* neighborX,
     unsigned int* neighborY,
     unsigned int* neighborZ,
-    unsigned int size_Mat,
+    unsigned long long numberOfLBnodes,
     real* DD,
     real* DD_AD,
     bool isEvenTimestep);
@@ -442,7 +442,7 @@ __global__ void LBCalcMacMedSP27( real* vxD,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
                                              unsigned int tdiff,
-                                             unsigned int size_Mat,
+                                             unsigned long long numberOfLBnodes,
                                              bool isEvenTimestep);
 
 __global__ void LBResetMedianValuesSP27(
@@ -451,7 +451,7 @@ __global__ void LBResetMedianValuesSP27(
     real* vzD,
     real* rhoD,
     real* pressD,
-    unsigned int size_Mat,
+    unsigned long long numberOfLBnodes,
     bool isEvenTimestep);
 
 __global__ void LBResetMedianValuesAD27(
@@ -461,7 +461,7 @@ __global__ void LBResetMedianValuesAD27(
     real* rhoD,
     real* pressD,
     real* concD,
-    unsigned int size_Mat,
+    unsigned long long numberOfLBnodes,
     bool isEvenTimestep);
 
 __global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
@@ -473,7 +473,7 @@ __global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
                                                         unsigned int* neighborX,
                                                         unsigned int* neighborY,
                                                         unsigned int* neighborZ,
-                                                        unsigned int size_Mat,
+                                                        unsigned long long numberOfLBnodes,
                                                         real* DD,
                                                         bool isEvenTimestep);
 
@@ -486,7 +486,7 @@ __global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ,
                                                     unsigned int* neighborX,
                                                     unsigned int* neighborY,
                                                     unsigned int* neighborZ,
-                                                    unsigned int size_Mat,
+                                                    unsigned long long numberOfLBnodes,
                                                     real* DD,
                                                     bool isEvenTimestep);
 
@@ -502,7 +502,7 @@ __global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
                                                         unsigned int* neighborY,
                                                         unsigned int* neighborZ,
                                                         real* DDStart,
-                                                        int size_Mat,
+                                                        unsigned long long numberOfLBnodes,
                                                         bool EvenOrOdd);
 
 __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
@@ -517,7 +517,7 @@ __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
                                                     unsigned int* neighborY,
                                                     unsigned int* neighborZ,
                                                     real* DDStart,
-                                                    int size_Mat,
+                                                    unsigned long long numberOfLBnodes,
                                                     bool EvenOrOdd);
 
 __global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
@@ -535,7 +535,7 @@ __global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
                                                             unsigned int* neighborY,
                                                             unsigned int* neighborZ,
                                                             real* DDStart,
-                                                            int size_Mat,
+                                                            unsigned long long numberOfLBnodes,
                                                             bool EvenOrOdd);
 
 __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
@@ -553,7 +553,7 @@ __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
                                                         unsigned int* neighborY,
                                                         unsigned int* neighborZ,
                                                         real* DDStart,
-                                                        int size_Mat,
+                                                        unsigned long long numberOfLBnodes,
                                                         bool EvenOrOdd);
 
 __global__ void LBCalcMeasurePoints(real* vxMP,
@@ -568,7 +568,7 @@ __global__ void LBCalcMeasurePoints(real* vxMP,
                                                unsigned int* neighborX,
                                                unsigned int* neighborY,
                                                unsigned int* neighborZ,
-                                               unsigned int size_Mat,
+                                               unsigned long long numberOfLBnodes,
                                                real* DD,
                                                bool isEvenTimestep);
 
@@ -580,7 +580,7 @@ __global__ void LB_BC_Press_East27( int nx,
                                                unsigned int* neighborY,
                                                unsigned int* neighborZ,
                                                real* DD,
-                                               unsigned int size_Mat,
+                                               unsigned long long numberOfLBnodes,
                                                bool isEvenTimestep) ;
 
 __global__ void LB_BC_Vel_West_27( int nx,
@@ -592,7 +592,7 @@ __global__ void LB_BC_Vel_West_27( int nx,
                                               unsigned int* neighborY,
                                               unsigned int* neighborZ,
                                               real* DD,
-                                              unsigned int size_Mat,
+                                              unsigned long long numberOfLBnodes,
                                               bool isEvenTimestep,
                                               real u0x,
                                               unsigned int grid_nx,
@@ -608,7 +608,7 @@ __global__ void QDevice27(real* distributions,
                                      unsigned int* neighborX,
                                      unsigned int* neighborY,
                                      unsigned int* neighborZ,
-                                     unsigned int numberOfLBnodes,
+                                     unsigned long long numberOfLBnodes,
                                      bool isEvenTimestep);
 
 __global__ void QDeviceComp27(
@@ -620,7 +620,7 @@ __global__ void QDeviceComp27(
                                          unsigned int* neighborX,
                                          unsigned int* neighborY,
                                          unsigned int* neighborZ,
-                                         unsigned int numberOfLBnodes,
+                                         unsigned long long numberOfLBnodes,
                                          bool isEvenTimestep);
 
 __global__ void QDeviceCompThinWallsPartOne27(real* DD,
@@ -631,7 +631,7 @@ __global__ void QDeviceCompThinWallsPartOne27(real* DD,
                                                          unsigned int* neighborX,
                                                          unsigned int* neighborY,
                                                          unsigned int* neighborZ,
-                                                         unsigned int size_Mat,
+                                                         unsigned long long numberOfLBnodes,
                                                          bool isEvenTimestep);
 
 __global__ void QDevice3rdMomentsComp27(	 real* distributions,
@@ -642,7 +642,7 @@ __global__ void QDevice3rdMomentsComp27(	 real* distributions,
                                                      unsigned int* neighborX,
                                                      unsigned int* neighborY,
                                                      unsigned int* neighborZ,
-                                                     unsigned int numberOfLBnodes,
+                                                     unsigned long long numberOfLBnodes,
                                                      bool isEvenTimestep);
 
 __global__ void QDeviceIncompHighNu27(real* DD,
@@ -653,7 +653,7 @@ __global__ void QDeviceIncompHighNu27(real* DD,
                                                  unsigned int* neighborX,
                                                  unsigned int* neighborY,
                                                  unsigned int* neighborZ,
-                                                 unsigned int numberOfLBnodes,
+                                                 unsigned long long numberOfLBnodes,
                                                  bool isEvenTimestep);
 
 __global__ void QDeviceCompHighNu27(	 real* DD,
@@ -664,7 +664,7 @@ __global__ void QDeviceCompHighNu27(	 real* DD,
                                                  unsigned int* neighborX,
                                                  unsigned int* neighborY,
                                                  unsigned int* neighborZ,
-                                                 unsigned int size_Mat,
+                                                 unsigned long long numberOfLBnodes,
                                                  bool isEvenTimestep);
 
 //Velocity BCs
@@ -679,7 +679,7 @@ __global__ void QVelDevPlainBB27(
     uint* neighborX,
     uint* neighborY,
     uint* neighborZ,
-    uint numberOfLBnodes,
+    unsigned long long numberOfLBnodes,
     bool isEvenTimestep);
 
 __global__ void QVelDevCouette27(real* vx,
@@ -693,7 +693,7 @@ __global__ void QVelDevCouette27(real* vx,
                                             unsigned int* neighborX,
                                             unsigned int* neighborY,
                                             unsigned int* neighborZ,
-                                            unsigned int size_Mat,
+                                            unsigned long long numberOfLBnodes,
                                             bool isEvenTimestep);
 
 __global__ void QVelDev1h27( int inx,
@@ -714,7 +714,7 @@ __global__ void QVelDev1h27( int inx,
                                         real* coordX,
                                         real* coordY,
                                         real* coordZ,
-                                        unsigned int size_Mat,
+                                        unsigned long long numberOfLBnodes,
                                         bool isEvenTimestep);
 
 __global__ void QVelDevice27(int inx,
@@ -730,7 +730,7 @@ __global__ void QVelDevice27(int inx,
                                         unsigned int* neighborX,
                                         unsigned int* neighborY,
                                         unsigned int* neighborZ,
-                                        unsigned int size_Mat,
+                                        unsigned long long numberOfLBnodes,
                                         bool isEvenTimestep);
 
 __global__ void QVelDeviceCompPlusSlip27(real* vx,
@@ -744,7 +744,7 @@ __global__ void QVelDeviceCompPlusSlip27(real* vx,
                                                     unsigned int* neighborX,
                                                     unsigned int* neighborY,
                                                     unsigned int* neighborZ,
-                                                    unsigned int size_Mat,
+                                                    unsigned long long numberOfLBnodes,
                                                     bool isEvenTimestep);
 
 __global__ void QVelDeviceComp27(real* velocityX,
@@ -758,7 +758,7 @@ __global__ void QVelDeviceComp27(real* velocityX,
                                             unsigned int* neighborX,
                                             unsigned int* neighborY,
                                             unsigned int* neighborZ,
-                                            unsigned int numberOfLBnodes,
+                                            unsigned long long numberOfLBnodes,
                                             bool isEvenTimestep);
 
 __global__ void QVelDeviceCompThinWallsPartOne27(
@@ -773,7 +773,7 @@ __global__ void QVelDeviceCompThinWallsPartOne27(
     uint* neighborX,
     uint* neighborY,
     uint* neighborZ,
-    uint size_Mat,
+    unsigned long long numberOfLBnodes,
     bool isEvenTimestep);
 
 __global__ void QThinWallsPartTwo27(
@@ -786,7 +786,7 @@ __global__ void QThinWallsPartTwo27(
     uint* neighborY,
     uint* neighborZ,
     uint* neighborWSB,
-    uint size_Mat,
+    unsigned long long numberOfLBnodes,
     bool isEvenTimestep);
 
 __global__ void QVelDeviceCompZeroPress27(
@@ -801,7 +801,7 @@ __global__ void QVelDeviceCompZeroPress27(
     unsigned int* neighborX,
     unsigned int* neighborY,
     unsigned int* neighborZ,
-    unsigned int numberOfLBnodes,
+    unsigned long long numberOfLBnodes,
     bool isEvenTimestep);
 
 __global__ void QVelDeviceIncompHighNu27(real* vx,
@@ -815,7 +815,7 @@ __global__ void QVelDeviceIncompHighNu27(real* vx,
                                                     unsigned int* neighborX,
                                                     unsigned int* neighborY,
                                                     unsigned int* neighborZ,
-                                                    unsigned int size_Mat,
+                                                    unsigned long long numberOfLBnodes,
                                                     bool isEvenTimestep);
 
 __global__ void QVelDeviceCompHighNu27(	real* vx,
@@ -829,7 +829,7 @@ __global__ void QVelDeviceCompHighNu27(	real* vx,
                                                     unsigned int* neighborX,
                                                     unsigned int* neighborY,
                                                     unsigned int* neighborZ,
-                                                    unsigned int size_Mat,
+                                                    unsigned long long numberOfLBnodes,
                                                     bool isEvenTimestep);
 
 __global__ void QVeloDeviceEQ27(real* VeloX,
@@ -842,23 +842,9 @@ __global__ void QVeloDeviceEQ27(real* VeloX,
                                            unsigned int* neighborX,
                                            unsigned int* neighborY,
                                            unsigned int* neighborZ,
-                                           unsigned int size_Mat,
+                                           unsigned long long numberOfLBnodes,
                                            bool isEvenTimestep);
 
-__global__ void QVeloStreetDeviceEQ27(
-    real* veloXfraction,
-    real* veloYfraction,
-    int*  naschVelo,
-    real* DD,
-    int*  naschIndex,
-    int   numberOfStreetNodes,
-    real  velocityRatio,
-    uint* neighborX,
-    uint* neighborY,
-    uint* neighborZ,
-    uint  size_Mat,
-    bool  isEvenTimestep);
-
 //Slip BCs
 __global__ void QSlipDevice27(real* DD,
                                          int* k_Q,
@@ -868,7 +854,7 @@ __global__ void QSlipDevice27(real* DD,
                                          unsigned int* neighborX,
                                          unsigned int* neighborY,
                                          unsigned int* neighborZ,
-                                         unsigned int size_Mat,
+                                         unsigned long long numberOfLBnodes,
                                          bool isEvenTimestep);
 
 __global__ void QSlipDeviceComp27(real* DD,
@@ -879,7 +865,7 @@ __global__ void QSlipDeviceComp27(real* DD,
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
-                                             unsigned int size_Mat,
+                                             unsigned long long numberOfLBnodes,
                                              bool isEvenTimestep);
 
 __global__ void QSlipDeviceComp27TurbViscosity(
@@ -892,7 +878,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
                                     unsigned int* neighborY,
                                     unsigned int* neighborZ,
                                     real* turbViscosity,
-                                    unsigned int numberOfLBnodes,
+                                    unsigned long long numberOfLBnodes,
                                     bool isEvenTimestep);
 
 __global__ void QSlipPressureDeviceComp27TurbViscosity(
@@ -905,7 +891,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
                                     unsigned int* neighborY,
                                     unsigned int* neighborZ,
                                     real* turbViscosity,
-                                    unsigned int numberOfLBnodes,
+                                    unsigned long long numberOfLBnodes,
                                     bool isEvenTimestep);
 
 __global__ void QSlipGeomDeviceComp27(real* DD,
@@ -919,7 +905,7 @@ __global__ void QSlipGeomDeviceComp27(real* DD,
                                                  unsigned int* neighborX,
                                                  unsigned int* neighborY,
                                                  unsigned int* neighborZ,
-                                                 unsigned int size_Mat,
+                                                 unsigned long long numberOfLBnodes,
                                                  bool isEvenTimestep);
 
 __global__ void QSlipNormDeviceComp27(real* DD,
@@ -933,9 +919,20 @@ __global__ void QSlipNormDeviceComp27(real* DD,
                                                  unsigned int* neighborX,
                                                  unsigned int* neighborY,
                                                  unsigned int* neighborZ,
-                                                 unsigned int size_Mat,
+                                                 unsigned long long numberOfLBnodes,
                                                  bool isEvenTimestep);
 
+__global__ void BBSlipDeviceComp27(
+    real* distributions,
+    int* subgridDistanceIndices,
+    real* subgridDistances,
+    unsigned int numberOfBCnodes,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep);
+
 // Stress BCs (wall model)
 __global__ void QStressDeviceComp27(real* DD,
                                                int* k_Q,
@@ -966,7 +963,7 @@ __global__ void QStressDeviceComp27(real* DD,
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
-                                             unsigned int size_Mat,
+                                             unsigned long long numberOfLBnodes,
                                              bool isEvenTimestep);
 
 __global__ void BBStressDevice27( real* DD,
@@ -996,7 +993,7 @@ __global__ void BBStressDevice27( real* DD,
                                                 unsigned int* neighborX,
                                                 unsigned int* neighborY,
                                                 unsigned int* neighborZ,
-                                                unsigned int size_Mat,
+                                                unsigned long long numberOfLBnodes,
                                                 bool isEvenTimestep);
 
 __global__ void BBStressPressureDevice27( real* DD,
@@ -1026,7 +1023,7 @@ __global__ void BBStressPressureDevice27( real* DD,
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
-                                             unsigned int size_Mat,
+                                             unsigned long long numberOfLBnodes,
                                              bool isEvenTimestep);
 
 //Pressure BCs
@@ -1039,7 +1036,7 @@ __global__ void QPressDevice27( real* rhoBC,
                                            unsigned int* neighborX,
                                            unsigned int* neighborY,
                                            unsigned int* neighborZ,
-                                           unsigned int size_Mat,
+                                           unsigned long long numberOfLBnodes,
                                            bool isEvenTimestep);
 
 __global__ void QPressDeviceAntiBB27(   real* rhoBC,
@@ -1054,7 +1051,7 @@ __global__ void QPressDeviceAntiBB27(   real* rhoBC,
                                                    unsigned int* neighborX,
                                                    unsigned int* neighborY,
                                                    unsigned int* neighborZ,
-                                                   unsigned int size_Mat,
+                                                   unsigned long long numberOfLBnodes,
                                                    bool isEvenTimestep);
 
 __global__ void QPressDeviceFixBackflow27( real* rhoBC,
@@ -1065,7 +1062,7 @@ __global__ void QPressDeviceFixBackflow27( real* rhoBC,
                                                       unsigned int* neighborX,
                                                       unsigned int* neighborY,
                                                       unsigned int* neighborZ,
-                                                      unsigned int size_Mat,
+                                                      unsigned long long numberOfLBnodes,
                                                       bool isEvenTimestep);
 
 __global__ void QPressDeviceDirDepBot27(  real* rhoBC,
@@ -1076,7 +1073,7 @@ __global__ void QPressDeviceDirDepBot27(  real* rhoBC,
                                                      unsigned int* neighborX,
                                                      unsigned int* neighborY,
                                                      unsigned int* neighborZ,
-                                                     unsigned int size_Mat,
+                                                     unsigned long long numberOfLBnodes,
                                                      bool isEvenTimestep);
 
 __global__ void QPressNoRhoDevice27(  real* rhoBC,
@@ -1088,7 +1085,7 @@ __global__ void QPressNoRhoDevice27(  real* rhoBC,
                                                  unsigned int* neighborX,
                                                  unsigned int* neighborY,
                                                  unsigned int* neighborZ,
-                                                 unsigned int numberOfLBnodes,
+                                                 unsigned long long numberOfLBnodes,
                                                  bool isEvenTimestep,
                                                  int direction);
 
@@ -1101,7 +1098,7 @@ __global__ void QPressZeroRhoOutflowDevice27(  real* rhoBC,
                                             unsigned int* neighborX,
                                             unsigned int* neighborY,
                                             unsigned int* neighborZ,
-                                            unsigned int numberOfLBnodes,
+                                            unsigned long long numberOfLBnodes,
                                             bool isEvenTimestep,
                                             int direction,
                                             real densityCorrectionFactor);
@@ -1115,7 +1112,7 @@ __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
                                                          unsigned int* neighborX,
                                                          unsigned int* neighborY,
                                                          unsigned int* neighborZ,
-                                                         unsigned int size_Mat,
+                                                         unsigned long long numberOfLBnodes,
                                                          bool isEvenTimestep);
 
 __global__ void QPressDeviceOld27(real* rhoBC,
@@ -1127,7 +1124,7 @@ __global__ void QPressDeviceOld27(real* rhoBC,
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
-                                             unsigned int size_Mat,
+                                             unsigned long long numberOfLBnodes,
                                              bool isEvenTimestep);
 
 __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
@@ -1139,7 +1136,7 @@ __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
                                                     unsigned int* neighborX,
                                                     unsigned int* neighborY,
                                                     unsigned int* neighborZ,
-                                                    unsigned int size_Mat,
+                                                    unsigned long long numberOfLBnodes,
                                                     bool isEvenTimestep);
 
 __global__ void QPressDeviceNEQ27(real* rhoBC,
@@ -1151,7 +1148,7 @@ __global__ void QPressDeviceNEQ27(real* rhoBC,
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
-                                             unsigned int size_Mat,
+                                             unsigned long long numberOfLBnodes,
                                              bool isEvenTimestep);
 
 __global__ void QPressDeviceEQZ27(real* rhoBC,
@@ -1164,7 +1161,7 @@ __global__ void QPressDeviceEQZ27(real* rhoBC,
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
-                                             unsigned int size_Mat,
+                                             unsigned long long numberOfLBnodes,
                                              bool isEvenTimestep);
 
 __global__ void QPressDeviceZero27(  real* DD,
@@ -1173,7 +1170,7 @@ __global__ void QPressDeviceZero27(  real* DD,
                                                 unsigned int* neighborX,
                                                 unsigned int* neighborY,
                                                 unsigned int* neighborZ,
-                                                unsigned int size_Mat,
+                                                unsigned long long numberOfLBnodes,
                                                 bool isEvenTimestep);
 
 __global__ void QPressDeviceFake27(real* rhoBC,
@@ -1185,7 +1182,7 @@ __global__ void QPressDeviceFake27(real* rhoBC,
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
-                                             unsigned int size_Mat,
+                                             unsigned long long numberOfLBnodes,
                                              bool isEvenTimestep);
 
 __global__ void BBDevice27(real* distributions,
@@ -1195,7 +1192,7 @@ __global__ void BBDevice27(real* distributions,
                                      unsigned int* neighborX,
                                      unsigned int* neighborY,
                                      unsigned int* neighborZ,
-                                     unsigned int numberOfLBnodes,
+                                     unsigned long long numberOfLBnodes,
                                      bool isEvenTimestep);
 
 __global__ void QPressDevice27_IntBB(real* rho,
@@ -1207,7 +1204,7 @@ __global__ void QPressDevice27_IntBB(real* rho,
                                                 unsigned int* neighborX,
                                                 unsigned int* neighborY,
                                                 unsigned int* neighborZ,
-                                                unsigned int size_Mat,
+                                                unsigned long long numberOfLBnodes,
                                                 bool isEvenTimestep);
 
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
@@ -1225,7 +1222,7 @@ __global__ void PressSchlaff27(real* rhoBC,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat,
+                                          unsigned long long numberOfLBnodes,
                                           bool isEvenTimestep);
 
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
@@ -1240,7 +1237,7 @@ __global__ void VelSchlaff27(  int t,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat,
+                                          unsigned long long numberOfLBnodes,
                                           bool isEvenTimestep);
 
 __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
@@ -1352,7 +1349,7 @@ __global__ void QAD7( real* DD,
                                  unsigned int* neighborX,
                                  unsigned int* neighborY,
                                  unsigned int* neighborZ,
-                                 unsigned int size_Mat,
+                                 unsigned long long numberOfLBnodes,
                                  bool isEvenTimestep);
 
 //////////////////////////////////////////////////////////////////////////
@@ -1365,7 +1362,7 @@ __global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel(
     uint* neighborZ,
     real* distributions,
     real* distributionsAD,
-    int size_Mat,
+    unsigned long long numberOfLBnodes,
     real* forces,
     bool isEvenTimestep);
 
@@ -1384,7 +1381,7 @@ __global__ void AD_SlipVelDeviceComp(
     uint * neighborX,
     uint * neighborY,
     uint * neighborZ,
-    uint size_Mat,
+    unsigned long long numberOfLBnodes,
     bool isEvenTimestep);
 
 __global__ void QADDirichlet27(   real* DD,
@@ -1398,7 +1395,7 @@ __global__ void QADDirichlet27(   real* DD,
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
-                                             unsigned int size_Mat,
+                                             unsigned long long numberOfLBnodes,
                                              bool isEvenTimestep);
 
 __global__ void QADBB27(  real* DD,
@@ -1412,7 +1409,7 @@ __global__ void QADBB27(  real* DD,
                                      unsigned int* neighborX,
                                      unsigned int* neighborY,
                                      unsigned int* neighborZ,
-                                     unsigned int size_Mat,
+                                     unsigned long long numberOfLBnodes,
                                      bool isEvenTimestep);
 
 __global__ void QADVel7( real* DD,
@@ -1427,7 +1424,7 @@ __global__ void QADVel7( real* DD,
                                     unsigned int* neighborX,
                                     unsigned int* neighborY,
                                     unsigned int* neighborZ,
-                                    unsigned int size_Mat,
+                                    unsigned long long numberOfLBnodes,
                                     bool isEvenTimestep);
 
 __global__ void QADVel27(real* DD,
@@ -1442,7 +1439,7 @@ __global__ void QADVel27(real* DD,
                                     unsigned int* neighborX,
                                     unsigned int* neighborY,
                                     unsigned int* neighborZ,
-                                    unsigned int size_Mat,
+                                    unsigned long long numberOfLBnodes,
                                     bool isEvenTimestep);
 
 __global__ void QADPress7(  real* DD,
@@ -1457,7 +1454,7 @@ __global__ void QADPress7(  real* DD,
                                        unsigned int* neighborX,
                                        unsigned int* neighborY,
                                        unsigned int* neighborZ,
-                                       unsigned int size_Mat,
+                                       unsigned long long numberOfLBnodes,
                                        bool isEvenTimestep);
 
 __global__ void QADPress27( real* DD,
@@ -1472,7 +1469,7 @@ __global__ void QADPress27( real* DD,
                                        unsigned int* neighborX,
                                        unsigned int* neighborY,
                                        unsigned int* neighborZ,
-                                       unsigned int size_Mat,
+                                       unsigned long long numberOfLBnodes,
                                        bool isEvenTimestep);
 
 __global__ void QADPressNEQNeighbor27(
@@ -1484,7 +1481,7 @@ __global__ void QADPressNEQNeighbor27(
                                                  unsigned int* neighborX,
                                                  unsigned int* neighborY,
                                                  unsigned int* neighborZ,
-                                                 unsigned int size_Mat,
+                                                 unsigned long long numberOfLBnodes,
                                                  bool isEvenTimestep
                                                 );
 
@@ -1499,7 +1496,7 @@ __global__ void QNoSlipADincomp7( real* DD,
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
-                                             unsigned int size_Mat,
+                                             unsigned long long numberOfLBnodes,
                                              bool isEvenTimestep);
 
 __global__ void QNoSlipADincomp27( real* DD,
@@ -1513,7 +1510,7 @@ __global__ void QNoSlipADincomp27( real* DD,
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
-                                             unsigned int size_Mat,
+                                             unsigned long long numberOfLBnodes,
                                              bool isEvenTimestep);
 
 __global__ void QADVeloIncomp7(  real* DD,
@@ -1528,7 +1525,7 @@ __global__ void QADVeloIncomp7(  real* DD,
                                             unsigned int* neighborX,
                                             unsigned int* neighborY,
                                             unsigned int* neighborZ,
-                                            unsigned int size_Mat,
+                                            unsigned long long numberOfLBnodes,
                                             bool isEvenTimestep);
 
 __global__ void QADVeloIncomp27( real* DD,
@@ -1543,7 +1540,7 @@ __global__ void QADVeloIncomp27( real* DD,
                                             unsigned int* neighborX,
                                             unsigned int* neighborY,
                                             unsigned int* neighborZ,
-                                            unsigned int size_Mat,
+                                            unsigned long long numberOfLBnodes,
                                             bool isEvenTimestep);
 
 __global__ void QADPressIncomp7(real* DD,
@@ -1558,7 +1555,7 @@ __global__ void QADPressIncomp7(real* DD,
                                            unsigned int* neighborX,
                                            unsigned int* neighborY,
                                            unsigned int* neighborZ,
-                                           unsigned int size_Mat,
+                                           unsigned long long numberOfLBnodes,
                                            bool isEvenTimestep);
 
 __global__ void QADPressIncomp27(   real* DD,
@@ -1573,7 +1570,7 @@ __global__ void QADPressIncomp27(   real* DD,
                                                unsigned int* neighborX,
                                                unsigned int* neighborY,
                                                unsigned int* neighborZ,
-                                               unsigned int size_Mat,
+                                               unsigned long long numberOfLBnodes,
                                                bool isEvenTimestep);
 
 //Propeller BC
@@ -1586,7 +1583,7 @@ __global__ void PropellerBC(unsigned int* neighborX,
                                        real* uz,
                                        int* k_Q,
                                        unsigned int size_Prop,
-                                       unsigned int size_Mat,
+                                       unsigned long long numberOfLBnodes,
                                        unsigned int* bcMatD,
                                        real* DD,
                                        bool EvenOrOdd);
@@ -1602,8 +1599,8 @@ __global__ void scaleCF27(real* DC,
                                     unsigned int* neighborFX,
                                     unsigned int* neighborFY,
                                     unsigned int* neighborFZ,
-                                               unsigned int size_MatC,
-                                               unsigned int size_MatF,
+                                               unsigned long long numberOfLBnodesC,
+                                               unsigned long long numberOfLBnodesF,
                                                bool isEvenTimestep,
                                      unsigned int* posCSWB,
                                      unsigned int* posFSWB,
@@ -1624,8 +1621,8 @@ __global__ void scaleCFEff27(real* DC,
                                         unsigned int* neighborFX,
                                         unsigned int* neighborFY,
                                         unsigned int* neighborFZ,
-                                                 unsigned int size_MatC,
-                                                 unsigned int size_MatF,
+                                                 unsigned long long numberOfLBnodesC,
+                                                 unsigned long long numberOfLBnodesF,
                                                  bool isEvenTimestep,
                                         unsigned int* posCSWB,
                                         unsigned int* posFSWB,
@@ -1637,7 +1634,7 @@ __global__ void scaleCFEff27(real* DC,
                                                  unsigned int nyC,
                                                  unsigned int nxF,
                                         unsigned int nyF,
-                                        OffCF offCF);
+                                        ICellNeigh neighborCoarseToFine);
 
 __global__ void scaleCFLast27( real* DC,
                                           real* DF,
@@ -1647,8 +1644,8 @@ __global__ void scaleCFLast27( real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC,
-                                          unsigned int size_MatF,
+                                          unsigned long long numberOfLBnodesC,
+                                          unsigned long long numberOfLBnodesF,
                                           bool isEvenTimestep,
                                           unsigned int* posCSWB,
                                           unsigned int* posFSWB,
@@ -1660,7 +1657,7 @@ __global__ void scaleCFLast27( real* DC,
                                           unsigned int nyC,
                                           unsigned int nxF,
                                           unsigned int nyF,
-                                          OffCF offCF);
+                                          ICellNeigh neighborCoarseToFine);
 
 __global__ void scaleCFpress27(real* DC,
                                           real* DF,
@@ -1670,8 +1667,8 @@ __global__ void scaleCFpress27(real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC,
-                                          unsigned int size_MatF,
+                                          unsigned long long numberOfLBnodesC,
+                                          unsigned long long numberOfLBnodesF,
                                           bool isEvenTimestep,
                                           unsigned int* posCSWB,
                                           unsigned int* posFSWB,
@@ -1683,7 +1680,7 @@ __global__ void scaleCFpress27(real* DC,
                                           unsigned int nyC,
                                           unsigned int nxF,
                                           unsigned int nyF,
-                                          OffCF offCF);
+                                          ICellNeigh neighborCoarseToFine);
 
 __global__ void scaleCF_Fix_27(real* DC,
                                           real* DF,
@@ -1693,8 +1690,8 @@ __global__ void scaleCF_Fix_27(real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC,
-                                          unsigned int size_MatF,
+                                          unsigned long long numberOfLBnodesC,
+                                          unsigned long long numberOfLBnodesF,
                                           bool isEvenTimestep,
                                           unsigned int* posCSWB,
                                           unsigned int* posFSWB,
@@ -1706,7 +1703,7 @@ __global__ void scaleCF_Fix_27(real* DC,
                                           unsigned int nyC,
                                           unsigned int nxF,
                                           unsigned int nyF,
-                                          OffCF offCF);
+                                          ICellNeigh neighborCoarseToFine);
 
 __global__ void scaleCF_Fix_comp_27(   real* DC,
                                                   real* DF,
@@ -1716,8 +1713,8 @@ __global__ void scaleCF_Fix_comp_27(   real* DC,
                                                   unsigned int* neighborFX,
                                                   unsigned int* neighborFY,
                                                   unsigned int* neighborFZ,
-                                                  unsigned int size_MatC,
-                                                  unsigned int size_MatF,
+                                                  unsigned long long numberOfLBnodesC,
+                                                  unsigned long long numberOfLBnodesF,
                                                   bool isEvenTimestep,
                                                   unsigned int* posCSWB,
                                                   unsigned int* posFSWB,
@@ -1729,7 +1726,7 @@ __global__ void scaleCF_Fix_comp_27(   real* DC,
                                                   unsigned int nyC,
                                                   unsigned int nxF,
                                                   unsigned int nyF,
-                                                  OffCF offCF);
+                                                  ICellNeigh neighborCoarseToFine);
 
 __global__ void scaleCF_0817_comp_27(  real* DC,
                                                   real* DF,
@@ -1739,8 +1736,8 @@ __global__ void scaleCF_0817_comp_27(  real* DC,
                                                   unsigned int* neighborFX,
                                                   unsigned int* neighborFY,
                                                   unsigned int* neighborFZ,
-                                                  unsigned int size_MatC,
-                                                  unsigned int size_MatF,
+                                                  unsigned long long numberOfLBnodesC,
+                                                  unsigned long long numberOfLBnodesF,
                                                   bool isEvenTimestep,
                                                   unsigned int* posCSWB,
                                                   unsigned int* posFSWB,
@@ -1752,7 +1749,7 @@ __global__ void scaleCF_0817_comp_27(  real* DC,
                                                   unsigned int nyC,
                                                   unsigned int nxF,
                                                   unsigned int nyF,
-                                                  OffCF offCF);
+                                                  ICellNeigh neighborCoarseToFine);
 
 __global__ void scaleCF_comp_D3Q27F3_2018( real* DC,
                                                       real* DF,
@@ -1763,8 +1760,8 @@ __global__ void scaleCF_comp_D3Q27F3_2018( real* DC,
                                                       unsigned int* neighborFX,
                                                       unsigned int* neighborFY,
                                                       unsigned int* neighborFZ,
-                                                      unsigned int size_MatC,
-                                                      unsigned int size_MatF,
+                                                      unsigned long long numberOfLBnodesC,
+                                                      unsigned long long numberOfLBnodesF,
                                                       bool isEvenTimestep,
                                                       unsigned int* posCSWB,
                                                       unsigned int* posFSWB,
@@ -1776,7 +1773,7 @@ __global__ void scaleCF_comp_D3Q27F3_2018( real* DC,
                                                       unsigned int nyC,
                                                       unsigned int nxF,
                                                       unsigned int nyF,
-                                                      OffCF offCF);
+                                                      ICellNeigh neighborCoarseToFine);
 
 __global__ void scaleCF_comp_D3Q27F3( real* DC,
                                                  real* DF,
@@ -1787,8 +1784,8 @@ __global__ void scaleCF_comp_D3Q27F3( real* DC,
                                                  unsigned int* neighborFX,
                                                  unsigned int* neighborFY,
                                                  unsigned int* neighborFZ,
-                                                 unsigned int size_MatC,
-                                                 unsigned int size_MatF,
+                                                 unsigned long long numberOfLBnodesC,
+                                                 unsigned long long numberOfLBnodesF,
                                                  bool isEvenTimestep,
                                                  unsigned int* posCSWB,
                                                  unsigned int* posFSWB,
@@ -1800,7 +1797,7 @@ __global__ void scaleCF_comp_D3Q27F3( real* DC,
                                                  unsigned int nyC,
                                                  unsigned int nxF,
                                                  unsigned int nyF,
-                                                 OffCF offCF);
+                                                 ICellNeigh neighborCoarseToFine);
 
 
 __global__ void scaleCF_staggered_time_comp_27(real* DC,
@@ -1811,8 +1808,8 @@ __global__ void scaleCF_staggered_time_comp_27(real* DC,
                                                           unsigned int* neighborFX,
                                                           unsigned int* neighborFY,
                                                           unsigned int* neighborFZ,
-                                                          unsigned int size_MatC,
-                                                          unsigned int size_MatF,
+                                                          unsigned long long numberOfLBnodesC,
+                                                          unsigned long long numberOfLBnodesF,
                                                           bool isEvenTimestep,
                                                           unsigned int* posCSWB,
                                                           unsigned int* posFSWB,
@@ -1824,7 +1821,7 @@ __global__ void scaleCF_staggered_time_comp_27(real* DC,
                                                           unsigned int nyC,
                                                           unsigned int nxF,
                                                           unsigned int nyF,
-                                                          OffCF offCF);
+                                                          ICellNeigh neighborCoarseToFine);
 
 __global__ void scaleCF_RhoSq_comp_27( real* DC,
                                                   real* DF,
@@ -1834,8 +1831,8 @@ __global__ void scaleCF_RhoSq_comp_27( real* DC,
                                                   unsigned int* neighborFX,
                                                   unsigned int* neighborFY,
                                                   unsigned int* neighborFZ,
-                                                  unsigned int size_MatC,
-                                                  unsigned int size_MatF,
+                                                  unsigned long long numberOfLBnodesC,
+                                                  unsigned long long numberOfLBnodesF,
                                                   bool isEvenTimestep,
                                                   unsigned int* posCSWB,
                                                   unsigned int* posFSWB,
@@ -1847,9 +1844,9 @@ __global__ void scaleCF_RhoSq_comp_27( real* DC,
                                                   unsigned int nyC,
                                                   unsigned int nxF,
                                                   unsigned int nyF,
-                                                  OffCF offCF);
+                                                  ICellNeigh neighborCoarseToFine);
 
-__global__ void scaleCF_compressible(
+template<bool hasTurbulentViscosity> __global__ void scaleCF_compressible(
     real* distributionsCoarse,
     real* distributionsFine,
     unsigned int* neighborXcoarse,
@@ -1858,15 +1855,17 @@ __global__ void scaleCF_compressible(
     unsigned int* neighborXfine,
     unsigned int* neighborYfine,
     unsigned int* neighborZfine,
-    unsigned int numberOfLBnodesCoarse,
-    unsigned int numberOfLBnodesFine,
+    unsigned long long numberOfLBnodesCoarse,
+    unsigned long long numberOfLBnodesFine,
     bool isEvenTimestep,
     unsigned int* indicesCoarseMMM,
     unsigned int* indicesFineMMM,
     unsigned int numberOfInterfaceNodes,
     real omegaCoarse,
     real omegaFine,
-    OffCF offsetCF);
+    real* turbulentViscosityCoarse,
+    real* turbulentViscosityFine,
+    ICellNeigh offsetCF);
 
 __global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC,
                                                         real* DF,
@@ -1876,8 +1875,8 @@ __global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC,
                                                         unsigned int* neighborFX,
                                                         unsigned int* neighborFY,
                                                         unsigned int* neighborFZ,
-                                                        unsigned int size_MatC,
-                                                        unsigned int size_MatF,
+                                                        unsigned long long numberOfLBnodesC,
+                                                        unsigned long long numberOfLBnodesF,
                                                         bool isEvenTimestep,
                                                         unsigned int* posCSWB,
                                                         unsigned int* posFSWB,
@@ -1889,7 +1888,7 @@ __global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC,
                                                         unsigned int nyC,
                                                         unsigned int nxF,
                                                         unsigned int nyF,
-                                                        OffCF offCF);
+                                                        ICellNeigh neighborCoarseToFine);
 
 __global__ void scaleCF_AA2016_comp_27(real* DC,
                                                   real* DF,
@@ -1899,8 +1898,8 @@ __global__ void scaleCF_AA2016_comp_27(real* DC,
                                                   unsigned int* neighborFX,
                                                   unsigned int* neighborFY,
                                                   unsigned int* neighborFZ,
-                                                  unsigned int size_MatC,
-                                                  unsigned int size_MatF,
+                                                  unsigned long long numberOfLBnodesC,
+                                                  unsigned long long numberOfLBnodesF,
                                                   bool isEvenTimestep,
                                                   unsigned int* posCSWB,
                                                   unsigned int* posFSWB,
@@ -1912,7 +1911,7 @@ __global__ void scaleCF_AA2016_comp_27(real* DC,
                                                   unsigned int nyC,
                                                   unsigned int nxF,
                                                   unsigned int nyF,
-                                                  OffCF offCF);
+                                                  ICellNeigh neighborCoarseToFine);
 
 __global__ void scaleCF_NSPress_27(real* DC,
                                               real* DF,
@@ -1922,8 +1921,8 @@ __global__ void scaleCF_NSPress_27(real* DC,
                                               unsigned int* neighborFX,
                                               unsigned int* neighborFY,
                                               unsigned int* neighborFZ,
-                                              unsigned int size_MatC,
-                                              unsigned int size_MatF,
+                                              unsigned long long numberOfLBnodesC,
+                                              unsigned long long numberOfLBnodesF,
                                               bool isEvenTimestep,
                                               unsigned int* posCSWB,
                                               unsigned int* posFSWB,
@@ -1935,7 +1934,7 @@ __global__ void scaleCF_NSPress_27(real* DC,
                                               unsigned int nyC,
                                               unsigned int nxF,
                                               unsigned int nyF,
-                                              OffCF offCF);
+                                              ICellNeigh neighborCoarseToFine);
 
 __global__ void scaleCFThSMG7( real* DC,
                                           real* DF,
@@ -1947,15 +1946,15 @@ __global__ void scaleCFThSMG7( real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC,
-                                          unsigned int size_MatF,
+                                          unsigned long long numberOfLBnodesC,
+                                          unsigned long long numberOfLBnodesF,
                                           bool isEvenTimestep,
                                           unsigned int* posCSWB,
                                           unsigned int* posFSWB,
                                           unsigned int kCF,
                                           real nu,
                                           real diffusivity_fine,
-                                          OffCF offCF);
+                                          ICellNeigh neighborCoarseToFine);
 
 __global__ void scaleCFThS7(real* DC,
                                        real* DF,
@@ -1967,8 +1966,8 @@ __global__ void scaleCFThS7(real* DC,
                                        unsigned int* neighborFX,
                                        unsigned int* neighborFY,
                                        unsigned int* neighborFZ,
-                                       unsigned int size_MatC,
-                                       unsigned int size_MatF,
+                                       unsigned long long numberOfLBnodesC,
+                                       unsigned long long numberOfLBnodesF,
                                        bool isEvenTimestep,
                                        unsigned int* posCSWB,
                                        unsigned int* posFSWB,
@@ -1986,15 +1985,15 @@ __global__ void scaleCFThS27(real* DC,
                                         unsigned int* neighborFX,
                                         unsigned int* neighborFY,
                                         unsigned int* neighborFZ,
-                                        unsigned int size_MatC,
-                                        unsigned int size_MatF,
+                                        unsigned long long numberOfLBnodesC,
+                                        unsigned long long numberOfLBnodesF,
                                         bool isEvenTimestep,
                                         unsigned int* posCSWB,
                                         unsigned int* posFSWB,
                                         unsigned int kCF,
                                         real nu,
                                         real diffusivity_fine,
-                                        OffCF offCF);
+                                        ICellNeigh neighborCoarseToFine);
 
 //fine to coarse
 __global__ void scaleFC27(real* DC,
@@ -2005,8 +2004,8 @@ __global__ void scaleFC27(real* DC,
                                     unsigned int* neighborFX,
                                     unsigned int* neighborFY,
                                     unsigned int* neighborFZ,
-                                               unsigned int size_MatC,
-                                               unsigned int size_MatF,
+                                               unsigned long long numberOfLBnodesC,
+                                               unsigned long long numberOfLBnodesF,
                                                bool isEvenTimestep,
                                      unsigned int* posC,
                                      unsigned int* posFSWB,
@@ -2027,8 +2026,8 @@ __global__ void scaleFCEff27(real* DC,
                                         unsigned int* neighborFX,
                                         unsigned int* neighborFY,
                                         unsigned int* neighborFZ,
-                                        unsigned int size_MatC,
-                                        unsigned int size_MatF,
+                                        unsigned long long numberOfLBnodesC,
+                                        unsigned long long numberOfLBnodesF,
                                         bool isEvenTimestep,
                                         unsigned int* posC,
                                         unsigned int* posFSWB,
@@ -2040,7 +2039,7 @@ __global__ void scaleFCEff27(real* DC,
                                         unsigned int nyC,
                                         unsigned int nxF,
                                         unsigned int nyF,
-                                        OffFC offFC);
+                                        ICellNeigh neighborFineToCoarse);
 
 __global__ void scaleFCLast27( real* DC,
                                           real* DF,
@@ -2050,8 +2049,8 @@ __global__ void scaleFCLast27( real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC,
-                                          unsigned int size_MatF,
+                                          unsigned long long numberOfLBnodesC,
+                                          unsigned long long numberOfLBnodesF,
                                           bool isEvenTimestep,
                                           unsigned int* posC,
                                           unsigned int* posFSWB,
@@ -2063,7 +2062,7 @@ __global__ void scaleFCLast27( real* DC,
                                           unsigned int nyC,
                                           unsigned int nxF,
                                           unsigned int nyF,
-                                          OffFC offFC);
+                                          ICellNeigh neighborFineToCoarse);
 
 __global__ void scaleFCpress27( real* DC,
                                           real* DF,
@@ -2073,8 +2072,8 @@ __global__ void scaleFCpress27( real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC,
-                                          unsigned int size_MatF,
+                                          unsigned long long numberOfLBnodesC,
+                                          unsigned long long numberOfLBnodesF,
                                           bool isEvenTimestep,
                                           unsigned int* posC,
                                           unsigned int* posFSWB,
@@ -2086,7 +2085,7 @@ __global__ void scaleFCpress27( real* DC,
                                           unsigned int nyC,
                                           unsigned int nxF,
                                           unsigned int nyF,
-                                          OffFC offFC);
+                                          ICellNeigh neighborFineToCoarse);
 
 __global__ void scaleFC_Fix_27( real* DC,
                                           real* DF,
@@ -2096,8 +2095,8 @@ __global__ void scaleFC_Fix_27( real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC,
-                                          unsigned int size_MatF,
+                                          unsigned long long numberOfLBnodesC,
+                                          unsigned long long numberOfLBnodesF,
                                           bool isEvenTimestep,
                                           unsigned int* posC,
                                           unsigned int* posFSWB,
@@ -2109,7 +2108,7 @@ __global__ void scaleFC_Fix_27( real* DC,
                                           unsigned int nyC,
                                           unsigned int nxF,
                                           unsigned int nyF,
-                                          OffFC offFC);
+                                          ICellNeigh neighborFineToCoarse);
 
 __global__ void scaleFC_Fix_comp_27(   real* DC,
                                                   real* DF,
@@ -2119,8 +2118,8 @@ __global__ void scaleFC_Fix_comp_27(   real* DC,
                                                   unsigned int* neighborFX,
                                                   unsigned int* neighborFY,
                                                   unsigned int* neighborFZ,
-                                                  unsigned int size_MatC,
-                                                  unsigned int size_MatF,
+                                                  unsigned long long numberOfLBnodesC,
+                                                  unsigned long long numberOfLBnodesF,
                                                   bool isEvenTimestep,
                                                   unsigned int* posC,
                                                   unsigned int* posFSWB,
@@ -2132,7 +2131,7 @@ __global__ void scaleFC_Fix_comp_27(   real* DC,
                                                   unsigned int nyC,
                                                   unsigned int nxF,
                                                   unsigned int nyF,
-                                                  OffFC offFC);
+                                                  ICellNeigh neighborFineToCoarse);
 
 __global__ void scaleFC_0817_comp_27(  real* DC,
                                                   real* DF,
@@ -2142,8 +2141,8 @@ __global__ void scaleFC_0817_comp_27(  real* DC,
                                                   unsigned int* neighborFX,
                                                   unsigned int* neighborFY,
                                                   unsigned int* neighborFZ,
-                                                  unsigned int size_MatC,
-                                                  unsigned int size_MatF,
+                                                  unsigned long long numberOfLBnodesC,
+                                                  unsigned long long numberOfLBnodesF,
                                                   bool isEvenTimestep,
                                                   unsigned int* posC,
                                                   unsigned int* posFSWB,
@@ -2155,7 +2154,7 @@ __global__ void scaleFC_0817_comp_27(  real* DC,
                                                   unsigned int nyC,
                                                   unsigned int nxF,
                                                   unsigned int nyF,
-                                                  OffFC offFC);
+                                                  ICellNeigh neighborFineToCoarse);
 
 __global__ void scaleFC_comp_D3Q27F3_2018( real* DC,
                                                       real* DF,
@@ -2166,8 +2165,8 @@ __global__ void scaleFC_comp_D3Q27F3_2018( real* DC,
                                                       unsigned int* neighborFX,
                                                       unsigned int* neighborFY,
                                                       unsigned int* neighborFZ,
-                                                      unsigned int size_MatC,
-                                                      unsigned int size_MatF,
+                                                      unsigned long long numberOfLBnodesC,
+                                                      unsigned long long numberOfLBnodesF,
                                                       bool isEvenTimestep,
                                                       unsigned int* posC,
                                                       unsigned int* posFSWB,
@@ -2179,7 +2178,7 @@ __global__ void scaleFC_comp_D3Q27F3_2018( real* DC,
                                                       unsigned int nyC,
                                                       unsigned int nxF,
                                                       unsigned int nyF,
-                                                      OffFC offFC);
+                                                      ICellNeigh neighborFineToCoarse);
 
 __global__ void scaleFC_comp_D3Q27F3( real* DC,
                                                  real* DF,
@@ -2190,8 +2189,8 @@ __global__ void scaleFC_comp_D3Q27F3( real* DC,
                                                  unsigned int* neighborFX,
                                                  unsigned int* neighborFY,
                                                  unsigned int* neighborFZ,
-                                                 unsigned int size_MatC,
-                                                 unsigned int size_MatF,
+                                                 unsigned long long numberOfLBnodesC,
+                                                 unsigned long long numberOfLBnodesF,
                                                  bool isEvenTimestep,
                                                  unsigned int* posC,
                                                  unsigned int* posFSWB,
@@ -2203,7 +2202,7 @@ __global__ void scaleFC_comp_D3Q27F3( real* DC,
                                                  unsigned int nyC,
                                                  unsigned int nxF,
                                                  unsigned int nyF,
-                                                 OffFC offFC);
+                                                 ICellNeigh neighborFineToCoarse);
 
 
 __global__ void scaleFC_staggered_time_comp_27(real* DC,
@@ -2214,8 +2213,8 @@ __global__ void scaleFC_staggered_time_comp_27(real* DC,
                                                           unsigned int* neighborFX,
                                                           unsigned int* neighborFY,
                                                           unsigned int* neighborFZ,
-                                                          unsigned int size_MatC,
-                                                          unsigned int size_MatF,
+                                                          unsigned long long numberOfLBnodesC,
+                                                          unsigned long long numberOfLBnodesF,
                                                           bool isEvenTimestep,
                                                           unsigned int* posC,
                                                           unsigned int* posFSWB,
@@ -2227,7 +2226,7 @@ __global__ void scaleFC_staggered_time_comp_27(real* DC,
                                                           unsigned int nyC,
                                                           unsigned int nxF,
                                                           unsigned int nyF,
-                                                          OffFC offFC);
+                                                          ICellNeigh neighborFineToCoarse);
 
 __global__ void scaleFC_RhoSq_comp_27( real* DC,
                                                   real* DF,
@@ -2237,8 +2236,8 @@ __global__ void scaleFC_RhoSq_comp_27( real* DC,
                                                   unsigned int* neighborFX,
                                                   unsigned int* neighborFY,
                                                   unsigned int* neighborFZ,
-                                                  unsigned int size_MatC,
-                                                  unsigned int size_MatF,
+                                                  unsigned long long numberOfLBnodesC,
+                                                  unsigned long long numberOfLBnodesF,
                                                   bool isEvenTimestep,
                                                   unsigned int* posC,
                                                   unsigned int* posFSWB,
@@ -2250,9 +2249,9 @@ __global__ void scaleFC_RhoSq_comp_27( real* DC,
                                                   unsigned int nyC,
                                                   unsigned int nxF,
                                                   unsigned int nyF,
-                                                  OffFC offFC);
+                                                  ICellNeigh neighborFineToCoarse);
 
-__global__ void scaleFC_compressible(
+template<bool hasTurbulentViscosity> __global__ void scaleFC_compressible(
     real *distributionsCoarse,
     real *distributionsFine,
     unsigned int *neighborXcoarse,
@@ -2261,15 +2260,17 @@ __global__ void scaleFC_compressible(
     unsigned int *neighborXfine,
     unsigned int *neighborYfine,
     unsigned int *neighborZfine,
-    unsigned int numberOfLBnodesCoarse,
-    unsigned int numberOfLBnodesFine,
+    unsigned long long numberOfLBnodesCoarse,
+    unsigned long long numberOfLBnodesFine,
     bool isEvenTimestep,
     unsigned int *indicesCoarse000,
     unsigned int *indicesFineMMM,
     unsigned int numberOfInterfaceNodes,
     real omegaCoarse,
     real omegaFine,
-    OffFC offsetFC);
+    real* turbulentViscosityCoarse,
+    real* turbulentViscosityFine,
+    ICellNeigh offsetFC);
 
 __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC,
                                                         real* DF,
@@ -2279,8 +2280,8 @@ __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC,
                                                         unsigned int* neighborFX,
                                                         unsigned int* neighborFY,
                                                         unsigned int* neighborFZ,
-                                                        unsigned int size_MatC,
-                                                        unsigned int size_MatF,
+                                                        unsigned long long numberOfLBnodesC,
+                                                        unsigned long long numberOfLBnodesF,
                                                         bool isEvenTimestep,
                                                         unsigned int* posC,
                                                         unsigned int* posFSWB,
@@ -2292,7 +2293,7 @@ __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC,
                                                         unsigned int nyC,
                                                         unsigned int nxF,
                                                         unsigned int nyF,
-                                                        OffFC offFC);
+                                                        ICellNeigh neighborFineToCoarse);
 
 __global__ void scaleFC_AA2016_comp_27(real* DC,
                                                   real* DF,
@@ -2302,8 +2303,8 @@ __global__ void scaleFC_AA2016_comp_27(real* DC,
                                                   unsigned int* neighborFX,
                                                   unsigned int* neighborFY,
                                                   unsigned int* neighborFZ,
-                                                  unsigned int size_MatC,
-                                                  unsigned int size_MatF,
+                                                  unsigned long long numberOfLBnodesC,
+                                                  unsigned long long numberOfLBnodesF,
                                                   bool isEvenTimestep,
                                                   unsigned int* posC,
                                                   unsigned int* posFSWB,
@@ -2315,7 +2316,7 @@ __global__ void scaleFC_AA2016_comp_27(real* DC,
                                                   unsigned int nyC,
                                                   unsigned int nxF,
                                                   unsigned int nyF,
-                                                  OffFC offFC);
+                                                  ICellNeigh neighborFineToCoarse);
 
 __global__ void scaleFC_NSPress_27(real* DC,
                                               real* DF,
@@ -2325,8 +2326,8 @@ __global__ void scaleFC_NSPress_27(real* DC,
                                               unsigned int* neighborFX,
                                               unsigned int* neighborFY,
                                               unsigned int* neighborFZ,
-                                              unsigned int size_MatC,
-                                              unsigned int size_MatF,
+                                              unsigned long long numberOfLBnodesC,
+                                              unsigned long long numberOfLBnodesF,
                                               bool isEvenTimestep,
                                               unsigned int* posC,
                                               unsigned int* posFSWB,
@@ -2338,7 +2339,7 @@ __global__ void scaleFC_NSPress_27(real* DC,
                                               unsigned int nyC,
                                               unsigned int nxF,
                                               unsigned int nyF,
-                                              OffFC offFC);
+                                              ICellNeigh neighborFineToCoarse);
 
 __global__ void scaleFCThSMG7( real* DC,
                                           real* DF,
@@ -2350,15 +2351,15 @@ __global__ void scaleFCThSMG7( real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC,
-                                          unsigned int size_MatF,
+                                          unsigned long long numberOfLBnodesC,
+                                          unsigned long long numberOfLBnodesF,
                                           bool isEvenTimestep,
                                           unsigned int* posC,
                                           unsigned int* posFSWB,
                                           unsigned int kFC,
                                           real nu,
                                           real diffusivity_coarse,
-                                          OffFC offFC);
+                                          ICellNeigh neighborFineToCoarse);
 
 __global__ void scaleFCThS7(real* DC,
                                        real* DF,
@@ -2370,8 +2371,8 @@ __global__ void scaleFCThS7(real* DC,
                                        unsigned int* neighborFX,
                                        unsigned int* neighborFY,
                                        unsigned int* neighborFZ,
-                                       unsigned int size_MatC,
-                                       unsigned int size_MatF,
+                                       unsigned long long numberOfLBnodesC,
+                                       unsigned long long numberOfLBnodesF,
                                        bool isEvenTimestep,
                                        unsigned int* posC,
                                        unsigned int* posFSWB,
@@ -2389,15 +2390,15 @@ __global__ void scaleFCThS27(  real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC,
-                                          unsigned int size_MatF,
+                                          unsigned long long numberOfLBnodesC,
+                                          unsigned long long numberOfLBnodesF,
                                           bool isEvenTimestep,
                                           unsigned int* posC,
                                           unsigned int* posFSWB,
                                           unsigned int kFC,
                                           real nu,
                                           real diffusivity_coarse,
-                                          OffFC offFC);
+                                          ICellNeigh neighborFineToCoarse);
 
 __global__ void DragLiftPost27(  real* DD,
                                             int* k_Q,
@@ -2409,7 +2410,7 @@ __global__ void DragLiftPost27(  real* DD,
                                             unsigned int* neighborX,
                                             unsigned int* neighborY,
                                             unsigned int* neighborZ,
-                                            unsigned int size_Mat,
+                                            unsigned long long numberOfLBnodes,
                                             bool isEvenTimestep);
 
 __global__ void DragLiftPre27(   real* DD,
@@ -2422,7 +2423,7 @@ __global__ void DragLiftPre27(   real* DD,
                                             unsigned int* neighborX,
                                             unsigned int* neighborY,
                                             unsigned int* neighborZ,
-                                            unsigned int size_Mat,
+                                            unsigned long long numberOfLBnodes,
                                             bool isEvenTimestep);
 
 __global__ void CalcCP27(real* DD,
@@ -2432,7 +2433,7 @@ __global__ void CalcCP27(real* DD,
                                     unsigned int* neighborX,
                                     unsigned int* neighborY,
                                     unsigned int* neighborZ,
-                                    unsigned int size_Mat,
+                                    unsigned long long numberOfLBnodes,
                                     bool isEvenTimestep);
 
 __global__ void getSendFsPre27(real* DD,
@@ -2442,7 +2443,7 @@ __global__ void getSendFsPre27(real* DD,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat,
+                                          unsigned long long numberOfLBnodes,
                                           bool isEvenTimestep);
 
 __global__ void getSendFsPost27(real* DD,
@@ -2452,7 +2453,7 @@ __global__ void getSendFsPost27(real* DD,
                                            unsigned int* neighborX,
                                            unsigned int* neighborY,
                                            unsigned int* neighborZ,
-                                           unsigned int size_Mat,
+                                           unsigned long long numberOfLBnodes,
                                            bool isEvenTimestep);
 
 __global__ void setRecvFsPre27(real* DD,
@@ -2462,7 +2463,7 @@ __global__ void setRecvFsPre27(real* DD,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat,
+                                          unsigned long long numberOfLBnodes,
                                           bool isEvenTimestep);
 
 __global__ void setRecvFsPost27(real* DD,
@@ -2472,7 +2473,7 @@ __global__ void setRecvFsPost27(real* DD,
                                            unsigned int* neighborX,
                                            unsigned int* neighborY,
                                            unsigned int* neighborZ,
-                                           unsigned int size_Mat,
+                                           unsigned long long numberOfLBnodes,
                                            bool isEvenTimestep);
 
 __global__ void getSendGsF3(
@@ -2483,7 +2484,7 @@ __global__ void getSendGsF3(
     unsigned int* neighborX,
     unsigned int* neighborY,
     unsigned int* neighborZ,
-    unsigned int size_Mat,
+    unsigned long long numberOfLBnodes,
     bool isEvenTimestep);
 
 __global__ void setRecvGsF3(
@@ -2494,7 +2495,7 @@ __global__ void setRecvGsF3(
     unsigned int* neighborX,
     unsigned int* neighborY,
     unsigned int* neighborZ,
-    unsigned int size_Mat,
+    unsigned long long numberOfLBnodes,
     bool isEvenTimestep);
 
 __global__ void WallFunction27( 	real* vx,
@@ -2508,7 +2509,7 @@ __global__ void WallFunction27( 	real* vx,
                                             unsigned int* neighborX,
                                             unsigned int* neighborY,
                                             unsigned int* neighborZ,
-                                            unsigned int size_Mat,
+                                            unsigned long long numberOfLBnodes,
                                             bool isEvenTimestep);
 
 __global__ void LBSetOutputWallVelocitySP27( real* vxD,
@@ -2525,7 +2526,7 @@ __global__ void LBSetOutputWallVelocitySP27( real* vxD,
                                                         unsigned int* neighborX,
                                                         unsigned int* neighborY,
                                                         unsigned int* neighborZ,
-                                                        unsigned int size_Mat,
+                                                        unsigned long long numberOfLBnodes,
                                                         real* DD,
                                                         bool isEvenTimestep);
 
@@ -2538,7 +2539,7 @@ __global__ void GetVeloforForcing27( real* DD,
                                                 unsigned int* neighborX,
                                                 unsigned int* neighborY,
                                                 unsigned int* neighborZ,
-                                                unsigned int size_Mat,
+                                                unsigned long long numberOfLBnodes,
                                                 bool isEvenTimestep);
 
 __global__ void InitParticles( real* coordX,
@@ -2563,7 +2564,7 @@ __global__ void InitParticles( real* coordX,
                                           unsigned int* neighborWSB,
                                           int level,
                                           unsigned int numberOfParticles,
-                                          unsigned int size_Mat);
+                                          unsigned long long numberOfLBnodes);
 
 __global__ void MoveParticles( real* coordX,
                                           real* coordY,
@@ -2590,7 +2591,7 @@ __global__ void MoveParticles( real* coordX,
                                           unsigned int timestep,
                                           unsigned int numberOfTimesteps,
                                           unsigned int numberOfParticles,
-                                          unsigned int size_Mat,
+                                          unsigned long long numberOfLBnodes,
                                           bool isEvenTimestep);
 
 __global__ void MoveParticlesWithoutBCs(   real* coordX,
@@ -2618,7 +2619,7 @@ __global__ void MoveParticlesWithoutBCs(   real* coordX,
                                                       unsigned int timestep,
                                                       unsigned int numberOfTimesteps,
                                                       unsigned int numberOfParticles,
-                                                      unsigned int size_Mat,
+                                                      unsigned long long numberOfLBnodes,
                                                       bool isEvenTimestep);
 
 __global__ void initRandom(curandState* state);
@@ -2641,7 +2642,7 @@ __global__ void CalcTurbulenceIntensity(
    unsigned int* neighborX,
    unsigned int* neighborY,
    unsigned int* neighborZ,
-   unsigned int size_Mat,
+   unsigned long long numberOfLBnodes,
    bool isEvenTimestep);
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GeometryUtils.h b/src/gpu/VirtualFluids_GPU/GPU/GeometryUtils.h
index 4dbf525e173c4acb00ff53e70f7485852bf956ac..d312c826036c1b5d856da0f0ab52832ba89c2f57 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GeometryUtils.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/GeometryUtils.h
@@ -1,5 +1,7 @@
 #ifndef _GEOMETRYUTILS_H
 #define _GEOMETRYUTILS_H
+#include "DataTypes.h"
+
 
 __inline__ __host__ __device__ void getNeighborIndicesOfBSW(  uint k, //index of DIR_MMM node
                                         uint &ke, uint &kn, uint &kt, uint &kne, uint &kte,uint &ktn, uint &ktne,
@@ -53,46 +55,45 @@ __inline__ __host__ __device__ real trilinearInterpolation( real dW, real dE, re
                                         uint k,  uint ke, uint kn, uint kt, uint kne, uint kte, uint ktn, uint ktne,
                                         real* quantity )
 {
-    real interpolatedValue = (            dE*dN*dT*quantity[k]    + dW*dN*dT*quantity[ke]
-                                        + dE*dS*dT*quantity[kn]   + dW*dS*dT*quantity[kne]
-                                        + dE*dN*dB*quantity[kt]   + dW*dN*dB*quantity[kte]
-                                        + dE*dS*dB*quantity[ktn]  + dW*dS*dB*quantity[ktne] );
-    return interpolatedValue;
+    return  (   dE*dN*dT*quantity[k]    + dW*dN*dT*quantity[ke]
+              + dE*dS*dT*quantity[kn]   + dW*dS*dT*quantity[kne]
+              + dE*dN*dB*quantity[kt]   + dW*dN*dB*quantity[kte]
+              + dE*dS*dB*quantity[ktn]  + dW*dS*dB*quantity[ktne] );
 }
 
-__inline__ __host__ __device__ void translate2D(real &posX, real &posY, real &newPosX, real &newPosY, real &translationX, real &translationY)
+__inline__ __host__ __device__ void translate2D(real posX, real posY, real &newPosX, real &newPosY, real translationX, real translationY)
 {
     newPosX = posX + translationX;
     newPosY = posY + translationY;
 }
 
-__inline__ __host__ __device__ void invTranslate2D(real &posX, real &posY, real &newPosX, real &newPosY, real &translationX, real &translationY)
+__inline__ __host__ __device__ void invTranslate2D(real posX, real posY, real &newPosX, real &newPosY, real translationX, real translationY)
 {
     newPosX = posX - translationX;
     newPosY = posY - translationY;
 }
 
-__inline__ __host__ __device__ void translate3D(real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ, real &translationX, real &translationY, real &translationZ)
+__inline__ __host__ __device__ void translate3D(real posX, real posY, real posZ, real &newPosX, real &newPosY, real &newPosZ, real translationX, real translationY, real translationZ)
 {
     newPosX = posX + translationX;
     newPosY = posY + translationY;
     newPosZ = posZ + translationZ;
 }
 
-__inline__ __host__ __device__ void invTranslate3D(real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ, real &translationX, real &translationY, real &translationZ)
+__inline__ __host__ __device__ void invTranslate3D(real posX, real posY, real posZ, real &newPosX, real &newPosY, real &newPosZ, real translationX, real translationY, real translationZ)
 {
     newPosX = posX - translationX;
     newPosY = posY - translationY;
     newPosZ = posZ - translationZ;
 }
 
-__inline__ __host__ __device__ void rotate2D(real &angle, real &posX, real &posY, real &newPosX, real &newPosY)
+__inline__ __host__ __device__ void rotate2D(real angle, real posX, real posY, real &newPosX, real &newPosY)
 {
     newPosX = posX*cos(angle) - posY*sin(angle);
     newPosY = posX*sin(angle) + posY*cos(angle);  
 }
 
-__inline__ __host__ __device__ void rotate2D(real &angle, real &posX, real &posY, real &newPosX, real &newPosY, real &originX, real &originY)
+__inline__ __host__ __device__ void rotate2D(real angle, real posX, real posY, real &newPosX, real &newPosY, real originX, real originY)
 {
     real tmpX, tmpY;
     invTranslate2D(posX, posY, newPosX, newPosY, originX, originY);
@@ -100,13 +101,13 @@ __inline__ __host__ __device__ void rotate2D(real &angle, real &posX, real &posY
     translate2D(tmpX, tmpY, newPosX, newPosY, originX, originY);
 }
 
-__inline__ __host__ __device__ void invRotate2D(real &angle, real &posX, real &posY, real &newPosX, real &newPosY)
+__inline__ __host__ __device__ void invRotate2D(real angle, real posX, real posY, real &newPosX, real &newPosY)
 {
     newPosX =  posX*cos(angle) + posY*sin(angle);
     newPosY = -posX*sin(angle) + posY*cos(angle);  
 }
 
-__inline__ __host__ __device__ void invRotate2D(real &angle, real &posX, real &posY, real &newPosX, real &newPosY, real &originX, real &originY)
+__inline__ __host__ __device__ void invRotate2D(real angle, real posX, real posY, real &newPosX, real &newPosY, real originX, real originY)
 {
     real tmpX, tmpY;
     invTranslate2D(posX, posY, newPosX, newPosY, originX, originY);
@@ -114,13 +115,13 @@ __inline__ __host__ __device__ void invRotate2D(real &angle, real &posX, real &p
     translate2D(tmpX, tmpY, newPosX, newPosY, originX, originY);
 }
 
-__inline__ __host__ __device__ void rotateAboutX3D(real &angle, real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ)
+__inline__ __host__ __device__ void rotateAboutX3D(real angle, real posX, real posY, real posZ, real &newPosX, real &newPosY, real &newPosZ)
 {
     newPosX = posX;
     rotate2D(angle, posY, posZ, newPosY, newPosZ);
 }
 
-__inline__ __host__ __device__ void rotateAboutX3D(real &angle, real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ, real &originX, real &originY, real &originZ)
+__inline__ __host__ __device__ void rotateAboutX3D(real angle, real posX, real posY, real posZ, real &newPosX, real &newPosY, real &newPosZ, real originX, real originY, real originZ)
 {
     real tmpX, tmpY, tmpZ;
     invTranslate3D(posX, posY, posZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
@@ -128,13 +129,13 @@ __inline__ __host__ __device__ void rotateAboutX3D(real &angle, real &posX, real
     translate3D(tmpX, tmpY, tmpZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
 }
 
-__inline__ __host__ __device__ void invRotateAboutX3D(real &angle, real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ)
+__inline__ __host__ __device__ void invRotateAboutX3D(real angle, real posX, real posY, real posZ, real &newPosX, real &newPosY, real &newPosZ)
 {
     newPosX = posX;
     invRotate2D(angle, posY, posZ, newPosY, newPosZ);
 }
 
-__inline__ __host__ __device__ void invRotateAboutX3D(real &angle, real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ, real &originX, real &originY, real &originZ)
+__inline__ __host__ __device__ void invRotateAboutX3D(real angle, real posX, real posY, real posZ, real &newPosX, real &newPosY, real &newPosZ, real originX, real originY, real originZ)
 {
     real tmpX, tmpY, tmpZ;
     invTranslate3D(posX, posY, posZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
@@ -142,13 +143,13 @@ __inline__ __host__ __device__ void invRotateAboutX3D(real &angle, real &posX, r
     translate3D(tmpX, tmpY, tmpZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
 }
 
-__inline__ __host__ __device__ void rotateAboutY3D(real &angle, real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ)
+__inline__ __host__ __device__ void rotateAboutY3D(real angle, real posX, real posY, real posZ, real &newPosX, real &newPosY, real &newPosZ)
 {    
     newPosY =  posY;
     rotate2D(angle, posX, posZ, newPosX, newPosZ);
 }
 
-__inline__ __host__ __device__ void rotateAboutY3D(real &angle, real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ, real &originX, real &originY, real &originZ)
+__inline__ __host__ __device__ void rotateAboutY3D(real angle, real posX, real posY, real posZ, real &newPosX, real &newPosY, real &newPosZ, real originX, real originY, real originZ)
 {
     real tmpX, tmpY, tmpZ;
     invTranslate3D(posX, posY, posZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
@@ -156,13 +157,13 @@ __inline__ __host__ __device__ void rotateAboutY3D(real &angle, real &posX, real
     translate3D(tmpX, tmpY, tmpZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
 }
 
-__inline__ __host__ __device__ void invRotateAboutY3D(real &angle, real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ)
+__inline__ __host__ __device__ void invRotateAboutY3D(real angle, real posX, real posY, real posZ, real &newPosX, real &newPosY, real &newPosZ)
 {
     newPosY =  posY;
     invRotate2D(angle, posX, posZ, newPosX, newPosZ);
 }
 
-__inline__ __host__ __device__ void invRotateAboutY3D(real &angle, real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ, real &originX, real &originY, real &originZ)
+__inline__ __host__ __device__ void invRotateAboutY3D(real angle, real posX, real posY, real posZ, real &newPosX, real &newPosY, real &newPosZ, real originX, real originY, real originZ)
 {
     real tmpX, tmpY, tmpZ;
     invTranslate3D(posX, posY, posZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
@@ -171,13 +172,13 @@ __inline__ __host__ __device__ void invRotateAboutY3D(real &angle, real &posX, r
 }
 
 
-__inline__ __host__ __device__ void rotateAboutZ3D(real &angle, real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ)
+__inline__ __host__ __device__ void rotateAboutZ3D(real angle, real posX, real posY, real posZ, real &newPosX, real &newPosY, real &newPosZ)
 {
     newPosZ = posZ;
     rotate2D(angle, posX, posY, newPosX, newPosY);
 }
 
-__inline__ __host__ __device__ void rotateAboutZ3D(real &angle, real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ, real &originX, real &originY, real &originZ)
+__inline__ __host__ __device__ void rotateAboutZ3D(real angle, real posX, real posY, real posZ, real &newPosX, real &newPosY, real &newPosZ, real originX, real originY, real originZ)
 {
     real tmpX, tmpY, tmpZ;
     invTranslate3D(posX, posY, posZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
@@ -185,13 +186,13 @@ __inline__ __host__ __device__ void rotateAboutZ3D(real &angle, real &posX, real
     translate3D(tmpX, tmpY, tmpZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
 }
 
-__inline__ __host__ __device__ void invRotateAboutZ3D(real &angle, real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ)
+__inline__ __host__ __device__ void invRotateAboutZ3D(real angle, real posX, real posY, real posZ, real &newPosX, real &newPosY, real &newPosZ)
 {
     newPosZ = posZ;
     invRotate2D(angle, posX, posY, newPosX, newPosY);
 }
 
-__inline__ __host__ __device__ void invRotateAboutZ3D(real &angle, real &posX, real &posY, real &posZ, real &newPosX, real &newPosY, real &newPosZ, real &originX, real &originY, real &originZ)
+__inline__ __host__ __device__ void invRotateAboutZ3D(real angle, real posX, real posY, real posZ, real &newPosX, real &newPosY, real &newPosZ, real originX, real originY, real originZ)
 {
     real tmpX, tmpY, tmpZ;
     invTranslate3D(posX, posY, posZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GeometryUtilsTest.cu b/src/gpu/VirtualFluids_GPU/GPU/GeometryUtilsTest.cu
new file mode 100644
index 0000000000000000000000000000000000000000..797a8f72195c57faa7889c8dadc30fb8eccb6288
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/GPU/GeometryUtilsTest.cu
@@ -0,0 +1,301 @@
+#include "GeometryUtils.h"
+#include "basics/constants/NumericConstants.h"
+#include "tests/testUtilities.h"
+
+TEST(GeometryUtilsTest, translate2D)
+{
+    real newPositionX;
+    real newPositionY;
+
+    translate2D(0., 0., newPositionX, newPositionY, 0., 0.);
+    EXPECT_THAT(newPositionX, RealEq(0.));
+    EXPECT_THAT(newPositionY, RealEq(0.));
+
+    translate2D(0.5, 0.5, newPositionX, newPositionY, 1., 1.);
+    EXPECT_THAT(newPositionX, RealEq(1.5));
+    EXPECT_THAT(newPositionY, RealEq(1.5));
+
+    translate2D(0.5, 0.5, newPositionX, newPositionY, -1., -1.);
+    EXPECT_THAT(newPositionX, RealEq(-0.5));
+    EXPECT_THAT(newPositionY, RealEq(-0.5));
+}
+
+TEST(GeometryUtilsTest, inverseTranslate2D)
+{
+    real newPositionX;
+    real newPositionY;
+
+    invTranslate2D(0., 0., newPositionX, newPositionY, 0., 0.);
+    EXPECT_THAT(newPositionX, RealEq(0.));
+    EXPECT_THAT(newPositionY, RealEq(0.));
+
+    invTranslate2D(0.5, 0.5, newPositionX, newPositionY, 1., 1.);
+    EXPECT_THAT(newPositionX, RealEq(-0.5));
+    EXPECT_THAT(newPositionY, RealEq(-0.5));
+
+    invTranslate2D(0.5, 0.5, newPositionX, newPositionY, -1., -1.);
+    EXPECT_THAT(newPositionX, RealEq(1.5));
+    EXPECT_THAT(newPositionY, RealEq(1.5));
+}
+
+TEST(GeometryUtilsTest, rotate2dAround0)
+{
+    auto posX = 2.0;
+    auto posY = 0.0;
+    real newPosX;
+    real newPosY;
+
+    auto angle = 0.0;
+    rotate2D(angle, posX, posY, newPosX, newPosY);
+    EXPECT_THAT(newPosX, RealNear(2.0, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(0.0, 10e-5));
+
+    angle = 0.5 * vf::basics::constant::cPi;
+    rotate2D(angle, posX, posY, newPosX, newPosY);
+    EXPECT_THAT(newPosX, RealNear(0.0, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(2.0, 10e-5));
+
+    angle = 1.0 * vf::basics::constant::cPi;
+    rotate2D(angle, posX, posY, newPosX, newPosY);
+    EXPECT_THAT(newPosX, RealNear(-2.0, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(0.0, 10e-5));
+
+    angle = 1.5 * vf::basics::constant::cPi;
+    rotate2D(angle, posX, posY, newPosX, newPosY);
+    EXPECT_THAT(newPosX, RealNear(0.0, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(-2.0, 10e-5));
+
+    angle = 2.0 * vf::basics::constant::cPi;
+    rotate2D(angle, posX, posY, newPosX, newPosY);
+    EXPECT_THAT(newPosX, RealNear(2.0, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(0.0, 10e-5));
+
+    angle = -0.5 * vf::basics::constant::cPi;
+    rotate2D(angle, posX, posY, newPosX, newPosY);
+    EXPECT_THAT(newPosX, RealNear(0.0, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(-2.0, 10e-5));
+}
+
+TEST(GeometryUtilsTest, rotate2dWithOrigin)
+{
+    auto posX = 3.0;
+    auto posY = -1.0;
+    auto originX = 1.0;
+    auto originY = -1.0;
+    real newPosX;
+    real newPosY;
+
+    auto angle = 0.0;
+    rotate2D(angle, posX, posY, newPosX, newPosY, originX, originY);
+    EXPECT_THAT(newPosX, RealNear(3.0, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(-1.0, 10e-5));
+
+    angle = 0.5 * vf::basics::constant::cPi;
+    rotate2D(angle, posX, posY, newPosX, newPosY, originX, originY);
+    EXPECT_THAT(newPosX, RealNear(1.0, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(1.0, 10e-5));
+
+    angle = 1.0 * vf::basics::constant::cPi;
+    rotate2D(angle, posX, posY, newPosX, newPosY, originX, originY);
+    EXPECT_THAT(newPosX, RealNear(-1.0, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(-1.0, 10e-5));
+
+    angle = 1.5 * vf::basics::constant::cPi;
+    rotate2D(angle, posX, posY, newPosX, newPosY, originX, originY);
+    EXPECT_THAT(newPosX, RealNear(1.0, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(-3.0, 10e-5));
+
+    angle = 2.0 * vf::basics::constant::cPi;
+    rotate2D(angle, posX, posY, newPosX, newPosY, originX, originY);
+    EXPECT_THAT(newPosX, RealNear(3.0, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(-1.0, 10e-5));
+
+    angle = -0.5 * vf::basics::constant::cPi;
+    rotate2D(angle, posX, posY, newPosX, newPosY, originX, originY);
+    EXPECT_THAT(newPosX, RealNear(1.0, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(-3.0, 10e-5));
+}
+
+TEST(GeometryUtilsTest, inverseRotate2DWithOrigin)
+{
+    auto posX = 3.0;
+    auto posY = -1.0;
+    auto originX = 1.0;
+    auto originY = -1.0;
+    real newPosX;
+    real newPosY;
+
+    auto angle = 0.0;
+    invRotate2D(angle, posX, posY, newPosX, newPosY, originX, originY);
+    EXPECT_THAT(newPosX, RealNear(3.0, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(-1.0, 10e-5));
+
+    angle = 0.5 * vf::basics::constant::cPi;
+    invRotate2D(angle, posX, posY, newPosX, newPosY, originX, originY);
+    EXPECT_THAT(newPosX, RealNear(1.0, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(-3.0, 10e-5));
+
+    angle = 1.0 * vf::basics::constant::cPi;
+    invRotate2D(angle, posX, posY, newPosX, newPosY, originX, originY);
+    EXPECT_THAT(newPosX, RealNear(-1.0, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(-1.0, 10e-5));
+
+    angle = 1.5 * vf::basics::constant::cPi;
+    invRotate2D(angle, posX, posY, newPosX, newPosY, originX, originY);
+    EXPECT_THAT(newPosX, RealNear(1.0, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(1.0, 10e-5));
+
+    angle = 2.0 * vf::basics::constant::cPi;
+    invRotate2D(angle, posX, posY, newPosX, newPosY, originX, originY);
+    EXPECT_THAT(newPosX, RealNear(3.0, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(-1.0, 10e-5));
+
+    angle = -1.5 * vf::basics::constant::cPi;
+    invRotate2D(angle, posX, posY, newPosX, newPosY, originX, originY);
+    EXPECT_THAT(newPosX, RealNear(1.0, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(-3.0, 10e-5));
+}
+
+TEST(GeometryUtilsTest, rotateAboutX3dAround0)
+{
+    auto posX = 0.5;
+    auto posY = 2.0;
+    auto posZ = 0.0;
+    real newPosX;
+    real newPosY;
+    real newPosZ;
+
+    auto angle = 0.0;
+    rotateAboutX3D(angle, posX, posY, posZ, newPosX, newPosY, newPosZ);
+    EXPECT_THAT(newPosX, RealNear(0.5, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(2.0, 10e-5));
+    EXPECT_THAT(newPosZ, RealNear(0.0, 10e-5));
+
+    angle = 0.5 * vf::basics::constant::cPi;
+    rotateAboutX3D(angle, posX, posY, posZ, newPosX, newPosY, newPosZ);
+    EXPECT_THAT(newPosX, RealNear(0.5, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(0.0, 10e-5));
+    EXPECT_THAT(newPosZ, RealNear(2.0, 10e-5));
+
+    angle = 1.0 * vf::basics::constant::cPi;
+    rotateAboutX3D(angle, posX, posY, posZ, newPosX, newPosY, newPosZ);
+    EXPECT_THAT(newPosX, RealNear(0.5, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(-2.0, 10e-5));
+    EXPECT_THAT(newPosZ, RealNear(0.0, 10e-5));
+
+    angle = 1.5 * vf::basics::constant::cPi;
+    rotateAboutX3D(angle, posX, posY, posZ, newPosX, newPosY, newPosZ);
+    EXPECT_THAT(newPosX, RealNear(0.5, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(0.0, 10e-5));
+    EXPECT_THAT(newPosZ, RealNear(-2.0, 10e-5));
+
+    angle = 2.0 * vf::basics::constant::cPi;
+    rotateAboutX3D(angle, posX, posY, posZ, newPosX, newPosY, newPosZ);
+    EXPECT_THAT(newPosX, RealNear(0.5, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(2.0, 10e-5));
+    EXPECT_THAT(newPosZ, RealNear(0.0, 10e-5));
+
+    angle = -0.5 * vf::basics::constant::cPi;
+    rotateAboutX3D(angle, posX, posY, posZ, newPosX, newPosY, newPosZ);
+    EXPECT_THAT(newPosX, RealNear(0.5, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(0.0, 10e-5));
+    EXPECT_THAT(newPosZ, RealNear(-2.0, 10e-5));
+}
+
+TEST(GeometryUtilsTest, rotateAboutX3dWithOrigin)
+{
+    auto posX = 0.5;
+    auto posY = 3.0;
+    auto posZ = -1.0;
+    auto originX = -0.75;
+    auto originY = 1.0;
+    auto originZ = -1.0;
+    real newPosX;
+    real newPosY;
+    real newPosZ;
+
+    auto angle = 0.0;
+    rotateAboutX3D(angle, posX, posY, posZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
+    EXPECT_THAT(newPosX, RealNear(0.5, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(3.0, 10e-5));
+    EXPECT_THAT(newPosZ, RealNear(-1.0, 10e-5));
+
+    angle = 0.5 * vf::basics::constant::cPi;    
+    rotateAboutX3D(angle, posX, posY, posZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
+    EXPECT_THAT(newPosX, RealNear(0.5, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(1.0, 10e-5));
+    EXPECT_THAT(newPosZ, RealNear(1.0, 10e-5));
+
+    angle = 1.0 * vf::basics::constant::cPi;
+    rotateAboutX3D(angle, posX, posY, posZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
+    EXPECT_THAT(newPosX, RealNear(0.5, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(-1.0, 10e-5));
+    EXPECT_THAT(newPosZ, RealNear(-1.0, 10e-5));
+
+    angle = 1.5 * vf::basics::constant::cPi;
+    rotateAboutX3D(angle, posX, posY, posZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
+    EXPECT_THAT(newPosX, RealNear(0.5, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(1.0, 10e-5));
+    EXPECT_THAT(newPosZ, RealNear(-3.0, 10e-5));
+
+    angle = 2.0 * vf::basics::constant::cPi;
+    rotateAboutX3D(angle, posX, posY, posZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
+    EXPECT_THAT(newPosX, RealNear(0.5, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(3.0, 10e-5));
+    EXPECT_THAT(newPosZ, RealNear(-1.0, 10e-5));
+
+    angle = -0.5 * vf::basics::constant::cPi;
+    rotateAboutX3D(angle, posX, posY, posZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
+    EXPECT_THAT(newPosX, RealNear(0.5, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(1.0, 10e-5));
+    EXPECT_THAT(newPosZ, RealNear(-3.0, 10e-5));
+}
+
+TEST(GeometryUtilsTest, inverseRotateAboutX3dWithOrigin)
+{
+    auto posX = 0.5;
+    auto posY = 3.0;
+    auto posZ = -1.0;
+    auto originX = -0.75;
+    auto originY = 1.0;
+    auto originZ = -1.0;
+    real newPosX;
+    real newPosY;
+    real newPosZ;
+
+    auto angle = 0.0;
+    invRotateAboutX3D(angle, posX, posY, posZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
+    EXPECT_THAT(newPosX, RealNear(0.5, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(3.0, 10e-5));
+    EXPECT_THAT(newPosZ, RealNear(-1.0, 10e-5));
+
+    angle = 0.5 * vf::basics::constant::cPi;
+    invRotateAboutX3D(angle, posX, posY, posZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
+    EXPECT_THAT(newPosX, RealNear(0.5, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(1.0, 10e-5));
+    EXPECT_THAT(newPosZ, RealNear(-3.0, 10e-5));
+
+    angle = 1.0 * vf::basics::constant::cPi;
+    invRotateAboutX3D(angle, posX, posY, posZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
+    EXPECT_THAT(newPosX, RealNear(0.5, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(-1.0, 10e-5));
+    EXPECT_THAT(newPosZ, RealNear(-1.0, 10e-5));
+
+    angle = 1.5 * vf::basics::constant::cPi;    
+    invRotateAboutX3D(angle, posX, posY, posZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
+    EXPECT_THAT(newPosX, RealNear(0.5, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(1.0, 10e-5));
+    EXPECT_THAT(newPosZ, RealNear(1.0, 10e-5));
+
+    angle = 2.0 * vf::basics::constant::cPi;
+    invRotateAboutX3D(angle, posX, posY, posZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
+    EXPECT_THAT(newPosX, RealNear(0.5, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(3.0, 10e-5));
+    EXPECT_THAT(newPosZ, RealNear(-1.0, 10e-5));
+
+    angle = -0.5 * vf::basics::constant::cPi;
+    invRotateAboutX3D(angle, posX, posY, posZ, newPosX, newPosY, newPosZ, originX, originY, originZ);
+    EXPECT_THAT(newPosX, RealNear(0.5, 10e-5));
+    EXPECT_THAT(newPosY, RealNear(1.0, 10e-5));
+    EXPECT_THAT(newPosZ, RealNear(1.0, 10e-5));
+}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleCF27.cu b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleCF27.cu
index 619d68c87d7a707e70be4c56d434191994144148..4c586faa2fa60fe2894d86f97c680c5f0f11087b 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleCF27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleCF27.cu
@@ -8,9 +8,9 @@
 /* Device code */
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////
@@ -22,8 +22,8 @@ __global__ void scaleCF_0817_comp_27( real* DC,
 												 unsigned int* neighborFX,
 												 unsigned int* neighborFY,
 												 unsigned int* neighborFZ,
-												 unsigned int size_MatC, 
-												 unsigned int size_MatF, 
+												 unsigned long long numberOfLBnodesCoarse, 
+												 unsigned long long numberOfLBnodesFine, 
 												 bool isEvenTimestep,
 												 unsigned int* posCSWB, 
 												 unsigned int* posFSWB, 
@@ -35,7 +35,7 @@ __global__ void scaleCF_0817_comp_27( real* DC,
 												 unsigned int nyC, 
 												 unsigned int nxF, 
 												 unsigned int nyF,
-												 OffCF offCF)
+												 ICellNeigh offCF)
 {
 	real
 		*fP00dest, *fM00dest, *f0P0dest, *f0M0dest, *f00Pdest, *f00Mdest, *fPP0dest, *fMM0dest, *fPM0dest,
@@ -43,33 +43,33 @@ __global__ void scaleCF_0817_comp_27( real* DC,
 		*f000dest, *fMMMdest, *fMMPdest, *fMPPdest, *fMPMdest, *fPPMdest, *fPPPdest, *fPMPdest, *fPMMdest;
 
 
-	fP00dest = &DF[DIR_P00   *size_MatF];
-	fM00dest = &DF[DIR_M00   *size_MatF];
-	f0P0dest = &DF[DIR_0P0   *size_MatF];
-	f0M0dest = &DF[DIR_0M0   *size_MatF];
-	f00Pdest = &DF[DIR_00P   *size_MatF];
-	f00Mdest = &DF[DIR_00M   *size_MatF];
-	fPP0dest = &DF[DIR_PP0  *size_MatF];
-	fMM0dest = &DF[DIR_MM0  *size_MatF];
-	fPM0dest = &DF[DIR_PM0  *size_MatF];
-	fMP0dest = &DF[DIR_MP0  *size_MatF];
-	fP0Pdest = &DF[DIR_P0P  *size_MatF];
-	fM0Mdest = &DF[DIR_M0M  *size_MatF];
-	fP0Mdest = &DF[DIR_P0M  *size_MatF];
-	fM0Pdest = &DF[DIR_M0P  *size_MatF];
-	f0PPdest = &DF[DIR_0PP  *size_MatF];
-	f0MMdest = &DF[DIR_0MM  *size_MatF];
-	f0PMdest = &DF[DIR_0PM  *size_MatF];
-	f0MPdest = &DF[DIR_0MP  *size_MatF];
-	f000dest = &DF[DIR_000*size_MatF];
-	fMMMdest = &DF[DIR_MMM *size_MatF];
-	fMMPdest = &DF[DIR_MMP *size_MatF];
-	fMPPdest = &DF[DIR_MPP *size_MatF];
-	fMPMdest = &DF[DIR_MPM *size_MatF];
-	fPPMdest = &DF[DIR_PPM *size_MatF];
-	fPPPdest = &DF[DIR_PPP *size_MatF];
-	fPMPdest = &DF[DIR_PMP *size_MatF];
-	fPMMdest = &DF[DIR_PMM *size_MatF];
+	fP00dest = &DF[DIR_P00 * numberOfLBnodesFine];
+	fM00dest = &DF[DIR_M00 * numberOfLBnodesFine];
+	f0P0dest = &DF[DIR_0P0 * numberOfLBnodesFine];
+	f0M0dest = &DF[DIR_0M0 * numberOfLBnodesFine];
+	f00Pdest = &DF[DIR_00P * numberOfLBnodesFine];
+	f00Mdest = &DF[DIR_00M * numberOfLBnodesFine];
+	fPP0dest = &DF[DIR_PP0 * numberOfLBnodesFine];
+	fMM0dest = &DF[DIR_MM0 * numberOfLBnodesFine];
+	fPM0dest = &DF[DIR_PM0 * numberOfLBnodesFine];
+	fMP0dest = &DF[DIR_MP0 * numberOfLBnodesFine];
+	fP0Pdest = &DF[DIR_P0P * numberOfLBnodesFine];
+	fM0Mdest = &DF[DIR_M0M * numberOfLBnodesFine];
+	fP0Mdest = &DF[DIR_P0M * numberOfLBnodesFine];
+	fM0Pdest = &DF[DIR_M0P * numberOfLBnodesFine];
+	f0PPdest = &DF[DIR_0PP * numberOfLBnodesFine];
+	f0MMdest = &DF[DIR_0MM * numberOfLBnodesFine];
+	f0PMdest = &DF[DIR_0PM * numberOfLBnodesFine];
+	f0MPdest = &DF[DIR_0MP * numberOfLBnodesFine];
+	f000dest = &DF[DIR_000 * numberOfLBnodesFine];
+	fMMMdest = &DF[DIR_MMM * numberOfLBnodesFine];
+	fMMPdest = &DF[DIR_MMP * numberOfLBnodesFine];
+	fMPPdest = &DF[DIR_MPP * numberOfLBnodesFine];
+	fMPMdest = &DF[DIR_MPM * numberOfLBnodesFine];
+	fPPMdest = &DF[DIR_PPM * numberOfLBnodesFine];
+	fPPPdest = &DF[DIR_PPP * numberOfLBnodesFine];
+	fPMPdest = &DF[DIR_PMP * numberOfLBnodesFine];
+	fPMMdest = &DF[DIR_PMM * numberOfLBnodesFine];
 
 	real
 		*fP00source, *fM00source, *f0P0source, *f0M0source, *f00Psource, *f00Msource, *fPP0source, *fMM0source, *fPM0source,
@@ -78,63 +78,63 @@ __global__ void scaleCF_0817_comp_27( real* DC,
 
 	if (isEvenTimestep == true)
 	{
-		fP00source = &DC[DIR_P00   *size_MatC];
-		fM00source = &DC[DIR_M00   *size_MatC];
-		f0P0source = &DC[DIR_0P0   *size_MatC];
-		f0M0source = &DC[DIR_0M0   *size_MatC];
-		f00Psource = &DC[DIR_00P   *size_MatC];
-		f00Msource = &DC[DIR_00M   *size_MatC];
-		fPP0source = &DC[DIR_PP0  *size_MatC];
-		fMM0source = &DC[DIR_MM0  *size_MatC];
-		fPM0source = &DC[DIR_PM0  *size_MatC];
-		fMP0source = &DC[DIR_MP0  *size_MatC];
-		fP0Psource = &DC[DIR_P0P  *size_MatC];
-		fM0Msource = &DC[DIR_M0M  *size_MatC];
-		fP0Msource = &DC[DIR_P0M  *size_MatC];
-		fM0Psource = &DC[DIR_M0P  *size_MatC];
-		f0PPsource = &DC[DIR_0PP  *size_MatC];
-		f0MMsource = &DC[DIR_0MM  *size_MatC];
-		f0PMsource = &DC[DIR_0PM  *size_MatC];
-		f0MPsource = &DC[DIR_0MP  *size_MatC];
-		f000source = &DC[DIR_000*size_MatC];
-		fMMMsource = &DC[DIR_MMM *size_MatC];
-		fMMPsource = &DC[DIR_MMP *size_MatC];
-		fMPPsource = &DC[DIR_MPP *size_MatC];
-		fMPMsource = &DC[DIR_MPM *size_MatC];
-		fPPMsource = &DC[DIR_PPM *size_MatC];
-		fPPPsource = &DC[DIR_PPP *size_MatC];
-		fPMPsource = &DC[DIR_PMP *size_MatC];
-		fPMMsource = &DC[DIR_PMM *size_MatC];
+		fP00source = &DC[DIR_P00 * numberOfLBnodesCoarse];
+		fM00source = &DC[DIR_M00 * numberOfLBnodesCoarse];
+		f0P0source = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+		f0M0source = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+		f00Psource = &DC[DIR_00P * numberOfLBnodesCoarse];
+		f00Msource = &DC[DIR_00M * numberOfLBnodesCoarse];
+		fPP0source = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+		fMM0source = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+		fPM0source = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+		fMP0source = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+		fP0Psource = &DC[DIR_P0P * numberOfLBnodesCoarse];
+		fM0Msource = &DC[DIR_M0M * numberOfLBnodesCoarse];
+		fP0Msource = &DC[DIR_P0M * numberOfLBnodesCoarse];
+		fM0Psource = &DC[DIR_M0P * numberOfLBnodesCoarse];
+		f0PPsource = &DC[DIR_0PP * numberOfLBnodesCoarse];
+		f0MMsource = &DC[DIR_0MM * numberOfLBnodesCoarse];
+		f0PMsource = &DC[DIR_0PM * numberOfLBnodesCoarse];
+		f0MPsource = &DC[DIR_0MP * numberOfLBnodesCoarse];
+		f000source = &DC[DIR_000 * numberOfLBnodesCoarse];
+		fMMMsource = &DC[DIR_MMM * numberOfLBnodesCoarse];
+		fMMPsource = &DC[DIR_MMP * numberOfLBnodesCoarse];
+		fMPPsource = &DC[DIR_MPP * numberOfLBnodesCoarse];
+		fMPMsource = &DC[DIR_MPM * numberOfLBnodesCoarse];
+		fPPMsource = &DC[DIR_PPM * numberOfLBnodesCoarse];
+		fPPPsource = &DC[DIR_PPP * numberOfLBnodesCoarse];
+		fPMPsource = &DC[DIR_PMP * numberOfLBnodesCoarse];
+		fPMMsource = &DC[DIR_PMM * numberOfLBnodesCoarse];
 	}
 	else
 	{
-		fP00source = &DC[DIR_M00   *size_MatC];
-		fM00source = &DC[DIR_P00   *size_MatC];
-		f0P0source = &DC[DIR_0M0   *size_MatC];
-		f0M0source = &DC[DIR_0P0   *size_MatC];
-		f00Psource = &DC[DIR_00M   *size_MatC];
-		f00Msource = &DC[DIR_00P   *size_MatC];
-		fPP0source = &DC[DIR_MM0  *size_MatC];
-		fMM0source = &DC[DIR_PP0  *size_MatC];
-		fPM0source = &DC[DIR_MP0  *size_MatC];
-		fMP0source = &DC[DIR_PM0  *size_MatC];
-		fP0Psource = &DC[DIR_M0M  *size_MatC];
-		fM0Msource = &DC[DIR_P0P  *size_MatC];
-		fP0Msource = &DC[DIR_M0P  *size_MatC];
-		fM0Psource = &DC[DIR_P0M  *size_MatC];
-		f0PPsource = &DC[DIR_0MM  *size_MatC];
-		f0MMsource = &DC[DIR_0PP  *size_MatC];
-		f0PMsource = &DC[DIR_0MP  *size_MatC];
-		f0MPsource = &DC[DIR_0PM  *size_MatC];
-		f000source = &DC[DIR_000*size_MatC];
-		fMMMsource = &DC[DIR_PPP *size_MatC];
-		fMMPsource = &DC[DIR_PPM *size_MatC];
-		fMPPsource = &DC[DIR_PMM *size_MatC];
-		fMPMsource = &DC[DIR_PMP *size_MatC];
-		fPPMsource = &DC[DIR_MMP *size_MatC];
-		fPPPsource = &DC[DIR_MMM *size_MatC];
-		fPMPsource = &DC[DIR_MPM *size_MatC];
-		fPMMsource = &DC[DIR_MPP *size_MatC];
+		fP00source = &DC[DIR_M00 * numberOfLBnodesCoarse];
+		fM00source = &DC[DIR_P00 * numberOfLBnodesCoarse];
+		f0P0source = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+		f0M0source = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+		f00Psource = &DC[DIR_00M * numberOfLBnodesCoarse];
+		f00Msource = &DC[DIR_00P * numberOfLBnodesCoarse];
+		fPP0source = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+		fMM0source = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+		fPM0source = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+		fMP0source = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+		fP0Psource = &DC[DIR_M0M * numberOfLBnodesCoarse];
+		fM0Msource = &DC[DIR_P0P * numberOfLBnodesCoarse];
+		fP0Msource = &DC[DIR_M0P * numberOfLBnodesCoarse];
+		fM0Psource = &DC[DIR_P0M * numberOfLBnodesCoarse];
+		f0PPsource = &DC[DIR_0MM * numberOfLBnodesCoarse];
+		f0MMsource = &DC[DIR_0PP * numberOfLBnodesCoarse];
+		f0PMsource = &DC[DIR_0MP * numberOfLBnodesCoarse];
+		f0MPsource = &DC[DIR_0PM * numberOfLBnodesCoarse];
+		f000source = &DC[DIR_000 * numberOfLBnodesCoarse];
+		fMMMsource = &DC[DIR_PPP * numberOfLBnodesCoarse];
+		fMMPsource = &DC[DIR_PPM * numberOfLBnodesCoarse];
+		fMPPsource = &DC[DIR_PMM * numberOfLBnodesCoarse];
+		fMPMsource = &DC[DIR_PMP * numberOfLBnodesCoarse];
+		fPPMsource = &DC[DIR_MMP * numberOfLBnodesCoarse];
+		fPPPsource = &DC[DIR_MMM * numberOfLBnodesCoarse];
+		fPMPsource = &DC[DIR_MPM * numberOfLBnodesCoarse];
+		fPMMsource = &DC[DIR_MPP * numberOfLBnodesCoarse];
 	}
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -188,9 +188,9 @@ __global__ void scaleCF_0817_comp_27( real* DC,
    if(k<kCF)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff    = offCF.xOffCF[k];
-      yoff    = offCF.yOffCF[k];
-      zoff    = offCF.zOffCF[k];
+      xoff    = offCF.x[k];
+      yoff    = offCF.y[k];
+      zoff    = offCF.z[k];
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -4091,8 +4091,8 @@ __global__ void scaleCF_AA2016_comp_27(real* DC,
 												  unsigned int* neighborFX,
 												  unsigned int* neighborFY,
 												  unsigned int* neighborFZ,
-												  unsigned int size_MatC, 
-												  unsigned int size_MatF, 
+												  unsigned long long numberOfLBnodesCoarse, 
+												  unsigned long long numberOfLBnodesFine, 
 												  bool isEvenTimestep,
 												  unsigned int* posCSWB, 
 												  unsigned int* posFSWB, 
@@ -4104,101 +4104,101 @@ __global__ void scaleCF_AA2016_comp_27(real* DC,
 												  unsigned int nyC, 
 												  unsigned int nxF, 
 												  unsigned int nyF,
-												  OffCF offCF)
+												  ICellNeigh offCF)
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -4270,9 +4270,9 @@ __global__ void scaleCF_AA2016_comp_27(real* DC,
    if(k<kCF)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff    = offCF.xOffCF[k];
-      yoff    = offCF.yOffCF[k];
-      zoff    = offCF.zOffCF[k];
+      xoff    = offCF.x[k];
+      yoff    = offCF.y[k];
+      zoff    = offCF.z[k];
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -10974,8 +10974,8 @@ __global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC,
 														unsigned int* neighborFX,
 														unsigned int* neighborFY,
 														unsigned int* neighborFZ,
-														unsigned int size_MatC, 
-														unsigned int size_MatF, 
+														unsigned long long numberOfLBnodesCoarse, 
+														unsigned long long numberOfLBnodesFine, 
 														bool isEvenTimestep,
 														unsigned int* posCSWB, 
 														unsigned int* posFSWB, 
@@ -10987,101 +10987,101 @@ __global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC,
 														unsigned int nyC, 
 														unsigned int nxF, 
 														unsigned int nyF,
-														OffCF offCF)
+														ICellNeigh offCF)
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -11153,9 +11153,9 @@ __global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC,
    if(k<kCF)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff    = offCF.xOffCF[k];
-      yoff    = offCF.yOffCF[k];
-      zoff    = offCF.zOffCF[k];
+      xoff    = offCF.x[k];
+      yoff    = offCF.y[k];
+      zoff    = offCF.z[k];
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -17849,8 +17849,8 @@ __global__ void scaleCF_RhoSq_comp_27(real* DC,
 												 unsigned int* neighborFX,
 												 unsigned int* neighborFY,
 												 unsigned int* neighborFZ,
-												 unsigned int size_MatC, 
-												 unsigned int size_MatF, 
+												 unsigned long long numberOfLBnodesCoarse, 
+												 unsigned long long numberOfLBnodesFine, 
 												 bool isEvenTimestep,
 												 unsigned int* posCSWB, 
 												 unsigned int* posFSWB, 
@@ -17862,101 +17862,101 @@ __global__ void scaleCF_RhoSq_comp_27(real* DC,
 												 unsigned int nyC, 
 												 unsigned int nxF, 
 												 unsigned int nyF,
-												 OffCF offCF)
+												 ICellNeigh offCF)
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -18008,9 +18008,9 @@ __global__ void scaleCF_RhoSq_comp_27(real* DC,
    if(k<kCF)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff    = offCF.xOffCF[k];
-      yoff    = offCF.yOffCF[k];
-      zoff    = offCF.zOffCF[k];
+      xoff    = offCF.x[k];
+      yoff    = offCF.y[k];
+      zoff    = offCF.z[k];
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -22133,8 +22133,8 @@ __global__ void scaleCF_staggered_time_comp_27(   real* DC,
 															 unsigned int* neighborFX,
 															 unsigned int* neighborFY,
 															 unsigned int* neighborFZ,
-															 unsigned int size_MatC, 
-															 unsigned int size_MatF, 
+															 unsigned long long numberOfLBnodesCoarse, 
+															 unsigned long long numberOfLBnodesFine, 
 															 bool isEvenTimestep,
 															 unsigned int* posCSWB, 
 															 unsigned int* posFSWB, 
@@ -22146,101 +22146,101 @@ __global__ void scaleCF_staggered_time_comp_27(   real* DC,
 															 unsigned int nyC, 
 															 unsigned int nxF, 
 															 unsigned int nyF,
-															 OffCF offCF)
+															 ICellNeigh offCF)
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -22292,9 +22292,9 @@ __global__ void scaleCF_staggered_time_comp_27(   real* DC,
    if(k<kCF)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff    = offCF.xOffCF[k];
-      yoff    = offCF.yOffCF[k];
-      zoff    = offCF.zOffCF[k];
+      xoff    = offCF.x[k];
+      yoff    = offCF.y[k];
+      zoff    = offCF.z[k];
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -26369,8 +26369,8 @@ __global__ void scaleCF_Fix_comp_27(  real* DC,
 												 unsigned int* neighborFX,
 												 unsigned int* neighborFY,
 												 unsigned int* neighborFZ,
-												 unsigned int size_MatC, 
-												 unsigned int size_MatF, 
+												 unsigned long long numberOfLBnodesCoarse, 
+												 unsigned long long numberOfLBnodesFine, 
 												 bool isEvenTimestep,
 												 unsigned int* posCSWB, 
 												 unsigned int* posFSWB, 
@@ -26382,101 +26382,101 @@ __global__ void scaleCF_Fix_comp_27(  real* DC,
 												 unsigned int nyC, 
 												 unsigned int nxF, 
 												 unsigned int nyF,
-												 OffCF offCF)
+												 ICellNeigh offCF)
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -26531,9 +26531,9 @@ __global__ void scaleCF_Fix_comp_27(  real* DC,
    if(k<kCF)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff    = offCF.xOffCF[k];
-      yoff    = offCF.yOffCF[k];
-      zoff    = offCF.zOffCF[k];
+      xoff    = offCF.x[k];
+      yoff    = offCF.y[k];
+      zoff    = offCF.z[k];
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -31136,8 +31136,8 @@ __global__ void scaleCF_NSPress_27(   real* DC,
 												 unsigned int* neighborFX,
 												 unsigned int* neighborFY,
 												 unsigned int* neighborFZ,
-												 unsigned int size_MatC, 
-												 unsigned int size_MatF, 
+												 unsigned long long numberOfLBnodesCoarse, 
+												 unsigned long long numberOfLBnodesFine, 
 												 bool isEvenTimestep,
 												 unsigned int* posCSWB, 
 												 unsigned int* posFSWB, 
@@ -31149,101 +31149,101 @@ __global__ void scaleCF_NSPress_27(   real* DC,
 												 unsigned int nyC, 
 												 unsigned int nxF, 
 												 unsigned int nyF,
-												 OffCF offCF)
+												 ICellNeigh offCF)
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -31294,9 +31294,9 @@ __global__ void scaleCF_NSPress_27(   real* DC,
    if(k<kCF)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff    = offCF.xOffCF[k];
-      yoff    = offCF.yOffCF[k];
-      zoff    = offCF.zOffCF[k];
+      xoff    = offCF.x[k];
+      yoff    = offCF.y[k];
+      zoff    = offCF.z[k];
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -35080,8 +35080,8 @@ __global__ void scaleCF_Fix_27(   real* DC,
                                              unsigned int* neighborFX,
                                              unsigned int* neighborFY,
                                              unsigned int* neighborFZ,
-                                             unsigned int size_MatC, 
-                                             unsigned int size_MatF, 
+                                             unsigned long long numberOfLBnodesCoarse, 
+                                             unsigned long long numberOfLBnodesFine, 
                                              bool isEvenTimestep,
                                              unsigned int* posCSWB, 
                                              unsigned int* posFSWB, 
@@ -35093,101 +35093,101 @@ __global__ void scaleCF_Fix_27(   real* DC,
                                              unsigned int nyC, 
                                              unsigned int nxF, 
                                              unsigned int nyF,
-                                             OffCF offCF)
+                                             ICellNeigh offCF)
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -35239,9 +35239,9 @@ __global__ void scaleCF_Fix_27(   real* DC,
    if(k<kCF)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff    = offCF.xOffCF[k];
-      yoff    = offCF.yOffCF[k];
-      zoff    = offCF.zOffCF[k];
+      xoff    = offCF.x[k];
+      yoff    = offCF.y[k];
+      zoff    = offCF.z[k];
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -39338,8 +39338,8 @@ __global__ void scaleCFpress27(   real* DC,
                                              unsigned int* neighborFX,
                                              unsigned int* neighborFY,
                                              unsigned int* neighborFZ,
-                                             unsigned int size_MatC, 
-                                             unsigned int size_MatF, 
+                                             unsigned long long numberOfLBnodesCoarse, 
+                                             unsigned long long numberOfLBnodesFine, 
                                              bool isEvenTimestep,
                                              unsigned int* posCSWB, 
                                              unsigned int* posFSWB, 
@@ -39351,101 +39351,101 @@ __global__ void scaleCFpress27(   real* DC,
                                              unsigned int nyC, 
                                              unsigned int nxF, 
                                              unsigned int nyF,
-                                             OffCF offCF)
+                                             ICellNeigh offCF)
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -39496,9 +39496,9 @@ __global__ void scaleCFpress27(   real* DC,
    if(k<kCF)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff    = offCF.xOffCF[k];
-      yoff    = offCF.yOffCF[k];
-      zoff    = offCF.zOffCF[k];
+      xoff    = offCF.x[k];
+      yoff    = offCF.y[k];
+      zoff    = offCF.z[k];
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -41012,8 +41012,8 @@ __global__ void scaleCFLast27( real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC, 
-                                          unsigned int size_MatF, 
+                                          unsigned long long numberOfLBnodesCoarse, 
+                                          unsigned long long numberOfLBnodesFine, 
                                           bool isEvenTimestep,
                                           unsigned int* posCSWB, 
                                           unsigned int* posFSWB, 
@@ -41025,101 +41025,101 @@ __global__ void scaleCFLast27( real* DC,
                                           unsigned int nyC, 
                                           unsigned int nxF, 
                                           unsigned int nyF,
-                                          OffCF offCF)
+                                          ICellNeigh offCF)
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -41170,9 +41170,9 @@ __global__ void scaleCFLast27( real* DC,
    if(k<kCF)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff    = offCF.xOffCF[k];
-      yoff    = offCF.yOffCF[k];
-      zoff    = offCF.zOffCF[k];
+      xoff    = offCF.x[k];
+      yoff    = offCF.y[k];
+      zoff    = offCF.z[k];
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -43249,140 +43249,140 @@ __global__ void scaleCFThSMG7(    real* DC,
                                              unsigned int* neighborFX,
                                              unsigned int* neighborFY,
                                              unsigned int* neighborFZ,
-                                             unsigned int size_MatC, 
-                                             unsigned int size_MatF, 
+                                             unsigned long long numberOfLBnodesCoarse, 
+                                             unsigned long long numberOfLBnodesFine, 
                                              bool isEvenTimestep,
                                              unsigned int* posCSWB, 
                                              unsigned int* posFSWB, 
                                              unsigned int kCF, 
                                              real nu,
                                              real diffusivity_fine,
-                                             OffCF offCF)
+                                             ICellNeigh offCF)
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, /**fzeroF,*/ *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   //fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   //fzeroF = &DF[DIR_000 * size_MatF];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, //*fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      //fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      //fzeroC = &DC[DIR_000 * size_MatC];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      //fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      //fzeroC = &DC[DIR_000 * size_MatC];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
 
    Distributions7 D7F;
-   D7F.f[0] = &DD7F[0*size_MatF];
-   D7F.f[1] = &DD7F[1*size_MatF];
-   D7F.f[2] = &DD7F[2*size_MatF];
-   D7F.f[3] = &DD7F[3*size_MatF];
-   D7F.f[4] = &DD7F[4*size_MatF];
-   D7F.f[5] = &DD7F[5*size_MatF];
-   D7F.f[6] = &DD7F[6*size_MatF];
+   D7F.f[0] = &DD7F[0*numberOfLBnodesFine];
+   D7F.f[1] = &DD7F[1*numberOfLBnodesFine];
+   D7F.f[2] = &DD7F[2*numberOfLBnodesFine];
+   D7F.f[3] = &DD7F[3*numberOfLBnodesFine];
+   D7F.f[4] = &DD7F[4*numberOfLBnodesFine];
+   D7F.f[5] = &DD7F[5*numberOfLBnodesFine];
+   D7F.f[6] = &DD7F[6*numberOfLBnodesFine];
                       
    Distributions7 D7C;
    if (isEvenTimestep==true)
    {
-      D7C.f[0] = &DD7C[0*size_MatC];
-      D7C.f[1] = &DD7C[1*size_MatC];
-      D7C.f[2] = &DD7C[2*size_MatC];
-      D7C.f[3] = &DD7C[3*size_MatC];
-      D7C.f[4] = &DD7C[4*size_MatC];
-      D7C.f[5] = &DD7C[5*size_MatC];
-      D7C.f[6] = &DD7C[6*size_MatC];
+      D7C.f[0] = &DD7C[0*numberOfLBnodesCoarse];
+      D7C.f[1] = &DD7C[1*numberOfLBnodesCoarse];
+      D7C.f[2] = &DD7C[2*numberOfLBnodesCoarse];
+      D7C.f[3] = &DD7C[3*numberOfLBnodesCoarse];
+      D7C.f[4] = &DD7C[4*numberOfLBnodesCoarse];
+      D7C.f[5] = &DD7C[5*numberOfLBnodesCoarse];
+      D7C.f[6] = &DD7C[6*numberOfLBnodesCoarse];
    }
    else
    {
-      D7C.f[0] = &DD7C[0*size_MatC];
-      D7C.f[2] = &DD7C[1*size_MatC];
-      D7C.f[1] = &DD7C[2*size_MatC];
-      D7C.f[4] = &DD7C[3*size_MatC];
-      D7C.f[3] = &DD7C[4*size_MatC];
-      D7C.f[6] = &DD7C[5*size_MatC];
-      D7C.f[5] = &DD7C[6*size_MatC];
+      D7C.f[0] = &DD7C[0*numberOfLBnodesCoarse];
+      D7C.f[2] = &DD7C[1*numberOfLBnodesCoarse];
+      D7C.f[1] = &DD7C[2*numberOfLBnodesCoarse];
+      D7C.f[4] = &DD7C[3*numberOfLBnodesCoarse];
+      D7C.f[3] = &DD7C[4*numberOfLBnodesCoarse];
+      D7C.f[6] = &DD7C[5*numberOfLBnodesCoarse];
+      D7C.f[5] = &DD7C[6*numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -43425,9 +43425,9 @@ __global__ void scaleCFThSMG7(    real* DC,
    if(k<kCF)
    {
       //////////////////////////////////////////////////////////////////////////
-      real xoff = offCF.xOffCF[k];
-      real yoff = offCF.yOffCF[k];
-      real zoff = offCF.zOffCF[k];      
+      real xoff = offCF.x[k];
+      real yoff = offCF.y[k];
+      real zoff = offCF.z[k];      
       real xoff_sq = xoff * xoff;
       real yoff_sq = yoff * yoff;
       real zoff_sq = zoff * zoff;
@@ -44476,8 +44476,8 @@ __global__ void scaleCFThS7(   real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC, 
-                                          unsigned int size_MatF, 
+                                          unsigned long long numberOfLBnodesCoarse, 
+                                          unsigned long long numberOfLBnodesFine, 
                                           bool isEvenTimestep,
                                           unsigned int* posCSWB, 
                                           unsigned int* posFSWB, 
@@ -44487,128 +44487,128 @@ __global__ void scaleCFThS7(   real* DC,
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, /**fzeroF,*/ *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   //fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   //fzeroF = &DF[DIR_000 * size_MatF];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, //*fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      //fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      //fzeroC = &DC[DIR_000 * size_MatC];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      //fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      //fzeroC = &DC[DIR_000 * size_MatC];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
 
    Distributions7 D7F;
-   D7F.f[0] = &DD7F[0*size_MatF];
-   D7F.f[1] = &DD7F[1*size_MatF];
-   D7F.f[2] = &DD7F[2*size_MatF];
-   D7F.f[3] = &DD7F[3*size_MatF];
-   D7F.f[4] = &DD7F[4*size_MatF];
-   D7F.f[5] = &DD7F[5*size_MatF];
-   D7F.f[6] = &DD7F[6*size_MatF];
+   D7F.f[0] = &DD7F[0*numberOfLBnodesFine];
+   D7F.f[1] = &DD7F[1*numberOfLBnodesFine];
+   D7F.f[2] = &DD7F[2*numberOfLBnodesFine];
+   D7F.f[3] = &DD7F[3*numberOfLBnodesFine];
+   D7F.f[4] = &DD7F[4*numberOfLBnodesFine];
+   D7F.f[5] = &DD7F[5*numberOfLBnodesFine];
+   D7F.f[6] = &DD7F[6*numberOfLBnodesFine];
                       
    Distributions7 D7C;
    if (isEvenTimestep==true)
    {
-      D7C.f[0] = &DD7C[0*size_MatC];
-      D7C.f[1] = &DD7C[1*size_MatC];
-      D7C.f[2] = &DD7C[2*size_MatC];
-      D7C.f[3] = &DD7C[3*size_MatC];
-      D7C.f[4] = &DD7C[4*size_MatC];
-      D7C.f[5] = &DD7C[5*size_MatC];
-      D7C.f[6] = &DD7C[6*size_MatC];
+      D7C.f[0] = &DD7C[0*numberOfLBnodesCoarse];
+      D7C.f[1] = &DD7C[1*numberOfLBnodesCoarse];
+      D7C.f[2] = &DD7C[2*numberOfLBnodesCoarse];
+      D7C.f[3] = &DD7C[3*numberOfLBnodesCoarse];
+      D7C.f[4] = &DD7C[4*numberOfLBnodesCoarse];
+      D7C.f[5] = &DD7C[5*numberOfLBnodesCoarse];
+      D7C.f[6] = &DD7C[6*numberOfLBnodesCoarse];
    }
    else
    {
-      D7C.f[0] = &DD7C[0*size_MatC];
-      D7C.f[2] = &DD7C[1*size_MatC];
-      D7C.f[1] = &DD7C[2*size_MatC];
-      D7C.f[4] = &DD7C[3*size_MatC];
-      D7C.f[3] = &DD7C[4*size_MatC];
-      D7C.f[6] = &DD7C[5*size_MatC];
-      D7C.f[5] = &DD7C[6*size_MatC];
+      D7C.f[0] = &DD7C[0*numberOfLBnodesCoarse];
+      D7C.f[2] = &DD7C[1*numberOfLBnodesCoarse];
+      D7C.f[1] = &DD7C[2*numberOfLBnodesCoarse];
+      D7C.f[4] = &DD7C[3*numberOfLBnodesCoarse];
+      D7C.f[3] = &DD7C[4*numberOfLBnodesCoarse];
+      D7C.f[6] = &DD7C[5*numberOfLBnodesCoarse];
+      D7C.f[5] = &DD7C[6*numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -45599,200 +45599,200 @@ __global__ void scaleCFThS27(     real* DC,
                                              unsigned int* neighborFX,
                                              unsigned int* neighborFY,
                                              unsigned int* neighborFZ,
-                                             unsigned int size_MatC, 
-                                             unsigned int size_MatF, 
+                                             unsigned long long numberOfLBnodesCoarse, 
+                                             unsigned long long numberOfLBnodesFine, 
                                              bool isEvenTimestep,
                                              unsigned int* posCSWB, 
                                              unsigned int* posFSWB, 
                                              unsigned int kCF, 
                                              real nu,
                                              real diffusivity_fine,
-											 OffCF offCF)
+											 ICellNeigh offCF)
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, /**fzeroF,*/ *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   //fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   //fzeroF = &DF[DIR_000 * size_MatF];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, //*fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      //fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      //fzeroC = &DC[DIR_000 * size_MatC];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      //fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      //fzeroC = &DC[DIR_000 * size_MatC];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
 
    Distributions27 D27F;
-   D27F.f[DIR_P00   ] = &DD27F[DIR_P00   *size_MatF];
-   D27F.f[DIR_M00   ] = &DD27F[DIR_M00   *size_MatF];
-   D27F.f[DIR_0P0   ] = &DD27F[DIR_0P0   *size_MatF];
-   D27F.f[DIR_0M0   ] = &DD27F[DIR_0M0   *size_MatF];
-   D27F.f[DIR_00P   ] = &DD27F[DIR_00P   *size_MatF];
-   D27F.f[DIR_00M   ] = &DD27F[DIR_00M   *size_MatF];
-   D27F.f[DIR_PP0  ] = &DD27F[DIR_PP0  *size_MatF];
-   D27F.f[DIR_MM0  ] = &DD27F[DIR_MM0  *size_MatF];
-   D27F.f[DIR_PM0  ] = &DD27F[DIR_PM0  *size_MatF];
-   D27F.f[DIR_MP0  ] = &DD27F[DIR_MP0  *size_MatF];
-   D27F.f[DIR_P0P  ] = &DD27F[DIR_P0P  *size_MatF];
-   D27F.f[DIR_M0M  ] = &DD27F[DIR_M0M  *size_MatF];
-   D27F.f[DIR_P0M  ] = &DD27F[DIR_P0M  *size_MatF];
-   D27F.f[DIR_M0P  ] = &DD27F[DIR_M0P  *size_MatF];
-   D27F.f[DIR_0PP  ] = &DD27F[DIR_0PP  *size_MatF];
-   D27F.f[DIR_0MM  ] = &DD27F[DIR_0MM  *size_MatF];
-   D27F.f[DIR_0PM  ] = &DD27F[DIR_0PM  *size_MatF];
-   D27F.f[DIR_0MP  ] = &DD27F[DIR_0MP  *size_MatF];
-   D27F.f[DIR_000] = &DD27F[DIR_000*size_MatF];
-   D27F.f[DIR_PPP ] = &DD27F[DIR_PPP *size_MatF];
-   D27F.f[DIR_MMP ] = &DD27F[DIR_MMP *size_MatF];
-   D27F.f[DIR_PMP ] = &DD27F[DIR_PMP *size_MatF];
-   D27F.f[DIR_MPP ] = &DD27F[DIR_MPP *size_MatF];
-   D27F.f[DIR_PPM ] = &DD27F[DIR_PPM *size_MatF];
-   D27F.f[DIR_MMM ] = &DD27F[DIR_MMM *size_MatF];
-   D27F.f[DIR_PMM ] = &DD27F[DIR_PMM *size_MatF];
-   D27F.f[DIR_MPM ] = &DD27F[DIR_MPM *size_MatF];
+   D27F.f[DIR_P00] = &DD27F[DIR_P00 * numberOfLBnodesFine];
+   D27F.f[DIR_M00] = &DD27F[DIR_M00 * numberOfLBnodesFine];
+   D27F.f[DIR_0P0] = &DD27F[DIR_0P0 * numberOfLBnodesFine];
+   D27F.f[DIR_0M0] = &DD27F[DIR_0M0 * numberOfLBnodesFine];
+   D27F.f[DIR_00P] = &DD27F[DIR_00P * numberOfLBnodesFine];
+   D27F.f[DIR_00M] = &DD27F[DIR_00M * numberOfLBnodesFine];
+   D27F.f[DIR_PP0] = &DD27F[DIR_PP0 * numberOfLBnodesFine];
+   D27F.f[DIR_MM0] = &DD27F[DIR_MM0 * numberOfLBnodesFine];
+   D27F.f[DIR_PM0] = &DD27F[DIR_PM0 * numberOfLBnodesFine];
+   D27F.f[DIR_MP0] = &DD27F[DIR_MP0 * numberOfLBnodesFine];
+   D27F.f[DIR_P0P] = &DD27F[DIR_P0P * numberOfLBnodesFine];
+   D27F.f[DIR_M0M] = &DD27F[DIR_M0M * numberOfLBnodesFine];
+   D27F.f[DIR_P0M] = &DD27F[DIR_P0M * numberOfLBnodesFine];
+   D27F.f[DIR_M0P] = &DD27F[DIR_M0P * numberOfLBnodesFine];
+   D27F.f[DIR_0PP] = &DD27F[DIR_0PP * numberOfLBnodesFine];
+   D27F.f[DIR_0MM] = &DD27F[DIR_0MM * numberOfLBnodesFine];
+   D27F.f[DIR_0PM] = &DD27F[DIR_0PM * numberOfLBnodesFine];
+   D27F.f[DIR_0MP] = &DD27F[DIR_0MP * numberOfLBnodesFine];
+   D27F.f[DIR_000] = &DD27F[DIR_000 * numberOfLBnodesFine];
+   D27F.f[DIR_PPP] = &DD27F[DIR_PPP * numberOfLBnodesFine];
+   D27F.f[DIR_MMP] = &DD27F[DIR_MMP * numberOfLBnodesFine];
+   D27F.f[DIR_PMP] = &DD27F[DIR_PMP * numberOfLBnodesFine];
+   D27F.f[DIR_MPP] = &DD27F[DIR_MPP * numberOfLBnodesFine];
+   D27F.f[DIR_PPM] = &DD27F[DIR_PPM * numberOfLBnodesFine];
+   D27F.f[DIR_MMM] = &DD27F[DIR_MMM * numberOfLBnodesFine];
+   D27F.f[DIR_PMM] = &DD27F[DIR_PMM * numberOfLBnodesFine];
+   D27F.f[DIR_MPM] = &DD27F[DIR_MPM * numberOfLBnodesFine];
 
    Distributions27 D27C;
    if (isEvenTimestep==true)
    {
-      D27C.f[DIR_P00   ] = &DD27C[DIR_P00   *size_MatC];
-      D27C.f[DIR_M00   ] = &DD27C[DIR_M00   *size_MatC];
-      D27C.f[DIR_0P0   ] = &DD27C[DIR_0P0   *size_MatC];
-      D27C.f[DIR_0M0   ] = &DD27C[DIR_0M0   *size_MatC];
-      D27C.f[DIR_00P   ] = &DD27C[DIR_00P   *size_MatC];
-      D27C.f[DIR_00M   ] = &DD27C[DIR_00M   *size_MatC];
-      D27C.f[DIR_PP0  ] = &DD27C[DIR_PP0  *size_MatC];
-      D27C.f[DIR_MM0  ] = &DD27C[DIR_MM0  *size_MatC];
-      D27C.f[DIR_PM0  ] = &DD27C[DIR_PM0  *size_MatC];
-      D27C.f[DIR_MP0  ] = &DD27C[DIR_MP0  *size_MatC];
-      D27C.f[DIR_P0P  ] = &DD27C[DIR_P0P  *size_MatC];
-      D27C.f[DIR_M0M  ] = &DD27C[DIR_M0M  *size_MatC];
-      D27C.f[DIR_P0M  ] = &DD27C[DIR_P0M  *size_MatC];
-      D27C.f[DIR_M0P  ] = &DD27C[DIR_M0P  *size_MatC];
-      D27C.f[DIR_0PP  ] = &DD27C[DIR_0PP  *size_MatC];
-      D27C.f[DIR_0MM  ] = &DD27C[DIR_0MM  *size_MatC];
-      D27C.f[DIR_0PM  ] = &DD27C[DIR_0PM  *size_MatC];
-      D27C.f[DIR_0MP  ] = &DD27C[DIR_0MP  *size_MatC];
-      D27C.f[DIR_000] = &DD27C[DIR_000*size_MatC];
-      D27C.f[DIR_PPP ] = &DD27C[DIR_PPP *size_MatC];
-      D27C.f[DIR_MMP ] = &DD27C[DIR_MMP *size_MatC];
-      D27C.f[DIR_PMP ] = &DD27C[DIR_PMP *size_MatC];
-      D27C.f[DIR_MPP ] = &DD27C[DIR_MPP *size_MatC];
-      D27C.f[DIR_PPM ] = &DD27C[DIR_PPM *size_MatC];
-      D27C.f[DIR_MMM ] = &DD27C[DIR_MMM *size_MatC];
-      D27C.f[DIR_PMM ] = &DD27C[DIR_PMM *size_MatC];
-      D27C.f[DIR_MPM ] = &DD27C[DIR_MPM *size_MatC];
+      D27C.f[DIR_P00] = &DD27C[DIR_P00 * numberOfLBnodesCoarse];
+      D27C.f[DIR_M00] = &DD27C[DIR_M00 * numberOfLBnodesCoarse];
+      D27C.f[DIR_0P0] = &DD27C[DIR_0P0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_0M0] = &DD27C[DIR_0M0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_00P] = &DD27C[DIR_00P * numberOfLBnodesCoarse];
+      D27C.f[DIR_00M] = &DD27C[DIR_00M * numberOfLBnodesCoarse];
+      D27C.f[DIR_PP0] = &DD27C[DIR_PP0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_MM0] = &DD27C[DIR_MM0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_PM0] = &DD27C[DIR_PM0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_MP0] = &DD27C[DIR_MP0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_P0P] = &DD27C[DIR_P0P * numberOfLBnodesCoarse];
+      D27C.f[DIR_M0M] = &DD27C[DIR_M0M * numberOfLBnodesCoarse];
+      D27C.f[DIR_P0M] = &DD27C[DIR_P0M * numberOfLBnodesCoarse];
+      D27C.f[DIR_M0P] = &DD27C[DIR_M0P * numberOfLBnodesCoarse];
+      D27C.f[DIR_0PP] = &DD27C[DIR_0PP * numberOfLBnodesCoarse];
+      D27C.f[DIR_0MM] = &DD27C[DIR_0MM * numberOfLBnodesCoarse];
+      D27C.f[DIR_0PM] = &DD27C[DIR_0PM * numberOfLBnodesCoarse];
+      D27C.f[DIR_0MP] = &DD27C[DIR_0MP * numberOfLBnodesCoarse];
+      D27C.f[DIR_000] = &DD27C[DIR_000 * numberOfLBnodesCoarse];
+      D27C.f[DIR_PPP] = &DD27C[DIR_PPP * numberOfLBnodesCoarse];
+      D27C.f[DIR_MMP] = &DD27C[DIR_MMP * numberOfLBnodesCoarse];
+      D27C.f[DIR_PMP] = &DD27C[DIR_PMP * numberOfLBnodesCoarse];
+      D27C.f[DIR_MPP] = &DD27C[DIR_MPP * numberOfLBnodesCoarse];
+      D27C.f[DIR_PPM] = &DD27C[DIR_PPM * numberOfLBnodesCoarse];
+      D27C.f[DIR_MMM] = &DD27C[DIR_MMM * numberOfLBnodesCoarse];
+      D27C.f[DIR_PMM] = &DD27C[DIR_PMM * numberOfLBnodesCoarse];
+      D27C.f[DIR_MPM] = &DD27C[DIR_MPM * numberOfLBnodesCoarse];
    }
    else
    {
-      D27C.f[DIR_M00   ] = &DD27C[DIR_P00   *size_MatC];
-      D27C.f[DIR_P00   ] = &DD27C[DIR_M00   *size_MatC];
-      D27C.f[DIR_0M0   ] = &DD27C[DIR_0P0   *size_MatC];
-      D27C.f[DIR_0P0   ] = &DD27C[DIR_0M0   *size_MatC];
-      D27C.f[DIR_00M   ] = &DD27C[DIR_00P   *size_MatC];
-      D27C.f[DIR_00P   ] = &DD27C[DIR_00M   *size_MatC];
-      D27C.f[DIR_MM0  ] = &DD27C[DIR_PP0  *size_MatC];
-      D27C.f[DIR_PP0  ] = &DD27C[DIR_MM0  *size_MatC];
-      D27C.f[DIR_MP0  ] = &DD27C[DIR_PM0  *size_MatC];
-      D27C.f[DIR_PM0  ] = &DD27C[DIR_MP0  *size_MatC];
-      D27C.f[DIR_M0M  ] = &DD27C[DIR_P0P  *size_MatC];
-      D27C.f[DIR_P0P  ] = &DD27C[DIR_M0M  *size_MatC];
-      D27C.f[DIR_M0P  ] = &DD27C[DIR_P0M  *size_MatC];
-      D27C.f[DIR_P0M  ] = &DD27C[DIR_M0P  *size_MatC];
-      D27C.f[DIR_0MM  ] = &DD27C[DIR_0PP  *size_MatC];
-      D27C.f[DIR_0PP  ] = &DD27C[DIR_0MM  *size_MatC];
-      D27C.f[DIR_0MP  ] = &DD27C[DIR_0PM  *size_MatC];
-      D27C.f[DIR_0PM  ] = &DD27C[DIR_0MP  *size_MatC];
-      D27C.f[DIR_000] = &DD27C[DIR_000*size_MatC];
-      D27C.f[DIR_MMM ] = &DD27C[DIR_PPP *size_MatC];
-      D27C.f[DIR_PPM ] = &DD27C[DIR_MMP *size_MatC];
-      D27C.f[DIR_MPM ] = &DD27C[DIR_PMP *size_MatC];
-      D27C.f[DIR_PMM ] = &DD27C[DIR_MPP *size_MatC];
-      D27C.f[DIR_MMP ] = &DD27C[DIR_PPM *size_MatC];
-      D27C.f[DIR_PPP ] = &DD27C[DIR_MMM *size_MatC];
-      D27C.f[DIR_MPP ] = &DD27C[DIR_PMM *size_MatC];
-      D27C.f[DIR_PMP ] = &DD27C[DIR_MPM *size_MatC];
+      D27C.f[DIR_M00] = &DD27C[DIR_P00 * numberOfLBnodesCoarse];
+      D27C.f[DIR_P00] = &DD27C[DIR_M00 * numberOfLBnodesCoarse];
+      D27C.f[DIR_0M0] = &DD27C[DIR_0P0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_0P0] = &DD27C[DIR_0M0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_00M] = &DD27C[DIR_00P * numberOfLBnodesCoarse];
+      D27C.f[DIR_00P] = &DD27C[DIR_00M * numberOfLBnodesCoarse];
+      D27C.f[DIR_MM0] = &DD27C[DIR_PP0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_PP0] = &DD27C[DIR_MM0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_MP0] = &DD27C[DIR_PM0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_PM0] = &DD27C[DIR_MP0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_M0M] = &DD27C[DIR_P0P * numberOfLBnodesCoarse];
+      D27C.f[DIR_P0P] = &DD27C[DIR_M0M * numberOfLBnodesCoarse];
+      D27C.f[DIR_M0P] = &DD27C[DIR_P0M * numberOfLBnodesCoarse];
+      D27C.f[DIR_P0M] = &DD27C[DIR_M0P * numberOfLBnodesCoarse];
+      D27C.f[DIR_0MM] = &DD27C[DIR_0PP * numberOfLBnodesCoarse];
+      D27C.f[DIR_0PP] = &DD27C[DIR_0MM * numberOfLBnodesCoarse];
+      D27C.f[DIR_0MP] = &DD27C[DIR_0PM * numberOfLBnodesCoarse];
+      D27C.f[DIR_0PM] = &DD27C[DIR_0MP * numberOfLBnodesCoarse];
+      D27C.f[DIR_000] = &DD27C[DIR_000 * numberOfLBnodesCoarse];
+      D27C.f[DIR_MMM] = &DD27C[DIR_PPP * numberOfLBnodesCoarse];
+      D27C.f[DIR_PPM] = &DD27C[DIR_MMP * numberOfLBnodesCoarse];
+      D27C.f[DIR_MPM] = &DD27C[DIR_PMP * numberOfLBnodesCoarse];
+      D27C.f[DIR_PMM] = &DD27C[DIR_MPP * numberOfLBnodesCoarse];
+      D27C.f[DIR_MMP] = &DD27C[DIR_PPM * numberOfLBnodesCoarse];
+      D27C.f[DIR_PPP] = &DD27C[DIR_MMM * numberOfLBnodesCoarse];
+      D27C.f[DIR_MPP] = &DD27C[DIR_PMM * numberOfLBnodesCoarse];
+      D27C.f[DIR_PMP] = &DD27C[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -45835,9 +45835,9 @@ __global__ void scaleCFThS27(     real* DC,
    if(k<kCF)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff    = offCF.xOffCF[k];
-      yoff    = offCF.yOffCF[k];
-      zoff    = offCF.zOffCF[k];
+      xoff    = offCF.x[k];
+      yoff    = offCF.y[k];
+      zoff    = offCF.z[k];
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -45892,33 +45892,33 @@ __global__ void scaleCFThS27(     real* DC,
       f_BSE  = fbseC[kbs];
       f_BNW  = fbnwC[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27C.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27C.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27C.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27C.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27C.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27C.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27C.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27C.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27C.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27C.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27C.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27C.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27C.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27C.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27C.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27C.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27C.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27C.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27C.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27C.f[DIR_M00])[kw   ];
+      f27N    =  (D27C.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27C.f[DIR_0M0])[ks   ];
+      f27T    =  (D27C.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27C.f[DIR_00M])[kb   ];
+      f27NE   =  (D27C.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27C.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27C.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27C.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27C.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27C.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27C.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27C.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27C.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27C.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27C.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27C.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27C.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27C.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27C.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27C.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27C.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27C.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27C.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27C.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27C.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27C.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27C.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27C.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27C.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27C.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27C.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27C.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27C.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_C_SWB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -45979,33 +45979,33 @@ __global__ void scaleCFThS27(     real* DC,
       f_BSE  = fbseC[kbs];
       f_BNW  = fbnwC[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27C.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27C.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27C.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27C.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27C.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27C.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27C.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27C.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27C.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27C.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27C.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27C.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27C.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27C.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27C.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27C.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27C.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27C.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27C.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27C.f[DIR_M00])[kw   ];
+      f27N    =  (D27C.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27C.f[DIR_0M0])[ks   ];
+      f27T    =  (D27C.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27C.f[DIR_00M])[kb   ];
+      f27NE   =  (D27C.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27C.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27C.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27C.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27C.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27C.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27C.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27C.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27C.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27C.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27C.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27C.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27C.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27C.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27C.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27C.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27C.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27C.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27C.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27C.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27C.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27C.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27C.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27C.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27C.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27C.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27C.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27C.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27C.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_C_SWT = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -46066,33 +46066,33 @@ __global__ void scaleCFThS27(     real* DC,
       f_BSE  = fbseC[kbs];
       f_BNW  = fbnwC[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27C.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27C.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27C.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27C.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27C.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27C.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27C.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27C.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27C.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27C.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27C.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27C.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27C.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27C.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27C.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27C.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27C.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27C.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27C.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27C.f[DIR_M00])[kw   ];
+      f27N    =  (D27C.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27C.f[DIR_0M0])[ks   ];
+      f27T    =  (D27C.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27C.f[DIR_00M])[kb   ];
+      f27NE   =  (D27C.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27C.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27C.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27C.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27C.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27C.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27C.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27C.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27C.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27C.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27C.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27C.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27C.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27C.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27C.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27C.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27C.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27C.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27C.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27C.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27C.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27C.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27C.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27C.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27C.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27C.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27C.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27C.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27C.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_C_SET = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -46153,33 +46153,33 @@ __global__ void scaleCFThS27(     real* DC,
       f_BSE  = fbseC[kbs];
       f_BNW  = fbnwC[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27C.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27C.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27C.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27C.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27C.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27C.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27C.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27C.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27C.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27C.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27C.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27C.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27C.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27C.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27C.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27C.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27C.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27C.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27C.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27C.f[DIR_M00])[kw   ];
+      f27N    =  (D27C.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27C.f[DIR_0M0])[ks   ];
+      f27T    =  (D27C.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27C.f[DIR_00M])[kb   ];
+      f27NE   =  (D27C.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27C.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27C.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27C.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27C.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27C.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27C.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27C.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27C.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27C.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27C.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27C.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27C.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27C.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27C.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27C.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27C.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27C.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27C.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27C.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27C.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27C.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27C.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27C.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27C.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27C.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27C.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27C.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27C.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_C_SEB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -46250,33 +46250,33 @@ __global__ void scaleCFThS27(     real* DC,
       f_BSE  = fbseC[kbs];
       f_BNW  = fbnwC[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27C.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27C.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27C.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27C.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27C.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27C.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27C.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27C.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27C.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27C.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27C.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27C.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27C.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27C.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27C.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27C.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27C.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27C.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27C.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27C.f[DIR_M00])[kw   ];
+      f27N    =  (D27C.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27C.f[DIR_0M0])[ks   ];
+      f27T    =  (D27C.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27C.f[DIR_00M])[kb   ];
+      f27NE   =  (D27C.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27C.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27C.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27C.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27C.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27C.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27C.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27C.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27C.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27C.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27C.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27C.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27C.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27C.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27C.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27C.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27C.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27C.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27C.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27C.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27C.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27C.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27C.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27C.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27C.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27C.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27C.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27C.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27C.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_C_NWB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -46337,33 +46337,33 @@ __global__ void scaleCFThS27(     real* DC,
       f_BSE  = fbseC[kbs];
       f_BNW  = fbnwC[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27C.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27C.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27C.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27C.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27C.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27C.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27C.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27C.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27C.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27C.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27C.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27C.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27C.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27C.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27C.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27C.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27C.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27C.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27C.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27C.f[DIR_M00])[kw   ];
+      f27N    =  (D27C.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27C.f[DIR_0M0])[ks   ];
+      f27T    =  (D27C.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27C.f[DIR_00M])[kb   ];
+      f27NE   =  (D27C.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27C.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27C.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27C.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27C.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27C.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27C.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27C.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27C.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27C.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27C.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27C.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27C.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27C.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27C.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27C.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27C.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27C.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27C.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27C.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27C.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27C.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27C.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27C.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27C.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27C.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27C.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27C.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27C.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_C_NWT = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -46424,33 +46424,33 @@ __global__ void scaleCFThS27(     real* DC,
       f_BSE  = fbseC[kbs];
       f_BNW  = fbnwC[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27C.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27C.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27C.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27C.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27C.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27C.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27C.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27C.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27C.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27C.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27C.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27C.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27C.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27C.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27C.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27C.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27C.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27C.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27C.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27C.f[DIR_M00])[kw   ];
+      f27N    =  (D27C.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27C.f[DIR_0M0])[ks   ];
+      f27T    =  (D27C.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27C.f[DIR_00M])[kb   ];
+      f27NE   =  (D27C.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27C.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27C.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27C.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27C.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27C.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27C.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27C.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27C.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27C.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27C.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27C.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27C.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27C.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27C.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27C.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27C.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27C.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27C.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27C.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27C.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27C.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27C.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27C.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27C.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27C.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27C.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27C.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27C.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_C_NET = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -46511,33 +46511,33 @@ __global__ void scaleCFThS27(     real* DC,
       f_BSE  = fbseC[kbs];
       f_BNW  = fbnwC[kbw];
       ////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27C.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27C.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27C.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27C.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27C.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27C.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27C.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27C.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27C.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27C.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27C.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27C.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27C.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27C.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27C.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27C.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27C.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27C.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27C.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27C.f[DIR_M00])[kw   ];
+      f27N    =  (D27C.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27C.f[DIR_0M0])[ks   ];
+      f27T    =  (D27C.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27C.f[DIR_00M])[kb   ];
+      f27NE   =  (D27C.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27C.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27C.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27C.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27C.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27C.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27C.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27C.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27C.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27C.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27C.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27C.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27C.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27C.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27C.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27C.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27C.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27C.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27C.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27C.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27C.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27C.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27C.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27C.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27C.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27C.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27C.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27C.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27C.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_C_NEB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -46656,32 +46656,32 @@ __global__ void scaleCFThS27(     real* DC,
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
       (D27F.f[DIR_000])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
-      (D27F.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27F.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27F.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27F.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27F.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27F.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27F.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27F.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_P00])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27F.f[DIR_M00])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27F.f[DIR_0P0])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27F.f[DIR_0M0])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27F.f[DIR_00P])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27F.f[DIR_00M])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27F.f[DIR_PP0])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_MM0])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_PM0])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_MP0])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_P0P])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_M0M])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_P0M])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_M0P])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_0PP])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27F.f[DIR_0MM])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0PM])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0MP])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PPP])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MMM])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PPM])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MMP])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PMP])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MPM])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PMM])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MPP])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
 
 
@@ -46734,32 +46734,32 @@ __global__ void scaleCFThS27(     real* DC,
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
       (D27F.f[DIR_000])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
-      (D27F.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27F.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27F.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27F.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27F.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27F.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27F.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27F.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_P00])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27F.f[DIR_M00])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27F.f[DIR_0P0])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27F.f[DIR_0M0])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27F.f[DIR_00P])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27F.f[DIR_00M])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27F.f[DIR_PP0])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_MM0])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_PM0])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_MP0])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_P0P])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_M0M])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_P0M])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_M0P])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_0PP])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27F.f[DIR_0MM])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0PM])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0MP])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PPP])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MMM])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PPM])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MMP])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PMP])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MPM])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PMM])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MPP])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
 
 
@@ -46812,32 +46812,32 @@ __global__ void scaleCFThS27(     real* DC,
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
       (D27F.f[DIR_000])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
-      (D27F.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27F.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27F.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27F.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27F.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27F.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27F.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27F.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_P00])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27F.f[DIR_M00])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27F.f[DIR_0P0])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27F.f[DIR_0M0])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27F.f[DIR_00P])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27F.f[DIR_00M])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27F.f[DIR_PP0])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_MM0])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_PM0])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_MP0])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_P0P])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_M0M])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_P0M])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_M0P])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_0PP])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27F.f[DIR_0MM])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0PM])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0MP])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PPP])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MMM])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PPM])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MMP])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PMP])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MPM])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PMM])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MPP])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
 
 
@@ -46890,32 +46890,32 @@ __global__ void scaleCFThS27(     real* DC,
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
       (D27F.f[DIR_000])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
-      (D27F.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27F.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27F.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27F.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27F.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27F.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27F.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27F.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_P00])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27F.f[DIR_M00])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27F.f[DIR_0P0])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27F.f[DIR_0M0])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27F.f[DIR_00P])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27F.f[DIR_00M])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27F.f[DIR_PP0])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_MM0])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_PM0])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_MP0])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_P0P])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_M0M])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_P0M])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_M0P])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_0PP])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27F.f[DIR_0MM])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0PM])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0MP])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PPP])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MMM])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PPM])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MMP])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PMP])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MPM])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PMM])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MPP])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
 
 
@@ -46978,32 +46978,32 @@ __global__ void scaleCFThS27(     real* DC,
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
       (D27F.f[DIR_000])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
-      (D27F.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27F.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27F.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27F.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27F.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27F.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27F.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27F.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_P00])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27F.f[DIR_M00])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27F.f[DIR_0P0])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27F.f[DIR_0M0])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27F.f[DIR_00P])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27F.f[DIR_00M])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27F.f[DIR_PP0])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_MM0])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_PM0])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_MP0])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_P0P])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_M0M])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_P0M])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_M0P])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_0PP])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27F.f[DIR_0MM])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0PM])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0MP])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PPP])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MMM])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PPM])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MMP])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PMP])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MPM])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PMM])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MPP])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
 
 
@@ -47056,32 +47056,32 @@ __global__ void scaleCFThS27(     real* DC,
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
       (D27F.f[DIR_000])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
-      (D27F.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27F.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27F.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27F.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27F.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27F.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27F.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27F.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_P00])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27F.f[DIR_M00])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27F.f[DIR_0P0])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27F.f[DIR_0M0])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27F.f[DIR_00P])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27F.f[DIR_00M])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27F.f[DIR_PP0])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_MM0])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_PM0])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_MP0])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_P0P])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_M0M])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_P0M])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_M0P])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_0PP])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27F.f[DIR_0MM])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0PM])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0MP])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PPP])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MMM])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PPM])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MMP])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PMP])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MPM])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PMM])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MPP])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
 
 
@@ -47134,32 +47134,32 @@ __global__ void scaleCFThS27(     real* DC,
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
       (D27F.f[DIR_000])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
-      (D27F.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27F.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27F.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27F.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27F.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27F.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27F.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27F.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_P00])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27F.f[DIR_M00])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27F.f[DIR_0P0])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27F.f[DIR_0M0])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27F.f[DIR_00P])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27F.f[DIR_00M])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27F.f[DIR_PP0])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_MM0])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_PM0])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_MP0])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_P0P])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_M0M])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_P0M])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_M0P])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_0PP])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27F.f[DIR_0MM])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0PM])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0MP])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PPP])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MMM])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PPM])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MMP])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PMP])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MPM])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PMM])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MPP])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
 
 
@@ -47212,32 +47212,32 @@ __global__ void scaleCFThS27(     real* DC,
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
       (D27F.f[DIR_000])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
-      (D27F.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27F.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27F.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27F.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27F.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27F.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27F.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27F.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27F.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27F.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27F.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27F.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27F.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27F.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27F.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27F.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_P00])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27F.f[DIR_M00])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27F.f[DIR_0P0])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27F.f[DIR_0M0])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27F.f[DIR_00P])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27F.f[DIR_00M])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27F.f[DIR_PP0])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_MM0])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_PM0])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_MP0])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_P0P])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_M0M])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_P0M])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_M0P])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_0PP])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27F.f[DIR_0MM])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0PM])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0MP])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PPP])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MMM])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PPM])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MMP])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PMP])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MPM])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PMM])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MPP])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -47287,8 +47287,8 @@ __global__ void scaleCFEff27(real* DC,
                                         unsigned int* neighborFX,
                                         unsigned int* neighborFY,
                                         unsigned int* neighborFZ,
-									             unsigned int size_MatC, 
-									             unsigned int size_MatF, 
+									             unsigned long long numberOfLBnodesCoarse, 
+									             unsigned long long numberOfLBnodesFine, 
 									             bool isEvenTimestep,
                                         unsigned int* posCSWB, 
                                         unsigned int* posFSWB, 
@@ -47300,101 +47300,101 @@ __global__ void scaleCFEff27(real* DC,
 									             unsigned int nyC, 
 									             unsigned int nxF, 
                                         unsigned int nyF,
-                                        OffCF offCF)
+                                        ICellNeigh offCF)
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -47445,9 +47445,9 @@ __global__ void scaleCFEff27(real* DC,
    if(k<kCF)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff    = offCF.xOffCF[k];
-      yoff    = offCF.yOffCF[k];
-      zoff    = offCF.zOffCF[k];
+      xoff    = offCF.x[k];
+      yoff    = offCF.y[k];
+      zoff    = offCF.z[k];
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -48997,8 +48997,8 @@ __global__ void scaleCF27(real* DC,
                                      unsigned int* neighborFX,
                                      unsigned int* neighborFY,
                                      unsigned int* neighborFZ,
-                                     unsigned int size_MatC, 
-                                     unsigned int size_MatF, 
+                                     unsigned long long numberOfLBnodesCoarse, 
+                                     unsigned long long numberOfLBnodesFine, 
                                      bool isEvenTimestep,
                                      unsigned int* posCSWB, 
                                      unsigned int* posFSWB, 
@@ -49014,96 +49014,96 @@ __global__ void scaleCF27(real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleCF_F3_27.cu b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleCF_F3_27.cu
index cb8bd2a322cc9176cd0aa31625ee386e1f62d63d..a3044503b2e08b8bc713c7431c43a98395ec3298 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleCF_F3_27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleCF_F3_27.cu
@@ -8,9 +8,9 @@
 /* Device code */
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////
@@ -23,8 +23,8 @@ __global__ void scaleCF_comp_D3Q27F3_2018(real* DC,
 													 unsigned int* neighborFX,
 													 unsigned int* neighborFY,
 													 unsigned int* neighborFZ,
-													 unsigned int size_MatC, 
-													 unsigned int size_MatF, 
+													 unsigned long long numberOfLBnodesCoarse, 
+													 unsigned long long numberOfLBnodesFine, 
 													 bool isEvenTimestep,
 													 unsigned int* posCSWB, 
 													 unsigned int* posFSWB, 
@@ -36,7 +36,7 @@ __global__ void scaleCF_comp_D3Q27F3_2018(real* DC,
 													 unsigned int nyC, 
 													 unsigned int nxF, 
 													 unsigned int nyF,
-													 OffCF offCF)
+													 ICellNeigh offCF)
 {
 	real
 		*fP00dest, *fM00dest, *f0P0dest, *f0M0dest, *f00Pdest, *f00Mdest, *fPP0dest, *fMM0dest, *fPM0dest,
@@ -44,33 +44,33 @@ __global__ void scaleCF_comp_D3Q27F3_2018(real* DC,
 		*f000dest, *fMMMdest, *fMMPdest, *fMPPdest, *fMPMdest, *fPPMdest, *fPPPdest, *fPMPdest, *fPMMdest;
 
 
-	fP00dest = &DF[DIR_P00   *size_MatF];
-	fM00dest = &DF[DIR_M00   *size_MatF];
-	f0P0dest = &DF[DIR_0P0   *size_MatF];
-	f0M0dest = &DF[DIR_0M0   *size_MatF];
-	f00Pdest = &DF[DIR_00P   *size_MatF];
-	f00Mdest = &DF[DIR_00M   *size_MatF];
-	fPP0dest = &DF[DIR_PP0  *size_MatF];
-	fMM0dest = &DF[DIR_MM0  *size_MatF];
-	fPM0dest = &DF[DIR_PM0  *size_MatF];
-	fMP0dest = &DF[DIR_MP0  *size_MatF];
-	fP0Pdest = &DF[DIR_P0P  *size_MatF];
-	fM0Mdest = &DF[DIR_M0M  *size_MatF];
-	fP0Mdest = &DF[DIR_P0M  *size_MatF];
-	fM0Pdest = &DF[DIR_M0P  *size_MatF];
-	f0PPdest = &DF[DIR_0PP  *size_MatF];
-	f0MMdest = &DF[DIR_0MM  *size_MatF];
-	f0PMdest = &DF[DIR_0PM  *size_MatF];
-	f0MPdest = &DF[DIR_0MP  *size_MatF];
-	f000dest = &DF[DIR_000*size_MatF];
-	fMMMdest = &DF[DIR_MMM *size_MatF];
-	fMMPdest = &DF[DIR_MMP *size_MatF];
-	fMPPdest = &DF[DIR_MPP *size_MatF];
-	fMPMdest = &DF[DIR_MPM *size_MatF];
-	fPPMdest = &DF[DIR_PPM *size_MatF];
-	fPPPdest = &DF[DIR_PPP *size_MatF];
-	fPMPdest = &DF[DIR_PMP *size_MatF];
-	fPMMdest = &DF[DIR_PMM *size_MatF];
+	fP00dest = &DF[DIR_P00 * numberOfLBnodesFine];
+	fM00dest = &DF[DIR_M00 * numberOfLBnodesFine];
+	f0P0dest = &DF[DIR_0P0 * numberOfLBnodesFine];
+	f0M0dest = &DF[DIR_0M0 * numberOfLBnodesFine];
+	f00Pdest = &DF[DIR_00P * numberOfLBnodesFine];
+	f00Mdest = &DF[DIR_00M * numberOfLBnodesFine];
+	fPP0dest = &DF[DIR_PP0 * numberOfLBnodesFine];
+	fMM0dest = &DF[DIR_MM0 * numberOfLBnodesFine];
+	fPM0dest = &DF[DIR_PM0 * numberOfLBnodesFine];
+	fMP0dest = &DF[DIR_MP0 * numberOfLBnodesFine];
+	fP0Pdest = &DF[DIR_P0P * numberOfLBnodesFine];
+	fM0Mdest = &DF[DIR_M0M * numberOfLBnodesFine];
+	fP0Mdest = &DF[DIR_P0M * numberOfLBnodesFine];
+	fM0Pdest = &DF[DIR_M0P * numberOfLBnodesFine];
+	f0PPdest = &DF[DIR_0PP * numberOfLBnodesFine];
+	f0MMdest = &DF[DIR_0MM * numberOfLBnodesFine];
+	f0PMdest = &DF[DIR_0PM * numberOfLBnodesFine];
+	f0MPdest = &DF[DIR_0MP * numberOfLBnodesFine];
+	f000dest = &DF[DIR_000 * numberOfLBnodesFine];
+	fMMMdest = &DF[DIR_MMM * numberOfLBnodesFine];
+	fMMPdest = &DF[DIR_MMP * numberOfLBnodesFine];
+	fMPPdest = &DF[DIR_MPP * numberOfLBnodesFine];
+	fMPMdest = &DF[DIR_MPM * numberOfLBnodesFine];
+	fPPMdest = &DF[DIR_PPM * numberOfLBnodesFine];
+	fPPPdest = &DF[DIR_PPP * numberOfLBnodesFine];
+	fPMPdest = &DF[DIR_PMP * numberOfLBnodesFine];
+	fPMMdest = &DF[DIR_PMM * numberOfLBnodesFine];
 
 	real
 		*fP00source, *fM00source, *f0P0source, *f0M0source, *f00Psource, *f00Msource, *fPP0source, *fMM0source, *fPM0source,
@@ -79,72 +79,72 @@ __global__ void scaleCF_comp_D3Q27F3_2018(real* DC,
 
 	if (isEvenTimestep == true)
 	{
-		fP00source = &DC[DIR_P00   *size_MatC];
-		fM00source = &DC[DIR_M00   *size_MatC];
-		f0P0source = &DC[DIR_0P0   *size_MatC];
-		f0M0source = &DC[DIR_0M0   *size_MatC];
-		f00Psource = &DC[DIR_00P   *size_MatC];
-		f00Msource = &DC[DIR_00M   *size_MatC];
-		fPP0source = &DC[DIR_PP0  *size_MatC];
-		fMM0source = &DC[DIR_MM0  *size_MatC];
-		fPM0source = &DC[DIR_PM0  *size_MatC];
-		fMP0source = &DC[DIR_MP0  *size_MatC];
-		fP0Psource = &DC[DIR_P0P  *size_MatC];
-		fM0Msource = &DC[DIR_M0M  *size_MatC];
-		fP0Msource = &DC[DIR_P0M  *size_MatC];
-		fM0Psource = &DC[DIR_M0P  *size_MatC];
-		f0PPsource = &DC[DIR_0PP  *size_MatC];
-		f0MMsource = &DC[DIR_0MM  *size_MatC];
-		f0PMsource = &DC[DIR_0PM  *size_MatC];
-		f0MPsource = &DC[DIR_0MP  *size_MatC];
-		f000source = &DC[DIR_000*size_MatC];
-		fMMMsource = &DC[DIR_MMM *size_MatC];
-		fMMPsource = &DC[DIR_MMP *size_MatC];
-		fMPPsource = &DC[DIR_MPP *size_MatC];
-		fMPMsource = &DC[DIR_MPM *size_MatC];
-		fPPMsource = &DC[DIR_PPM *size_MatC];
-		fPPPsource = &DC[DIR_PPP *size_MatC];
-		fPMPsource = &DC[DIR_PMP *size_MatC];
-		fPMMsource = &DC[DIR_PMM *size_MatC];
+		fP00source = &DC[DIR_P00 * numberOfLBnodesCoarse];
+		fM00source = &DC[DIR_M00 * numberOfLBnodesCoarse];
+		f0P0source = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+		f0M0source = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+		f00Psource = &DC[DIR_00P * numberOfLBnodesCoarse];
+		f00Msource = &DC[DIR_00M * numberOfLBnodesCoarse];
+		fPP0source = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+		fMM0source = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+		fPM0source = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+		fMP0source = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+		fP0Psource = &DC[DIR_P0P * numberOfLBnodesCoarse];
+		fM0Msource = &DC[DIR_M0M * numberOfLBnodesCoarse];
+		fP0Msource = &DC[DIR_P0M * numberOfLBnodesCoarse];
+		fM0Psource = &DC[DIR_M0P * numberOfLBnodesCoarse];
+		f0PPsource = &DC[DIR_0PP * numberOfLBnodesCoarse];
+		f0MMsource = &DC[DIR_0MM * numberOfLBnodesCoarse];
+		f0PMsource = &DC[DIR_0PM * numberOfLBnodesCoarse];
+		f0MPsource = &DC[DIR_0MP * numberOfLBnodesCoarse];
+		f000source = &DC[DIR_000 * numberOfLBnodesCoarse];
+		fMMMsource = &DC[DIR_MMM * numberOfLBnodesCoarse];
+		fMMPsource = &DC[DIR_MMP * numberOfLBnodesCoarse];
+		fMPPsource = &DC[DIR_MPP * numberOfLBnodesCoarse];
+		fMPMsource = &DC[DIR_MPM * numberOfLBnodesCoarse];
+		fPPMsource = &DC[DIR_PPM * numberOfLBnodesCoarse];
+		fPPPsource = &DC[DIR_PPP * numberOfLBnodesCoarse];
+		fPMPsource = &DC[DIR_PMP * numberOfLBnodesCoarse];
+		fPMMsource = &DC[DIR_PMM * numberOfLBnodesCoarse];
 	}
 	else
 	{
-		fP00source = &DC[DIR_M00   *size_MatC];
-		fM00source = &DC[DIR_P00   *size_MatC];
-		f0P0source = &DC[DIR_0M0   *size_MatC];
-		f0M0source = &DC[DIR_0P0   *size_MatC];
-		f00Psource = &DC[DIR_00M   *size_MatC];
-		f00Msource = &DC[DIR_00P   *size_MatC];
-		fPP0source = &DC[DIR_MM0  *size_MatC];
-		fMM0source = &DC[DIR_PP0  *size_MatC];
-		fPM0source = &DC[DIR_MP0  *size_MatC];
-		fMP0source = &DC[DIR_PM0  *size_MatC];
-		fP0Psource = &DC[DIR_M0M  *size_MatC];
-		fM0Msource = &DC[DIR_P0P  *size_MatC];
-		fP0Msource = &DC[DIR_M0P  *size_MatC];
-		fM0Psource = &DC[DIR_P0M  *size_MatC];
-		f0PPsource = &DC[DIR_0MM  *size_MatC];
-		f0MMsource = &DC[DIR_0PP  *size_MatC];
-		f0PMsource = &DC[DIR_0MP  *size_MatC];
-		f0MPsource = &DC[DIR_0PM  *size_MatC];
-		f000source = &DC[DIR_000*size_MatC];
-		fMMMsource = &DC[DIR_PPP *size_MatC];
-		fMMPsource = &DC[DIR_PPM *size_MatC];
-		fMPPsource = &DC[DIR_PMM *size_MatC];
-		fMPMsource = &DC[DIR_PMP *size_MatC];
-		fPPMsource = &DC[DIR_MMP *size_MatC];
-		fPPPsource = &DC[DIR_MMM *size_MatC];
-		fPMPsource = &DC[DIR_MPM *size_MatC];
-		fPMMsource = &DC[DIR_MPP *size_MatC];
+		fP00source = &DC[DIR_M00 * numberOfLBnodesCoarse];
+		fM00source = &DC[DIR_P00 * numberOfLBnodesCoarse];
+		f0P0source = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+		f0M0source = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+		f00Psource = &DC[DIR_00M * numberOfLBnodesCoarse];
+		f00Msource = &DC[DIR_00P * numberOfLBnodesCoarse];
+		fPP0source = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+		fMM0source = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+		fPM0source = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+		fMP0source = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+		fP0Psource = &DC[DIR_M0M * numberOfLBnodesCoarse];
+		fM0Msource = &DC[DIR_P0P * numberOfLBnodesCoarse];
+		fP0Msource = &DC[DIR_M0P * numberOfLBnodesCoarse];
+		fM0Psource = &DC[DIR_P0M * numberOfLBnodesCoarse];
+		f0PPsource = &DC[DIR_0MM * numberOfLBnodesCoarse];
+		f0MMsource = &DC[DIR_0PP * numberOfLBnodesCoarse];
+		f0PMsource = &DC[DIR_0MP * numberOfLBnodesCoarse];
+		f0MPsource = &DC[DIR_0PM * numberOfLBnodesCoarse];
+		f000source = &DC[DIR_000 * numberOfLBnodesCoarse];
+		fMMMsource = &DC[DIR_PPP * numberOfLBnodesCoarse];
+		fMMPsource = &DC[DIR_PPM * numberOfLBnodesCoarse];
+		fMPPsource = &DC[DIR_PMM * numberOfLBnodesCoarse];
+		fMPMsource = &DC[DIR_PMP * numberOfLBnodesCoarse];
+		fPPMsource = &DC[DIR_MMP * numberOfLBnodesCoarse];
+		fPPPsource = &DC[DIR_MMM * numberOfLBnodesCoarse];
+		fPMPsource = &DC[DIR_MPM * numberOfLBnodesCoarse];
+		fPMMsource = &DC[DIR_MPP * numberOfLBnodesCoarse];
 	}
 
 	Distributions6 G;
-	G.g[DIR_P00] = &G6[DIR_P00   *size_MatF];
-	G.g[DIR_M00] = &G6[DIR_M00   *size_MatF];
-	G.g[DIR_0P0] = &G6[DIR_0P0   *size_MatF];
-	G.g[DIR_0M0] = &G6[DIR_0M0   *size_MatF];
-	G.g[DIR_00P] = &G6[DIR_00P   *size_MatF];
-	G.g[DIR_00M] = &G6[DIR_00M   *size_MatF];
+	G.g[DIR_P00] = &G6[DIR_P00 * numberOfLBnodesFine];
+	G.g[DIR_M00] = &G6[DIR_M00 * numberOfLBnodesFine];
+	G.g[DIR_0P0] = &G6[DIR_0P0 * numberOfLBnodesFine];
+	G.g[DIR_0M0] = &G6[DIR_0M0 * numberOfLBnodesFine];
+	G.g[DIR_00P] = &G6[DIR_00P * numberOfLBnodesFine];
+	G.g[DIR_00M] = &G6[DIR_00M * numberOfLBnodesFine];
 
 	////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -198,9 +198,9 @@ __global__ void scaleCF_comp_D3Q27F3_2018(real* DC,
    if(k<kCF)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff    = offCF.xOffCF[k];
-      yoff    = offCF.yOffCF[k];
-      zoff    = offCF.zOffCF[k];
+      xoff    = offCF.x[k];
+      yoff    = offCF.y[k];
+      zoff    = offCF.z[k];
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -4370,8 +4370,8 @@ __global__ void scaleCF_comp_D3Q27F3( real* DC,
 												 unsigned int* neighborFX,
 												 unsigned int* neighborFY,
 												 unsigned int* neighborFZ,
-												 unsigned int size_MatC, 
-												 unsigned int size_MatF, 
+												 unsigned long long numberOfLBnodesCoarse, 
+												 unsigned long long numberOfLBnodesFine, 
 												 bool isEvenTimestep,
 												 unsigned int* posCSWB, 
 												 unsigned int* posFSWB, 
@@ -4383,7 +4383,7 @@ __global__ void scaleCF_comp_D3Q27F3( real* DC,
 												 unsigned int nyC, 
 												 unsigned int nxF, 
 												 unsigned int nyF,
-												 OffCF offCF)
+												 ICellNeigh offCF)
 {
 	real
 		*fP00dest, *fM00dest, *f0P0dest, *f0M0dest, *f00Pdest, *f00Mdest, *fPP0dest, *fMM0dest, *fPM0dest,
@@ -4391,33 +4391,33 @@ __global__ void scaleCF_comp_D3Q27F3( real* DC,
 		*f000dest, *fMMMdest, *fMMPdest, *fMPPdest, *fMPMdest, *fPPMdest, *fPPPdest, *fPMPdest, *fPMMdest;
 
 
-	fP00dest = &DF[DIR_P00   *size_MatF];
-	fM00dest = &DF[DIR_M00   *size_MatF];
-	f0P0dest = &DF[DIR_0P0   *size_MatF];
-	f0M0dest = &DF[DIR_0M0   *size_MatF];
-	f00Pdest = &DF[DIR_00P   *size_MatF];
-	f00Mdest = &DF[DIR_00M   *size_MatF];
-	fPP0dest = &DF[DIR_PP0  *size_MatF];
-	fMM0dest = &DF[DIR_MM0  *size_MatF];
-	fPM0dest = &DF[DIR_PM0  *size_MatF];
-	fMP0dest = &DF[DIR_MP0  *size_MatF];
-	fP0Pdest = &DF[DIR_P0P  *size_MatF];
-	fM0Mdest = &DF[DIR_M0M  *size_MatF];
-	fP0Mdest = &DF[DIR_P0M  *size_MatF];
-	fM0Pdest = &DF[DIR_M0P  *size_MatF];
-	f0PPdest = &DF[DIR_0PP  *size_MatF];
-	f0MMdest = &DF[DIR_0MM  *size_MatF];
-	f0PMdest = &DF[DIR_0PM  *size_MatF];
-	f0MPdest = &DF[DIR_0MP  *size_MatF];
-	f000dest = &DF[DIR_000*size_MatF];
-	fMMMdest = &DF[DIR_MMM *size_MatF];
-	fMMPdest = &DF[DIR_MMP *size_MatF];
-	fMPPdest = &DF[DIR_MPP *size_MatF];
-	fMPMdest = &DF[DIR_MPM *size_MatF];
-	fPPMdest = &DF[DIR_PPM *size_MatF];
-	fPPPdest = &DF[DIR_PPP *size_MatF];
-	fPMPdest = &DF[DIR_PMP *size_MatF];
-	fPMMdest = &DF[DIR_PMM *size_MatF];
+	fP00dest = &DF[DIR_P00 * numberOfLBnodesFine];
+	fM00dest = &DF[DIR_M00 * numberOfLBnodesFine];
+	f0P0dest = &DF[DIR_0P0 * numberOfLBnodesFine];
+	f0M0dest = &DF[DIR_0M0 * numberOfLBnodesFine];
+	f00Pdest = &DF[DIR_00P * numberOfLBnodesFine];
+	f00Mdest = &DF[DIR_00M * numberOfLBnodesFine];
+	fPP0dest = &DF[DIR_PP0 * numberOfLBnodesFine];
+	fMM0dest = &DF[DIR_MM0 * numberOfLBnodesFine];
+	fPM0dest = &DF[DIR_PM0 * numberOfLBnodesFine];
+	fMP0dest = &DF[DIR_MP0 * numberOfLBnodesFine];
+	fP0Pdest = &DF[DIR_P0P * numberOfLBnodesFine];
+	fM0Mdest = &DF[DIR_M0M * numberOfLBnodesFine];
+	fP0Mdest = &DF[DIR_P0M * numberOfLBnodesFine];
+	fM0Pdest = &DF[DIR_M0P * numberOfLBnodesFine];
+	f0PPdest = &DF[DIR_0PP * numberOfLBnodesFine];
+	f0MMdest = &DF[DIR_0MM * numberOfLBnodesFine];
+	f0PMdest = &DF[DIR_0PM * numberOfLBnodesFine];
+	f0MPdest = &DF[DIR_0MP * numberOfLBnodesFine];
+	f000dest = &DF[DIR_000 * numberOfLBnodesFine];
+	fMMMdest = &DF[DIR_MMM * numberOfLBnodesFine];
+	fMMPdest = &DF[DIR_MMP * numberOfLBnodesFine];
+	fMPPdest = &DF[DIR_MPP * numberOfLBnodesFine];
+	fMPMdest = &DF[DIR_MPM * numberOfLBnodesFine];
+	fPPMdest = &DF[DIR_PPM * numberOfLBnodesFine];
+	fPPPdest = &DF[DIR_PPP * numberOfLBnodesFine];
+	fPMPdest = &DF[DIR_PMP * numberOfLBnodesFine];
+	fPMMdest = &DF[DIR_PMM * numberOfLBnodesFine];
 
 	real
 		*fP00source, *fM00source, *f0P0source, *f0M0source, *f00Psource, *f00Msource, *fPP0source, *fMM0source, *fPM0source,
@@ -4426,72 +4426,72 @@ __global__ void scaleCF_comp_D3Q27F3( real* DC,
 
 	if (isEvenTimestep == true)
 	{
-		fP00source = &DC[DIR_P00   *size_MatC];
-		fM00source = &DC[DIR_M00   *size_MatC];
-		f0P0source = &DC[DIR_0P0   *size_MatC];
-		f0M0source = &DC[DIR_0M0   *size_MatC];
-		f00Psource = &DC[DIR_00P   *size_MatC];
-		f00Msource = &DC[DIR_00M   *size_MatC];
-		fPP0source = &DC[DIR_PP0  *size_MatC];
-		fMM0source = &DC[DIR_MM0  *size_MatC];
-		fPM0source = &DC[DIR_PM0  *size_MatC];
-		fMP0source = &DC[DIR_MP0  *size_MatC];
-		fP0Psource = &DC[DIR_P0P  *size_MatC];
-		fM0Msource = &DC[DIR_M0M  *size_MatC];
-		fP0Msource = &DC[DIR_P0M  *size_MatC];
-		fM0Psource = &DC[DIR_M0P  *size_MatC];
-		f0PPsource = &DC[DIR_0PP  *size_MatC];
-		f0MMsource = &DC[DIR_0MM  *size_MatC];
-		f0PMsource = &DC[DIR_0PM  *size_MatC];
-		f0MPsource = &DC[DIR_0MP  *size_MatC];
-		f000source = &DC[DIR_000*size_MatC];
-		fMMMsource = &DC[DIR_MMM *size_MatC];
-		fMMPsource = &DC[DIR_MMP *size_MatC];
-		fMPPsource = &DC[DIR_MPP *size_MatC];
-		fMPMsource = &DC[DIR_MPM *size_MatC];
-		fPPMsource = &DC[DIR_PPM *size_MatC];
-		fPPPsource = &DC[DIR_PPP *size_MatC];
-		fPMPsource = &DC[DIR_PMP *size_MatC];
-		fPMMsource = &DC[DIR_PMM *size_MatC];
+		fP00source = &DC[DIR_P00 * numberOfLBnodesCoarse];
+		fM00source = &DC[DIR_M00 * numberOfLBnodesCoarse];
+		f0P0source = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+		f0M0source = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+		f00Psource = &DC[DIR_00P * numberOfLBnodesCoarse];
+		f00Msource = &DC[DIR_00M * numberOfLBnodesCoarse];
+		fPP0source = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+		fMM0source = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+		fPM0source = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+		fMP0source = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+		fP0Psource = &DC[DIR_P0P * numberOfLBnodesCoarse];
+		fM0Msource = &DC[DIR_M0M * numberOfLBnodesCoarse];
+		fP0Msource = &DC[DIR_P0M * numberOfLBnodesCoarse];
+		fM0Psource = &DC[DIR_M0P * numberOfLBnodesCoarse];
+		f0PPsource = &DC[DIR_0PP * numberOfLBnodesCoarse];
+		f0MMsource = &DC[DIR_0MM * numberOfLBnodesCoarse];
+		f0PMsource = &DC[DIR_0PM * numberOfLBnodesCoarse];
+		f0MPsource = &DC[DIR_0MP * numberOfLBnodesCoarse];
+		f000source = &DC[DIR_000 * numberOfLBnodesCoarse];
+		fMMMsource = &DC[DIR_MMM * numberOfLBnodesCoarse];
+		fMMPsource = &DC[DIR_MMP * numberOfLBnodesCoarse];
+		fMPPsource = &DC[DIR_MPP * numberOfLBnodesCoarse];
+		fMPMsource = &DC[DIR_MPM * numberOfLBnodesCoarse];
+		fPPMsource = &DC[DIR_PPM * numberOfLBnodesCoarse];
+		fPPPsource = &DC[DIR_PPP * numberOfLBnodesCoarse];
+		fPMPsource = &DC[DIR_PMP * numberOfLBnodesCoarse];
+		fPMMsource = &DC[DIR_PMM * numberOfLBnodesCoarse];
 	}
 	else
 	{
-		fP00source = &DC[DIR_M00   *size_MatC];
-		fM00source = &DC[DIR_P00   *size_MatC];
-		f0P0source = &DC[DIR_0M0   *size_MatC];
-		f0M0source = &DC[DIR_0P0   *size_MatC];
-		f00Psource = &DC[DIR_00M   *size_MatC];
-		f00Msource = &DC[DIR_00P   *size_MatC];
-		fPP0source = &DC[DIR_MM0  *size_MatC];
-		fMM0source = &DC[DIR_PP0  *size_MatC];
-		fPM0source = &DC[DIR_MP0  *size_MatC];
-		fMP0source = &DC[DIR_PM0  *size_MatC];
-		fP0Psource = &DC[DIR_M0M  *size_MatC];
-		fM0Msource = &DC[DIR_P0P  *size_MatC];
-		fP0Msource = &DC[DIR_M0P  *size_MatC];
-		fM0Psource = &DC[DIR_P0M  *size_MatC];
-		f0PPsource = &DC[DIR_0MM  *size_MatC];
-		f0MMsource = &DC[DIR_0PP  *size_MatC];
-		f0PMsource = &DC[DIR_0MP  *size_MatC];
-		f0MPsource = &DC[DIR_0PM  *size_MatC];
-		f000source = &DC[DIR_000*size_MatC];
-		fMMMsource = &DC[DIR_PPP *size_MatC];
-		fMMPsource = &DC[DIR_PPM *size_MatC];
-		fMPPsource = &DC[DIR_PMM *size_MatC];
-		fMPMsource = &DC[DIR_PMP *size_MatC];
-		fPPMsource = &DC[DIR_MMP *size_MatC];
-		fPPPsource = &DC[DIR_MMM *size_MatC];
-		fPMPsource = &DC[DIR_MPM *size_MatC];
-		fPMMsource = &DC[DIR_MPP *size_MatC];
+		fP00source = &DC[DIR_M00 * numberOfLBnodesCoarse];
+		fM00source = &DC[DIR_P00 * numberOfLBnodesCoarse];
+		f0P0source = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+		f0M0source = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+		f00Psource = &DC[DIR_00M * numberOfLBnodesCoarse];
+		f00Msource = &DC[DIR_00P * numberOfLBnodesCoarse];
+		fPP0source = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+		fMM0source = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+		fPM0source = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+		fMP0source = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+		fP0Psource = &DC[DIR_M0M * numberOfLBnodesCoarse];
+		fM0Msource = &DC[DIR_P0P * numberOfLBnodesCoarse];
+		fP0Msource = &DC[DIR_M0P * numberOfLBnodesCoarse];
+		fM0Psource = &DC[DIR_P0M * numberOfLBnodesCoarse];
+		f0PPsource = &DC[DIR_0MM * numberOfLBnodesCoarse];
+		f0MMsource = &DC[DIR_0PP * numberOfLBnodesCoarse];
+		f0PMsource = &DC[DIR_0MP * numberOfLBnodesCoarse];
+		f0MPsource = &DC[DIR_0PM * numberOfLBnodesCoarse];
+		f000source = &DC[DIR_000 * numberOfLBnodesCoarse];
+		fMMMsource = &DC[DIR_PPP * numberOfLBnodesCoarse];
+		fMMPsource = &DC[DIR_PPM * numberOfLBnodesCoarse];
+		fMPPsource = &DC[DIR_PMM * numberOfLBnodesCoarse];
+		fMPMsource = &DC[DIR_PMP * numberOfLBnodesCoarse];
+		fPPMsource = &DC[DIR_MMP * numberOfLBnodesCoarse];
+		fPPPsource = &DC[DIR_MMM * numberOfLBnodesCoarse];
+		fPMPsource = &DC[DIR_MPM * numberOfLBnodesCoarse];
+		fPMMsource = &DC[DIR_MPP * numberOfLBnodesCoarse];
 	}
 
 	Distributions6 G;
-	G.g[DIR_P00] = &G6[DIR_P00   *size_MatF];
-	G.g[DIR_M00] = &G6[DIR_M00   *size_MatF];
-	G.g[DIR_0P0] = &G6[DIR_0P0   *size_MatF];
-	G.g[DIR_0M0] = &G6[DIR_0M0   *size_MatF];
-	G.g[DIR_00P] = &G6[DIR_00P   *size_MatF];
-	G.g[DIR_00M] = &G6[DIR_00M   *size_MatF];
+	G.g[DIR_P00] = &G6[DIR_P00 * numberOfLBnodesFine];
+	G.g[DIR_M00] = &G6[DIR_M00 * numberOfLBnodesFine];
+	G.g[DIR_0P0] = &G6[DIR_0P0 * numberOfLBnodesFine];
+	G.g[DIR_0M0] = &G6[DIR_0M0 * numberOfLBnodesFine];
+	G.g[DIR_00P] = &G6[DIR_00P * numberOfLBnodesFine];
+	G.g[DIR_00M] = &G6[DIR_00M * numberOfLBnodesFine];
 
 	////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -4545,9 +4545,9 @@ __global__ void scaleCF_comp_D3Q27F3( real* DC,
    if(k<kCF)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff    = offCF.xOffCF[k];
-      yoff    = offCF.yOffCF[k];
-      zoff    = offCF.zOffCF[k];
+      xoff    = offCF.x[k];
+      yoff    = offCF.y[k];
+      zoff    = offCF.z[k];
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleFC27.cu b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleFC27.cu
index f2a66876cf39e3519e22fc2b0e236514f05ce85a..08c47230faa5ffeed0b996e2b1125d3c45e6bce1 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleFC27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleFC27.cu
@@ -8,9 +8,9 @@
 /* Device code */
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////
@@ -22,8 +22,8 @@ __global__ void scaleFC_0817_comp_27( real* DC,
 												 unsigned int* neighborFX,
 												 unsigned int* neighborFY,
 												 unsigned int* neighborFZ,
-												 unsigned int size_MatC, 
-												 unsigned int size_MatF, 
+												 unsigned long long numberOfLBnodesCoarse, 
+												 unsigned long long numberOfLBnodesFine, 
 												 bool isEvenTimestep,
 												 unsigned int* posC, 
 												 unsigned int* posFSWB, 
@@ -35,7 +35,7 @@ __global__ void scaleFC_0817_comp_27( real* DC,
 												 unsigned int nyC, 
 												 unsigned int nxF, 
 												 unsigned int nyF,
-												 OffFC offFC)
+												 ICellNeigh offFC)
 {
    real 
 	   *fP00source, *fM00source, *f0P0source, *f0M0source, *f00Psource, *f00Msource, *fPP0source, *fMM0source, *fPM0source,
@@ -43,33 +43,33 @@ __global__ void scaleFC_0817_comp_27( real* DC,
 	   *f000source, *fMMMsource, *fMMPsource, *fMPPsource, *fMPMsource, *fPPMsource, *fPPPsource, *fPMPsource, *fPMMsource;
 
 
-   fP00source = &DF[DIR_P00   *size_MatF];
-   fM00source = &DF[DIR_M00   *size_MatF];
-   f0P0source = &DF[DIR_0P0   *size_MatF];
-   f0M0source = &DF[DIR_0M0   *size_MatF];
-   f00Psource = &DF[DIR_00P   *size_MatF];
-   f00Msource = &DF[DIR_00M   *size_MatF];
-   fPP0source = &DF[DIR_PP0  *size_MatF];
-   fMM0source = &DF[DIR_MM0  *size_MatF];
-   fPM0source = &DF[DIR_PM0  *size_MatF];
-   fMP0source = &DF[DIR_MP0  *size_MatF];
-   fP0Psource = &DF[DIR_P0P  *size_MatF];
-   fM0Msource = &DF[DIR_M0M  *size_MatF];
-   fP0Msource = &DF[DIR_P0M  *size_MatF];
-   fM0Psource = &DF[DIR_M0P  *size_MatF];
-   f0PPsource = &DF[DIR_0PP  *size_MatF];
-   f0MMsource = &DF[DIR_0MM  *size_MatF];
-   f0PMsource = &DF[DIR_0PM  *size_MatF];
-   f0MPsource = &DF[DIR_0MP  *size_MatF];
-   f000source = &DF[DIR_000*size_MatF];
-   fMMMsource = &DF[DIR_MMM *size_MatF];
-   fMMPsource = &DF[DIR_MMP *size_MatF];
-   fMPPsource = &DF[DIR_MPP *size_MatF];
-   fMPMsource = &DF[DIR_MPM *size_MatF];
-   fPPMsource = &DF[DIR_PPM *size_MatF];
-   fPPPsource = &DF[DIR_PPP *size_MatF];
-   fPMPsource = &DF[DIR_PMP *size_MatF];
-   fPMMsource = &DF[DIR_PMM *size_MatF];
+   fP00source = &DF[DIR_P00 * numberOfLBnodesFine];
+   fM00source = &DF[DIR_M00 * numberOfLBnodesFine];
+   f0P0source = &DF[DIR_0P0 * numberOfLBnodesFine];
+   f0M0source = &DF[DIR_0M0 * numberOfLBnodesFine];
+   f00Psource = &DF[DIR_00P * numberOfLBnodesFine];
+   f00Msource = &DF[DIR_00M * numberOfLBnodesFine];
+   fPP0source = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fMM0source = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fPM0source = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fMP0source = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fP0Psource = &DF[DIR_P0P * numberOfLBnodesFine];
+   fM0Msource = &DF[DIR_M0M * numberOfLBnodesFine];
+   fP0Msource = &DF[DIR_P0M * numberOfLBnodesFine];
+   fM0Psource = &DF[DIR_M0P * numberOfLBnodesFine];
+   f0PPsource = &DF[DIR_0PP * numberOfLBnodesFine];
+   f0MMsource = &DF[DIR_0MM * numberOfLBnodesFine];
+   f0PMsource = &DF[DIR_0PM * numberOfLBnodesFine];
+   f0MPsource = &DF[DIR_0MP * numberOfLBnodesFine];
+   f000source = &DF[DIR_000 * numberOfLBnodesFine];
+   fMMMsource = &DF[DIR_MMM * numberOfLBnodesFine];
+   fMMPsource = &DF[DIR_MMP * numberOfLBnodesFine];
+   fMPPsource = &DF[DIR_MPP * numberOfLBnodesFine];
+   fMPMsource = &DF[DIR_MPM * numberOfLBnodesFine];
+   fPPMsource = &DF[DIR_PPM * numberOfLBnodesFine];
+   fPPPsource = &DF[DIR_PPP * numberOfLBnodesFine];
+   fPMPsource = &DF[DIR_PMP * numberOfLBnodesFine];
+   fPMMsource = &DF[DIR_PMM * numberOfLBnodesFine];
 
    real
 	   *fP00dest, *fM00dest, *f0P0dest, *f0M0dest, *f00Pdest, *f00Mdest, *fPP0dest, *fMM0dest, *fPM0dest,
@@ -78,63 +78,63 @@ __global__ void scaleFC_0817_comp_27( real* DC,
 
    if (isEvenTimestep==true)
    {
-	   fP00dest = &DC[DIR_P00   *size_MatC];
-	   fM00dest = &DC[DIR_M00   *size_MatC];
-	   f0P0dest = &DC[DIR_0P0   *size_MatC];
-	   f0M0dest = &DC[DIR_0M0   *size_MatC];
-	   f00Pdest = &DC[DIR_00P   *size_MatC];
-	   f00Mdest = &DC[DIR_00M   *size_MatC];
-	   fPP0dest = &DC[DIR_PP0  *size_MatC];
-	   fMM0dest = &DC[DIR_MM0  *size_MatC];
-	   fPM0dest = &DC[DIR_PM0  *size_MatC];
-	   fMP0dest = &DC[DIR_MP0  *size_MatC];
-	   fP0Pdest = &DC[DIR_P0P  *size_MatC];
-	   fM0Mdest = &DC[DIR_M0M  *size_MatC];
-	   fP0Mdest = &DC[DIR_P0M  *size_MatC];
-	   fM0Pdest = &DC[DIR_M0P  *size_MatC];
-	   f0PPdest = &DC[DIR_0PP  *size_MatC];
-	   f0MMdest = &DC[DIR_0MM  *size_MatC];
-	   f0PMdest = &DC[DIR_0PM  *size_MatC];
-	   f0MPdest = &DC[DIR_0MP  *size_MatC];
-	   f000dest = &DC[DIR_000*size_MatC];
-	   fMMMdest = &DC[DIR_MMM *size_MatC];
-	   fMMPdest = &DC[DIR_MMP *size_MatC];
-	   fMPPdest = &DC[DIR_MPP *size_MatC];
-	   fMPMdest = &DC[DIR_MPM *size_MatC];
-	   fPPMdest = &DC[DIR_PPM *size_MatC];
-	   fPPPdest = &DC[DIR_PPP *size_MatC];
-	   fPMPdest = &DC[DIR_PMP *size_MatC];
-	   fPMMdest = &DC[DIR_PMM *size_MatC];
+	   fP00dest = &DC[DIR_P00 * numberOfLBnodesCoarse];
+	   fM00dest = &DC[DIR_M00 * numberOfLBnodesCoarse];
+	   f0P0dest = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+	   f0M0dest = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+	   f00Pdest = &DC[DIR_00P * numberOfLBnodesCoarse];
+	   f00Mdest = &DC[DIR_00M * numberOfLBnodesCoarse];
+	   fPP0dest = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+	   fMM0dest = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+	   fPM0dest = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+	   fMP0dest = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+	   fP0Pdest = &DC[DIR_P0P * numberOfLBnodesCoarse];
+	   fM0Mdest = &DC[DIR_M0M * numberOfLBnodesCoarse];
+	   fP0Mdest = &DC[DIR_P0M * numberOfLBnodesCoarse];
+	   fM0Pdest = &DC[DIR_M0P * numberOfLBnodesCoarse];
+	   f0PPdest = &DC[DIR_0PP * numberOfLBnodesCoarse];
+	   f0MMdest = &DC[DIR_0MM * numberOfLBnodesCoarse];
+	   f0PMdest = &DC[DIR_0PM * numberOfLBnodesCoarse];
+	   f0MPdest = &DC[DIR_0MP * numberOfLBnodesCoarse];
+	   f000dest = &DC[DIR_000 * numberOfLBnodesCoarse];
+	   fMMMdest = &DC[DIR_MMM * numberOfLBnodesCoarse];
+	   fMMPdest = &DC[DIR_MMP * numberOfLBnodesCoarse];
+	   fMPPdest = &DC[DIR_MPP * numberOfLBnodesCoarse];
+	   fMPMdest = &DC[DIR_MPM * numberOfLBnodesCoarse];
+	   fPPMdest = &DC[DIR_PPM * numberOfLBnodesCoarse];
+	   fPPPdest = &DC[DIR_PPP * numberOfLBnodesCoarse];
+	   fPMPdest = &DC[DIR_PMP * numberOfLBnodesCoarse];
+	   fPMMdest = &DC[DIR_PMM * numberOfLBnodesCoarse];
    } 
    else
    {
-	   fP00dest = &DC[DIR_M00   *size_MatC];
-	   fM00dest = &DC[DIR_P00   *size_MatC];
-	   f0P0dest = &DC[DIR_0M0   *size_MatC];
-	   f0M0dest = &DC[DIR_0P0   *size_MatC];
-	   f00Pdest = &DC[DIR_00M   *size_MatC];
-	   f00Mdest = &DC[DIR_00P   *size_MatC];
-	   fPP0dest = &DC[DIR_MM0  *size_MatC];
-	   fMM0dest = &DC[DIR_PP0  *size_MatC];
-	   fPM0dest = &DC[DIR_MP0  *size_MatC];
-	   fMP0dest = &DC[DIR_PM0  *size_MatC];
-	   fP0Pdest = &DC[DIR_M0M  *size_MatC];
-	   fM0Mdest = &DC[DIR_P0P  *size_MatC];
-	   fP0Mdest = &DC[DIR_M0P  *size_MatC];
-	   fM0Pdest = &DC[DIR_P0M  *size_MatC];
-	   f0PPdest = &DC[DIR_0MM  *size_MatC];
-	   f0MMdest = &DC[DIR_0PP  *size_MatC];
-	   f0PMdest = &DC[DIR_0MP  *size_MatC];
-	   f0MPdest = &DC[DIR_0PM  *size_MatC];
-	   f000dest = &DC[DIR_000*size_MatC];
-	   fMMMdest = &DC[DIR_PPP *size_MatC];
-	   fMMPdest = &DC[DIR_PPM *size_MatC];
-	   fMPPdest = &DC[DIR_PMM *size_MatC];
-	   fMPMdest = &DC[DIR_PMP *size_MatC];
-	   fPPMdest = &DC[DIR_MMP *size_MatC];
-	   fPPPdest = &DC[DIR_MMM *size_MatC];
-	   fPMPdest = &DC[DIR_MPM *size_MatC];
-	   fPMMdest = &DC[DIR_MPP *size_MatC];
+	   fP00dest = &DC[DIR_M00 * numberOfLBnodesCoarse];
+	   fM00dest = &DC[DIR_P00 * numberOfLBnodesCoarse];
+	   f0P0dest = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+	   f0M0dest = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+	   f00Pdest = &DC[DIR_00M * numberOfLBnodesCoarse];
+	   f00Mdest = &DC[DIR_00P * numberOfLBnodesCoarse];
+	   fPP0dest = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+	   fMM0dest = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+	   fPM0dest = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+	   fMP0dest = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+	   fP0Pdest = &DC[DIR_M0M * numberOfLBnodesCoarse];
+	   fM0Mdest = &DC[DIR_P0P * numberOfLBnodesCoarse];
+	   fP0Mdest = &DC[DIR_M0P * numberOfLBnodesCoarse];
+	   fM0Pdest = &DC[DIR_P0M * numberOfLBnodesCoarse];
+	   f0PPdest = &DC[DIR_0MM * numberOfLBnodesCoarse];
+	   f0MMdest = &DC[DIR_0PP * numberOfLBnodesCoarse];
+	   f0PMdest = &DC[DIR_0MP * numberOfLBnodesCoarse];
+	   f0MPdest = &DC[DIR_0PM * numberOfLBnodesCoarse];
+	   f000dest = &DC[DIR_000 * numberOfLBnodesCoarse];
+	   fMMMdest = &DC[DIR_PPP * numberOfLBnodesCoarse];
+	   fMMPdest = &DC[DIR_PPM * numberOfLBnodesCoarse];
+	   fMPPdest = &DC[DIR_PMM * numberOfLBnodesCoarse];
+	   fMPMdest = &DC[DIR_PMP * numberOfLBnodesCoarse];
+	   fPPMdest = &DC[DIR_MMP * numberOfLBnodesCoarse];
+	   fPPPdest = &DC[DIR_MMM * numberOfLBnodesCoarse];
+	   fPMPdest = &DC[DIR_MPM * numberOfLBnodesCoarse];
+	   fPMMdest = &DC[DIR_MPP * numberOfLBnodesCoarse];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -183,9 +183,9 @@ __global__ void scaleFC_0817_comp_27( real* DC,
    if(k<kFC)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff = offFC.xOffFC[k];
-      yoff = offFC.yOffFC[k];
-      zoff = offFC.zOffFC[k];      
+      xoff = offFC.x[k];
+      yoff = offFC.y[k];
+      zoff = offFC.z[k];      
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -1218,8 +1218,8 @@ __global__ void scaleFC_AA2016_comp_27(real* DC,
 												  unsigned int* neighborFX,
 												  unsigned int* neighborFY,
 												  unsigned int* neighborFZ,
-												  unsigned int size_MatC, 
-												  unsigned int size_MatF, 
+												  unsigned long long numberOfLBnodesCoarse, 
+												  unsigned long long numberOfLBnodesFine, 
 												  bool isEvenTimestep,
 												  unsigned int* posC, 
 												  unsigned int* posFSWB, 
@@ -1231,101 +1231,101 @@ __global__ void scaleFC_AA2016_comp_27(real* DC,
 												  unsigned int nyC, 
 												  unsigned int nxF, 
 												  unsigned int nyF,
-												  OffFC offFC)
+												  ICellNeigh offFC)
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -1393,9 +1393,9 @@ __global__ void scaleFC_AA2016_comp_27(real* DC,
    if(k<kFC)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff = offFC.xOffFC[k];
-      yoff = offFC.yOffFC[k];
-      zoff = offFC.zOffFC[k];      
+      xoff = offFC.x[k];
+      yoff = offFC.y[k];
+      zoff = offFC.z[k];      
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -5407,8 +5407,8 @@ __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC,
 														unsigned int* neighborFX,
 														unsigned int* neighborFY,
 														unsigned int* neighborFZ,
-														unsigned int size_MatC, 
-														unsigned int size_MatF, 
+														unsigned long long numberOfLBnodesCoarse, 
+														unsigned long long numberOfLBnodesFine, 
 														bool isEvenTimestep,
 														unsigned int* posC, 
 														unsigned int* posFSWB, 
@@ -5420,101 +5420,101 @@ __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC,
 														unsigned int nyC, 
 														unsigned int nxF, 
 														unsigned int nyF,
-														OffFC offFC)
+														ICellNeigh offFC)
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -5582,9 +5582,9 @@ __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC,
    if(k<kFC)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff = offFC.xOffFC[k];
-      yoff = offFC.yOffFC[k];
-      zoff = offFC.zOffFC[k];      
+      xoff = offFC.x[k];
+      yoff = offFC.y[k];
+      zoff = offFC.z[k];      
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -9587,103 +9587,120 @@ __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-__device__ void scaleFC_RhoSq_comp_27_Calculation(real *DC, real *DF, unsigned int *neighborCX, unsigned int *neighborCY,
-                                                  unsigned int *neighborCZ, unsigned int *neighborFX, unsigned int *neighborFY,
-                                                  unsigned int *neighborFZ, unsigned int size_MatC, unsigned int size_MatF,
-                                                  bool isEvenTimestep, unsigned int *posC, unsigned int *posFSWB, unsigned int kFC,
-                                                  real omCoarse, real omFine, real nu, unsigned int nxC, unsigned int nyC,
-                                                  unsigned int nxF, unsigned int nyF, OffFC offFC, const unsigned k)
+__device__ void scaleFC_RhoSq_comp_27_Calculation(
+    real *DC, real *DF, 
+    unsigned int *neighborCX,
+    unsigned int *neighborCY,
+    unsigned int *neighborCZ,
+    unsigned int *neighborFX,
+    unsigned int *neighborFY,
+    unsigned int *neighborFZ,
+    unsigned long long numberOfLBnodesCoarse,
+    unsigned long long numberOfLBnodesFine,
+    bool isEvenTimestep,
+    unsigned int *posC,
+    unsigned int *posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    ICellNeigh offFC,
+    const unsigned k)
 {
     real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF,
         *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-    feF    = &DF[DIR_P00 * size_MatF];
-    fwF    = &DF[DIR_M00 * size_MatF];
-    fnF    = &DF[DIR_0P0 * size_MatF];
-    fsF    = &DF[DIR_0M0 * size_MatF];
-    ftF    = &DF[DIR_00P * size_MatF];
-    fbF    = &DF[DIR_00M * size_MatF];
-    fneF   = &DF[DIR_PP0 * size_MatF];
-    fswF   = &DF[DIR_MM0 * size_MatF];
-    fseF   = &DF[DIR_PM0 * size_MatF];
-    fnwF   = &DF[DIR_MP0 * size_MatF];
-    fteF   = &DF[DIR_P0P * size_MatF];
-    fbwF   = &DF[DIR_M0M * size_MatF];
-    fbeF   = &DF[DIR_P0M * size_MatF];
-    ftwF   = &DF[DIR_M0P * size_MatF];
-    ftnF   = &DF[DIR_0PP * size_MatF];
-    fbsF   = &DF[DIR_0MM * size_MatF];
-    fbnF   = &DF[DIR_0PM * size_MatF];
-    ftsF   = &DF[DIR_0MP * size_MatF];
-    fzeroF = &DF[DIR_000 * size_MatF];
-    ftneF  = &DF[DIR_PPP * size_MatF];
-    ftswF  = &DF[DIR_MMP * size_MatF];
-    ftseF  = &DF[DIR_PMP * size_MatF];
-    ftnwF  = &DF[DIR_MPP * size_MatF];
-    fbneF  = &DF[DIR_PPM * size_MatF];
-    fbswF  = &DF[DIR_MMM * size_MatF];
-    fbseF  = &DF[DIR_PMM * size_MatF];
-    fbnwF  = &DF[DIR_MPM * size_MatF];
+    feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+    fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+    fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+    fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+    ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+    fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+    fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+    fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+    fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+    fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+    fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+    fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+    fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+    ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+    ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+    fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+    fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+    ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+    fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+    ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+    ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+    ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+    ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+    fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+    fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+    fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+    fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
     real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC,
         *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
     if (isEvenTimestep == true) {
-        feC    = &DC[DIR_P00 * size_MatC];
-        fwC    = &DC[DIR_M00 * size_MatC];
-        fnC    = &DC[DIR_0P0 * size_MatC];
-        fsC    = &DC[DIR_0M0 * size_MatC];
-        ftC    = &DC[DIR_00P * size_MatC];
-        fbC    = &DC[DIR_00M * size_MatC];
-        fneC   = &DC[DIR_PP0 * size_MatC];
-        fswC   = &DC[DIR_MM0 * size_MatC];
-        fseC   = &DC[DIR_PM0 * size_MatC];
-        fnwC   = &DC[DIR_MP0 * size_MatC];
-        fteC   = &DC[DIR_P0P * size_MatC];
-        fbwC   = &DC[DIR_M0M * size_MatC];
-        fbeC   = &DC[DIR_P0M * size_MatC];
-        ftwC   = &DC[DIR_M0P * size_MatC];
-        ftnC   = &DC[DIR_0PP * size_MatC];
-        fbsC   = &DC[DIR_0MM * size_MatC];
-        fbnC   = &DC[DIR_0PM * size_MatC];
-        ftsC   = &DC[DIR_0MP * size_MatC];
-        fzeroC = &DC[DIR_000 * size_MatC];
-        ftneC  = &DC[DIR_PPP * size_MatC];
-        ftswC  = &DC[DIR_MMP * size_MatC];
-        ftseC  = &DC[DIR_PMP * size_MatC];
-        ftnwC  = &DC[DIR_MPP * size_MatC];
-        fbneC  = &DC[DIR_PPM * size_MatC];
-        fbswC  = &DC[DIR_MMM * size_MatC];
-        fbseC  = &DC[DIR_PMM * size_MatC];
-        fbnwC  = &DC[DIR_MPM * size_MatC];
+        feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+        fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+        fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+        fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+        ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+        fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+        fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+        fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+        fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+        fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+        fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+        fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+        fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+        ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+        ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+        fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+        fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+        ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+        fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+        ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+        ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+        ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+        ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+        fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+        fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+        fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+        fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
     } else {
-        fwC    = &DC[DIR_P00 * size_MatC];
-        feC    = &DC[DIR_M00 * size_MatC];
-        fsC    = &DC[DIR_0P0 * size_MatC];
-        fnC    = &DC[DIR_0M0 * size_MatC];
-        fbC    = &DC[DIR_00P * size_MatC];
-        ftC    = &DC[DIR_00M * size_MatC];
-        fswC   = &DC[DIR_PP0 * size_MatC];
-        fneC   = &DC[DIR_MM0 * size_MatC];
-        fnwC   = &DC[DIR_PM0 * size_MatC];
-        fseC   = &DC[DIR_MP0 * size_MatC];
-        fbwC   = &DC[DIR_P0P * size_MatC];
-        fteC   = &DC[DIR_M0M * size_MatC];
-        ftwC   = &DC[DIR_P0M * size_MatC];
-        fbeC   = &DC[DIR_M0P * size_MatC];
-        fbsC   = &DC[DIR_0PP * size_MatC];
-        ftnC   = &DC[DIR_0MM * size_MatC];
-        ftsC   = &DC[DIR_0PM * size_MatC];
-        fbnC   = &DC[DIR_0MP * size_MatC];
-        fzeroC = &DC[DIR_000 * size_MatC];
-        fbswC  = &DC[DIR_PPP * size_MatC];
-        fbneC  = &DC[DIR_MMP * size_MatC];
-        fbnwC  = &DC[DIR_PMP * size_MatC];
-        fbseC  = &DC[DIR_MPP * size_MatC];
-        ftswC  = &DC[DIR_PPM * size_MatC];
-        ftneC  = &DC[DIR_MMM * size_MatC];
-        ftnwC  = &DC[DIR_PMM * size_MatC];
-        ftseC  = &DC[DIR_MPM * size_MatC];
+        fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+        feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+        fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+        fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+        fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+        ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+        fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+        fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+        fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+        fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+        fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+        fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+        ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+        fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+        fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+        ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+        ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+        fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+        fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+        fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+        fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+        fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+        fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+        ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+        ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+        ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+        ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
     }
 
     ////////////////////////////////////////////////////////////////////////////////
@@ -9724,9 +9741,9 @@ __device__ void scaleFC_RhoSq_comp_27_Calculation(real *DC, real *DF, unsigned i
 
     if (k < kFC) {
         //////////////////////////////////////////////////////////////////////////
-        xoff    = offFC.xOffFC[k];
-        yoff    = offFC.yOffFC[k];
-        zoff    = offFC.zOffFC[k];
+        xoff    = offFC.x[k];
+        yoff    = offFC.y[k];
+        zoff    = offFC.z[k];
         xoff_sq = xoff * xoff;
         yoff_sq = yoff * yoff;
         zoff_sq = zoff * zoff;
@@ -11064,8 +11081,8 @@ __global__ void scaleFC_RhoSq_comp_27(real* DC,
 												 unsigned int* neighborFX,
 												 unsigned int* neighborFY,
 												 unsigned int* neighborFZ,
-												 unsigned int size_MatC, 
-												 unsigned int size_MatF, 
+												 unsigned long long numberOfLBnodesCoarse, 
+												 unsigned long long numberOfLBnodesFine, 
 												 bool isEvenTimestep,
 												 unsigned int* posC, 
 												 unsigned int* posFSWB, 
@@ -11077,7 +11094,7 @@ __global__ void scaleFC_RhoSq_comp_27(real* DC,
 												 unsigned int nyC, 
 												 unsigned int nxF, 
 												 unsigned int nyF,
-												 OffFC offFC)
+												 ICellNeigh offFC)
 {
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -11091,7 +11108,7 @@ __global__ void scaleFC_RhoSq_comp_27(real* DC,
    //////////////////////////////////////////////////////////////////////////
 
    scaleFC_RhoSq_comp_27_Calculation(DC, DF, neighborCX, neighborCY, neighborCZ, neighborFX, neighborFY, neighborFZ,
-                                     size_MatC, size_MatF, isEvenTimestep, posC, posFSWB, kFC, omCoarse, omFine, nu, nxC,
+                                     numberOfLBnodesCoarse, numberOfLBnodesFine, isEvenTimestep, posC, posFSWB, kFC, omCoarse, omFine, nu, nxC,
                                      nyC, nxF, nyF, offFC, k);
 }
 
@@ -11157,8 +11174,8 @@ __global__ void scaleFC_staggered_time_comp_27(   real* DC,
 															 unsigned int* neighborFX,
 															 unsigned int* neighborFY,
 															 unsigned int* neighborFZ,
-															 unsigned int size_MatC, 
-															 unsigned int size_MatF, 
+															 unsigned long long numberOfLBnodesCoarse, 
+															 unsigned long long numberOfLBnodesFine, 
 															 bool isEvenTimestep,
 															 unsigned int* posC, 
 															 unsigned int* posFSWB, 
@@ -11170,101 +11187,101 @@ __global__ void scaleFC_staggered_time_comp_27(   real* DC,
 															 unsigned int nyC, 
 															 unsigned int nxF, 
 															 unsigned int nyF,
-															 OffFC offFC)
+															 ICellNeigh offFC)
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -11312,9 +11329,9 @@ __global__ void scaleFC_staggered_time_comp_27(   real* DC,
    if(k<kFC)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff = offFC.xOffFC[k];
-      yoff = offFC.yOffFC[k];
-      zoff = offFC.zOffFC[k];      
+      xoff = offFC.x[k];
+      yoff = offFC.y[k];
+      zoff = offFC.z[k];      
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -11755,827 +11772,6 @@ __global__ void scaleFC_staggered_time_comp_27(   real* DC,
       kxxMyyFromfcNEQ_NEB = -c3o2*omegaS *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (c1o1 + drho_NEB) - ((vx1_NEB*vx1_NEB-vx2_NEB*vx2_NEB)));
       kxxMzzFromfcNEQ_NEB = -c3o2*omegaS *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (c1o1 + drho_NEB) - ((vx1_NEB*vx1_NEB-vx3_NEB*vx3_NEB)));
 
-   //   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  ////pointertausch
-	  // if (isEvenTimestep==false)
-	  // {
-		 // feC    = &DC[DIR_P00   *size_MatC];
-		 // fwC    = &DC[DIR_M00   *size_MatC];
-		 // fnC    = &DC[DIR_0P0   *size_MatC];
-		 // fsC    = &DC[DIR_0M0   *size_MatC];
-		 // ftC    = &DC[DIR_00P   *size_MatC];
-		 // fbC    = &DC[DIR_00M   *size_MatC];
-		 // fneC   = &DC[DIR_PP0  *size_MatC];
-		 // fswC   = &DC[DIR_MM0  *size_MatC];
-		 // fseC   = &DC[DIR_PM0  *size_MatC];
-		 // fnwC   = &DC[DIR_MP0  *size_MatC];
-		 // fteC   = &DC[DIR_P0P  *size_MatC];
-		 // fbwC   = &DC[DIR_M0M  *size_MatC];
-		 // fbeC   = &DC[DIR_P0M  *size_MatC];
-		 // ftwC   = &DC[DIR_M0P  *size_MatC];
-		 // ftnC   = &DC[DIR_0PP  *size_MatC];
-		 // fbsC   = &DC[DIR_0MM  *size_MatC];
-		 // fbnC   = &DC[DIR_0PM  *size_MatC];
-		 // ftsC   = &DC[DIR_0MP  *size_MatC];
-		 // fzeroC = &DC[DIR_000*size_MatC];
-		 // ftneC  = &DC[DIR_PPP *size_MatC];
-		 // ftswC  = &DC[DIR_MMP *size_MatC];
-		 // ftseC  = &DC[DIR_PMP *size_MatC];
-		 // ftnwC  = &DC[DIR_MPP *size_MatC];
-		 // fbneC  = &DC[DIR_PPM *size_MatC];
-		 // fbswC  = &DC[DIR_MMM *size_MatC];
-		 // fbseC  = &DC[DIR_PMM *size_MatC];
-		 // fbnwC  = &DC[DIR_MPM *size_MatC];
-	  // } 
-	  // else
-	  // {
-		 // fwC    = &DC[DIR_P00   *size_MatC];
-		 // feC    = &DC[DIR_M00   *size_MatC];
-		 // fsC    = &DC[DIR_0P0   *size_MatC];
-		 // fnC    = &DC[DIR_0M0   *size_MatC];
-		 // fbC    = &DC[DIR_00P   *size_MatC];
-		 // ftC    = &DC[DIR_00M   *size_MatC];
-		 // fswC   = &DC[DIR_PP0  *size_MatC];
-		 // fneC   = &DC[DIR_MM0  *size_MatC];
-		 // fnwC   = &DC[DIR_PM0  *size_MatC];
-		 // fseC   = &DC[DIR_MP0  *size_MatC];
-		 // fbwC   = &DC[DIR_P0P  *size_MatC];
-		 // fteC   = &DC[DIR_M0M  *size_MatC];
-		 // ftwC   = &DC[DIR_P0M  *size_MatC];
-		 // fbeC   = &DC[DIR_M0P  *size_MatC];
-		 // fbsC   = &DC[DIR_0PP  *size_MatC];
-		 // ftnC   = &DC[DIR_0MM  *size_MatC];
-		 // ftsC   = &DC[DIR_0PM  *size_MatC];
-		 // fbnC   = &DC[DIR_0MP  *size_MatC];
-		 // fzeroC = &DC[DIR_000*size_MatC];
-		 // fbswC  = &DC[DIR_PPP *size_MatC];
-		 // fbneC  = &DC[DIR_MMP *size_MatC];
-		 // fbnwC  = &DC[DIR_PMP *size_MatC];
-		 // fbseC  = &DC[DIR_MPP *size_MatC];
-		 // ftswC  = &DC[DIR_PPM *size_MatC];
-		 // ftneC  = &DC[DIR_MMM *size_MatC];
-		 // ftnwC  = &DC[DIR_PMM *size_MatC];
-		 // ftseC  = &DC[DIR_MPM *size_MatC];
-	  // }
-
- 	 // real rho_tmp;
-	  //real vx1_tmp;
-	  //real vx2_tmp;
-	  //real vx3_tmp;
-
-   //  //////////////////////////////////////////////////////////////////////////
-   //   xoff = offFC.xOffFC[k];
-   //   yoff = offFC.yOffFC[k];
-   //   zoff = offFC.zOffFC[k];      
-   //   xoff_sq = xoff * xoff;
-   //   yoff_sq = yoff * yoff;
-   //   zoff_sq = zoff * zoff;
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //SWB//
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //index 0
-   //   k0zero= posFSWB[k];
-   //   k0w   = neighborFX[k0zero];
-   //   k0s   = neighborFY[k0zero];
-   //   k0b   = neighborFZ[k0zero];
-   //   k0sw  = neighborFY[k0w];
-   //   k0bw  = neighborFZ[k0w];
-   //   k0bs  = neighborFZ[k0s];
-   //   k0bsw = neighborFZ[k0sw];
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //index 
-   //   kzero= k0zero;
-   //   kw   = k0w;   
-   //   ks   = k0s;   
-   //   kb   = k0b;   
-   //   ksw  = k0sw;  
-   //   kbw  = k0bw;  
-   //   kbs  = k0bs;  
-   //   kbsw = k0bsw; 
-   //   ////////////////////////////////////////////////////////////////////////////////
-   //   f_E    = fwF[kw];
-   //   f_W    = feF[kzero];
-   //   f_N    = fsF[ks];
-   //   f_S    = fnF[kzero];
-   //   f_T    = fbF[kb];
-   //   f_B    = ftF[kzero];
-   //   f_NE   = fswF[ksw];
-   //   f_SW   = fneF[kzero];
-   //   f_SE   = fnwF[kw];
-   //   f_NW   = fseF[ks];
-   //   f_TE   = fbwF[kbw];
-   //   f_BW   = fteF[kzero];
-   //   f_BE   = ftwF[kw];
-   //   f_TW   = fbeF[kb];
-   //   f_TN   = fbsF[kbs];
-   //   f_BS   = ftnF[kzero];
-   //   f_BN   = ftsF[ks];
-   //   f_TS   = fbnF[kb];
-   //   f_ZERO = fzeroF[kzero];
-   //   f_TNE  = fbswF[kbsw];
-   //   f_TSW  = fbneF[kb];
-   //   f_TSE  = fbnwF[kbw];
-   //   f_TNW  = fbseF[kbs];
-   //   f_BNE  = ftswF[ksw];
-   //   f_BSW  = ftneF[kzero];
-   //   f_BSE  = ftnwF[kw];
-   //   f_BNW  = ftseF[ks];
-
-   //   //drho_SWB = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-   //   //vx1_SWB  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + drho_SWB);
-	  ////vx2_SWB  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + drho_SWB);
-	  ////vx3_SWB  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + drho_SWB);
-
-   //   //kxyFromfcNEQ_SWB    = -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + drho_SWB) - ((vx1_SWB*vx2_SWB)));
-   //   //kyzFromfcNEQ_SWB    = -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + drho_SWB) - ((vx2_SWB*vx3_SWB)));
-   //   //kxzFromfcNEQ_SWB    = -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + drho_SWB) - ((vx1_SWB*vx3_SWB)));
-   //   //kxxMyyFromfcNEQ_SWB = -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + drho_SWB) - ((vx1_SWB*vx1_SWB-vx2_SWB*vx2_SWB)));
-   //   //kxxMzzFromfcNEQ_SWB = -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + drho_SWB) - ((vx1_SWB*vx1_SWB-vx3_SWB*vx3_SWB)));
-
-	  //rho_tmp = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-	  //
-	  //drho_SWB += rho_tmp;
-
-	  //vx1_tmp  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + rho_tmp);
-	  //vx2_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + rho_tmp);
-	  //vx3_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + rho_tmp);
-
-   //   vx1_SWB  += vx1_tmp;
-	  //vx2_SWB  += vx2_tmp;
-	  //vx3_SWB  += vx3_tmp;
-
-   //   drho_SWB *= c1o2;
-   //   vx1_SWB  *= c1o2;
-	  //vx2_SWB  *= c1o2;
-	  //vx3_SWB  *= c1o2;
-
-   //   kxyFromfcNEQ_SWB    += -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx2_tmp)));
-   //   kyzFromfcNEQ_SWB    += -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + rho_tmp) - ((vx2_tmp*vx3_tmp)));
-   //   kxzFromfcNEQ_SWB    += -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx3_tmp)));
-   //   kxxMyyFromfcNEQ_SWB += -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx2_tmp*vx2_tmp)));
-   //   kxxMzzFromfcNEQ_SWB += -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx3_tmp*vx3_tmp)));
-
-	  //kxyFromfcNEQ_SWB    *= c1o2;
-	  //kyzFromfcNEQ_SWB    *= c1o2;
-	  //kxzFromfcNEQ_SWB    *= c1o2;
-	  //kxxMyyFromfcNEQ_SWB *= c1o2;
-	  //kxxMzzFromfcNEQ_SWB *= c1o2;
-
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //SWT//
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //index 
-   //   kzero= kb;
-   //   kw   = kbw;   
-   //   ks   = kbs;   
-   //   kb   = neighborFZ[kb];   
-   //   ksw  = kbsw;  
-   //   kbw  = neighborFZ[kbw];  
-   //   kbs  = neighborFZ[kbs];  
-   //   kbsw = neighborFZ[kbsw]; 
-   //   ////////////////////////////////////////////////////////////////////////////////
-   //   f_E    = fwF[kw];
-   //   f_W    = feF[kzero];
-   //   f_N    = fsF[ks];
-   //   f_S    = fnF[kzero];
-   //   f_T    = fbF[kb];
-   //   f_B    = ftF[kzero];
-   //   f_NE   = fswF[ksw];
-   //   f_SW   = fneF[kzero];
-   //   f_SE   = fnwF[kw];
-   //   f_NW   = fseF[ks];
-   //   f_TE   = fbwF[kbw];
-   //   f_BW   = fteF[kzero];
-   //   f_BE   = ftwF[kw];
-   //   f_TW   = fbeF[kb];
-   //   f_TN   = fbsF[kbs];
-   //   f_BS   = ftnF[kzero];
-   //   f_BN   = ftsF[ks];
-   //   f_TS   = fbnF[kb];
-   //   f_ZERO = fzeroF[kzero];
-   //   f_TNE  = fbswF[kbsw];
-   //   f_TSW  = fbneF[kb];
-   //   f_TSE  = fbnwF[kbw];
-   //   f_TNW  = fbseF[kbs];
-   //   f_BNE  = ftswF[ksw];
-   //   f_BSW  = ftneF[kzero];
-   //   f_BSE  = ftnwF[kw];
-   //   f_BNW  = ftseF[ks];
-
-   //   //drho_SWT = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-   //   //vx1_SWT  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + drho_SWT);
-	  ////vx2_SWT  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + drho_SWT);
-	  ////vx3_SWT  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + drho_SWT);
-
-   //   //kxyFromfcNEQ_SWT    = -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + drho_SWT) - ((vx1_SWT*vx2_SWT)));
-   //   //kyzFromfcNEQ_SWT    = -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + drho_SWT) - ((vx2_SWT*vx3_SWT)));
-   //   //kxzFromfcNEQ_SWT    = -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + drho_SWT) - ((vx1_SWT*vx3_SWT)));
-   //   //kxxMyyFromfcNEQ_SWT = -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + drho_SWT) - ((vx1_SWT*vx1_SWT-vx2_SWT*vx2_SWT)));
-   //   //kxxMzzFromfcNEQ_SWT = -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + drho_SWT) - ((vx1_SWT*vx1_SWT-vx3_SWT*vx3_SWT)));
-
-	  //rho_tmp = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-	  //
-	  //drho_SWT += rho_tmp;
-
-	  //vx1_tmp  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + rho_tmp);
-	  //vx2_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + rho_tmp);
-	  //vx3_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + rho_tmp);
-
-   //   vx1_SWT  += vx1_tmp;
-	  //vx2_SWT  += vx2_tmp;
-	  //vx3_SWT  += vx3_tmp;
-
-   //   drho_SWT *= c1o2;
-   //   vx1_SWT  *= c1o2;
-	  //vx2_SWT  *= c1o2;
-	  //vx3_SWT  *= c1o2;
-
-   //   kxyFromfcNEQ_SWT    += -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx2_tmp)));
-   //   kyzFromfcNEQ_SWT    += -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + rho_tmp) - ((vx2_tmp*vx3_tmp)));
-   //   kxzFromfcNEQ_SWT    += -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx3_tmp)));
-   //   kxxMyyFromfcNEQ_SWT += -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx2_tmp*vx2_tmp)));
-   //   kxxMzzFromfcNEQ_SWT += -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx3_tmp*vx3_tmp)));
-
-	  //kxyFromfcNEQ_SWT    *= c1o2;
-	  //kyzFromfcNEQ_SWT    *= c1o2;
-	  //kxzFromfcNEQ_SWT    *= c1o2;
-	  //kxxMyyFromfcNEQ_SWT *= c1o2;
-	  //kxxMzzFromfcNEQ_SWT *= c1o2;
-
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //SET//
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //index 
-   //   kzero= kw;
-   //   kw   = neighborFX[kw];   
-   //   ks   = ksw;   
-   //   kb   = kbw;   
-   //   ksw  = neighborFX[ksw];  
-   //   kbw  = neighborFX[kbw];  
-   //   kbs  = kbsw;  
-   //   kbsw = neighborFX[kbsw]; 
-   //   ////////////////////////////////////////////////////////////////////////////////
-   //   f_E    = fwF[kw];
-   //   f_W    = feF[kzero];
-   //   f_N    = fsF[ks];
-   //   f_S    = fnF[kzero];
-   //   f_T    = fbF[kb];
-   //   f_B    = ftF[kzero];
-   //   f_NE   = fswF[ksw];
-   //   f_SW   = fneF[kzero];
-   //   f_SE   = fnwF[kw];
-   //   f_NW   = fseF[ks];
-   //   f_TE   = fbwF[kbw];
-   //   f_BW   = fteF[kzero];
-   //   f_BE   = ftwF[kw];
-   //   f_TW   = fbeF[kb];
-   //   f_TN   = fbsF[kbs];
-   //   f_BS   = ftnF[kzero];
-   //   f_BN   = ftsF[ks];
-   //   f_TS   = fbnF[kb];
-   //   f_ZERO = fzeroF[kzero];
-   //   f_TNE  = fbswF[kbsw];
-   //   f_TSW  = fbneF[kb];
-   //   f_TSE  = fbnwF[kbw];
-   //   f_TNW  = fbseF[kbs];
-   //   f_BNE  = ftswF[ksw];
-   //   f_BSW  = ftneF[kzero];
-   //   f_BSE  = ftnwF[kw];
-   //   f_BNW  = ftseF[ks];
-
-   //   //drho_SET = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-   //   //vx1_SET  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + drho_SET);
-	  ////vx2_SET  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + drho_SET);
-	  ////vx3_SET  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + drho_SET);
-
-   //   //kxyFromfcNEQ_SET    = -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + drho_SET) - ((vx1_SET*vx2_SET)));
-   //   //kyzFromfcNEQ_SET    = -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + drho_SET) - ((vx2_SET*vx3_SET)));
-   //   //kxzFromfcNEQ_SET    = -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + drho_SET) - ((vx1_SET*vx3_SET)));
-   //   //kxxMyyFromfcNEQ_SET = -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + drho_SET) - ((vx1_SET*vx1_SET-vx2_SET*vx2_SET)));
-   //   //kxxMzzFromfcNEQ_SET = -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + drho_SET) - ((vx1_SET*vx1_SET-vx3_SET*vx3_SET)));
-
-	  //rho_tmp = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-	  //
-	  //drho_SET += rho_tmp;
-
-	  //vx1_tmp  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + rho_tmp);
-	  //vx2_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + rho_tmp);
-	  //vx3_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + rho_tmp);
-
-   //   vx1_SET  += vx1_tmp;
-	  //vx2_SET  += vx2_tmp;
-	  //vx3_SET  += vx3_tmp;
-
-   //   drho_SET *= c1o2;
-   //   vx1_SET  *= c1o2;
-	  //vx2_SET  *= c1o2;
-	  //vx3_SET  *= c1o2;
-
-   //   kxyFromfcNEQ_SET    += -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx2_tmp)));
-   //   kyzFromfcNEQ_SET    += -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + rho_tmp) - ((vx2_tmp*vx3_tmp)));
-   //   kxzFromfcNEQ_SET    += -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx3_tmp)));
-   //   kxxMyyFromfcNEQ_SET += -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx2_tmp*vx2_tmp)));
-   //   kxxMzzFromfcNEQ_SET += -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx3_tmp*vx3_tmp)));
-
-	  //kxyFromfcNEQ_SET    *= c1o2;
-	  //kyzFromfcNEQ_SET    *= c1o2;
-	  //kxzFromfcNEQ_SET    *= c1o2;
-	  //kxxMyyFromfcNEQ_SET *= c1o2;
-	  //kxxMzzFromfcNEQ_SET *= c1o2;
-
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //SEB//
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //index 
-   //   kb   = kzero;   
-   //   kbw  = kw;  
-   //   kbs  = ks;  
-   //   kbsw = ksw; 
-   //   kzero= k0w;
-   //   kw   = neighborFX[k0w];   
-   //   ks   = k0sw;   
-   //   ksw  = neighborFX[k0sw];  
-   //   ////////////////////////////////////////////////////////////////////////////////
-   //   f_E    = fwF[kw];
-   //   f_W    = feF[kzero];
-   //   f_N    = fsF[ks];
-   //   f_S    = fnF[kzero];
-   //   f_T    = fbF[kb];
-   //   f_B    = ftF[kzero];
-   //   f_NE   = fswF[ksw];
-   //   f_SW   = fneF[kzero];
-   //   f_SE   = fnwF[kw];
-   //   f_NW   = fseF[ks];
-   //   f_TE   = fbwF[kbw];
-   //   f_BW   = fteF[kzero];
-   //   f_BE   = ftwF[kw];
-   //   f_TW   = fbeF[kb];
-   //   f_TN   = fbsF[kbs];
-   //   f_BS   = ftnF[kzero];
-   //   f_BN   = ftsF[ks];
-   //   f_TS   = fbnF[kb];
-   //   f_ZERO = fzeroF[kzero];
-   //   f_TNE  = fbswF[kbsw];
-   //   f_TSW  = fbneF[kb];
-   //   f_TSE  = fbnwF[kbw];
-   //   f_TNW  = fbseF[kbs];
-   //   f_BNE  = ftswF[ksw];
-   //   f_BSW  = ftneF[kzero];
-   //   f_BSE  = ftnwF[kw];
-   //   f_BNW  = ftseF[ks];
-
-   //   //drho_SEB = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-   //   //vx1_SEB  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + drho_SEB);
-	  ////vx2_SEB  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + drho_SEB);
-	  ////vx3_SEB  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + drho_SEB);
-
-   //   //kxyFromfcNEQ_SEB    = -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + drho_SEB) - ((vx1_SEB*vx2_SEB)));
-   //   //kyzFromfcNEQ_SEB    = -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + drho_SEB) - ((vx2_SEB*vx3_SEB)));
-   //   //kxzFromfcNEQ_SEB    = -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + drho_SEB) - ((vx1_SEB*vx3_SEB)));
-   //   //kxxMyyFromfcNEQ_SEB = -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + drho_SEB) - ((vx1_SEB*vx1_SEB-vx2_SEB*vx2_SEB)));
-   //   //kxxMzzFromfcNEQ_SEB = -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + drho_SEB) - ((vx1_SEB*vx1_SEB-vx3_SEB*vx3_SEB)));
-
-	  //rho_tmp = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-	  //
-	  //drho_SEB += rho_tmp;
-
-	  //vx1_tmp  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + rho_tmp);
-	  //vx2_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + rho_tmp);
-	  //vx3_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + rho_tmp);
-
-   //   vx1_SEB  += vx1_tmp;
-	  //vx2_SEB  += vx2_tmp;
-	  //vx3_SEB  += vx3_tmp;
-
-   //   drho_SEB *= c1o2;
-   //   vx1_SEB  *= c1o2;
-	  //vx2_SEB  *= c1o2;
-	  //vx3_SEB  *= c1o2;
-
-   //   kxyFromfcNEQ_SEB    += -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx2_tmp)));
-   //   kyzFromfcNEQ_SEB    += -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + rho_tmp) - ((vx2_tmp*vx3_tmp)));
-   //   kxzFromfcNEQ_SEB    += -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx3_tmp)));
-   //   kxxMyyFromfcNEQ_SEB += -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx2_tmp*vx2_tmp)));
-   //   kxxMzzFromfcNEQ_SEB += -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx3_tmp*vx3_tmp)));
-
-	  //kxyFromfcNEQ_SEB    *= c1o2;
-	  //kyzFromfcNEQ_SEB    *= c1o2;
-	  //kxzFromfcNEQ_SEB    *= c1o2;
-	  //kxxMyyFromfcNEQ_SEB *= c1o2;
-	  //kxxMzzFromfcNEQ_SEB *= c1o2;
-
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //NWB//
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //index 0
-   //   k0zero= k0s;
-   //   k0w   = k0sw;
-   //   k0s   = neighborFY[k0s];
-   //   k0b   = k0bs;
-   //   k0sw  = neighborFY[k0sw];
-   //   k0bw  = k0bsw;
-   //   k0bs  = neighborFY[k0bs];
-   //   k0bsw = neighborFY[k0bsw];
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //index 
-   //   kzero= k0zero;
-   //   kw   = k0w;   
-   //   ks   = k0s;   
-   //   kb   = k0b;   
-   //   ksw  = k0sw;  
-   //   kbw  = k0bw;  
-   //   kbs  = k0bs;  
-   //   kbsw = k0bsw; 
-   //   ////////////////////////////////////////////////////////////////////////////////
-   //   f_E    = fwF[kw];
-   //   f_W    = feF[kzero];
-   //   f_N    = fsF[ks];
-   //   f_S    = fnF[kzero];
-   //   f_T    = fbF[kb];
-   //   f_B    = ftF[kzero];
-   //   f_NE   = fswF[ksw];
-   //   f_SW   = fneF[kzero];
-   //   f_SE   = fnwF[kw];
-   //   f_NW   = fseF[ks];
-   //   f_TE   = fbwF[kbw];
-   //   f_BW   = fteF[kzero];
-   //   f_BE   = ftwF[kw];
-   //   f_TW   = fbeF[kb];
-   //   f_TN   = fbsF[kbs];
-   //   f_BS   = ftnF[kzero];
-   //   f_BN   = ftsF[ks];
-   //   f_TS   = fbnF[kb];
-   //   f_ZERO = fzeroF[kzero];
-   //   f_TNE  = fbswF[kbsw];
-   //   f_TSW  = fbneF[kb];
-   //   f_TSE  = fbnwF[kbw];
-   //   f_TNW  = fbseF[kbs];
-   //   f_BNE  = ftswF[ksw];
-   //   f_BSW  = ftneF[kzero];
-   //   f_BSE  = ftnwF[kw];
-   //   f_BNW  = ftseF[ks];
-
-   //   //drho_NWB = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-   //   //vx1_NWB  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + drho_NWB);
-	  ////vx2_NWB  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + drho_NWB);
-	  ////vx3_NWB  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + drho_NWB);
-
-   //   //kxyFromfcNEQ_NWB    = -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + drho_NWB) - ((vx1_NWB*vx2_NWB)));
-   //   //kyzFromfcNEQ_NWB    = -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + drho_NWB) - ((vx2_NWB*vx3_NWB)));
-   //   //kxzFromfcNEQ_NWB    = -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + drho_NWB) - ((vx1_NWB*vx3_NWB)));
-   //   //kxxMyyFromfcNEQ_NWB = -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + drho_NWB) - ((vx1_NWB*vx1_NWB-vx2_NWB*vx2_NWB)));
-   //   //kxxMzzFromfcNEQ_NWB = -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + drho_NWB) - ((vx1_NWB*vx1_NWB-vx3_NWB*vx3_NWB)));
-
-	  //rho_tmp = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-	  //
-	  //drho_NWB += rho_tmp;
-
-	  //vx1_tmp  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + rho_tmp);
-	  //vx2_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + rho_tmp);
-	  //vx3_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + rho_tmp);
-
-   //   vx1_NWB  += vx1_tmp;
-	  //vx2_NWB  += vx2_tmp;
-	  //vx3_NWB  += vx3_tmp;
-
-   //   drho_NWB *= c1o2;
-   //   vx1_NWB  *= c1o2;
-	  //vx2_NWB  *= c1o2;
-	  //vx3_NWB  *= c1o2;
-
-   //   kxyFromfcNEQ_NWB    += -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx2_tmp)));
-   //   kyzFromfcNEQ_NWB    += -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + rho_tmp) - ((vx2_tmp*vx3_tmp)));
-   //   kxzFromfcNEQ_NWB    += -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx3_tmp)));
-   //   kxxMyyFromfcNEQ_NWB += -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx2_tmp*vx2_tmp)));
-   //   kxxMzzFromfcNEQ_NWB += -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx3_tmp*vx3_tmp)));
-
-	  //kxyFromfcNEQ_NWB    *= c1o2;
-	  //kyzFromfcNEQ_NWB    *= c1o2;
-	  //kxzFromfcNEQ_NWB    *= c1o2;
-	  //kxxMyyFromfcNEQ_NWB *= c1o2;
-	  //kxxMzzFromfcNEQ_NWB *= c1o2;
-
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //NWT//
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //index 
-   //   kzero= kb;
-   //   kw   = kbw;   
-   //   ks   = kbs;   
-   //   kb   = neighborFZ[kb];   
-   //   ksw  = kbsw;  
-   //   kbw  = neighborFZ[kbw];  
-   //   kbs  = neighborFZ[kbs];  
-   //   kbsw = neighborFZ[kbsw]; 
-   //   ////////////////////////////////////////////////////////////////////////////////
-   //   f_E    = fwF[kw];
-   //   f_W    = feF[kzero];
-   //   f_N    = fsF[ks];
-   //   f_S    = fnF[kzero];
-   //   f_T    = fbF[kb];
-   //   f_B    = ftF[kzero];
-   //   f_NE   = fswF[ksw];
-   //   f_SW   = fneF[kzero];
-   //   f_SE   = fnwF[kw];
-   //   f_NW   = fseF[ks];
-   //   f_TE   = fbwF[kbw];
-   //   f_BW   = fteF[kzero];
-   //   f_BE   = ftwF[kw];
-   //   f_TW   = fbeF[kb];
-   //   f_TN   = fbsF[kbs];
-   //   f_BS   = ftnF[kzero];
-   //   f_BN   = ftsF[ks];
-   //   f_TS   = fbnF[kb];
-   //   f_ZERO = fzeroF[kzero];
-   //   f_TNE  = fbswF[kbsw];
-   //   f_TSW  = fbneF[kb];
-   //   f_TSE  = fbnwF[kbw];
-   //   f_TNW  = fbseF[kbs];
-   //   f_BNE  = ftswF[ksw];
-   //   f_BSW  = ftneF[kzero];
-   //   f_BSE  = ftnwF[kw];
-   //   f_BNW  = ftseF[ks];
-
-   //   //drho_NWT = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-   //   //vx1_NWT  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + drho_NWT);
-	  ////vx2_NWT  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + drho_NWT);
-	  ////vx3_NWT  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + drho_NWT);
-
-   //   //kxyFromfcNEQ_NWT    = -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + drho_NWT) - ((vx1_NWT*vx2_NWT)));
-   //   //kyzFromfcNEQ_NWT    = -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + drho_NWT) - ((vx2_NWT*vx3_NWT)));
-   //   //kxzFromfcNEQ_NWT    = -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + drho_NWT) - ((vx1_NWT*vx3_NWT)));
-   //   //kxxMyyFromfcNEQ_NWT = -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + drho_NWT) - ((vx1_NWT*vx1_NWT-vx2_NWT*vx2_NWT)));
-   //   //kxxMzzFromfcNEQ_NWT = -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + drho_NWT) - ((vx1_NWT*vx1_NWT-vx3_NWT*vx3_NWT)));
-
-	  //rho_tmp = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-	  //
-	  //drho_NWT += rho_tmp;
-
-	  //vx1_tmp  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + rho_tmp);
-	  //vx2_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + rho_tmp);
-	  //vx3_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + rho_tmp);
-
-   //   vx1_NWT  += vx1_tmp;
-	  //vx2_NWT  += vx2_tmp;
-	  //vx3_NWT  += vx3_tmp;
-
-   //   drho_NWT *= c1o2;
-   //   vx1_NWT  *= c1o2;
-	  //vx2_NWT  *= c1o2;
-	  //vx3_NWT  *= c1o2;
-
-   //   kxyFromfcNEQ_NWT    += -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx2_tmp)));
-   //   kyzFromfcNEQ_NWT    += -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + rho_tmp) - ((vx2_tmp*vx3_tmp)));
-   //   kxzFromfcNEQ_NWT    += -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx3_tmp)));
-   //   kxxMyyFromfcNEQ_NWT += -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx2_tmp*vx2_tmp)));
-   //   kxxMzzFromfcNEQ_NWT += -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx3_tmp*vx3_tmp)));
-
-	  //kxyFromfcNEQ_NWT    *= c1o2;
-	  //kyzFromfcNEQ_NWT    *= c1o2;
-	  //kxzFromfcNEQ_NWT    *= c1o2;
-	  //kxxMyyFromfcNEQ_NWT *= c1o2;
-	  //kxxMzzFromfcNEQ_NWT *= c1o2;
-
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //NET//
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //index 
-   //   kzero= kw;
-   //   kw   = neighborFX[kw];   
-   //   ks   = ksw;   
-   //   kb   = kbw;   
-   //   ksw  = neighborFX[ksw];  
-   //   kbw  = neighborFX[kbw];  
-   //   kbs  = kbsw;  
-   //   kbsw = neighborFX[kbsw]; 
-   //   ////////////////////////////////////////////////////////////////////////////////
-   //   f_E    = fwF[kw];
-   //   f_W    = feF[kzero];
-   //   f_N    = fsF[ks];
-   //   f_S    = fnF[kzero];
-   //   f_T    = fbF[kb];
-   //   f_B    = ftF[kzero];
-   //   f_NE   = fswF[ksw];
-   //   f_SW   = fneF[kzero];
-   //   f_SE   = fnwF[kw];
-   //   f_NW   = fseF[ks];
-   //   f_TE   = fbwF[kbw];
-   //   f_BW   = fteF[kzero];
-   //   f_BE   = ftwF[kw];
-   //   f_TW   = fbeF[kb];
-   //   f_TN   = fbsF[kbs];
-   //   f_BS   = ftnF[kzero];
-   //   f_BN   = ftsF[ks];
-   //   f_TS   = fbnF[kb];
-   //   f_ZERO = fzeroF[kzero];
-   //   f_TNE  = fbswF[kbsw];
-   //   f_TSW  = fbneF[kb];
-   //   f_TSE  = fbnwF[kbw];
-   //   f_TNW  = fbseF[kbs];
-   //   f_BNE  = ftswF[ksw];
-   //   f_BSW  = ftneF[kzero];
-   //   f_BSE  = ftnwF[kw];
-   //   f_BNW  = ftseF[ks];
-
-   //   //drho_NET = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-   //   //vx1_NET  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + drho_NET);
-	  ////vx2_NET  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + drho_NET);
-	  ////vx3_NET  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + drho_NET);
-
-   //   //kxyFromfcNEQ_NET    = -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + drho_NET) - ((vx1_NET*vx2_NET)));
-   //   //kyzFromfcNEQ_NET    = -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + drho_NET) - ((vx2_NET*vx3_NET)));
-   //   //kxzFromfcNEQ_NET    = -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + drho_NET) - ((vx1_NET*vx3_NET)));
-   //   //kxxMyyFromfcNEQ_NET = -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + drho_NET) - ((vx1_NET*vx1_NET-vx2_NET*vx2_NET)));
-   //   //kxxMzzFromfcNEQ_NET = -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + drho_NET) - ((vx1_NET*vx1_NET-vx3_NET*vx3_NET)));
-
-	  //rho_tmp = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-	  //
-	  //drho_NET += rho_tmp;
-
-	  //vx1_tmp  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + rho_tmp);
-	  //vx2_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + rho_tmp);
-	  //vx3_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + rho_tmp);
-
-   //   vx1_NET  += vx1_tmp;
-	  //vx2_NET  += vx2_tmp;
-	  //vx3_NET  += vx3_tmp;
-
-   //   drho_NET *= c1o2;
-   //   vx1_NET  *= c1o2;
-	  //vx2_NET  *= c1o2;
-	  //vx3_NET  *= c1o2;
-
-   //   kxyFromfcNEQ_NET    += -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx2_tmp)));
-   //   kyzFromfcNEQ_NET    += -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + rho_tmp) - ((vx2_tmp*vx3_tmp)));
-   //   kxzFromfcNEQ_NET    += -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx3_tmp)));
-   //   kxxMyyFromfcNEQ_NET += -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx2_tmp*vx2_tmp)));
-   //   kxxMzzFromfcNEQ_NET += -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx3_tmp*vx3_tmp)));
-
-	  //kxyFromfcNEQ_NET    *= c1o2;
-	  //kyzFromfcNEQ_NET    *= c1o2;
-	  //kxzFromfcNEQ_NET    *= c1o2;
-	  //kxxMyyFromfcNEQ_NET *= c1o2;
-	  //kxxMzzFromfcNEQ_NET *= c1o2;
-
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //NEB//
-   //   //////////////////////////////////////////////////////////////////////////
-   //   //index 
-   //   kb   = kzero;   
-   //   kbw  = kw;  
-   //   kbs  = ks;  
-   //   kbsw = ksw; 
-   //   kzero= k0w;
-   //   kw   = neighborFX[k0w];   
-   //   ks   = k0sw;   
-   //   ksw  = neighborFX[k0sw];  
-   //   ////////////////////////////////////////////////////////////////////////////////
-   //   f_E    = fwF[kw];
-   //   f_W    = feF[kzero];
-   //   f_N    = fsF[ks];
-   //   f_S    = fnF[kzero];
-   //   f_T    = fbF[kb];
-   //   f_B    = ftF[kzero];
-   //   f_NE   = fswF[ksw];
-   //   f_SW   = fneF[kzero];
-   //   f_SE   = fnwF[kw];
-   //   f_NW   = fseF[ks];
-   //   f_TE   = fbwF[kbw];
-   //   f_BW   = fteF[kzero];
-   //   f_BE   = ftwF[kw];
-   //   f_TW   = fbeF[kb];
-   //   f_TN   = fbsF[kbs];
-   //   f_BS   = ftnF[kzero];
-   //   f_BN   = ftsF[ks];
-   //   f_TS   = fbnF[kb];
-   //   f_ZERO = fzeroF[kzero];
-   //   f_TNE  = fbswF[kbsw];
-   //   f_TSW  = fbneF[kb];
-   //   f_TSE  = fbnwF[kbw];
-   //   f_TNW  = fbseF[kbs];
-   //   f_BNE  = ftswF[ksw];
-   //   f_BSW  = ftneF[kzero];
-   //   f_BSE  = ftnwF[kw];
-   //   f_BNW  = ftseF[ks];
-
-   //   //drho_NEB = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-   //   //vx1_NEB  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + drho_NEB);
-	  ////vx2_NEB  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + drho_NEB);
-	  ////vx3_NEB  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + drho_NEB);
-
-   //   //kxyFromfcNEQ_NEB    = -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + drho_NEB) - ((vx1_NEB*vx2_NEB)));
-   //   //kyzFromfcNEQ_NEB    = -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + drho_NEB) - ((vx2_NEB*vx3_NEB)));
-   //   //kxzFromfcNEQ_NEB    = -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + drho_NEB) - ((vx1_NEB*vx3_NEB)));
-   //   //kxxMyyFromfcNEQ_NEB = -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + drho_NEB) - ((vx1_NEB*vx1_NEB-vx2_NEB*vx2_NEB)));
-   //   //kxxMzzFromfcNEQ_NEB = -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + drho_NEB) - ((vx1_NEB*vx1_NEB-vx3_NEB*vx3_NEB)));
-
-	  //rho_tmp = f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+f_ZERO+f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
-	  //
-	  //drho_NEB += rho_tmp;
-
-	  //vx1_tmp  = (((f_TNE-f_BSW)+(f_TSE-f_BNW)+(f_BNE-f_TSW)+(f_BSE-f_TNW)) + (((f_NE-f_SW)+(f_TE-f_BW))+((f_SE-f_NW)+(f_BE-f_TW))) + (f_E-f_W))/(one + rho_tmp);
-	  //vx2_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_BNE-f_TSW)+(f_BNW-f_TSE)) + (((f_NE-f_SW)+(f_TN-f_BS))+((f_BN-f_TS)+(f_NW-f_SE))) + (f_N-f_S))/(one + rho_tmp);
-	  //vx3_tmp  = (((f_TNE-f_BSW)+(f_TNW-f_BSE)+(f_TSE-f_BNW)+(f_TSW-f_BNE)) + (((f_TE-f_BW)+(f_TN-f_BS))+((f_TW-f_BE)+(f_TS-f_BN))) + (f_T-f_B))/(one + rho_tmp);
-
-   //   vx1_NEB  += vx1_tmp;
-	  //vx2_NEB  += vx2_tmp;
-	  //vx3_NEB  += vx3_tmp;
-
-   //   drho_NEB *= c1o2;
-   //   vx1_NEB  *= c1o2;
-	  //vx2_NEB  *= c1o2;
-	  //vx3_NEB  *= c1o2;
-
-   //   kxyFromfcNEQ_NEB    += -three*omegaS/(one-omegaS)*((f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx2_tmp)));
-   //   kyzFromfcNEQ_NEB    += -three*omegaS/(one-omegaS)*((f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW ) / (one + rho_tmp) - ((vx2_tmp*vx3_tmp)));
-   //   kxzFromfcNEQ_NEB    += -three*omegaS/(one-omegaS)*((f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE ) / (one + rho_tmp) - ((vx1_tmp*vx3_tmp)));
-   //   kxxMyyFromfcNEQ_NEB += -c3o2*omegaS/(one-omegaS) *((f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx2_tmp*vx2_tmp)));
-   //   kxxMzzFromfcNEQ_NEB += -c3o2*omegaS/(one-omegaS) *((f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE             ) / (one + rho_tmp) - ((vx1_tmp*vx1_tmp-vx3_tmp*vx3_tmp)));
-
-	  //kxyFromfcNEQ_NEB    *= c1o2;
-	  //kyzFromfcNEQ_NEB    *= c1o2;
-	  //kxzFromfcNEQ_NEB    *= c1o2;
-	  //kxxMyyFromfcNEQ_NEB *= c1o2;
-	  //kxxMzzFromfcNEQ_NEB *= c1o2;
-	  //
-	  //
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  
-	  //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  //kxyFromfcNEQ_SWB    = zero;
-	  //kyzFromfcNEQ_SWB    = zero;
-	  //kxzFromfcNEQ_SWB    = zero;
-	  //kxxMyyFromfcNEQ_SWB = zero;
-	  //kxxMzzFromfcNEQ_SWB = zero;
-	  //kxyFromfcNEQ_SWT    = zero;
-	  //kyzFromfcNEQ_SWT    = zero;
-	  //kxzFromfcNEQ_SWT    = zero;
-	  //kxxMyyFromfcNEQ_SWT = zero;
-	  //kxxMzzFromfcNEQ_SWT = zero;
-	  //kxyFromfcNEQ_SET    = zero;
-	  //kyzFromfcNEQ_SET    = zero;
-	  //kxzFromfcNEQ_SET    = zero;
-	  //kxxMyyFromfcNEQ_SET = zero;
-	  //kxxMzzFromfcNEQ_SET = zero;
-	  //kxyFromfcNEQ_SEB    = zero;
-	  //kyzFromfcNEQ_SEB    = zero;
-	  //kxzFromfcNEQ_SEB    = zero;
-	  //kxxMyyFromfcNEQ_SEB = zero;
-	  //kxxMzzFromfcNEQ_SEB = zero;
-	  //kxyFromfcNEQ_NWB    = zero;
-	  //kyzFromfcNEQ_NWB    = zero;
-	  //kxzFromfcNEQ_NWB    = zero;
-	  //kxxMyyFromfcNEQ_NWB = zero;
-	  //kxxMzzFromfcNEQ_NWB = zero;
-	  //kxyFromfcNEQ_NWT    = zero;
-	  //kyzFromfcNEQ_NWT    = zero;
-	  //kxzFromfcNEQ_NWT    = zero;
-	  //kxxMyyFromfcNEQ_NWT = zero;
-	  //kxxMzzFromfcNEQ_NWT = zero;
-	  //kxyFromfcNEQ_NET    = zero;
-	  //kyzFromfcNEQ_NET    = zero;
-	  //kxzFromfcNEQ_NET    = zero;
-	  //kxxMyyFromfcNEQ_NET = zero;
-	  //kxxMzzFromfcNEQ_NET = zero;
-	  //kxyFromfcNEQ_NEB    = zero;
-	  //kyzFromfcNEQ_NEB    = zero;
-	  //kxzFromfcNEQ_NEB    = zero;
-	  //kxxMyyFromfcNEQ_NEB = zero;
-	  //kxxMzzFromfcNEQ_NEB = zero;
       //////////////////////////////////////////////////////////////////////////
       //3
       //////////////////////////////////////////////////////////////////////////
@@ -13278,8 +12474,8 @@ __global__ void scaleFC_Fix_comp_27(  real* DC,
 												 unsigned int* neighborFX,
 												 unsigned int* neighborFY,
 												 unsigned int* neighborFZ,
-												 unsigned int size_MatC, 
-												 unsigned int size_MatF, 
+												 unsigned long long numberOfLBnodesCoarse, 
+												 unsigned long long numberOfLBnodesFine, 
 												 bool isEvenTimestep,
 												 unsigned int* posC, 
 												 unsigned int* posFSWB, 
@@ -13291,101 +12487,101 @@ __global__ void scaleFC_Fix_comp_27(  real* DC,
 												 unsigned int nyC, 
 												 unsigned int nxF, 
 												 unsigned int nyF,
-												 OffFC offFC)
+												 ICellNeigh offFC)
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -13435,9 +12631,9 @@ __global__ void scaleFC_Fix_comp_27(  real* DC,
    if(k<kFC)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff = offFC.xOffFC[k];
-      yoff = offFC.yOffFC[k];
-      zoff = offFC.zOffFC[k];      
+      xoff = offFC.x[k];
+      yoff = offFC.y[k];
+      zoff = offFC.z[k];      
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -15138,8 +14334,8 @@ __global__ void scaleFC_NSPress_27(   real* DC,
 												 unsigned int* neighborFX,
 												 unsigned int* neighborFY,
 												 unsigned int* neighborFZ,
-												 unsigned int size_MatC, 
-												 unsigned int size_MatF, 
+												 unsigned long long numberOfLBnodesCoarse, 
+												 unsigned long long numberOfLBnodesFine, 
 												 bool isEvenTimestep,
 												 unsigned int* posC, 
 												 unsigned int* posFSWB, 
@@ -15151,101 +14347,101 @@ __global__ void scaleFC_NSPress_27(   real* DC,
 												 unsigned int nyC, 
 												 unsigned int nxF, 
 												 unsigned int nyF,
-												 OffFC offFC)
+												 ICellNeigh offFC)
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -15292,9 +14488,9 @@ __global__ void scaleFC_NSPress_27(   real* DC,
    if(k<kFC)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff = offFC.xOffFC[k];
-      yoff = offFC.yOffFC[k];
-      zoff = offFC.zOffFC[k];      
+      xoff = offFC.x[k];
+      yoff = offFC.y[k];
+      zoff = offFC.z[k];      
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -16344,8 +15540,8 @@ __global__ void scaleFC_Fix_27(   real* DC,
                                              unsigned int* neighborFX,
                                              unsigned int* neighborFY,
                                              unsigned int* neighborFZ,
-                                             unsigned int size_MatC, 
-                                             unsigned int size_MatF, 
+                                             unsigned long long numberOfLBnodesCoarse, 
+                                             unsigned long long numberOfLBnodesFine, 
                                              bool isEvenTimestep,
                                              unsigned int* posC, 
                                              unsigned int* posFSWB, 
@@ -16357,101 +15553,101 @@ __global__ void scaleFC_Fix_27(   real* DC,
                                              unsigned int nyC, 
                                              unsigned int nxF, 
                                              unsigned int nyF,
-                                             OffFC offFC)
+                                             ICellNeigh offFC)
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -16499,9 +15695,9 @@ __global__ void scaleFC_Fix_27(   real* DC,
    if(k<kFC)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff = offFC.xOffFC[k];
-      yoff = offFC.yOffFC[k];
-      zoff = offFC.zOffFC[k];      
+      xoff = offFC.x[k];
+      yoff = offFC.y[k];
+      zoff = offFC.z[k];      
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -17704,8 +16900,8 @@ __global__ void scaleFCpress27(real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC, 
-                                          unsigned int size_MatF, 
+                                          unsigned long long numberOfLBnodesCoarse, 
+                                          unsigned long long numberOfLBnodesFine, 
                                           bool isEvenTimestep,
                                           unsigned int* posC, 
                                           unsigned int* posFSWB, 
@@ -17717,101 +16913,101 @@ __global__ void scaleFCpress27(real* DC,
                                           unsigned int nyC, 
                                           unsigned int nxF, 
                                           unsigned int nyF,
-                                          OffFC offFC)
+                                          ICellNeigh offFC)
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -17861,9 +17057,9 @@ __global__ void scaleFCpress27(real* DC,
    if(k<kFC)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff = offFC.xOffFC[k];
-      yoff = offFC.yOffFC[k];
-      zoff = offFC.zOffFC[k];      
+      xoff = offFC.x[k];
+      yoff = offFC.y[k];
+      zoff = offFC.z[k];      
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -18629,8 +17825,8 @@ __global__ void scaleFCLast27( real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC, 
-                                          unsigned int size_MatF, 
+                                          unsigned long long numberOfLBnodesCoarse, 
+                                          unsigned long long numberOfLBnodesFine, 
                                           bool isEvenTimestep,
                                           unsigned int* posC, 
                                           unsigned int* posFSWB, 
@@ -18642,101 +17838,101 @@ __global__ void scaleFCLast27( real* DC,
                                           unsigned int nyC, 
                                           unsigned int nxF, 
                                           unsigned int nyF,
-                                          OffFC offFC)
+                                          ICellNeigh offFC)
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -18786,9 +17982,9 @@ __global__ void scaleFCLast27( real* DC,
    if(k<kFC)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff = offFC.xOffFC[k];
-      yoff = offFC.yOffFC[k];
-      zoff = offFC.zOffFC[k];      
+      xoff = offFC.x[k];
+      yoff = offFC.y[k];
+      zoff = offFC.z[k];      
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -20027,140 +19223,137 @@ __global__ void scaleFCThSMG7(    real* DC,
                                              unsigned int* neighborFX,
                                              unsigned int* neighborFY,
                                              unsigned int* neighborFZ,
-                                             unsigned int size_MatC, 
-                                             unsigned int size_MatF, 
+                                             unsigned long long numberOfLBnodesCoarse, 
+                                             unsigned long long numberOfLBnodesFine, 
                                              bool isEvenTimestep,
                                              unsigned int* posC, 
                                              unsigned int* posFSWB, 
                                              unsigned int kFC, 
                                              real nu,
                                              real diffusivity_coarse,
-                                             OffFC offFC)
+                                             ICellNeigh offFC)
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, //*fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   //fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, //*fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      //fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      //fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    Distributions7 D7F;
-   D7F.f[0] = &DD7F[0*size_MatF];
-   D7F.f[1] = &DD7F[1*size_MatF];
-   D7F.f[2] = &DD7F[2*size_MatF];
-   D7F.f[3] = &DD7F[3*size_MatF];
-   D7F.f[4] = &DD7F[4*size_MatF];
-   D7F.f[5] = &DD7F[5*size_MatF];
-   D7F.f[6] = &DD7F[6*size_MatF];
+   D7F.f[0] = &DD7F[0*numberOfLBnodesFine];
+   D7F.f[1] = &DD7F[1*numberOfLBnodesFine];
+   D7F.f[2] = &DD7F[2*numberOfLBnodesFine];
+   D7F.f[3] = &DD7F[3*numberOfLBnodesFine];
+   D7F.f[4] = &DD7F[4*numberOfLBnodesFine];
+   D7F.f[5] = &DD7F[5*numberOfLBnodesFine];
+   D7F.f[6] = &DD7F[6*numberOfLBnodesFine];
 
    Distributions7 D7C;
    if (isEvenTimestep==true)
    {
-      D7C.f[0] = &DD7C[0*size_MatC];
-      D7C.f[1] = &DD7C[1*size_MatC];
-      D7C.f[2] = &DD7C[2*size_MatC];
-      D7C.f[3] = &DD7C[3*size_MatC];
-      D7C.f[4] = &DD7C[4*size_MatC];
-      D7C.f[5] = &DD7C[5*size_MatC];
-      D7C.f[6] = &DD7C[6*size_MatC];
+      D7C.f[0] = &DD7C[0*numberOfLBnodesCoarse];
+      D7C.f[1] = &DD7C[1*numberOfLBnodesCoarse];
+      D7C.f[2] = &DD7C[2*numberOfLBnodesCoarse];
+      D7C.f[3] = &DD7C[3*numberOfLBnodesCoarse];
+      D7C.f[4] = &DD7C[4*numberOfLBnodesCoarse];
+      D7C.f[5] = &DD7C[5*numberOfLBnodesCoarse];
+      D7C.f[6] = &DD7C[6*numberOfLBnodesCoarse];
    }
    else
    {
-      D7C.f[0] = &DD7C[0*size_MatC];
-      D7C.f[2] = &DD7C[1*size_MatC];
-      D7C.f[1] = &DD7C[2*size_MatC];
-      D7C.f[4] = &DD7C[3*size_MatC];
-      D7C.f[3] = &DD7C[4*size_MatC];
-      D7C.f[6] = &DD7C[5*size_MatC];
-      D7C.f[5] = &DD7C[6*size_MatC];
+      D7C.f[0] = &DD7C[0*numberOfLBnodesCoarse];
+      D7C.f[2] = &DD7C[1*numberOfLBnodesCoarse];
+      D7C.f[1] = &DD7C[2*numberOfLBnodesCoarse];
+      D7C.f[4] = &DD7C[3*numberOfLBnodesCoarse];
+      D7C.f[3] = &DD7C[4*numberOfLBnodesCoarse];
+      D7C.f[6] = &DD7C[5*numberOfLBnodesCoarse];
+      D7C.f[5] = &DD7C[6*numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -20199,9 +19392,9 @@ __global__ void scaleFCThSMG7(    real* DC,
 
    if(k<kFC){
       //////////////////////////////////////////////////////////////////////////
-      real xoff = offFC.xOffFC[k];
-      real yoff = offFC.yOffFC[k];
-      real zoff = offFC.zOffFC[k];      
+      real xoff = offFC.x[k];
+      real yoff = offFC.y[k];
+      real zoff = offFC.z[k];      
       real xoff_sq = xoff * xoff;
       real yoff_sq = yoff * yoff;
       real zoff_sq = zoff * zoff;
@@ -20900,8 +20093,8 @@ __global__ void scaleFCThS7(   real* DC,
                                           unsigned int* neighborFX,
                                           unsigned int* neighborFY,
                                           unsigned int* neighborFZ,
-                                          unsigned int size_MatC, 
-                                          unsigned int size_MatF, 
+                                          unsigned long long numberOfLBnodesCoarse, 
+                                          unsigned long long numberOfLBnodesFine, 
                                           bool isEvenTimestep,
                                           unsigned int* posC, 
                                           unsigned int* posFSWB, 
@@ -20912,127 +20105,124 @@ __global__ void scaleFCThS7(   real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, //*fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   //fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, //*fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      //fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      //fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    Distributions7 D7F;
-   D7F.f[0] = &DD7F[0*size_MatF];
-   D7F.f[1] = &DD7F[1*size_MatF];
-   D7F.f[2] = &DD7F[2*size_MatF];
-   D7F.f[3] = &DD7F[3*size_MatF];
-   D7F.f[4] = &DD7F[4*size_MatF];
-   D7F.f[5] = &DD7F[5*size_MatF];
-   D7F.f[6] = &DD7F[6*size_MatF];
+   D7F.f[0] = &DD7F[0*numberOfLBnodesFine];
+   D7F.f[1] = &DD7F[1*numberOfLBnodesFine];
+   D7F.f[2] = &DD7F[2*numberOfLBnodesFine];
+   D7F.f[3] = &DD7F[3*numberOfLBnodesFine];
+   D7F.f[4] = &DD7F[4*numberOfLBnodesFine];
+   D7F.f[5] = &DD7F[5*numberOfLBnodesFine];
+   D7F.f[6] = &DD7F[6*numberOfLBnodesFine];
 
    Distributions7 D7C;
    if (isEvenTimestep==true)
    {
-      D7C.f[0] = &DD7C[0*size_MatC];
-      D7C.f[1] = &DD7C[1*size_MatC];
-      D7C.f[2] = &DD7C[2*size_MatC];
-      D7C.f[3] = &DD7C[3*size_MatC];
-      D7C.f[4] = &DD7C[4*size_MatC];
-      D7C.f[5] = &DD7C[5*size_MatC];
-      D7C.f[6] = &DD7C[6*size_MatC];
+      D7C.f[0] = &DD7C[0*numberOfLBnodesCoarse];
+      D7C.f[1] = &DD7C[1*numberOfLBnodesCoarse];
+      D7C.f[2] = &DD7C[2*numberOfLBnodesCoarse];
+      D7C.f[3] = &DD7C[3*numberOfLBnodesCoarse];
+      D7C.f[4] = &DD7C[4*numberOfLBnodesCoarse];
+      D7C.f[5] = &DD7C[5*numberOfLBnodesCoarse];
+      D7C.f[6] = &DD7C[6*numberOfLBnodesCoarse];
    }
    else
    {
-      D7C.f[0] = &DD7C[0*size_MatC];
-      D7C.f[2] = &DD7C[1*size_MatC];
-      D7C.f[1] = &DD7C[2*size_MatC];
-      D7C.f[4] = &DD7C[3*size_MatC];
-      D7C.f[3] = &DD7C[4*size_MatC];
-      D7C.f[6] = &DD7C[5*size_MatC];
-      D7C.f[5] = &DD7C[6*size_MatC];
+      D7C.f[0] = &DD7C[0*numberOfLBnodesCoarse];
+      D7C.f[2] = &DD7C[1*numberOfLBnodesCoarse];
+      D7C.f[1] = &DD7C[2*numberOfLBnodesCoarse];
+      D7C.f[4] = &DD7C[3*numberOfLBnodesCoarse];
+      D7C.f[3] = &DD7C[4*numberOfLBnodesCoarse];
+      D7C.f[6] = &DD7C[5*numberOfLBnodesCoarse];
+      D7C.f[5] = &DD7C[6*numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -21691,200 +20881,197 @@ __global__ void scaleFCThS27(     real* DC,
                                              unsigned int* neighborFX,
                                              unsigned int* neighborFY,
                                              unsigned int* neighborFZ,
-                                             unsigned int size_MatC, 
-                                             unsigned int size_MatF, 
+                                             unsigned long long numberOfLBnodesCoarse, 
+                                             unsigned long long numberOfLBnodesFine, 
                                              bool isEvenTimestep,
                                              unsigned int* posC, 
                                              unsigned int* posFSWB, 
                                              unsigned int kFC, 
                                              real nu,
                                              real diffusivity_coarse,
-											 OffFC offFC)
+											 ICellNeigh offFC)
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, //*fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   //fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, //*fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      //fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      //fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    Distributions27 D27F;
-   D27F.f[DIR_P00   ] = &DD27F[DIR_P00   *size_MatF];
-   D27F.f[DIR_M00   ] = &DD27F[DIR_M00   *size_MatF];
-   D27F.f[DIR_0P0   ] = &DD27F[DIR_0P0   *size_MatF];
-   D27F.f[DIR_0M0   ] = &DD27F[DIR_0M0   *size_MatF];
-   D27F.f[DIR_00P   ] = &DD27F[DIR_00P   *size_MatF];
-   D27F.f[DIR_00M   ] = &DD27F[DIR_00M   *size_MatF];
-   D27F.f[DIR_PP0  ] = &DD27F[DIR_PP0  *size_MatF];
-   D27F.f[DIR_MM0  ] = &DD27F[DIR_MM0  *size_MatF];
-   D27F.f[DIR_PM0  ] = &DD27F[DIR_PM0  *size_MatF];
-   D27F.f[DIR_MP0  ] = &DD27F[DIR_MP0  *size_MatF];
-   D27F.f[DIR_P0P  ] = &DD27F[DIR_P0P  *size_MatF];
-   D27F.f[DIR_M0M  ] = &DD27F[DIR_M0M  *size_MatF];
-   D27F.f[DIR_P0M  ] = &DD27F[DIR_P0M  *size_MatF];
-   D27F.f[DIR_M0P  ] = &DD27F[DIR_M0P  *size_MatF];
-   D27F.f[DIR_0PP  ] = &DD27F[DIR_0PP  *size_MatF];
-   D27F.f[DIR_0MM  ] = &DD27F[DIR_0MM  *size_MatF];
-   D27F.f[DIR_0PM  ] = &DD27F[DIR_0PM  *size_MatF];
-   D27F.f[DIR_0MP  ] = &DD27F[DIR_0MP  *size_MatF];
-   D27F.f[DIR_000] = &DD27F[DIR_000*size_MatF];
-   D27F.f[DIR_PPP ] = &DD27F[DIR_PPP *size_MatF];
-   D27F.f[DIR_MMP ] = &DD27F[DIR_MMP *size_MatF];
-   D27F.f[DIR_PMP ] = &DD27F[DIR_PMP *size_MatF];
-   D27F.f[DIR_MPP ] = &DD27F[DIR_MPP *size_MatF];
-   D27F.f[DIR_PPM ] = &DD27F[DIR_PPM *size_MatF];
-   D27F.f[DIR_MMM ] = &DD27F[DIR_MMM *size_MatF];
-   D27F.f[DIR_PMM ] = &DD27F[DIR_PMM *size_MatF];
-   D27F.f[DIR_MPM ] = &DD27F[DIR_MPM *size_MatF];
+   D27F.f[DIR_P00] = &DD27F[DIR_P00 * numberOfLBnodesFine];
+   D27F.f[DIR_M00] = &DD27F[DIR_M00 * numberOfLBnodesFine];
+   D27F.f[DIR_0P0] = &DD27F[DIR_0P0 * numberOfLBnodesFine];
+   D27F.f[DIR_0M0] = &DD27F[DIR_0M0 * numberOfLBnodesFine];
+   D27F.f[DIR_00P] = &DD27F[DIR_00P * numberOfLBnodesFine];
+   D27F.f[DIR_00M] = &DD27F[DIR_00M * numberOfLBnodesFine];
+   D27F.f[DIR_PP0] = &DD27F[DIR_PP0 * numberOfLBnodesFine];
+   D27F.f[DIR_MM0] = &DD27F[DIR_MM0 * numberOfLBnodesFine];
+   D27F.f[DIR_PM0] = &DD27F[DIR_PM0 * numberOfLBnodesFine];
+   D27F.f[DIR_MP0] = &DD27F[DIR_MP0 * numberOfLBnodesFine];
+   D27F.f[DIR_P0P] = &DD27F[DIR_P0P * numberOfLBnodesFine];
+   D27F.f[DIR_M0M] = &DD27F[DIR_M0M * numberOfLBnodesFine];
+   D27F.f[DIR_P0M] = &DD27F[DIR_P0M * numberOfLBnodesFine];
+   D27F.f[DIR_M0P] = &DD27F[DIR_M0P * numberOfLBnodesFine];
+   D27F.f[DIR_0PP] = &DD27F[DIR_0PP * numberOfLBnodesFine];
+   D27F.f[DIR_0MM] = &DD27F[DIR_0MM * numberOfLBnodesFine];
+   D27F.f[DIR_0PM] = &DD27F[DIR_0PM * numberOfLBnodesFine];
+   D27F.f[DIR_0MP] = &DD27F[DIR_0MP * numberOfLBnodesFine];
+   D27F.f[DIR_000] = &DD27F[DIR_000 * numberOfLBnodesFine];
+   D27F.f[DIR_PPP] = &DD27F[DIR_PPP * numberOfLBnodesFine];
+   D27F.f[DIR_MMP] = &DD27F[DIR_MMP * numberOfLBnodesFine];
+   D27F.f[DIR_PMP] = &DD27F[DIR_PMP * numberOfLBnodesFine];
+   D27F.f[DIR_MPP] = &DD27F[DIR_MPP * numberOfLBnodesFine];
+   D27F.f[DIR_PPM] = &DD27F[DIR_PPM * numberOfLBnodesFine];
+   D27F.f[DIR_MMM] = &DD27F[DIR_MMM * numberOfLBnodesFine];
+   D27F.f[DIR_PMM] = &DD27F[DIR_PMM * numberOfLBnodesFine];
+   D27F.f[DIR_MPM] = &DD27F[DIR_MPM * numberOfLBnodesFine];
 
    Distributions27 D27C;
    if (isEvenTimestep==true)
    {
-      D27C.f[DIR_P00   ] = &DD27C[DIR_P00   *size_MatC];
-      D27C.f[DIR_M00   ] = &DD27C[DIR_M00   *size_MatC];
-      D27C.f[DIR_0P0   ] = &DD27C[DIR_0P0   *size_MatC];
-      D27C.f[DIR_0M0   ] = &DD27C[DIR_0M0   *size_MatC];
-      D27C.f[DIR_00P   ] = &DD27C[DIR_00P   *size_MatC];
-      D27C.f[DIR_00M   ] = &DD27C[DIR_00M   *size_MatC];
-      D27C.f[DIR_PP0  ] = &DD27C[DIR_PP0  *size_MatC];
-      D27C.f[DIR_MM0  ] = &DD27C[DIR_MM0  *size_MatC];
-      D27C.f[DIR_PM0  ] = &DD27C[DIR_PM0  *size_MatC];
-      D27C.f[DIR_MP0  ] = &DD27C[DIR_MP0  *size_MatC];
-      D27C.f[DIR_P0P  ] = &DD27C[DIR_P0P  *size_MatC];
-      D27C.f[DIR_M0M  ] = &DD27C[DIR_M0M  *size_MatC];
-      D27C.f[DIR_P0M  ] = &DD27C[DIR_P0M  *size_MatC];
-      D27C.f[DIR_M0P  ] = &DD27C[DIR_M0P  *size_MatC];
-      D27C.f[DIR_0PP  ] = &DD27C[DIR_0PP  *size_MatC];
-      D27C.f[DIR_0MM  ] = &DD27C[DIR_0MM  *size_MatC];
-      D27C.f[DIR_0PM  ] = &DD27C[DIR_0PM  *size_MatC];
-      D27C.f[DIR_0MP  ] = &DD27C[DIR_0MP  *size_MatC];
-      D27C.f[DIR_000] = &DD27C[DIR_000*size_MatC];
-      D27C.f[DIR_PPP ] = &DD27C[DIR_PPP *size_MatC];
-      D27C.f[DIR_MMP ] = &DD27C[DIR_MMP *size_MatC];
-      D27C.f[DIR_PMP ] = &DD27C[DIR_PMP *size_MatC];
-      D27C.f[DIR_MPP ] = &DD27C[DIR_MPP *size_MatC];
-      D27C.f[DIR_PPM ] = &DD27C[DIR_PPM *size_MatC];
-      D27C.f[DIR_MMM ] = &DD27C[DIR_MMM *size_MatC];
-      D27C.f[DIR_PMM ] = &DD27C[DIR_PMM *size_MatC];
-      D27C.f[DIR_MPM ] = &DD27C[DIR_MPM *size_MatC];
+      D27C.f[DIR_P00] = &DD27C[DIR_P00 * numberOfLBnodesCoarse];
+      D27C.f[DIR_M00] = &DD27C[DIR_M00 * numberOfLBnodesCoarse];
+      D27C.f[DIR_0P0] = &DD27C[DIR_0P0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_0M0] = &DD27C[DIR_0M0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_00P] = &DD27C[DIR_00P * numberOfLBnodesCoarse];
+      D27C.f[DIR_00M] = &DD27C[DIR_00M * numberOfLBnodesCoarse];
+      D27C.f[DIR_PP0] = &DD27C[DIR_PP0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_MM0] = &DD27C[DIR_MM0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_PM0] = &DD27C[DIR_PM0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_MP0] = &DD27C[DIR_MP0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_P0P] = &DD27C[DIR_P0P * numberOfLBnodesCoarse];
+      D27C.f[DIR_M0M] = &DD27C[DIR_M0M * numberOfLBnodesCoarse];
+      D27C.f[DIR_P0M] = &DD27C[DIR_P0M * numberOfLBnodesCoarse];
+      D27C.f[DIR_M0P] = &DD27C[DIR_M0P * numberOfLBnodesCoarse];
+      D27C.f[DIR_0PP] = &DD27C[DIR_0PP * numberOfLBnodesCoarse];
+      D27C.f[DIR_0MM] = &DD27C[DIR_0MM * numberOfLBnodesCoarse];
+      D27C.f[DIR_0PM] = &DD27C[DIR_0PM * numberOfLBnodesCoarse];
+      D27C.f[DIR_0MP] = &DD27C[DIR_0MP * numberOfLBnodesCoarse];
+      D27C.f[DIR_000] = &DD27C[DIR_000 * numberOfLBnodesCoarse];
+      D27C.f[DIR_PPP] = &DD27C[DIR_PPP * numberOfLBnodesCoarse];
+      D27C.f[DIR_MMP] = &DD27C[DIR_MMP * numberOfLBnodesCoarse];
+      D27C.f[DIR_PMP] = &DD27C[DIR_PMP * numberOfLBnodesCoarse];
+      D27C.f[DIR_MPP] = &DD27C[DIR_MPP * numberOfLBnodesCoarse];
+      D27C.f[DIR_PPM] = &DD27C[DIR_PPM * numberOfLBnodesCoarse];
+      D27C.f[DIR_MMM] = &DD27C[DIR_MMM * numberOfLBnodesCoarse];
+      D27C.f[DIR_PMM] = &DD27C[DIR_PMM * numberOfLBnodesCoarse];
+      D27C.f[DIR_MPM] = &DD27C[DIR_MPM * numberOfLBnodesCoarse];
    }
    else
    {
-      D27C.f[DIR_M00   ] = &DD27C[DIR_P00   *size_MatC];
-      D27C.f[DIR_P00   ] = &DD27C[DIR_M00   *size_MatC];
-      D27C.f[DIR_0M0   ] = &DD27C[DIR_0P0   *size_MatC];
-      D27C.f[DIR_0P0   ] = &DD27C[DIR_0M0   *size_MatC];
-      D27C.f[DIR_00M   ] = &DD27C[DIR_00P   *size_MatC];
-      D27C.f[DIR_00P   ] = &DD27C[DIR_00M   *size_MatC];
-      D27C.f[DIR_MM0  ] = &DD27C[DIR_PP0  *size_MatC];
-      D27C.f[DIR_PP0  ] = &DD27C[DIR_MM0  *size_MatC];
-      D27C.f[DIR_MP0  ] = &DD27C[DIR_PM0  *size_MatC];
-      D27C.f[DIR_PM0  ] = &DD27C[DIR_MP0  *size_MatC];
-      D27C.f[DIR_M0M  ] = &DD27C[DIR_P0P  *size_MatC];
-      D27C.f[DIR_P0P  ] = &DD27C[DIR_M0M  *size_MatC];
-      D27C.f[DIR_M0P  ] = &DD27C[DIR_P0M  *size_MatC];
-      D27C.f[DIR_P0M  ] = &DD27C[DIR_M0P  *size_MatC];
-      D27C.f[DIR_0MM  ] = &DD27C[DIR_0PP  *size_MatC];
-      D27C.f[DIR_0PP  ] = &DD27C[DIR_0MM  *size_MatC];
-      D27C.f[DIR_0MP  ] = &DD27C[DIR_0PM  *size_MatC];
-      D27C.f[DIR_0PM  ] = &DD27C[DIR_0MP  *size_MatC];
-      D27C.f[DIR_000] = &DD27C[DIR_000*size_MatC];
-      D27C.f[DIR_MMM ] = &DD27C[DIR_PPP *size_MatC];
-      D27C.f[DIR_PPM ] = &DD27C[DIR_MMP *size_MatC];
-      D27C.f[DIR_MPM ] = &DD27C[DIR_PMP *size_MatC];
-      D27C.f[DIR_PMM ] = &DD27C[DIR_MPP *size_MatC];
-      D27C.f[DIR_MMP ] = &DD27C[DIR_PPM *size_MatC];
-      D27C.f[DIR_PPP ] = &DD27C[DIR_MMM *size_MatC];
-      D27C.f[DIR_MPP ] = &DD27C[DIR_PMM *size_MatC];
-      D27C.f[DIR_PMP ] = &DD27C[DIR_MPM *size_MatC];
+      D27C.f[DIR_M00] = &DD27C[DIR_P00 * numberOfLBnodesCoarse];
+      D27C.f[DIR_P00] = &DD27C[DIR_M00 * numberOfLBnodesCoarse];
+      D27C.f[DIR_0M0] = &DD27C[DIR_0P0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_0P0] = &DD27C[DIR_0M0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_00M] = &DD27C[DIR_00P * numberOfLBnodesCoarse];
+      D27C.f[DIR_00P] = &DD27C[DIR_00M * numberOfLBnodesCoarse];
+      D27C.f[DIR_MM0] = &DD27C[DIR_PP0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_PP0] = &DD27C[DIR_MM0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_MP0] = &DD27C[DIR_PM0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_PM0] = &DD27C[DIR_MP0 * numberOfLBnodesCoarse];
+      D27C.f[DIR_M0M] = &DD27C[DIR_P0P * numberOfLBnodesCoarse];
+      D27C.f[DIR_P0P] = &DD27C[DIR_M0M * numberOfLBnodesCoarse];
+      D27C.f[DIR_M0P] = &DD27C[DIR_P0M * numberOfLBnodesCoarse];
+      D27C.f[DIR_P0M] = &DD27C[DIR_M0P * numberOfLBnodesCoarse];
+      D27C.f[DIR_0MM] = &DD27C[DIR_0PP * numberOfLBnodesCoarse];
+      D27C.f[DIR_0PP] = &DD27C[DIR_0MM * numberOfLBnodesCoarse];
+      D27C.f[DIR_0MP] = &DD27C[DIR_0PM * numberOfLBnodesCoarse];
+      D27C.f[DIR_0PM] = &DD27C[DIR_0MP * numberOfLBnodesCoarse];
+      D27C.f[DIR_000] = &DD27C[DIR_000 * numberOfLBnodesCoarse];
+      D27C.f[DIR_MMM] = &DD27C[DIR_PPP * numberOfLBnodesCoarse];
+      D27C.f[DIR_PPM] = &DD27C[DIR_MMP * numberOfLBnodesCoarse];
+      D27C.f[DIR_MPM] = &DD27C[DIR_PMP * numberOfLBnodesCoarse];
+      D27C.f[DIR_PMM] = &DD27C[DIR_MPP * numberOfLBnodesCoarse];
+      D27C.f[DIR_MMP] = &DD27C[DIR_PPM * numberOfLBnodesCoarse];
+      D27C.f[DIR_PPP] = &DD27C[DIR_MMM * numberOfLBnodesCoarse];
+      D27C.f[DIR_MPP] = &DD27C[DIR_PMM * numberOfLBnodesCoarse];
+      D27C.f[DIR_PMP] = &DD27C[DIR_MPM * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -21923,9 +21110,9 @@ __global__ void scaleFCThS27(     real* DC,
 
    if(k<kFC){
       //////////////////////////////////////////////////////////////////////////
-      xoff    = offFC.xOffFC[k];
-      yoff    = offFC.yOffFC[k];
-      zoff    = offFC.zOffFC[k];
+      xoff    = offFC.x[k];
+      yoff    = offFC.y[k];
+      zoff    = offFC.z[k];
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -21980,33 +21167,33 @@ __global__ void scaleFCThS27(     real* DC,
       f_BSE  = fbseF[kbs];
       f_BNW  = fbnwF[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27F.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27F.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27F.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27F.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27F.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27F.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27F.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27F.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27F.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27F.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27F.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27F.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27F.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27F.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27F.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27F.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27F.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27F.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27F.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27F.f[DIR_M00])[kw   ];
+      f27N    =  (D27F.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27F.f[DIR_0M0])[ks   ];
+      f27T    =  (D27F.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27F.f[DIR_00M])[kb   ];
+      f27NE   =  (D27F.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27F.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27F.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27F.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27F.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27F.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27F.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27F.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27F.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27F.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27F.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27F.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27F.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27F.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27F.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27F.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27F.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27F.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27F.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27F.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27F.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27F.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27F.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27F.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27F.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27F.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27F.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27F.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27F.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_F_SWB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -22067,33 +21254,33 @@ __global__ void scaleFCThS27(     real* DC,
       f_BSE  = fbseF[kbs];
       f_BNW  = fbnwF[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27F.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27F.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27F.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27F.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27F.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27F.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27F.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27F.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27F.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27F.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27F.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27F.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27F.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27F.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27F.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27F.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27F.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27F.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27F.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27F.f[DIR_M00])[kw   ];
+      f27N    =  (D27F.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27F.f[DIR_0M0])[ks   ];
+      f27T    =  (D27F.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27F.f[DIR_00M])[kb   ];
+      f27NE   =  (D27F.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27F.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27F.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27F.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27F.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27F.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27F.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27F.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27F.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27F.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27F.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27F.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27F.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27F.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27F.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27F.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27F.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27F.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27F.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27F.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27F.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27F.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27F.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27F.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27F.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27F.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27F.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27F.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27F.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_F_SWT = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -22154,33 +21341,33 @@ __global__ void scaleFCThS27(     real* DC,
       f_BSE  = fbseF[kbs];
       f_BNW  = fbnwF[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27F.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27F.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27F.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27F.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27F.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27F.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27F.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27F.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27F.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27F.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27F.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27F.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27F.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27F.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27F.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27F.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27F.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27F.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27F.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27F.f[DIR_M00])[kw   ];
+      f27N    =  (D27F.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27F.f[DIR_0M0])[ks   ];
+      f27T    =  (D27F.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27F.f[DIR_00M])[kb   ];
+      f27NE   =  (D27F.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27F.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27F.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27F.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27F.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27F.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27F.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27F.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27F.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27F.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27F.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27F.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27F.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27F.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27F.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27F.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27F.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27F.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27F.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27F.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27F.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27F.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27F.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27F.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27F.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27F.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27F.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27F.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27F.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_F_SET = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -22241,33 +21428,33 @@ __global__ void scaleFCThS27(     real* DC,
       f_BSE  = fbseF[kbs];
       f_BNW  = fbnwF[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27F.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27F.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27F.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27F.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27F.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27F.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27F.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27F.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27F.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27F.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27F.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27F.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27F.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27F.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27F.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27F.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27F.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27F.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27F.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27F.f[DIR_M00])[kw   ];
+      f27N    =  (D27F.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27F.f[DIR_0M0])[ks   ];
+      f27T    =  (D27F.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27F.f[DIR_00M])[kb   ];
+      f27NE   =  (D27F.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27F.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27F.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27F.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27F.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27F.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27F.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27F.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27F.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27F.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27F.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27F.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27F.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27F.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27F.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27F.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27F.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27F.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27F.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27F.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27F.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27F.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27F.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27F.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27F.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27F.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27F.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27F.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27F.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_F_SEB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -22338,33 +21525,33 @@ __global__ void scaleFCThS27(     real* DC,
       f_BSE  = fbseF[kbs];
       f_BNW  = fbnwF[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27F.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27F.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27F.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27F.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27F.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27F.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27F.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27F.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27F.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27F.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27F.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27F.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27F.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27F.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27F.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27F.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27F.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27F.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27F.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27F.f[DIR_M00])[kw   ];
+      f27N    =  (D27F.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27F.f[DIR_0M0])[ks   ];
+      f27T    =  (D27F.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27F.f[DIR_00M])[kb   ];
+      f27NE   =  (D27F.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27F.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27F.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27F.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27F.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27F.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27F.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27F.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27F.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27F.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27F.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27F.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27F.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27F.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27F.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27F.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27F.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27F.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27F.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27F.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27F.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27F.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27F.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27F.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27F.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27F.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27F.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27F.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27F.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_F_NWB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -22425,33 +21612,33 @@ __global__ void scaleFCThS27(     real* DC,
       f_BSE  = fbseF[kbs];
       f_BNW  = fbnwF[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27F.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27F.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27F.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27F.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27F.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27F.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27F.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27F.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27F.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27F.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27F.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27F.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27F.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27F.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27F.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27F.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27F.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27F.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27F.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27F.f[DIR_M00])[kw   ];
+      f27N    =  (D27F.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27F.f[DIR_0M0])[ks   ];
+      f27T    =  (D27F.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27F.f[DIR_00M])[kb   ];
+      f27NE   =  (D27F.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27F.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27F.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27F.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27F.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27F.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27F.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27F.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27F.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27F.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27F.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27F.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27F.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27F.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27F.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27F.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27F.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27F.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27F.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27F.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27F.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27F.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27F.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27F.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27F.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27F.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27F.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27F.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27F.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_F_NWT = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -22512,33 +21699,33 @@ __global__ void scaleFCThS27(     real* DC,
       f_BSE  = fbseF[kbs];
       f_BNW  = fbnwF[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27F.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27F.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27F.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27F.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27F.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27F.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27F.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27F.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27F.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27F.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27F.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27F.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27F.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27F.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27F.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27F.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27F.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27F.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27F.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27F.f[DIR_M00])[kw   ];
+      f27N    =  (D27F.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27F.f[DIR_0M0])[ks   ];
+      f27T    =  (D27F.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27F.f[DIR_00M])[kb   ];
+      f27NE   =  (D27F.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27F.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27F.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27F.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27F.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27F.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27F.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27F.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27F.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27F.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27F.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27F.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27F.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27F.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27F.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27F.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27F.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27F.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27F.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27F.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27F.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27F.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27F.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27F.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27F.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27F.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27F.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27F.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27F.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_F_NET = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -22599,33 +21786,33 @@ __global__ void scaleFCThS27(     real* DC,
       f_BSE  = fbseF[kbs];
       f_BNW  = fbnwF[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27F.f[DIR_P00   ])[kzero];//ke
-      f27W    =  (D27F.f[DIR_M00   ])[kw   ];
-      f27N    =  (D27F.f[DIR_0P0   ])[kzero];//kn
-      f27S    =  (D27F.f[DIR_0M0   ])[ks   ];
-      f27T    =  (D27F.f[DIR_00P   ])[kzero];//kt
-      f27B    =  (D27F.f[DIR_00M   ])[kb   ];
-      f27NE   =  (D27F.f[DIR_PP0  ])[kzero];//kne
-      f27SW   =  (D27F.f[DIR_MM0  ])[ksw  ];
-      f27SE   =  (D27F.f[DIR_PM0  ])[ks   ];//kse
-      f27NW   =  (D27F.f[DIR_MP0  ])[kw   ];//knw
-      f27TE   =  (D27F.f[DIR_P0P  ])[kzero];//kte
-      f27BW   =  (D27F.f[DIR_M0M  ])[kbw  ];
-      f27BE   =  (D27F.f[DIR_P0M  ])[kb   ];//kbe
-      f27TW   =  (D27F.f[DIR_M0P  ])[kw   ];//ktw
-      f27TN   =  (D27F.f[DIR_0PP  ])[kzero];//ktn
-      f27BS   =  (D27F.f[DIR_0MM  ])[kbs  ];
-      f27BN   =  (D27F.f[DIR_0PM  ])[kb   ];//kbn
-      f27TS   =  (D27F.f[DIR_0MP  ])[ks   ];//kts
+      f27E    =  (D27F.f[DIR_P00])[kzero];//ke
+      f27W    =  (D27F.f[DIR_M00])[kw   ];
+      f27N    =  (D27F.f[DIR_0P0])[kzero];//kn
+      f27S    =  (D27F.f[DIR_0M0])[ks   ];
+      f27T    =  (D27F.f[DIR_00P])[kzero];//kt
+      f27B    =  (D27F.f[DIR_00M])[kb   ];
+      f27NE   =  (D27F.f[DIR_PP0])[kzero];//kne
+      f27SW   =  (D27F.f[DIR_MM0])[ksw  ];
+      f27SE   =  (D27F.f[DIR_PM0])[ks   ];//kse
+      f27NW   =  (D27F.f[DIR_MP0])[kw   ];//knw
+      f27TE   =  (D27F.f[DIR_P0P])[kzero];//kte
+      f27BW   =  (D27F.f[DIR_M0M])[kbw  ];
+      f27BE   =  (D27F.f[DIR_P0M])[kb   ];//kbe
+      f27TW   =  (D27F.f[DIR_M0P])[kw   ];//ktw
+      f27TN   =  (D27F.f[DIR_0PP])[kzero];//ktn
+      f27BS   =  (D27F.f[DIR_0MM])[kbs  ];
+      f27BN   =  (D27F.f[DIR_0PM])[kb   ];//kbn
+      f27TS   =  (D27F.f[DIR_0MP])[ks   ];//kts
       f27ZERO =  (D27F.f[DIR_000])[kzero];//kzero
-      f27TNE   = (D27F.f[DIR_PPP ])[kzero];//ktne
-      f27TSW   = (D27F.f[DIR_MMP ])[ksw  ];//ktsw
-      f27TSE   = (D27F.f[DIR_PMP ])[ks   ];//ktse
-      f27TNW   = (D27F.f[DIR_MPP ])[kw   ];//ktnw
-      f27BNE   = (D27F.f[DIR_PPM ])[kb   ];//kbne
-      f27BSW   = (D27F.f[DIR_MMM ])[kbsw ];
-      f27BSE   = (D27F.f[DIR_PMM ])[kbs  ];//kbse
-      f27BNW   = (D27F.f[DIR_MPM ])[kbw  ];//kbnw
+      f27TNE   = (D27F.f[DIR_PPP])[kzero];//ktne
+      f27TSW   = (D27F.f[DIR_MMP])[ksw  ];//ktsw
+      f27TSE   = (D27F.f[DIR_PMP])[ks   ];//ktse
+      f27TNW   = (D27F.f[DIR_MPP])[kw   ];//ktnw
+      f27BNE   = (D27F.f[DIR_PPM])[kb   ];//kbne
+      f27BSW   = (D27F.f[DIR_MMM])[kbsw ];
+      f27BSE   = (D27F.f[DIR_PMM])[kbs  ];//kbse
+      f27BNW   = (D27F.f[DIR_MPM])[kbw  ];//kbnw
 
       Conc_F_NEB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -22739,32 +21926,32 @@ __global__ void scaleFCThS27(     real* DC,
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
       (D27C.f[DIR_000])[kzero] =   c8o27* Conc_C*(c1o1-cu_sq);
-      (D27C.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_C*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27C.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_C*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27C.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_C*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27C.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_C*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27C.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_C*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27C.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_C*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27C.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_C*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27C.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_C*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27C.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_C*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27C.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_C*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27C.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_C*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27C.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_C*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27C.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_C*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27C.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_C*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27C.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_C*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27C.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_C*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27C.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_C*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27C.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_C*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27C.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_C*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27C.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_C*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27C.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_C*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27C.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_C*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27C.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_C*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27C.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_C*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27C.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_C*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27C.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_C*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27C.f[DIR_P00])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_C*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27C.f[DIR_M00])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_C*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27C.f[DIR_0P0])[kzero] =   c2o27* (c3o1*(     My    )+Conc_C*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27C.f[DIR_0M0])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_C*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27C.f[DIR_00P])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_C*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27C.f[DIR_00M])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_C*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27C.f[DIR_PP0])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_C*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27C.f[DIR_MM0])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_C*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27C.f[DIR_PM0])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_C*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27C.f[DIR_MP0])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_C*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27C.f[DIR_P0P])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_C*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27C.f[DIR_M0M])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_C*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27C.f[DIR_P0M])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_C*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27C.f[DIR_M0P])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_C*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27C.f[DIR_0PP])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_C*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27C.f[DIR_0MM])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_C*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27C.f[DIR_0PM])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_C*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27C.f[DIR_0MP])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_C*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27C.f[DIR_PPP])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_C*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27C.f[DIR_MMM])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_C*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27C.f[DIR_PPM])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_C*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27C.f[DIR_MMP])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_C*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27C.f[DIR_PMP])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_C*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27C.f[DIR_MPM])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_C*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27C.f[DIR_PMM])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_C*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27C.f[DIR_MPP])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_C*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
    }
 }
@@ -22812,8 +21999,8 @@ __global__ void scaleFCEff27(real* DC,
                                         unsigned int* neighborFX,
                                         unsigned int* neighborFY,
                                         unsigned int* neighborFZ,
-                                        unsigned int size_MatC, 
-                                        unsigned int size_MatF, 
+                                        unsigned long long numberOfLBnodesCoarse, 
+                                        unsigned long long numberOfLBnodesFine, 
                                         bool isEvenTimestep,
                                         unsigned int* posC, 
                                         unsigned int* posFSWB, 
@@ -22825,101 +22012,101 @@ __global__ void scaleFCEff27(real* DC,
                                         unsigned int nyC, 
                                         unsigned int nxF, 
                                         unsigned int nyF,
-                                        OffFC offFC)
+                                        ICellNeigh offFC)
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -22969,9 +22156,9 @@ __global__ void scaleFCEff27(real* DC,
    if(k<kFC)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff = offFC.xOffFC[k];
-      yoff = offFC.yOffFC[k];
-      zoff = offFC.zOffFC[k];      
+      xoff = offFC.x[k];
+      yoff = offFC.y[k];
+      zoff = offFC.z[k];      
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -23791,8 +22978,8 @@ __global__ void scaleFC27(real* DC,
                                      unsigned int* neighborFX,
                                      unsigned int* neighborFY,
                                      unsigned int* neighborFZ,
-										       unsigned int size_MatC, 
-										       unsigned int size_MatF, 
+										       unsigned long long numberOfLBnodesCoarse, 
+										       unsigned long long numberOfLBnodesFine, 
 										       bool isEvenTimestep,
                                      unsigned int* posC, 
                                      unsigned int* posFSWB, 
@@ -23808,96 +22995,96 @@ __global__ void scaleFC27(real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
          *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[DIR_P00   *size_MatF];
-   fwF    = &DF[DIR_M00   *size_MatF];
-   fnF    = &DF[DIR_0P0   *size_MatF];
-   fsF    = &DF[DIR_0M0   *size_MatF];
-   ftF    = &DF[DIR_00P   *size_MatF];
-   fbF    = &DF[DIR_00M   *size_MatF];
-   fneF   = &DF[DIR_PP0  *size_MatF];
-   fswF   = &DF[DIR_MM0  *size_MatF];
-   fseF   = &DF[DIR_PM0  *size_MatF];
-   fnwF   = &DF[DIR_MP0  *size_MatF];
-   fteF   = &DF[DIR_P0P  *size_MatF];
-   fbwF   = &DF[DIR_M0M  *size_MatF];
-   fbeF   = &DF[DIR_P0M  *size_MatF];
-   ftwF   = &DF[DIR_M0P  *size_MatF];
-   ftnF   = &DF[DIR_0PP  *size_MatF];
-   fbsF   = &DF[DIR_0MM  *size_MatF];
-   fbnF   = &DF[DIR_0PM  *size_MatF];
-   ftsF   = &DF[DIR_0MP  *size_MatF];
-   fzeroF = &DF[DIR_000*size_MatF];
-   ftneF  = &DF[DIR_PPP *size_MatF];
-   ftswF  = &DF[DIR_MMP *size_MatF];
-   ftseF  = &DF[DIR_PMP *size_MatF];
-   ftnwF  = &DF[DIR_MPP *size_MatF];
-   fbneF  = &DF[DIR_PPM *size_MatF];
-   fbswF  = &DF[DIR_MMM *size_MatF];
-   fbseF  = &DF[DIR_PMM *size_MatF];
-   fbnwF  = &DF[DIR_MPM *size_MatF];
+   feF    = &DF[DIR_P00 * numberOfLBnodesFine];
+   fwF    = &DF[DIR_M00 * numberOfLBnodesFine];
+   fnF    = &DF[DIR_0P0 * numberOfLBnodesFine];
+   fsF    = &DF[DIR_0M0 * numberOfLBnodesFine];
+   ftF    = &DF[DIR_00P * numberOfLBnodesFine];
+   fbF    = &DF[DIR_00M * numberOfLBnodesFine];
+   fneF   = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fswF   = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fseF   = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fnwF   = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fteF   = &DF[DIR_P0P * numberOfLBnodesFine];
+   fbwF   = &DF[DIR_M0M * numberOfLBnodesFine];
+   fbeF   = &DF[DIR_P0M * numberOfLBnodesFine];
+   ftwF   = &DF[DIR_M0P * numberOfLBnodesFine];
+   ftnF   = &DF[DIR_0PP * numberOfLBnodesFine];
+   fbsF   = &DF[DIR_0MM * numberOfLBnodesFine];
+   fbnF   = &DF[DIR_0PM * numberOfLBnodesFine];
+   ftsF   = &DF[DIR_0MP * numberOfLBnodesFine];
+   fzeroF = &DF[DIR_000 * numberOfLBnodesFine];
+   ftneF  = &DF[DIR_PPP * numberOfLBnodesFine];
+   ftswF  = &DF[DIR_MMP * numberOfLBnodesFine];
+   ftseF  = &DF[DIR_PMP * numberOfLBnodesFine];
+   ftnwF  = &DF[DIR_MPP * numberOfLBnodesFine];
+   fbneF  = &DF[DIR_PPM * numberOfLBnodesFine];
+   fbswF  = &DF[DIR_MMM * numberOfLBnodesFine];
+   fbseF  = &DF[DIR_PMM * numberOfLBnodesFine];
+   fbnwF  = &DF[DIR_MPM * numberOfLBnodesFine];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
          *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[DIR_P00   *size_MatC];
-      fwC    = &DC[DIR_M00   *size_MatC];
-      fnC    = &DC[DIR_0P0   *size_MatC];
-      fsC    = &DC[DIR_0M0   *size_MatC];
-      ftC    = &DC[DIR_00P   *size_MatC];
-      fbC    = &DC[DIR_00M   *size_MatC];
-      fneC   = &DC[DIR_PP0  *size_MatC];
-      fswC   = &DC[DIR_MM0  *size_MatC];
-      fseC   = &DC[DIR_PM0  *size_MatC];
-      fnwC   = &DC[DIR_MP0  *size_MatC];
-      fteC   = &DC[DIR_P0P  *size_MatC];
-      fbwC   = &DC[DIR_M0M  *size_MatC];
-      fbeC   = &DC[DIR_P0M  *size_MatC];
-      ftwC   = &DC[DIR_M0P  *size_MatC];
-      ftnC   = &DC[DIR_0PP  *size_MatC];
-      fbsC   = &DC[DIR_0MM  *size_MatC];
-      fbnC   = &DC[DIR_0PM  *size_MatC];
-      ftsC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      ftneC  = &DC[DIR_PPP *size_MatC];
-      ftswC  = &DC[DIR_MMP *size_MatC];
-      ftseC  = &DC[DIR_PMP *size_MatC];
-      ftnwC  = &DC[DIR_MPP *size_MatC];
-      fbneC  = &DC[DIR_PPM *size_MatC];
-      fbswC  = &DC[DIR_MMM *size_MatC];
-      fbseC  = &DC[DIR_PMM *size_MatC];
-      fbnwC  = &DC[DIR_MPM *size_MatC];
+      feC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      fwC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    } 
    else
    {
-      fwC    = &DC[DIR_P00   *size_MatC];
-      feC    = &DC[DIR_M00   *size_MatC];
-      fsC    = &DC[DIR_0P0   *size_MatC];
-      fnC    = &DC[DIR_0M0   *size_MatC];
-      fbC    = &DC[DIR_00P   *size_MatC];
-      ftC    = &DC[DIR_00M   *size_MatC];
-      fswC   = &DC[DIR_PP0  *size_MatC];
-      fneC   = &DC[DIR_MM0  *size_MatC];
-      fnwC   = &DC[DIR_PM0  *size_MatC];
-      fseC   = &DC[DIR_MP0  *size_MatC];
-      fbwC   = &DC[DIR_P0P  *size_MatC];
-      fteC   = &DC[DIR_M0M  *size_MatC];
-      ftwC   = &DC[DIR_P0M  *size_MatC];
-      fbeC   = &DC[DIR_M0P  *size_MatC];
-      fbsC   = &DC[DIR_0PP  *size_MatC];
-      ftnC   = &DC[DIR_0MM  *size_MatC];
-      ftsC   = &DC[DIR_0PM  *size_MatC];
-      fbnC   = &DC[DIR_0MP  *size_MatC];
-      fzeroC = &DC[DIR_000*size_MatC];
-      fbswC  = &DC[DIR_PPP *size_MatC];
-      fbneC  = &DC[DIR_MMP *size_MatC];
-      fbnwC  = &DC[DIR_PMP *size_MatC];
-      fbseC  = &DC[DIR_MPP *size_MatC];
-      ftswC  = &DC[DIR_PPM *size_MatC];
-      ftneC  = &DC[DIR_MMM *size_MatC];
-      ftnwC  = &DC[DIR_PMM *size_MatC];
-      ftseC  = &DC[DIR_MPM *size_MatC];
+      fwC    = &DC[DIR_P00 * numberOfLBnodesCoarse];
+      feC    = &DC[DIR_M00 * numberOfLBnodesCoarse];
+      fsC    = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+      fnC    = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+      fbC    = &DC[DIR_00P * numberOfLBnodesCoarse];
+      ftC    = &DC[DIR_00M * numberOfLBnodesCoarse];
+      fswC   = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+      fneC   = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+      fnwC   = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+      fseC   = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+      fbwC   = &DC[DIR_P0P * numberOfLBnodesCoarse];
+      fteC   = &DC[DIR_M0M * numberOfLBnodesCoarse];
+      ftwC   = &DC[DIR_P0M * numberOfLBnodesCoarse];
+      fbeC   = &DC[DIR_M0P * numberOfLBnodesCoarse];
+      fbsC   = &DC[DIR_0PP * numberOfLBnodesCoarse];
+      ftnC   = &DC[DIR_0MM * numberOfLBnodesCoarse];
+      ftsC   = &DC[DIR_0PM * numberOfLBnodesCoarse];
+      fbnC   = &DC[DIR_0MP * numberOfLBnodesCoarse];
+      fzeroC = &DC[DIR_000 * numberOfLBnodesCoarse];
+      fbswC  = &DC[DIR_PPP * numberOfLBnodesCoarse];
+      fbneC  = &DC[DIR_MMP * numberOfLBnodesCoarse];
+      fbnwC  = &DC[DIR_PMP * numberOfLBnodesCoarse];
+      fbseC  = &DC[DIR_MPP * numberOfLBnodesCoarse];
+      ftswC  = &DC[DIR_PPM * numberOfLBnodesCoarse];
+      ftneC  = &DC[DIR_MMM * numberOfLBnodesCoarse];
+      ftnwC  = &DC[DIR_PMM * numberOfLBnodesCoarse];
+      ftseC  = &DC[DIR_MPM * numberOfLBnodesCoarse];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleFC_F3_27.cu b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleFC_F3_27.cu
index e7fe8b50637e97b9c8cc34025216f4d02e684c55..7de51b3b0aec87e3e8773c08435c3ada445e9a41 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleFC_F3_27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/ScaleFC_F3_27.cu
@@ -8,9 +8,9 @@
 /* Device code */
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////
@@ -23,8 +23,8 @@ __global__ void scaleFC_comp_D3Q27F3_2018(real* DC,
 													 unsigned int* neighborFX,
 													 unsigned int* neighborFY,
 													 unsigned int* neighborFZ,
-													 unsigned int size_MatC, 
-													 unsigned int size_MatF, 
+													 unsigned long long numberOfLBnodesCoarse, 
+													 unsigned long long numberOfLBnodesFine, 
 													 bool isEvenTimestep,
 													 unsigned int* posC, 
 													 unsigned int* posFSWB, 
@@ -36,7 +36,7 @@ __global__ void scaleFC_comp_D3Q27F3_2018(real* DC,
 													 unsigned int nyC, 
 													 unsigned int nxF, 
 													 unsigned int nyF,
-													 OffFC offFC)
+													 ICellNeigh offFC)
 {
    real 
 	   *fP00source, *fM00source, *f0P0source, *f0M0source, *f00Psource, *f00Msource, *fPP0source, *fMM0source, *fPM0source,
@@ -44,33 +44,33 @@ __global__ void scaleFC_comp_D3Q27F3_2018(real* DC,
 	   *f000source, *fMMMsource, *fMMPsource, *fMPPsource, *fMPMsource, *fPPMsource, *fPPPsource, *fPMPsource, *fPMMsource;
 
 
-   fP00source = &DF[DIR_P00   *size_MatF];
-   fM00source = &DF[DIR_M00   *size_MatF];
-   f0P0source = &DF[DIR_0P0   *size_MatF];
-   f0M0source = &DF[DIR_0M0   *size_MatF];
-   f00Psource = &DF[DIR_00P   *size_MatF];
-   f00Msource = &DF[DIR_00M   *size_MatF];
-   fPP0source = &DF[DIR_PP0  *size_MatF];
-   fMM0source = &DF[DIR_MM0  *size_MatF];
-   fPM0source = &DF[DIR_PM0  *size_MatF];
-   fMP0source = &DF[DIR_MP0  *size_MatF];
-   fP0Psource = &DF[DIR_P0P  *size_MatF];
-   fM0Msource = &DF[DIR_M0M  *size_MatF];
-   fP0Msource = &DF[DIR_P0M  *size_MatF];
-   fM0Psource = &DF[DIR_M0P  *size_MatF];
-   f0PPsource = &DF[DIR_0PP  *size_MatF];
-   f0MMsource = &DF[DIR_0MM  *size_MatF];
-   f0PMsource = &DF[DIR_0PM  *size_MatF];
-   f0MPsource = &DF[DIR_0MP  *size_MatF];
-   f000source = &DF[DIR_000*size_MatF];
-   fMMMsource = &DF[DIR_MMM *size_MatF];
-   fMMPsource = &DF[DIR_MMP *size_MatF];
-   fMPPsource = &DF[DIR_MPP *size_MatF];
-   fMPMsource = &DF[DIR_MPM *size_MatF];
-   fPPMsource = &DF[DIR_PPM *size_MatF];
-   fPPPsource = &DF[DIR_PPP *size_MatF];
-   fPMPsource = &DF[DIR_PMP *size_MatF];
-   fPMMsource = &DF[DIR_PMM *size_MatF];
+   fP00source = &DF[DIR_P00 * numberOfLBnodesFine];
+   fM00source = &DF[DIR_M00 * numberOfLBnodesFine];
+   f0P0source = &DF[DIR_0P0 * numberOfLBnodesFine];
+   f0M0source = &DF[DIR_0M0 * numberOfLBnodesFine];
+   f00Psource = &DF[DIR_00P * numberOfLBnodesFine];
+   f00Msource = &DF[DIR_00M * numberOfLBnodesFine];
+   fPP0source = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fMM0source = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fPM0source = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fMP0source = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fP0Psource = &DF[DIR_P0P * numberOfLBnodesFine];
+   fM0Msource = &DF[DIR_M0M * numberOfLBnodesFine];
+   fP0Msource = &DF[DIR_P0M * numberOfLBnodesFine];
+   fM0Psource = &DF[DIR_M0P * numberOfLBnodesFine];
+   f0PPsource = &DF[DIR_0PP * numberOfLBnodesFine];
+   f0MMsource = &DF[DIR_0MM * numberOfLBnodesFine];
+   f0PMsource = &DF[DIR_0PM * numberOfLBnodesFine];
+   f0MPsource = &DF[DIR_0MP * numberOfLBnodesFine];
+   f000source = &DF[DIR_000 * numberOfLBnodesFine];
+   fMMMsource = &DF[DIR_MMM * numberOfLBnodesFine];
+   fMMPsource = &DF[DIR_MMP * numberOfLBnodesFine];
+   fMPPsource = &DF[DIR_MPP * numberOfLBnodesFine];
+   fMPMsource = &DF[DIR_MPM * numberOfLBnodesFine];
+   fPPMsource = &DF[DIR_PPM * numberOfLBnodesFine];
+   fPPPsource = &DF[DIR_PPP * numberOfLBnodesFine];
+   fPMPsource = &DF[DIR_PMP * numberOfLBnodesFine];
+   fPMMsource = &DF[DIR_PMM * numberOfLBnodesFine];
 
    real
 	   *fP00dest, *fM00dest, *f0P0dest, *f0M0dest, *f00Pdest, *f00Mdest, *fPP0dest, *fMM0dest, *fPM0dest,
@@ -79,83 +79,83 @@ __global__ void scaleFC_comp_D3Q27F3_2018(real* DC,
 
    if (isEvenTimestep==true)
    {
-	   fP00dest = &DC[DIR_P00   *size_MatC];
-	   fM00dest = &DC[DIR_M00   *size_MatC];
-	   f0P0dest = &DC[DIR_0P0   *size_MatC];
-	   f0M0dest = &DC[DIR_0M0   *size_MatC];
-	   f00Pdest = &DC[DIR_00P   *size_MatC];
-	   f00Mdest = &DC[DIR_00M   *size_MatC];
-	   fPP0dest = &DC[DIR_PP0  *size_MatC];
-	   fMM0dest = &DC[DIR_MM0  *size_MatC];
-	   fPM0dest = &DC[DIR_PM0  *size_MatC];
-	   fMP0dest = &DC[DIR_MP0  *size_MatC];
-	   fP0Pdest = &DC[DIR_P0P  *size_MatC];
-	   fM0Mdest = &DC[DIR_M0M  *size_MatC];
-	   fP0Mdest = &DC[DIR_P0M  *size_MatC];
-	   fM0Pdest = &DC[DIR_M0P  *size_MatC];
-	   f0PPdest = &DC[DIR_0PP  *size_MatC];
-	   f0MMdest = &DC[DIR_0MM  *size_MatC];
-	   f0PMdest = &DC[DIR_0PM  *size_MatC];
-	   f0MPdest = &DC[DIR_0MP  *size_MatC];
-	   f000dest = &DC[DIR_000*size_MatC];
-	   fMMMdest = &DC[DIR_MMM *size_MatC];
-	   fMMPdest = &DC[DIR_MMP *size_MatC];
-	   fMPPdest = &DC[DIR_MPP *size_MatC];
-	   fMPMdest = &DC[DIR_MPM *size_MatC];
-	   fPPMdest = &DC[DIR_PPM *size_MatC];
-	   fPPPdest = &DC[DIR_PPP *size_MatC];
-	   fPMPdest = &DC[DIR_PMP *size_MatC];
-	   fPMMdest = &DC[DIR_PMM *size_MatC];
+	   fP00dest = &DC[DIR_P00 * numberOfLBnodesCoarse];
+	   fM00dest = &DC[DIR_M00 * numberOfLBnodesCoarse];
+	   f0P0dest = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+	   f0M0dest = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+	   f00Pdest = &DC[DIR_00P * numberOfLBnodesCoarse];
+	   f00Mdest = &DC[DIR_00M * numberOfLBnodesCoarse];
+	   fPP0dest = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+	   fMM0dest = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+	   fPM0dest = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+	   fMP0dest = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+	   fP0Pdest = &DC[DIR_P0P * numberOfLBnodesCoarse];
+	   fM0Mdest = &DC[DIR_M0M * numberOfLBnodesCoarse];
+	   fP0Mdest = &DC[DIR_P0M * numberOfLBnodesCoarse];
+	   fM0Pdest = &DC[DIR_M0P * numberOfLBnodesCoarse];
+	   f0PPdest = &DC[DIR_0PP * numberOfLBnodesCoarse];
+	   f0MMdest = &DC[DIR_0MM * numberOfLBnodesCoarse];
+	   f0PMdest = &DC[DIR_0PM * numberOfLBnodesCoarse];
+	   f0MPdest = &DC[DIR_0MP * numberOfLBnodesCoarse];
+	   f000dest = &DC[DIR_000 * numberOfLBnodesCoarse];
+	   fMMMdest = &DC[DIR_MMM * numberOfLBnodesCoarse];
+	   fMMPdest = &DC[DIR_MMP * numberOfLBnodesCoarse];
+	   fMPPdest = &DC[DIR_MPP * numberOfLBnodesCoarse];
+	   fMPMdest = &DC[DIR_MPM * numberOfLBnodesCoarse];
+	   fPPMdest = &DC[DIR_PPM * numberOfLBnodesCoarse];
+	   fPPPdest = &DC[DIR_PPP * numberOfLBnodesCoarse];
+	   fPMPdest = &DC[DIR_PMP * numberOfLBnodesCoarse];
+	   fPMMdest = &DC[DIR_PMM * numberOfLBnodesCoarse];
    } 
    else
    {
-	   fP00dest = &DC[DIR_M00   *size_MatC];
-	   fM00dest = &DC[DIR_P00   *size_MatC];
-	   f0P0dest = &DC[DIR_0M0   *size_MatC];
-	   f0M0dest = &DC[DIR_0P0   *size_MatC];
-	   f00Pdest = &DC[DIR_00M   *size_MatC];
-	   f00Mdest = &DC[DIR_00P   *size_MatC];
-	   fPP0dest = &DC[DIR_MM0  *size_MatC];
-	   fMM0dest = &DC[DIR_PP0  *size_MatC];
-	   fPM0dest = &DC[DIR_MP0  *size_MatC];
-	   fMP0dest = &DC[DIR_PM0  *size_MatC];
-	   fP0Pdest = &DC[DIR_M0M  *size_MatC];
-	   fM0Mdest = &DC[DIR_P0P  *size_MatC];
-	   fP0Mdest = &DC[DIR_M0P  *size_MatC];
-	   fM0Pdest = &DC[DIR_P0M  *size_MatC];
-	   f0PPdest = &DC[DIR_0MM  *size_MatC];
-	   f0MMdest = &DC[DIR_0PP  *size_MatC];
-	   f0PMdest = &DC[DIR_0MP  *size_MatC];
-	   f0MPdest = &DC[DIR_0PM  *size_MatC];
-	   f000dest = &DC[DIR_000*size_MatC];
-	   fMMMdest = &DC[DIR_PPP *size_MatC];
-	   fMMPdest = &DC[DIR_PPM *size_MatC];
-	   fMPPdest = &DC[DIR_PMM *size_MatC];
-	   fMPMdest = &DC[DIR_PMP *size_MatC];
-	   fPPMdest = &DC[DIR_MMP *size_MatC];
-	   fPPPdest = &DC[DIR_MMM *size_MatC];
-	   fPMPdest = &DC[DIR_MPM *size_MatC];
-	   fPMMdest = &DC[DIR_MPP *size_MatC];
+	   fP00dest = &DC[DIR_M00 * numberOfLBnodesCoarse];
+	   fM00dest = &DC[DIR_P00 * numberOfLBnodesCoarse];
+	   f0P0dest = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+	   f0M0dest = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+	   f00Pdest = &DC[DIR_00M * numberOfLBnodesCoarse];
+	   f00Mdest = &DC[DIR_00P * numberOfLBnodesCoarse];
+	   fPP0dest = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+	   fMM0dest = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+	   fPM0dest = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+	   fMP0dest = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+	   fP0Pdest = &DC[DIR_M0M * numberOfLBnodesCoarse];
+	   fM0Mdest = &DC[DIR_P0P * numberOfLBnodesCoarse];
+	   fP0Mdest = &DC[DIR_M0P * numberOfLBnodesCoarse];
+	   fM0Pdest = &DC[DIR_P0M * numberOfLBnodesCoarse];
+	   f0PPdest = &DC[DIR_0MM * numberOfLBnodesCoarse];
+	   f0MMdest = &DC[DIR_0PP * numberOfLBnodesCoarse];
+	   f0PMdest = &DC[DIR_0MP * numberOfLBnodesCoarse];
+	   f0MPdest = &DC[DIR_0PM * numberOfLBnodesCoarse];
+	   f000dest = &DC[DIR_000 * numberOfLBnodesCoarse];
+	   fMMMdest = &DC[DIR_PPP * numberOfLBnodesCoarse];
+	   fMMPdest = &DC[DIR_PPM * numberOfLBnodesCoarse];
+	   fMPPdest = &DC[DIR_PMM * numberOfLBnodesCoarse];
+	   fMPMdest = &DC[DIR_PMP * numberOfLBnodesCoarse];
+	   fPPMdest = &DC[DIR_MMP * numberOfLBnodesCoarse];
+	   fPPPdest = &DC[DIR_MMM * numberOfLBnodesCoarse];
+	   fPMPdest = &DC[DIR_MPM * numberOfLBnodesCoarse];
+	   fPMMdest = &DC[DIR_MPP * numberOfLBnodesCoarse];
    }
 
    Distributions6 G;
    if (isEvenTimestep == true)
    {
-	   G.g[DIR_P00] = &G6[DIR_P00   *size_MatC];
-	   G.g[DIR_M00] = &G6[DIR_M00   *size_MatC];
-	   G.g[DIR_0P0] = &G6[DIR_0P0   *size_MatC];
-	   G.g[DIR_0M0] = &G6[DIR_0M0   *size_MatC];
-	   G.g[DIR_00P] = &G6[DIR_00P   *size_MatC];
-	   G.g[DIR_00M] = &G6[DIR_00M   *size_MatC];
+	   G.g[DIR_P00] = &G6[DIR_P00 * numberOfLBnodesCoarse];
+	   G.g[DIR_M00] = &G6[DIR_M00 * numberOfLBnodesCoarse];
+	   G.g[DIR_0P0] = &G6[DIR_0P0 * numberOfLBnodesCoarse];
+	   G.g[DIR_0M0] = &G6[DIR_0M0 * numberOfLBnodesCoarse];
+	   G.g[DIR_00P] = &G6[DIR_00P * numberOfLBnodesCoarse];
+	   G.g[DIR_00M] = &G6[DIR_00M * numberOfLBnodesCoarse];
    }
    else
    {
-	   G.g[DIR_M00] = &G6[DIR_P00   *size_MatC];
-	   G.g[DIR_P00] = &G6[DIR_M00   *size_MatC];
-	   G.g[DIR_0M0] = &G6[DIR_0P0   *size_MatC];
-	   G.g[DIR_0P0] = &G6[DIR_0M0   *size_MatC];
-	   G.g[DIR_00M] = &G6[DIR_00P   *size_MatC];
-	   G.g[DIR_00P] = &G6[DIR_00M   *size_MatC];
+	   G.g[DIR_M00] = &G6[DIR_P00 * numberOfLBnodesCoarse];
+	   G.g[DIR_P00] = &G6[DIR_M00 * numberOfLBnodesCoarse];
+	   G.g[DIR_0M0] = &G6[DIR_0P0 * numberOfLBnodesCoarse];
+	   G.g[DIR_0P0] = &G6[DIR_0M0 * numberOfLBnodesCoarse];
+	   G.g[DIR_00M] = &G6[DIR_00P * numberOfLBnodesCoarse];
+	   G.g[DIR_00P] = &G6[DIR_00M * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -205,9 +205,9 @@ __global__ void scaleFC_comp_D3Q27F3_2018(real* DC,
    if(k<kFC)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff = offFC.xOffFC[k];
-      yoff = offFC.yOffFC[k];
-      zoff = offFC.zOffFC[k];      
+      xoff = offFC.x[k];
+      yoff = offFC.y[k];
+      zoff = offFC.z[k];      
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
@@ -1270,8 +1270,8 @@ __global__ void scaleFC_comp_D3Q27F3( real* DC,
 												 unsigned int* neighborFX,
 												 unsigned int* neighborFY,
 												 unsigned int* neighborFZ,
-												 unsigned int size_MatC, 
-												 unsigned int size_MatF, 
+												 unsigned long long numberOfLBnodesCoarse, 
+												 unsigned long long numberOfLBnodesFine, 
 												 bool isEvenTimestep,
 												 unsigned int* posC, 
 												 unsigned int* posFSWB, 
@@ -1283,7 +1283,7 @@ __global__ void scaleFC_comp_D3Q27F3( real* DC,
 												 unsigned int nyC, 
 												 unsigned int nxF, 
 												 unsigned int nyF,
-												 OffFC offFC)
+												 ICellNeigh offFC)
 {
    real 
 	   *fP00source, *fM00source, *f0P0source, *f0M0source, *f00Psource, *f00Msource, *fPP0source, *fMM0source, *fPM0source,
@@ -1291,33 +1291,33 @@ __global__ void scaleFC_comp_D3Q27F3( real* DC,
 	   *f000source, *fMMMsource, *fMMPsource, *fMPPsource, *fMPMsource, *fPPMsource, *fPPPsource, *fPMPsource, *fPMMsource;
 
 
-   fP00source = &DF[DIR_P00   *size_MatF];
-   fM00source = &DF[DIR_M00   *size_MatF];
-   f0P0source = &DF[DIR_0P0   *size_MatF];
-   f0M0source = &DF[DIR_0M0   *size_MatF];
-   f00Psource = &DF[DIR_00P   *size_MatF];
-   f00Msource = &DF[DIR_00M   *size_MatF];
-   fPP0source = &DF[DIR_PP0  *size_MatF];
-   fMM0source = &DF[DIR_MM0  *size_MatF];
-   fPM0source = &DF[DIR_PM0  *size_MatF];
-   fMP0source = &DF[DIR_MP0  *size_MatF];
-   fP0Psource = &DF[DIR_P0P  *size_MatF];
-   fM0Msource = &DF[DIR_M0M  *size_MatF];
-   fP0Msource = &DF[DIR_P0M  *size_MatF];
-   fM0Psource = &DF[DIR_M0P  *size_MatF];
-   f0PPsource = &DF[DIR_0PP  *size_MatF];
-   f0MMsource = &DF[DIR_0MM  *size_MatF];
-   f0PMsource = &DF[DIR_0PM  *size_MatF];
-   f0MPsource = &DF[DIR_0MP  *size_MatF];
-   f000source = &DF[DIR_000*size_MatF];
-   fMMMsource = &DF[DIR_MMM *size_MatF];
-   fMMPsource = &DF[DIR_MMP *size_MatF];
-   fMPPsource = &DF[DIR_MPP *size_MatF];
-   fMPMsource = &DF[DIR_MPM *size_MatF];
-   fPPMsource = &DF[DIR_PPM *size_MatF];
-   fPPPsource = &DF[DIR_PPP *size_MatF];
-   fPMPsource = &DF[DIR_PMP *size_MatF];
-   fPMMsource = &DF[DIR_PMM *size_MatF];
+   fP00source = &DF[DIR_P00 * numberOfLBnodesFine];
+   fM00source = &DF[DIR_M00 * numberOfLBnodesFine];
+   f0P0source = &DF[DIR_0P0 * numberOfLBnodesFine];
+   f0M0source = &DF[DIR_0M0 * numberOfLBnodesFine];
+   f00Psource = &DF[DIR_00P * numberOfLBnodesFine];
+   f00Msource = &DF[DIR_00M * numberOfLBnodesFine];
+   fPP0source = &DF[DIR_PP0 * numberOfLBnodesFine];
+   fMM0source = &DF[DIR_MM0 * numberOfLBnodesFine];
+   fPM0source = &DF[DIR_PM0 * numberOfLBnodesFine];
+   fMP0source = &DF[DIR_MP0 * numberOfLBnodesFine];
+   fP0Psource = &DF[DIR_P0P * numberOfLBnodesFine];
+   fM0Msource = &DF[DIR_M0M * numberOfLBnodesFine];
+   fP0Msource = &DF[DIR_P0M * numberOfLBnodesFine];
+   fM0Psource = &DF[DIR_M0P * numberOfLBnodesFine];
+   f0PPsource = &DF[DIR_0PP * numberOfLBnodesFine];
+   f0MMsource = &DF[DIR_0MM * numberOfLBnodesFine];
+   f0PMsource = &DF[DIR_0PM * numberOfLBnodesFine];
+   f0MPsource = &DF[DIR_0MP * numberOfLBnodesFine];
+   f000source = &DF[DIR_000 * numberOfLBnodesFine];
+   fMMMsource = &DF[DIR_MMM * numberOfLBnodesFine];
+   fMMPsource = &DF[DIR_MMP * numberOfLBnodesFine];
+   fMPPsource = &DF[DIR_MPP * numberOfLBnodesFine];
+   fMPMsource = &DF[DIR_MPM * numberOfLBnodesFine];
+   fPPMsource = &DF[DIR_PPM * numberOfLBnodesFine];
+   fPPPsource = &DF[DIR_PPP * numberOfLBnodesFine];
+   fPMPsource = &DF[DIR_PMP * numberOfLBnodesFine];
+   fPMMsource = &DF[DIR_PMM * numberOfLBnodesFine];
 
    real
 	   *fP00dest, *fM00dest, *f0P0dest, *f0M0dest, *f00Pdest, *f00Mdest, *fPP0dest, *fMM0dest, *fPM0dest,
@@ -1326,83 +1326,83 @@ __global__ void scaleFC_comp_D3Q27F3( real* DC,
 
    if (isEvenTimestep==true)
    {
-	   fP00dest = &DC[DIR_P00   *size_MatC];
-	   fM00dest = &DC[DIR_M00   *size_MatC];
-	   f0P0dest = &DC[DIR_0P0   *size_MatC];
-	   f0M0dest = &DC[DIR_0M0   *size_MatC];
-	   f00Pdest = &DC[DIR_00P   *size_MatC];
-	   f00Mdest = &DC[DIR_00M   *size_MatC];
-	   fPP0dest = &DC[DIR_PP0  *size_MatC];
-	   fMM0dest = &DC[DIR_MM0  *size_MatC];
-	   fPM0dest = &DC[DIR_PM0  *size_MatC];
-	   fMP0dest = &DC[DIR_MP0  *size_MatC];
-	   fP0Pdest = &DC[DIR_P0P  *size_MatC];
-	   fM0Mdest = &DC[DIR_M0M  *size_MatC];
-	   fP0Mdest = &DC[DIR_P0M  *size_MatC];
-	   fM0Pdest = &DC[DIR_M0P  *size_MatC];
-	   f0PPdest = &DC[DIR_0PP  *size_MatC];
-	   f0MMdest = &DC[DIR_0MM  *size_MatC];
-	   f0PMdest = &DC[DIR_0PM  *size_MatC];
-	   f0MPdest = &DC[DIR_0MP  *size_MatC];
-	   f000dest = &DC[DIR_000*size_MatC];
-	   fMMMdest = &DC[DIR_MMM *size_MatC];
-	   fMMPdest = &DC[DIR_MMP *size_MatC];
-	   fMPPdest = &DC[DIR_MPP *size_MatC];
-	   fMPMdest = &DC[DIR_MPM *size_MatC];
-	   fPPMdest = &DC[DIR_PPM *size_MatC];
-	   fPPPdest = &DC[DIR_PPP *size_MatC];
-	   fPMPdest = &DC[DIR_PMP *size_MatC];
-	   fPMMdest = &DC[DIR_PMM *size_MatC];
+	   fP00dest = &DC[DIR_P00 * numberOfLBnodesCoarse];
+	   fM00dest = &DC[DIR_M00 * numberOfLBnodesCoarse];
+	   f0P0dest = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+	   f0M0dest = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+	   f00Pdest = &DC[DIR_00P * numberOfLBnodesCoarse];
+	   f00Mdest = &DC[DIR_00M * numberOfLBnodesCoarse];
+	   fPP0dest = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+	   fMM0dest = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+	   fPM0dest = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+	   fMP0dest = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+	   fP0Pdest = &DC[DIR_P0P * numberOfLBnodesCoarse];
+	   fM0Mdest = &DC[DIR_M0M * numberOfLBnodesCoarse];
+	   fP0Mdest = &DC[DIR_P0M * numberOfLBnodesCoarse];
+	   fM0Pdest = &DC[DIR_M0P * numberOfLBnodesCoarse];
+	   f0PPdest = &DC[DIR_0PP * numberOfLBnodesCoarse];
+	   f0MMdest = &DC[DIR_0MM * numberOfLBnodesCoarse];
+	   f0PMdest = &DC[DIR_0PM * numberOfLBnodesCoarse];
+	   f0MPdest = &DC[DIR_0MP * numberOfLBnodesCoarse];
+	   f000dest = &DC[DIR_000 * numberOfLBnodesCoarse];
+	   fMMMdest = &DC[DIR_MMM * numberOfLBnodesCoarse];
+	   fMMPdest = &DC[DIR_MMP * numberOfLBnodesCoarse];
+	   fMPPdest = &DC[DIR_MPP * numberOfLBnodesCoarse];
+	   fMPMdest = &DC[DIR_MPM * numberOfLBnodesCoarse];
+	   fPPMdest = &DC[DIR_PPM * numberOfLBnodesCoarse];
+	   fPPPdest = &DC[DIR_PPP * numberOfLBnodesCoarse];
+	   fPMPdest = &DC[DIR_PMP * numberOfLBnodesCoarse];
+	   fPMMdest = &DC[DIR_PMM * numberOfLBnodesCoarse];
    } 
    else
    {
-	   fP00dest = &DC[DIR_M00   *size_MatC];
-	   fM00dest = &DC[DIR_P00   *size_MatC];
-	   f0P0dest = &DC[DIR_0M0   *size_MatC];
-	   f0M0dest = &DC[DIR_0P0   *size_MatC];
-	   f00Pdest = &DC[DIR_00M   *size_MatC];
-	   f00Mdest = &DC[DIR_00P   *size_MatC];
-	   fPP0dest = &DC[DIR_MM0  *size_MatC];
-	   fMM0dest = &DC[DIR_PP0  *size_MatC];
-	   fPM0dest = &DC[DIR_MP0  *size_MatC];
-	   fMP0dest = &DC[DIR_PM0  *size_MatC];
-	   fP0Pdest = &DC[DIR_M0M  *size_MatC];
-	   fM0Mdest = &DC[DIR_P0P  *size_MatC];
-	   fP0Mdest = &DC[DIR_M0P  *size_MatC];
-	   fM0Pdest = &DC[DIR_P0M  *size_MatC];
-	   f0PPdest = &DC[DIR_0MM  *size_MatC];
-	   f0MMdest = &DC[DIR_0PP  *size_MatC];
-	   f0PMdest = &DC[DIR_0MP  *size_MatC];
-	   f0MPdest = &DC[DIR_0PM  *size_MatC];
-	   f000dest = &DC[DIR_000*size_MatC];
-	   fMMMdest = &DC[DIR_PPP *size_MatC];
-	   fMMPdest = &DC[DIR_PPM *size_MatC];
-	   fMPPdest = &DC[DIR_PMM *size_MatC];
-	   fMPMdest = &DC[DIR_PMP *size_MatC];
-	   fPPMdest = &DC[DIR_MMP *size_MatC];
-	   fPPPdest = &DC[DIR_MMM *size_MatC];
-	   fPMPdest = &DC[DIR_MPM *size_MatC];
-	   fPMMdest = &DC[DIR_MPP *size_MatC];
+	   fP00dest = &DC[DIR_M00 * numberOfLBnodesCoarse];
+	   fM00dest = &DC[DIR_P00 * numberOfLBnodesCoarse];
+	   f0P0dest = &DC[DIR_0M0 * numberOfLBnodesCoarse];
+	   f0M0dest = &DC[DIR_0P0 * numberOfLBnodesCoarse];
+	   f00Pdest = &DC[DIR_00M * numberOfLBnodesCoarse];
+	   f00Mdest = &DC[DIR_00P * numberOfLBnodesCoarse];
+	   fPP0dest = &DC[DIR_MM0 * numberOfLBnodesCoarse];
+	   fMM0dest = &DC[DIR_PP0 * numberOfLBnodesCoarse];
+	   fPM0dest = &DC[DIR_MP0 * numberOfLBnodesCoarse];
+	   fMP0dest = &DC[DIR_PM0 * numberOfLBnodesCoarse];
+	   fP0Pdest = &DC[DIR_M0M * numberOfLBnodesCoarse];
+	   fM0Mdest = &DC[DIR_P0P * numberOfLBnodesCoarse];
+	   fP0Mdest = &DC[DIR_M0P * numberOfLBnodesCoarse];
+	   fM0Pdest = &DC[DIR_P0M * numberOfLBnodesCoarse];
+	   f0PPdest = &DC[DIR_0MM * numberOfLBnodesCoarse];
+	   f0MMdest = &DC[DIR_0PP * numberOfLBnodesCoarse];
+	   f0PMdest = &DC[DIR_0MP * numberOfLBnodesCoarse];
+	   f0MPdest = &DC[DIR_0PM * numberOfLBnodesCoarse];
+	   f000dest = &DC[DIR_000 * numberOfLBnodesCoarse];
+	   fMMMdest = &DC[DIR_PPP * numberOfLBnodesCoarse];
+	   fMMPdest = &DC[DIR_PPM * numberOfLBnodesCoarse];
+	   fMPPdest = &DC[DIR_PMM * numberOfLBnodesCoarse];
+	   fMPMdest = &DC[DIR_PMP * numberOfLBnodesCoarse];
+	   fPPMdest = &DC[DIR_MMP * numberOfLBnodesCoarse];
+	   fPPPdest = &DC[DIR_MMM * numberOfLBnodesCoarse];
+	   fPMPdest = &DC[DIR_MPM * numberOfLBnodesCoarse];
+	   fPMMdest = &DC[DIR_MPP * numberOfLBnodesCoarse];
    }
 
    Distributions6 G;
    if (isEvenTimestep == true)
    {
-	   G.g[DIR_P00] = &G6[DIR_P00   *size_MatC];
-	   G.g[DIR_M00] = &G6[DIR_M00   *size_MatC];
-	   G.g[DIR_0P0] = &G6[DIR_0P0   *size_MatC];
-	   G.g[DIR_0M0] = &G6[DIR_0M0   *size_MatC];
-	   G.g[DIR_00P] = &G6[DIR_00P   *size_MatC];
-	   G.g[DIR_00M] = &G6[DIR_00M   *size_MatC];
+	   G.g[DIR_P00] = &G6[DIR_P00 * numberOfLBnodesCoarse];
+	   G.g[DIR_M00] = &G6[DIR_M00 * numberOfLBnodesCoarse];
+	   G.g[DIR_0P0] = &G6[DIR_0P0 * numberOfLBnodesCoarse];
+	   G.g[DIR_0M0] = &G6[DIR_0M0 * numberOfLBnodesCoarse];
+	   G.g[DIR_00P] = &G6[DIR_00P * numberOfLBnodesCoarse];
+	   G.g[DIR_00M] = &G6[DIR_00M * numberOfLBnodesCoarse];
    }
    else
    {
-	   G.g[DIR_M00] = &G6[DIR_P00   *size_MatC];
-	   G.g[DIR_P00] = &G6[DIR_M00   *size_MatC];
-	   G.g[DIR_0M0] = &G6[DIR_0P0   *size_MatC];
-	   G.g[DIR_0P0] = &G6[DIR_0M0   *size_MatC];
-	   G.g[DIR_00M] = &G6[DIR_00P   *size_MatC];
-	   G.g[DIR_00P] = &G6[DIR_00M   *size_MatC];
+	   G.g[DIR_M00] = &G6[DIR_P00 * numberOfLBnodesCoarse];
+	   G.g[DIR_P00] = &G6[DIR_M00 * numberOfLBnodesCoarse];
+	   G.g[DIR_0M0] = &G6[DIR_0P0 * numberOfLBnodesCoarse];
+	   G.g[DIR_0P0] = &G6[DIR_0M0 * numberOfLBnodesCoarse];
+	   G.g[DIR_00M] = &G6[DIR_00P * numberOfLBnodesCoarse];
+	   G.g[DIR_00P] = &G6[DIR_00M * numberOfLBnodesCoarse];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -1452,9 +1452,9 @@ __global__ void scaleFC_comp_D3Q27F3( real* DC,
    if(k<kFC)
    {
       //////////////////////////////////////////////////////////////////////////
-      xoff = offFC.xOffFC[k];
-      yoff = offFC.yOffFC[k];
-      zoff = offFC.zOffFC[k];      
+      xoff = offFC.x[k];
+      yoff = offFC.y[k];
+      zoff = offFC.z[k];      
       xoff_sq = xoff * xoff;
       yoff_sq = yoff * yoff;
       zoff_sq = zoff * zoff;
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleCF_compressible.cu b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleCF_compressible.cu
index f4160b89c047a7e6244a5579baae03d30b3c89cb..ec1c8207bdd38666f4222270be81b91960142e62 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleCF_compressible.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleCF_compressible.cu
@@ -32,12 +32,13 @@
 //=======================================================================================
 
 #include "DataTypes.h"
-#include "Kernel/Utilities/DistributionHelper.cuh"
-#include "Kernel/Utilities/ChimeraTransformation.h"
-#include "Kernel/Utilities/ScalingHelperFunctions.h"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
+#include "LBM/GPUHelperFunctions/ChimeraTransformation.h"
+#include "LBM/GPUHelperFunctions/ScalingUtilities.h"
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
+using namespace vf::gpu;
 
 //////////////////////////////////////////////////////////////////////////
 //! \brief Calculate the interpolated distributions on the fine destination nodes
@@ -217,7 +218,7 @@ __device__ __inline__ void interpolateDistributions(
 //!
 
 // based on scaleCF_RhoSq_comp_27
-__global__ void scaleCF_compressible(
+template<bool hasTurbulentViscosity> __global__ void scaleCF_compressible(
     real* distributionsCoarse, 
     real* distributionsFine, 
     unsigned int* neighborXcoarse,
@@ -226,24 +227,26 @@ __global__ void scaleCF_compressible(
     unsigned int* neighborXfine,
     unsigned int* neighborYfine,
     unsigned int* neighborZfine,
-    unsigned int numberOfLBnodesCoarse, 
-    unsigned int numberOfLBnodesFine, 
+    unsigned long long numberOfLBnodesCoarse, 
+    unsigned long long numberOfLBnodesFine, 
     bool isEvenTimestep,
     unsigned int* indicesCoarseMMM, 
     unsigned int* indicesFineMMM, 
     unsigned int numberOfInterfaceNodes, 
     real omegaCoarse, 
     real omegaFine, 
-    OffCF offsetCF)
+    real* turbulentViscosityCoarse,
+    real* turbulentViscosityFine,
+    ICellNeigh neighborCoarseToFine)
 {
     ////////////////////////////////////////////////////////////////////////////////
-    //! - Get the thread index coordinates from threadId_100, blockId_100, blockDim and gridDim.
+    //! - Get the node index coordinates from threadId_100, blockId_100, blockDim and gridDim.
     //!
-    const unsigned k_thread = vf::gpu::getNodeIndex();
+    const unsigned nodeIndex = getNodeIndex();
 
     //////////////////////////////////////////////////////////////////////////
     //! - Return for non-interface node
-    if (k_thread >= numberOfInterfaceNodes)
+    if (nodeIndex >= numberOfInterfaceNodes)
         return;
 
     //////////////////////////////////////////////////////////////////////////
@@ -252,8 +255,9 @@ __global__ void scaleCF_compressible(
     //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
     //! DOI:10.3390/computation5020019 ]</b></a>
     //!
-    Distributions27 distFine   = vf::gpu::getDistributionReferences27(distributionsFine,   numberOfLBnodesFine,   true);
-    Distributions27 distCoarse = vf::gpu::getDistributionReferences27(distributionsCoarse, numberOfLBnodesCoarse, isEvenTimestep);
+    Distributions27 distFine, distCoarse;
+    getPointersToDistributions(distFine, distributionsFine, numberOfLBnodesFine, true);
+    getPointersToDistributions(distCoarse, distributionsCoarse, numberOfLBnodesCoarse, isEvenTimestep);
 
     ////////////////////////////////////////////////////////////////////////////////
     //! - declare local variables for source nodes
@@ -289,7 +293,7 @@ __global__ void scaleCF_compressible(
     // source node BSW = MMM
     ////////////////////////////////////////////////////////////////////////////////
     // index of the base node and its neighbors
-    unsigned int k_base_000 = indicesCoarseMMM[k_thread];
+    unsigned int k_base_000 = indicesCoarseMMM[nodeIndex];
     unsigned int k_base_M00 = neighborXcoarse [k_base_000];
     unsigned int k_base_0M0 = neighborYcoarse [k_base_000];
     unsigned int k_base_00M = neighborZcoarse [k_base_000];
@@ -308,6 +312,8 @@ __global__ void scaleCF_compressible(
     unsigned int k_0MM = k_base_0MM;
     unsigned int k_MMM = k_base_MMM;
 
+    if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]);
+
     calculateMomentsOnSourceNodes( distCoarse, omegaC,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_MMM, vx1_MMM, vx2_MMM, vx3_MMM,
         kxyFromfcNEQ_MMM, kyzFromfcNEQ_MMM, kxzFromfcNEQ_MMM, kxxMyyFromfcNEQ_MMM, kxxMzzFromfcNEQ_MMM);
@@ -325,6 +331,8 @@ __global__ void scaleCF_compressible(
     k_0MM = neighborZcoarse[k_0MM];
     k_MMM = neighborZcoarse[k_MMM];
 
+    if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]);
+
     calculateMomentsOnSourceNodes( distCoarse, omegaC,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_MMP, vx1_MMP, vx2_MMP, vx3_MMP,
         kxyFromfcNEQ_MMP, kyzFromfcNEQ_MMP, kxzFromfcNEQ_MMP, kxxMyyFromfcNEQ_MMP, kxxMzzFromfcNEQ_MMP);
@@ -342,6 +350,8 @@ __global__ void scaleCF_compressible(
     k_0MM = k_MMM;
     k_MMM = neighborXcoarse[k_MMM];
 
+    if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]);
+
     calculateMomentsOnSourceNodes( distCoarse, omegaC,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_PMP, vx1_PMP, vx2_PMP, vx3_PMP,
         kxyFromfcNEQ_PMP, kyzFromfcNEQ_PMP, kxzFromfcNEQ_PMP, kxxMyyFromfcNEQ_PMP, kxxMzzFromfcNEQ_PMP);
@@ -359,6 +369,8 @@ __global__ void scaleCF_compressible(
     k_0M0 = k_base_MM0;
     k_MM0 = neighborXcoarse[k_base_MM0];
 
+    if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]);
+
     calculateMomentsOnSourceNodes( distCoarse, omegaC,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_PMM, vx1_PMM, vx2_PMM, vx3_PMM,
         kxyFromfcNEQ_PMM, kyzFromfcNEQ_PMM, kxzFromfcNEQ_PMM, kxxMyyFromfcNEQ_PMM, kxxMzzFromfcNEQ_PMM);
@@ -386,6 +398,8 @@ __global__ void scaleCF_compressible(
     k_0MM = k_base_0MM;
     k_MMM = k_base_MMM;
 
+    if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]);
+
     calculateMomentsOnSourceNodes( distCoarse, omegaC,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_MPM, vx1_MPM, vx2_MPM, vx3_MPM,
         kxyFromfcNEQ_MPM, kyzFromfcNEQ_MPM, kxzFromfcNEQ_MPM, kxxMyyFromfcNEQ_MPM, kxxMzzFromfcNEQ_MPM);
@@ -402,6 +416,8 @@ __global__ void scaleCF_compressible(
     k_M0M = neighborZcoarse[k_M0M];
     k_0MM = neighborZcoarse[k_0MM];
     k_MMM = neighborZcoarse[k_MMM];
+
+    if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]);
     
     calculateMomentsOnSourceNodes( distCoarse, omegaC,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_MPP, vx1_MPP, vx2_MPP, vx3_MPP,
@@ -421,11 +437,12 @@ __global__ void scaleCF_compressible(
     k_0MM = k_MMM;
     k_MMM = neighborXcoarse[k_MMM];
 
+    if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]);
+
     calculateMomentsOnSourceNodes( distCoarse, omegaC,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_PPP, vx1_PPP, vx2_PPP, vx3_PPP,
         kxyFromfcNEQ_PPP, kyzFromfcNEQ_PPP, kxzFromfcNEQ_PPP, kxxMyyFromfcNEQ_PPP, kxxMzzFromfcNEQ_PPP);
 
-
     //////////////////////////////////////////////////////////////////////////
     // source node BNE = PPM
     //////////////////////////////////////////////////////////////////////////
@@ -438,6 +455,8 @@ __global__ void scaleCF_compressible(
     k_M00 = neighborXcoarse[k_base_M00];
     k_0M0 = k_base_MM0;
     k_MM0 = neighborXcoarse[k_base_MM0];
+
+    if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]);
     
     calculateMomentsOnSourceNodes( distCoarse, omegaC,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_PPM, vx1_PPM, vx2_PPM, vx3_PPM,
@@ -452,119 +471,240 @@ __global__ void scaleCF_compressible(
     real c_000, c_100, c_010, c_001, c_200, c_020, c_002, c_110, c_101, c_011, c_111;
     real d_000, d_100, d_010, d_001, d_110, d_101, d_011, d_111;
 
-    a_000 = (-kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_PPP + kxxMyyFromfcNEQ_MPM + kxxMyyFromfcNEQ_MPP -
-            kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_PMP + kxxMyyFromfcNEQ_MMM + kxxMyyFromfcNEQ_MMP -
-            kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_PPP + kxxMzzFromfcNEQ_MPM + kxxMzzFromfcNEQ_MPP -
-            kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_PMP + kxxMzzFromfcNEQ_MMM + kxxMzzFromfcNEQ_MMP -
-            c2o1 * kxyFromfcNEQ_PPM - c2o1 * kxyFromfcNEQ_PPP - c2o1 * kxyFromfcNEQ_MPM - c2o1 * kxyFromfcNEQ_MPP +
-            c2o1 * kxyFromfcNEQ_PMM + c2o1 * kxyFromfcNEQ_PMP + c2o1 * kxyFromfcNEQ_MMM + c2o1 * kxyFromfcNEQ_MMP +
-            c2o1 * kxzFromfcNEQ_PPM - c2o1 * kxzFromfcNEQ_PPP + c2o1 * kxzFromfcNEQ_MPM - c2o1 * kxzFromfcNEQ_MPP +
-            c2o1 * kxzFromfcNEQ_PMM - c2o1 * kxzFromfcNEQ_PMP + c2o1 * kxzFromfcNEQ_MMM - c2o1 * kxzFromfcNEQ_MMP +
-            c8o1 * vx1_PPM + c8o1 * vx1_PPP + c8o1 * vx1_MPM + c8o1 * vx1_MPP + c8o1 * vx1_PMM + c8o1 * vx1_PMP +
-            c8o1 * vx1_MMM + c8o1 * vx1_MMP + c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM - c2o1 * vx2_MPP -
-            c2o1 * vx2_PMM - c2o1 * vx2_PMP + c2o1 * vx2_MMM + c2o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP +
-            c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM + c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) /
-            c64o1;
-    b_000 = (c2o1 * kxxMyyFromfcNEQ_PPM + c2o1 * kxxMyyFromfcNEQ_PPP + c2o1 * kxxMyyFromfcNEQ_MPM +
-            c2o1 * kxxMyyFromfcNEQ_MPP - c2o1 * kxxMyyFromfcNEQ_PMM - c2o1 * kxxMyyFromfcNEQ_PMP -
-            c2o1 * kxxMyyFromfcNEQ_MMM - c2o1 * kxxMyyFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_PPP -
-            kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_MPP + kxxMzzFromfcNEQ_PMM + kxxMzzFromfcNEQ_PMP +
-            kxxMzzFromfcNEQ_MMM + kxxMzzFromfcNEQ_MMP - c2o1 * kxyFromfcNEQ_PPM - c2o1 * kxyFromfcNEQ_PPP +
-            c2o1 * kxyFromfcNEQ_MPM + c2o1 * kxyFromfcNEQ_MPP - c2o1 * kxyFromfcNEQ_PMM - c2o1 * kxyFromfcNEQ_PMP +
-            c2o1 * kxyFromfcNEQ_MMM + c2o1 * kxyFromfcNEQ_MMP + c2o1 * kyzFromfcNEQ_PPM - c2o1 * kyzFromfcNEQ_PPP +
-            c2o1 * kyzFromfcNEQ_MPM - c2o1 * kyzFromfcNEQ_MPP + c2o1 * kyzFromfcNEQ_PMM - c2o1 * kyzFromfcNEQ_PMP +
-            c2o1 * kyzFromfcNEQ_MMM - c2o1 * kyzFromfcNEQ_MMP + c2o1 * vx1_PPM + c2o1 * vx1_PPP - c2o1 * vx1_MPM -
-            c2o1 * vx1_MPP - c2o1 * vx1_PMM - c2o1 * vx1_PMP + c2o1 * vx1_MMM + c2o1 * vx1_MMP + c8o1 * vx2_PPM +
-            c8o1 * vx2_PPP + c8o1 * vx2_MPM + c8o1 * vx2_MPP + c8o1 * vx2_PMM + c8o1 * vx2_PMP + c8o1 * vx2_MMM +
-            c8o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP - c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM -
-            c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) /
-            c64o1;
-    c_000 = (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_PPP + kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_MPP +
-            kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_PMP + kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_MMP -
-            c2o1 * kxxMzzFromfcNEQ_PPM + c2o1 * kxxMzzFromfcNEQ_PPP - c2o1 * kxxMzzFromfcNEQ_MPM +
-            c2o1 * kxxMzzFromfcNEQ_MPP - c2o1 * kxxMzzFromfcNEQ_PMM + c2o1 * kxxMzzFromfcNEQ_PMP -
-            c2o1 * kxxMzzFromfcNEQ_MMM + c2o1 * kxxMzzFromfcNEQ_MMP - c2o1 * kxzFromfcNEQ_PPM -
-            c2o1 * kxzFromfcNEQ_PPP + c2o1 * kxzFromfcNEQ_MPM + c2o1 * kxzFromfcNEQ_MPP - c2o1 * kxzFromfcNEQ_PMM -
-            c2o1 * kxzFromfcNEQ_PMP + c2o1 * kxzFromfcNEQ_MMM + c2o1 * kxzFromfcNEQ_MMP - c2o1 * kyzFromfcNEQ_PPM -
-            c2o1 * kyzFromfcNEQ_PPP - c2o1 * kyzFromfcNEQ_MPM - c2o1 * kyzFromfcNEQ_MPP + c2o1 * kyzFromfcNEQ_PMM +
-            c2o1 * kyzFromfcNEQ_PMP + c2o1 * kyzFromfcNEQ_MMM + c2o1 * kyzFromfcNEQ_MMP - c2o1 * vx1_PPM +
-            c2o1 * vx1_PPP + c2o1 * vx1_MPM - c2o1 * vx1_MPP - c2o1 * vx1_PMM + c2o1 * vx1_PMP + c2o1 * vx1_MMM -
-            c2o1 * vx1_MMP - c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM -
-            c2o1 * vx2_PMP + c2o1 * vx2_MMM - c2o1 * vx2_MMP + c8o1 * vx3_PPM + c8o1 * vx3_PPP + c8o1 * vx3_MPM +
-            c8o1 * vx3_MPP + c8o1 * vx3_PMM + c8o1 * vx3_PMP + c8o1 * vx3_MMM + c8o1 * vx3_MMP) /
-            c64o1;
-    a_100  = (vx1_PPM + vx1_PPP - vx1_MPM - vx1_MPP + vx1_PMM + vx1_PMP - vx1_MMM - vx1_MMP) / c4o1;
-    b_100  = (vx2_PPM + vx2_PPP - vx2_MPM - vx2_MPP + vx2_PMM + vx2_PMP - vx2_MMM - vx2_MMP) / c4o1;
-    c_100  = (vx3_PPM + vx3_PPP - vx3_MPM - vx3_MPP + vx3_PMM + vx3_PMP - vx3_MMM - vx3_MMP) / c4o1;
-    a_200 = (kxxMyyFromfcNEQ_PPM + kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_MPP +
-            kxxMyyFromfcNEQ_PMM + kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_MMP +
-            kxxMzzFromfcNEQ_PPM + kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_MPP +
-            kxxMzzFromfcNEQ_PMM + kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_MMP + c2o1 * vx2_PPM +
-            c2o1 * vx2_PPP - c2o1 * vx2_MPM - c2o1 * vx2_MPP - c2o1 * vx2_PMM - c2o1 * vx2_PMP + c2o1 * vx2_MMM +
-            c2o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP + c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM +
-            c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) /
-            c16o1;
-    b_200 = (kxyFromfcNEQ_PPM + kxyFromfcNEQ_PPP - kxyFromfcNEQ_MPM - kxyFromfcNEQ_MPP + kxyFromfcNEQ_PMM +
-            kxyFromfcNEQ_PMP - kxyFromfcNEQ_MMM - kxyFromfcNEQ_MMP - c2o1 * vx1_PPM - c2o1 * vx1_PPP +
-            c2o1 * vx1_MPM + c2o1 * vx1_MPP + c2o1 * vx1_PMM + c2o1 * vx1_PMP - c2o1 * vx1_MMM - c2o1 * vx1_MMP) /
-            c8o1;
-    c_200 = (kxzFromfcNEQ_PPM + kxzFromfcNEQ_PPP - kxzFromfcNEQ_MPM - kxzFromfcNEQ_MPP + kxzFromfcNEQ_PMM +
-            kxzFromfcNEQ_PMP - kxzFromfcNEQ_MMM - kxzFromfcNEQ_MMP + c2o1 * vx1_PPM - c2o1 * vx1_PPP -
-            c2o1 * vx1_MPM + c2o1 * vx1_MPP + c2o1 * vx1_PMM - c2o1 * vx1_PMP - c2o1 * vx1_MMM + c2o1 * vx1_MMP) /
-            c8o1;
-    a_010  = (vx1_PPM + vx1_PPP + vx1_MPM + vx1_MPP - vx1_PMM - vx1_PMP - vx1_MMM - vx1_MMP) / c4o1;
-    b_010  = (vx2_PPM + vx2_PPP + vx2_MPM + vx2_MPP - vx2_PMM - vx2_PMP - vx2_MMM - vx2_MMP) / c4o1;
-    c_010  = (vx3_PPM + vx3_PPP + vx3_MPM + vx3_MPP - vx3_PMM - vx3_PMP - vx3_MMM - vx3_MMP) / c4o1;
-    a_020 = (kxyFromfcNEQ_PPM + kxyFromfcNEQ_PPP + kxyFromfcNEQ_MPM + kxyFromfcNEQ_MPP - kxyFromfcNEQ_PMM -
-            kxyFromfcNEQ_PMP - kxyFromfcNEQ_MMM - kxyFromfcNEQ_MMP - c2o1 * vx2_PPM - c2o1 * vx2_PPP +
-            c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM + c2o1 * vx2_PMP - c2o1 * vx2_MMM - c2o1 * vx2_MMP) /
-            c8o1;
-    b_020 = (-c2o1 * kxxMyyFromfcNEQ_PPM - c2o1 * kxxMyyFromfcNEQ_PPP - c2o1 * kxxMyyFromfcNEQ_MPM -
-            c2o1 * kxxMyyFromfcNEQ_MPP + c2o1 * kxxMyyFromfcNEQ_PMM + c2o1 * kxxMyyFromfcNEQ_PMP +
-            c2o1 * kxxMyyFromfcNEQ_MMM + c2o1 * kxxMyyFromfcNEQ_MMP + kxxMzzFromfcNEQ_PPM + kxxMzzFromfcNEQ_PPP +
-            kxxMzzFromfcNEQ_MPM + kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_PMP -
-            kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_MMP + c2o1 * vx1_PPM + c2o1 * vx1_PPP - c2o1 * vx1_MPM -
-            c2o1 * vx1_MPP - c2o1 * vx1_PMM - c2o1 * vx1_PMP + c2o1 * vx1_MMM + c2o1 * vx1_MMP - c2o1 * vx3_PPM +
-            c2o1 * vx3_PPP - c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM - c2o1 * vx3_PMP + c2o1 * vx3_MMM -
-            c2o1 * vx3_MMP) /
-            c16o1;
-    c_020 = (kyzFromfcNEQ_PPM + kyzFromfcNEQ_PPP + kyzFromfcNEQ_MPM + kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM -
-            kyzFromfcNEQ_PMP - kyzFromfcNEQ_MMM - kyzFromfcNEQ_MMP + c2o1 * vx2_PPM - c2o1 * vx2_PPP +
-            c2o1 * vx2_MPM - c2o1 * vx2_MPP - c2o1 * vx2_PMM + c2o1 * vx2_PMP - c2o1 * vx2_MMM + c2o1 * vx2_MMP) /
-            c8o1;
-    a_001  = (-vx1_PPM + vx1_PPP - vx1_MPM + vx1_MPP - vx1_PMM + vx1_PMP - vx1_MMM + vx1_MMP) / c4o1;
-    b_001  = (-vx2_PPM + vx2_PPP - vx2_MPM + vx2_MPP - vx2_PMM + vx2_PMP - vx2_MMM + vx2_MMP) / c4o1;
-    c_001  = (-vx3_PPM + vx3_PPP - vx3_MPM + vx3_MPP - vx3_PMM + vx3_PMP - vx3_MMM + vx3_MMP) / c4o1;
-    a_002 = (-kxzFromfcNEQ_PPM + kxzFromfcNEQ_PPP - kxzFromfcNEQ_MPM + kxzFromfcNEQ_MPP - kxzFromfcNEQ_PMM +
-            kxzFromfcNEQ_PMP - kxzFromfcNEQ_MMM + kxzFromfcNEQ_MMP + c2o1 * vx3_PPM - c2o1 * vx3_PPP -
-            c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM - c2o1 * vx3_PMP - c2o1 * vx3_MMM + c2o1 * vx3_MMP) /
-            c8o1;
-    b_002 = (-kyzFromfcNEQ_PPM + kyzFromfcNEQ_PPP - kyzFromfcNEQ_MPM + kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM +
-            kyzFromfcNEQ_PMP - kyzFromfcNEQ_MMM + kyzFromfcNEQ_MMP + c2o1 * vx3_PPM - c2o1 * vx3_PPP +
-            c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM + c2o1 * vx3_PMP - c2o1 * vx3_MMM + c2o1 * vx3_MMP) /
-            c8o1;
-    c_002 = (-kxxMyyFromfcNEQ_PPM + kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MPM + kxxMyyFromfcNEQ_MPP -
-            kxxMyyFromfcNEQ_PMM + kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MMM + kxxMyyFromfcNEQ_MMP +
-            c2o1 * kxxMzzFromfcNEQ_PPM - c2o1 * kxxMzzFromfcNEQ_PPP + c2o1 * kxxMzzFromfcNEQ_MPM -
-            c2o1 * kxxMzzFromfcNEQ_MPP + c2o1 * kxxMzzFromfcNEQ_PMM - c2o1 * kxxMzzFromfcNEQ_PMP +
-            c2o1 * kxxMzzFromfcNEQ_MMM - c2o1 * kxxMzzFromfcNEQ_MMP - c2o1 * vx1_PPM + c2o1 * vx1_PPP +
-            c2o1 * vx1_MPM - c2o1 * vx1_MPP - c2o1 * vx1_PMM + c2o1 * vx1_PMP + c2o1 * vx1_MMM - c2o1 * vx1_MMP -
-            c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM - c2o1 * vx2_PMP +
-            c2o1 * vx2_MMM - c2o1 * vx2_MMP) /
-            c16o1;
-    a_110 = (vx1_PPM + vx1_PPP - vx1_MPM - vx1_MPP - vx1_PMM - vx1_PMP + vx1_MMM + vx1_MMP) / c2o1;
-    b_110 = (vx2_PPM + vx2_PPP - vx2_MPM - vx2_MPP - vx2_PMM - vx2_PMP + vx2_MMM + vx2_MMP) / c2o1;
-    c_110 = (vx3_PPM + vx3_PPP - vx3_MPM - vx3_MPP - vx3_PMM - vx3_PMP + vx3_MMM + vx3_MMP) / c2o1;
-    a_101 = (-vx1_PPM + vx1_PPP + vx1_MPM - vx1_MPP - vx1_PMM + vx1_PMP + vx1_MMM - vx1_MMP) / c2o1;
-    b_101 = (-vx2_PPM + vx2_PPP + vx2_MPM - vx2_MPP - vx2_PMM + vx2_PMP + vx2_MMM - vx2_MMP) / c2o1;
-    c_101 = (-vx3_PPM + vx3_PPP + vx3_MPM - vx3_MPP - vx3_PMM + vx3_PMP + vx3_MMM - vx3_MMP) / c2o1;
-    a_011 = (-vx1_PPM + vx1_PPP - vx1_MPM + vx1_MPP + vx1_PMM - vx1_PMP + vx1_MMM - vx1_MMP) / c2o1;
-    b_011 = (-vx2_PPM + vx2_PPP - vx2_MPM + vx2_MPP + vx2_PMM - vx2_PMP + vx2_MMM - vx2_MMP) / c2o1;
-    c_011 = (-vx3_PPM + vx3_PPP - vx3_MPM + vx3_MPP + vx3_PMM - vx3_PMP + vx3_MMM - vx3_MMP) / c2o1;
-
-    a_111 = -vx1_PPM + vx1_PPP + vx1_MPM - vx1_MPP + vx1_PMM - vx1_PMP - vx1_MMM + vx1_MMP;
-    b_111 = -vx2_PPM + vx2_PPP + vx2_MPM - vx2_MPP + vx2_PMM - vx2_PMP - vx2_MMM + vx2_MMP;
-    c_111 = -vx3_PPM + vx3_PPP + vx3_MPM - vx3_MPP + vx3_PMM - vx3_PMP - vx3_MMM + vx3_MMP;
+    // a_000 = (-kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_PPP + kxxMyyFromfcNEQ_MPM + kxxMyyFromfcNEQ_MPP -
+    //         kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_PMP + kxxMyyFromfcNEQ_MMM + kxxMyyFromfcNEQ_MMP -
+    //         kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_PPP + kxxMzzFromfcNEQ_MPM + kxxMzzFromfcNEQ_MPP -
+    //         kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_PMP + kxxMzzFromfcNEQ_MMM + kxxMzzFromfcNEQ_MMP -
+    //         c2o1 * kxyFromfcNEQ_PPM - c2o1 * kxyFromfcNEQ_PPP - c2o1 * kxyFromfcNEQ_MPM - c2o1 * kxyFromfcNEQ_MPP +
+    //         c2o1 * kxyFromfcNEQ_PMM + c2o1 * kxyFromfcNEQ_PMP + c2o1 * kxyFromfcNEQ_MMM + c2o1 * kxyFromfcNEQ_MMP +
+    //         c2o1 * kxzFromfcNEQ_PPM - c2o1 * kxzFromfcNEQ_PPP + c2o1 * kxzFromfcNEQ_MPM - c2o1 * kxzFromfcNEQ_MPP +
+    //         c2o1 * kxzFromfcNEQ_PMM - c2o1 * kxzFromfcNEQ_PMP + c2o1 * kxzFromfcNEQ_MMM - c2o1 * kxzFromfcNEQ_MMP +
+    //         c8o1 * vx1_PPM + c8o1 * vx1_PPP + c8o1 * vx1_MPM + c8o1 * vx1_MPP + c8o1 * vx1_PMM + c8o1 * vx1_PMP +
+    //         c8o1 * vx1_MMM + c8o1 * vx1_MMP + c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM - c2o1 * vx2_MPP -
+    //         c2o1 * vx2_PMM - c2o1 * vx2_PMP + c2o1 * vx2_MMM + c2o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP +
+    //         c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM + c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) /
+    //         c64o1;
+    a_000 =
+        c1o64 * (c2o1 * (((kxyFromfcNEQ_MMM - kxyFromfcNEQ_PPP) + (kxyFromfcNEQ_MMP - kxyFromfcNEQ_PPM)) +
+                         ((kxyFromfcNEQ_PMM - kxyFromfcNEQ_MPP) + (kxyFromfcNEQ_PMP - kxyFromfcNEQ_MPM)) +
+                         ((kxzFromfcNEQ_MMM - kxzFromfcNEQ_PPP) + (kxzFromfcNEQ_PPM - kxzFromfcNEQ_MMP)) +
+                         ((kxzFromfcNEQ_PMM - kxzFromfcNEQ_MPP) + (kxzFromfcNEQ_MPM - kxzFromfcNEQ_PMP)) +
+                         ((vx2_PPP + vx2_MMM) + (vx2_PPM + vx2_MMP)) - ((vx2_MPP + vx2_PMM) + (vx2_MPM + vx2_PMP)) +
+                         ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MMP)) + ((vx3_PMP + vx3_MPM) - (vx3_MPP + vx3_PMM))) +
+                 c8o1 * (((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) + ((vx1_MPP + vx1_PMM) + (vx1_PMP + vx1_MPM))) +
+                 ((kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_PPP) + (kxxMyyFromfcNEQ_MMP - kxxMyyFromfcNEQ_PPM)) +
+                 ((kxxMyyFromfcNEQ_MPP - kxxMyyFromfcNEQ_PMM) + (kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_PMP)) +
+                 ((kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_PPP) + (kxxMzzFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM)) +
+                 ((kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM) + (kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_PMP)));
+
+    // b_000 = (c2o1 * kxxMyyFromfcNEQ_PPM + c2o1 * kxxMyyFromfcNEQ_PPP + c2o1 * kxxMyyFromfcNEQ_MPM +
+    //         c2o1 * kxxMyyFromfcNEQ_MPP - c2o1 * kxxMyyFromfcNEQ_PMM - c2o1 * kxxMyyFromfcNEQ_PMP -
+    //         c2o1 * kxxMyyFromfcNEQ_MMM - c2o1 * kxxMyyFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_PPP -
+    //         kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_MPP + kxxMzzFromfcNEQ_PMM + kxxMzzFromfcNEQ_PMP +
+    //         kxxMzzFromfcNEQ_MMM + kxxMzzFromfcNEQ_MMP - c2o1 * kxyFromfcNEQ_PPM - c2o1 * kxyFromfcNEQ_PPP +
+    //         c2o1 * kxyFromfcNEQ_MPM + c2o1 * kxyFromfcNEQ_MPP - c2o1 * kxyFromfcNEQ_PMM - c2o1 * kxyFromfcNEQ_PMP +
+    //         c2o1 * kxyFromfcNEQ_MMM + c2o1 * kxyFromfcNEQ_MMP + c2o1 * kyzFromfcNEQ_PPM - c2o1 * kyzFromfcNEQ_PPP +
+    //         c2o1 * kyzFromfcNEQ_MPM - c2o1 * kyzFromfcNEQ_MPP + c2o1 * kyzFromfcNEQ_PMM - c2o1 * kyzFromfcNEQ_PMP +
+    //         c2o1 * kyzFromfcNEQ_MMM - c2o1 * kyzFromfcNEQ_MMP + c2o1 * vx1_PPM + c2o1 * vx1_PPP - c2o1 * vx1_MPM -
+    //         c2o1 * vx1_MPP - c2o1 * vx1_PMM - c2o1 * vx1_PMP + c2o1 * vx1_MMM + c2o1 * vx1_MMP + c8o1 * vx2_PPM +
+    //         c8o1 * vx2_PPP + c8o1 * vx2_MPM + c8o1 * vx2_MPP + c8o1 * vx2_PMM + c8o1 * vx2_PMP + c8o1 * vx2_MMM +
+    //         c8o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP - c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM -
+    //         c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) /
+    //         c64o1;
+    b_000 =
+        c1o64 * (c2o1 * (((kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MMM) + (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_MMP)) +
+                         ((kxxMyyFromfcNEQ_MPP - kxxMyyFromfcNEQ_PMM) + (kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_PMP)) +
+                         ((kxyFromfcNEQ_MMM - kxyFromfcNEQ_PPP) + (kxyFromfcNEQ_MMP - kxyFromfcNEQ_PPM)) +
+                         ((kxyFromfcNEQ_MPP - kxyFromfcNEQ_PMM) + (kxyFromfcNEQ_MPM - kxyFromfcNEQ_PMP)) +
+                         ((kyzFromfcNEQ_MMM - kyzFromfcNEQ_PPP) + (kyzFromfcNEQ_PPM - kyzFromfcNEQ_MMP)) +
+                         ((kyzFromfcNEQ_PMM - kyzFromfcNEQ_MPP) + (kyzFromfcNEQ_MPM - kyzFromfcNEQ_PMP)) +
+                         ((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) - ((vx1_MPM + vx1_MPP) + (vx1_PMM + vx1_PMP)) +
+                         ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MMP)) + ((vx3_MPP + vx3_PMM) - (vx3_MPM + vx3_PMP))) +
+                 c8o1 * (((vx2_PPP + vx2_MMM) + (vx2_PPM + vx2_MMP)) + ((vx2_MPP + vx2_PMM) + (vx2_MPM + vx2_PMP))) +
+                 ((kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_PPP) + (kxxMzzFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM)) +
+                 ((kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_MPP) + (kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MPM)));
+
+    // c_000 = (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_PPP + kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_MPP +
+    //         kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_PMP + kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_MMP -
+    //         c2o1 * kxxMzzFromfcNEQ_PPM + c2o1 * kxxMzzFromfcNEQ_PPP - c2o1 * kxxMzzFromfcNEQ_MPM +
+    //         c2o1 * kxxMzzFromfcNEQ_MPP - c2o1 * kxxMzzFromfcNEQ_PMM + c2o1 * kxxMzzFromfcNEQ_PMP -
+    //         c2o1 * kxxMzzFromfcNEQ_MMM + c2o1 * kxxMzzFromfcNEQ_MMP - c2o1 * kxzFromfcNEQ_PPM -
+    //         c2o1 * kxzFromfcNEQ_PPP + c2o1 * kxzFromfcNEQ_MPM + c2o1 * kxzFromfcNEQ_MPP - c2o1 * kxzFromfcNEQ_PMM -
+    //         c2o1 * kxzFromfcNEQ_PMP + c2o1 * kxzFromfcNEQ_MMM + c2o1 * kxzFromfcNEQ_MMP - c2o1 * kyzFromfcNEQ_PPM -
+    //         c2o1 * kyzFromfcNEQ_PPP - c2o1 * kyzFromfcNEQ_MPM - c2o1 * kyzFromfcNEQ_MPP + c2o1 * kyzFromfcNEQ_PMM +
+    //         c2o1 * kyzFromfcNEQ_PMP + c2o1 * kyzFromfcNEQ_MMM + c2o1 * kyzFromfcNEQ_MMP - c2o1 * vx1_PPM +
+    //         c2o1 * vx1_PPP + c2o1 * vx1_MPM - c2o1 * vx1_MPP - c2o1 * vx1_PMM + c2o1 * vx1_PMP + c2o1 * vx1_MMM -
+    //         c2o1 * vx1_MMP - c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM -
+    //         c2o1 * vx2_PMP + c2o1 * vx2_MMM - c2o1 * vx2_MMP + c8o1 * vx3_PPM + c8o1 * vx3_PPP + c8o1 * vx3_MPM +
+    //         c8o1 * vx3_MPP + c8o1 * vx3_PMM + c8o1 * vx3_PMP + c8o1 * vx3_MMM + c8o1 * vx3_MMP) /
+    //         c64o1;
+    c_000 =
+        c1o64 * (c2o1 * (((kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MMM) + (kxxMzzFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM)) +
+                         ((kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM) + (kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MPM)) +
+                         ((kxzFromfcNEQ_MMM - kxzFromfcNEQ_PPP) + (kxzFromfcNEQ_MMP - kxzFromfcNEQ_PPM)) +
+                         ((kxzFromfcNEQ_MPP - kxzFromfcNEQ_PMM) + (kxzFromfcNEQ_MPM - kxzFromfcNEQ_PMP)) +
+                         ((kyzFromfcNEQ_MMM - kyzFromfcNEQ_PPP) + (kyzFromfcNEQ_MMP - kyzFromfcNEQ_PPM)) +
+                         ((kyzFromfcNEQ_PMM - kyzFromfcNEQ_MPP) + (kyzFromfcNEQ_PMP - kyzFromfcNEQ_MPM)) +
+                         ((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_MPM + vx1_PMP) - (vx1_MPP + vx1_PMM)) +
+                         ((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_MPP + vx2_PMM) - (vx2_MPM + vx2_PMP))) +
+                 c8o1 * (((vx3_PPP + vx3_MMM) + (vx3_PPM + vx3_MMP)) + ((vx3_PMM + vx3_MPP) + (vx3_PMP + vx3_MPM))) +
+                 ((kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_PPP) + (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_MMP)) +
+                 ((kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_MPP) + (kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_PMP)));
+
+    // a_100  = (vx1_PPM + vx1_PPP - vx1_MPM - vx1_MPP + vx1_PMM + vx1_PMP - vx1_MMM - vx1_MMP) / c4o1;
+    a_100 = c1o4 * (((vx1_PPP - vx1_MMM) + (vx1_PPM - vx1_MMP)) + ((vx1_PMM - vx1_MPP) + (vx1_PMP - vx1_MPM)));
+
+    // b_100  = (vx2_PPM + vx2_PPP - vx2_MPM - vx2_MPP + vx2_PMM + vx2_PMP - vx2_MMM - vx2_MMP) / c4o1;
+    b_100 = c1o4 * (((vx2_PPP - vx2_MMM) + (vx2_PPM - vx2_MMP)) + ((vx2_PMM - vx2_MPP) + (vx2_PMP - vx2_MPM)));
+
+    // c_100  = (vx3_PPM + vx3_PPP - vx3_MPM - vx3_MPP + vx3_PMM + vx3_PMP - vx3_MMM - vx3_MMP) / c4o1;
+    c_100 = c1o4 * (((vx3_PPP - vx3_MMM) + (vx3_PPM - vx3_MMP)) + ((vx3_PMM - vx3_MPP) + (vx3_PMP - vx3_MPM)));
+
+    // a_200 = (kxxMyyFromfcNEQ_PPM + kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_MPP +
+    //         kxxMyyFromfcNEQ_PMM + kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_MMP +
+    //         kxxMzzFromfcNEQ_PPM + kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_MPP +
+    //         kxxMzzFromfcNEQ_PMM + kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_MMP + c2o1 * vx2_PPM +
+    //         c2o1 * vx2_PPP - c2o1 * vx2_MPM - c2o1 * vx2_MPP - c2o1 * vx2_PMM - c2o1 * vx2_PMP + c2o1 * vx2_MMM +
+    //         c2o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP + c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM +
+    //         c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) /
+    //         c16o1;
+    a_200 =
+        c1o16 * (c2o1 * (((vx2_PPP + vx2_MMM) + (vx2_PPM - vx2_MPP)) + ((vx2_MMP - vx2_PMM) - (vx2_MPM + vx2_PMP)) +
+                         ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MPP)) + ((vx3_MPM + vx3_PMP) - (vx3_MMP + vx3_PMM))) +
+                 ((kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MMM) + (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_MMP)) +
+                 ((kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_MPP) + (kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MPM)) +
+                 ((kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MMM) + (kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_MMP)) +
+                 ((kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_MPP) + (kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MPM)));
+
+    // b_200 = (kxyFromfcNEQ_PPM + kxyFromfcNEQ_PPP - kxyFromfcNEQ_MPM - kxyFromfcNEQ_MPP + kxyFromfcNEQ_PMM +
+    //         kxyFromfcNEQ_PMP - kxyFromfcNEQ_MMM - kxyFromfcNEQ_MMP - c2o1 * vx1_PPM - c2o1 * vx1_PPP +
+    //         c2o1 * vx1_MPM + c2o1 * vx1_MPP + c2o1 * vx1_PMM + c2o1 * vx1_PMP - c2o1 * vx1_MMM - c2o1 * vx1_MMP) /
+    //         c8o1;
+    b_200 =
+        c1o8 * (c2o1 * (-((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) + ((vx1_MPP + vx1_PMM) + (vx1_MPM + vx1_PMP))) +
+                ((kxyFromfcNEQ_PPP - kxyFromfcNEQ_MMM) + (kxyFromfcNEQ_PPM - kxyFromfcNEQ_MMP)) +
+                ((kxyFromfcNEQ_PMM - kxyFromfcNEQ_MPP) + (kxyFromfcNEQ_PMP - kxyFromfcNEQ_MPM)));
+
+    // c_200 = (kxzFromfcNEQ_PPM + kxzFromfcNEQ_PPP - kxzFromfcNEQ_MPM - kxzFromfcNEQ_MPP + kxzFromfcNEQ_PMM +
+    //          kxzFromfcNEQ_PMP - kxzFromfcNEQ_MMM - kxzFromfcNEQ_MMP + c2o1 * vx1_PPM - c2o1 * vx1_PPP - c2o1 *
+    //          vx1_MPM + c2o1 * vx1_MPP + c2o1 * vx1_PMM - c2o1 * vx1_PMP - c2o1 * vx1_MMM + c2o1 * vx1_MMP) /
+    //         c8o1;
+    c_200 = c1o8 * (c2o1 * (((vx1_PPM + vx1_MMP) - (vx1_PPP + vx1_MMM)) + ((vx1_MPP + vx1_PMM) - (vx1_MPM + vx1_PMP))) +
+                    ((kxzFromfcNEQ_PPP - kxzFromfcNEQ_MMM) + (kxzFromfcNEQ_PPM - kxzFromfcNEQ_MMP)) +
+                    ((kxzFromfcNEQ_PMM - kxzFromfcNEQ_MPP) + (kxzFromfcNEQ_PMP - kxzFromfcNEQ_MPM)));
+
+    // a_010 = (vx1_PPM + vx1_PPP + vx1_MPM + vx1_MPP - vx1_PMM - vx1_PMP - vx1_MMM - vx1_MMP) / c4o1;
+    a_010 = c1o4 * (((vx1_PPP - vx1_MMM) + (vx1_PPM - vx1_MMP)) + ((vx1_MPP - vx1_PMM) + (vx1_MPM - vx1_PMP)));
+
+    // b_010 = (vx2_PPM + vx2_PPP + vx2_MPM + vx2_MPP - vx2_PMM - vx2_PMP - vx2_MMM - vx2_MMP) / c4o1;
+    b_010 = c1o4 * (((vx2_PPP - vx2_MMM) + (vx2_PPM - vx2_MMP)) + ((vx2_MPP - vx2_PMM) + (vx2_MPM - vx2_PMP)));
+
+    // c_010 = (vx3_PPM + vx3_PPP + vx3_MPM + vx3_MPP - vx3_PMM - vx3_PMP - vx3_MMM - vx3_MMP) / c4o1;
+    c_010 = c1o4 * (((vx3_PPP - vx3_MMM) + (vx3_PPM - vx3_MMP)) + ((vx3_MPP - vx3_PMM) + (vx3_MPM - vx3_PMP)));
+
+    // a_020 = (kxyFromfcNEQ_PPM + kxyFromfcNEQ_PPP + kxyFromfcNEQ_MPM + kxyFromfcNEQ_MPP - kxyFromfcNEQ_PMM -
+    //         kxyFromfcNEQ_PMP - kxyFromfcNEQ_MMM - kxyFromfcNEQ_MMP - c2o1 * vx2_PPM - c2o1 * vx2_PPP +
+    //         c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM + c2o1 * vx2_PMP - c2o1 * vx2_MMM - c2o1 * vx2_MMP) /
+    //         c8o1;
+    a_020 =
+        c1o8 * (c2o1 * (-((vx2_PPP + vx2_MMM) + (vx2_MMP + vx2_PPM)) + ((vx2_MPP + vx2_PMM) + (vx2_MPM + vx2_PMP))) +
+                ((kxyFromfcNEQ_PPP - kxyFromfcNEQ_MMM) + (kxyFromfcNEQ_PPM - kxyFromfcNEQ_MMP)) +
+                ((kxyFromfcNEQ_MPP - kxyFromfcNEQ_PMM) + (kxyFromfcNEQ_MPM - kxyFromfcNEQ_PMP)));
+
+    // b_020 = (-c2o1 * kxxMyyFromfcNEQ_PPM - c2o1 * kxxMyyFromfcNEQ_PPP - c2o1 * kxxMyyFromfcNEQ_MPM -
+    //         c2o1 * kxxMyyFromfcNEQ_MPP + c2o1 * kxxMyyFromfcNEQ_PMM + c2o1 * kxxMyyFromfcNEQ_PMP +
+    //         c2o1 * kxxMyyFromfcNEQ_MMM + c2o1 * kxxMyyFromfcNEQ_MMP + kxxMzzFromfcNEQ_PPM + kxxMzzFromfcNEQ_PPP +
+    //         kxxMzzFromfcNEQ_MPM + kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_PMP -
+    //         kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_MMP + c2o1 * vx1_PPM + c2o1 * vx1_PPP - c2o1 * vx1_MPM -
+    //         c2o1 * vx1_MPP - c2o1 * vx1_PMM - c2o1 * vx1_PMP + c2o1 * vx1_MMM + c2o1 * vx1_MMP - c2o1 * vx3_PPM +
+    //         c2o1 * vx3_PPP - c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM - c2o1 * vx3_PMP + c2o1 * vx3_MMM -
+    //         c2o1 * vx3_MMP) /
+    //         c16o1;
+    b_020 =
+        c1o16 * (c2o1 * (((kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_PPP) + (kxxMyyFromfcNEQ_MMP - kxxMyyFromfcNEQ_PPM)) +
+                         ((kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_MPP) + (kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MPM)) +
+                         ((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) - ((vx1_MPP + vx1_PMM) + (vx1_PMP + vx1_MPM)) +
+                         ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MMP)) + ((vx3_MPP + vx3_PMM) - (vx3_MPM + vx3_PMP))) +
+                 ((kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MMM) + (kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_MMP)) +
+                 ((kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM) + (kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_PMP)));
+
+    // c_020 = (kyzFromfcNEQ_PPM + kyzFromfcNEQ_PPP + kyzFromfcNEQ_MPM + kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM -
+    //          kyzFromfcNEQ_PMP - kyzFromfcNEQ_MMM - kyzFromfcNEQ_MMP + c2o1 * vx2_PPM - c2o1 * vx2_PPP + c2o1 *
+    //          vx2_MPM - c2o1 * vx2_MPP - c2o1 * vx2_PMM + c2o1 * vx2_PMP - c2o1 * vx2_MMM + c2o1 * vx2_MMP) /
+    //         c8o1;
+    c_020 = c1o8 * (c2o1 * (((vx2_MMP + vx2_PPM) - (vx2_PPP + vx2_MMM)) + ((vx2_PMP + vx2_MPM) - (vx2_MPP + vx2_PMM))) +
+                    ((kyzFromfcNEQ_PPP - kyzFromfcNEQ_MMM) + (kyzFromfcNEQ_PPM - kyzFromfcNEQ_MMP)) +
+                    ((kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM) + (kyzFromfcNEQ_MPM - kyzFromfcNEQ_PMP)));
+
+    // a_001  = (-vx1_PPM + vx1_PPP - vx1_MPM + vx1_MPP - vx1_PMM + vx1_PMP - vx1_MMM + vx1_MMP) / c4o1;
+    a_001 = c1o4 * (((vx1_PPP - vx1_MMM) + (vx1_MMP - vx1_PPM)) + ((vx1_MPP - vx1_PMM) + (vx1_PMP - vx1_MPM)));
+
+    // b_001  = (-vx2_PPM + vx2_PPP - vx2_MPM + vx2_MPP - vx2_PMM + vx2_PMP - vx2_MMM + vx2_MMP) / c4o1;
+    b_001 = c1o4 * (((vx2_PPP - vx2_MMM) + (vx2_MMP - vx2_PPM)) + ((vx2_MPP - vx2_PMM) + (vx2_PMP - vx2_MPM)));
+
+    // c_001  = (-vx3_PPM + vx3_PPP - vx3_MPM + vx3_MPP - vx3_PMM + vx3_PMP - vx3_MMM + vx3_MMP) / c4o1;
+    c_001 = c1o4 * (((vx3_PPP - vx3_MMM) + (vx3_MMP - vx3_PPM)) + ((vx3_MPP - vx3_PMM) + (vx3_PMP - vx3_MPM)));
+
+    // a_002 = (-kxzFromfcNEQ_PPM + kxzFromfcNEQ_PPP - kxzFromfcNEQ_MPM + kxzFromfcNEQ_MPP - kxzFromfcNEQ_PMM +
+    //         kxzFromfcNEQ_PMP - kxzFromfcNEQ_MMM + kxzFromfcNEQ_MMP + c2o1 * vx3_PPM - c2o1 * vx3_PPP -
+    //         c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM - c2o1 * vx3_PMP - c2o1 * vx3_MMM + c2o1 * vx3_MMP) /
+    //         c8o1;
+    a_002 = c1o8 * (c2o1 * (((vx3_PPM + vx3_MMP) - (vx3_PPP + vx3_MMM)) + ((vx3_MPP + vx3_PMM) - (vx3_PMP + vx3_MPM))) +
+                    ((kxzFromfcNEQ_PPP - kxzFromfcNEQ_MMM) + (kxzFromfcNEQ_MMP - kxzFromfcNEQ_PPM)) +
+                    ((kxzFromfcNEQ_PMP - kxzFromfcNEQ_MPM) + (kxzFromfcNEQ_MPP - kxzFromfcNEQ_PMM)));
+
+    // b_002 = (-kyzFromfcNEQ_PPM + kyzFromfcNEQ_PPP - kyzFromfcNEQ_MPM + kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM +
+    //          kyzFromfcNEQ_PMP - kyzFromfcNEQ_MMM + kyzFromfcNEQ_MMP + c2o1 * vx3_PPM - c2o1 * vx3_PPP + c2o1 *
+    //          vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM + c2o1 * vx3_PMP - c2o1 * vx3_MMM + c2o1 * vx3_MMP) /
+    //         c8o1;
+    b_002 = c1o8 * (c2o1 * (((vx3_PPM + vx3_MMP) - (vx3_PPP + vx3_MMM)) + ((vx3_MPM + vx3_PMP) - (vx3_PMM + vx3_MPP))) +
+                    ((kyzFromfcNEQ_PPP - kyzFromfcNEQ_MMM) + (kyzFromfcNEQ_MMP - kyzFromfcNEQ_PPM)) +
+                    ((kyzFromfcNEQ_PMP - kyzFromfcNEQ_MPM) + (kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM)));
+
+    // c_002 = (-kxxMyyFromfcNEQ_PPM + kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MPM + kxxMyyFromfcNEQ_MPP -
+    //         kxxMyyFromfcNEQ_PMM + kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MMM + kxxMyyFromfcNEQ_MMP +
+    //         c2o1 * kxxMzzFromfcNEQ_PPM - c2o1 * kxxMzzFromfcNEQ_PPP + c2o1 * kxxMzzFromfcNEQ_MPM -
+    //         c2o1 * kxxMzzFromfcNEQ_MPP + c2o1 * kxxMzzFromfcNEQ_PMM - c2o1 * kxxMzzFromfcNEQ_PMP +
+    //         c2o1 * kxxMzzFromfcNEQ_MMM - c2o1 * kxxMzzFromfcNEQ_MMP - c2o1 * vx1_PPM + c2o1 * vx1_PPP +
+    //         c2o1 * vx1_MPM - c2o1 * vx1_MPP - c2o1 * vx1_PMM + c2o1 * vx1_PMP + c2o1 * vx1_MMM - c2o1 * vx1_MMP -
+    //         c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM - c2o1 * vx2_PMP +
+    //         c2o1 * vx2_MMM - c2o1 * vx2_MMP) /
+    //         c16o1;
+    c_002 =
+        c1o16 * (c2o1 * (((kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_PPP) + (kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_MMP)) +
+                         ((kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_PMP) + (kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_MPP)) +
+                         ((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_MPM + vx1_PMP) - (vx1_PMM + vx1_MPP)) +
+                         ((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_PMM + vx2_MPP) - (vx2_MPM + vx2_PMP))) +
+                 ((kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MMM) + (kxxMyyFromfcNEQ_MMP - kxxMyyFromfcNEQ_PPM)) +
+                 ((kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MPM) + (kxxMyyFromfcNEQ_MPP - kxxMyyFromfcNEQ_PMM)));
+
+    // a_110 = (vx1_PPM + vx1_PPP - vx1_MPM - vx1_MPP - vx1_PMM - vx1_PMP + vx1_MMM + vx1_MMP) / c2o1;
+    // b_110 = (vx2_PPM + vx2_PPP - vx2_MPM - vx2_MPP - vx2_PMM - vx2_PMP + vx2_MMM + vx2_MMP) / c2o1;
+    // c_110 = (vx3_PPM + vx3_PPP - vx3_MPM - vx3_MPP - vx3_PMM - vx3_PMP + vx3_MMM + vx3_MMP) / c2o1;
+    a_110 = c1o2 * (((vx1_PPP + vx1_MMM) + (vx1_MMP + vx1_PPM)) - ((vx1_MPM + vx1_PMP) + (vx1_PMM + vx1_MPP)));
+    b_110 = c1o2 * (((vx2_PPP + vx2_MMM) + (vx2_MMP + vx2_PPM)) - ((vx2_MPM + vx2_PMP) + (vx2_PMM + vx2_MPP)));
+    c_110 = c1o2 * (((vx3_PPP + vx3_MMM) + (vx3_MMP + vx3_PPM)) - ((vx3_MPM + vx3_PMP) + (vx3_PMM + vx3_MPP)));
+
+    // a_101 = (-vx1_PPM + vx1_PPP + vx1_MPM - vx1_MPP - vx1_PMM + vx1_PMP + vx1_MMM - vx1_MMP) / c2o1;
+    // b_101 = (-vx2_PPM + vx2_PPP + vx2_MPM - vx2_MPP - vx2_PMM + vx2_PMP + vx2_MMM - vx2_MMP) / c2o1;
+    // c_101 = (-vx3_PPM + vx3_PPP + vx3_MPM - vx3_MPP - vx3_PMM + vx3_PMP + vx3_MMM - vx3_MMP) / c2o1;
+    a_101 = c1o2 * (((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_MPM + vx1_PMP) - (vx1_PMM + vx1_MPP)));
+    b_101 = c1o2 * (((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_MPM + vx2_PMP) - (vx2_PMM + vx2_MPP)));
+    c_101 = c1o2 * (((vx3_PPP + vx3_MMM) - (vx3_MMP + vx3_PPM)) + ((vx3_MPM + vx3_PMP) - (vx3_PMM + vx3_MPP)));
+
+    // a_011 = (-vx1_PPM + vx1_PPP - vx1_MPM + vx1_MPP + vx1_PMM - vx1_PMP + vx1_MMM - vx1_MMP) / c2o1;
+    // b_011 = (-vx2_PPM + vx2_PPP - vx2_MPM + vx2_MPP + vx2_PMM - vx2_PMP + vx2_MMM - vx2_MMP) / c2o1;
+    // c_011 = (-vx3_PPM + vx3_PPP - vx3_MPM + vx3_MPP + vx3_PMM - vx3_PMP + vx3_MMM - vx3_MMP) / c2o1;
+    a_011 = c1o2 * (((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_PMM + vx1_MPP) - (vx1_MPM + vx1_PMP)));
+    b_011 = c1o2 * (((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_PMM + vx2_MPP) - (vx2_MPM + vx2_PMP)));
+    c_011 = c1o2 * (((vx3_PPP + vx3_MMM) - (vx3_MMP + vx3_PPM)) + ((vx3_PMM + vx3_MPP) - (vx3_MPM + vx3_PMP)));
+
+    // a_111 = -vx1_PPM + vx1_PPP + vx1_MPM - vx1_MPP + vx1_PMM - vx1_PMP - vx1_MMM + vx1_MMP;
+    // b_111 = -vx2_PPM + vx2_PPP + vx2_MPM - vx2_MPP + vx2_PMM - vx2_PMP - vx2_MMM + vx2_MMP;
+    // c_111 = -vx3_PPM + vx3_PPP + vx3_MPM - vx3_MPP + vx3_PMM - vx3_PMP - vx3_MMM + vx3_MMP;
+    a_111 = ((vx1_PPP - vx1_MMM) + (vx1_MMP - vx1_PPM)) + ((vx1_MPM - vx1_PMP) + (vx1_PMM - vx1_MPP));
+    b_111 = ((vx2_PPP - vx2_MMM) + (vx2_MMP - vx2_PPM)) + ((vx2_MPM - vx2_PMP) + (vx2_PMM - vx2_MPP));
+    c_111 = ((vx3_PPP - vx3_MMM) + (vx3_MMP - vx3_PPM)) + ((vx3_MPM - vx3_PMP) + (vx3_PMM - vx3_MPP));
 
     //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -618,9 +758,9 @@ __global__ void scaleCF_compressible(
     ////////////////////////////////////////////////////////////////////////////////
     //! - Set the relative position of the offset cell {-1, 0, 1}
     //!
-    real xoff    = offsetCF.xOffCF[k_thread];
-    real yoff    = offsetCF.yOffCF[k_thread];
-    real zoff    = offsetCF.zOffCF[k_thread];
+    real xoff    = neighborCoarseToFine.x[nodeIndex];
+    real yoff    = neighborCoarseToFine.y[nodeIndex];
+    real zoff    = neighborCoarseToFine.z[nodeIndex];
 
     real xoff_sq = xoff * xoff;
     real yoff_sq = yoff * yoff;
@@ -632,14 +772,29 @@ __global__ void scaleCF_compressible(
         ((xoff != c0o1) || (yoff != c0o1) || (zoff != c0o1))
         ? c0o1
         : -c3o1 * (a_100 * a_100 + b_010 * b_010 + c_001 * c_001) - c6o1 * (b_100 * a_010 + c_100 * a_001 + c_010 * b_001);
-    d_000 = ( drho_PPM + drho_PPP + drho_MPM + drho_MPP + drho_PMM + drho_PMP + drho_MMM + drho_MMP) * c1o8;
-    d_100 = ( drho_PPM + drho_PPP - drho_MPM - drho_MPP + drho_PMM + drho_PMP - drho_MMM - drho_MMP) * c1o4;
-    d_010 = ( drho_PPM + drho_PPP + drho_MPM + drho_MPP - drho_PMM - drho_PMP - drho_MMM - drho_MMP) * c1o4;
-    d_001 = (-drho_PPM + drho_PPP - drho_MPM + drho_MPP - drho_PMM + drho_PMP - drho_MMM + drho_MMP) * c1o4;
-    d_110 = ( drho_PPM + drho_PPP - drho_MPM - drho_MPP - drho_PMM - drho_PMP + drho_MMM + drho_MMP) * c1o2;
-    d_101 = (-drho_PPM + drho_PPP + drho_MPM - drho_MPP - drho_PMM + drho_PMP + drho_MMM - drho_MMP) * c1o2;
-    d_011 = (-drho_PPM + drho_PPP - drho_MPM + drho_MPP + drho_PMM - drho_PMP + drho_MMM - drho_MMP) * c1o2;
-    d_111 =  -drho_PPM + drho_PPP + drho_MPM - drho_MPP + drho_PMM - drho_PMP - drho_MMM + drho_MMP;
+    // d_000 = ( drho_PPM + drho_PPP + drho_MPM + drho_MPP + drho_PMM + drho_PMP + drho_MMM + drho_MMP) * c1o8;
+    d_000 = c1o8 * (((drho_PPP + drho_MMM) + (drho_PPM + drho_MMP)) + ((drho_PMM + drho_MPP) + (drho_PMP + drho_MPM)));
+
+    // d_100 = ( drho_PPM + drho_PPP - drho_MPM - drho_MPP + drho_PMM + drho_PMP - drho_MMM - drho_MMP) * c1o4;
+    d_100 = c1o4 * (((drho_PPP - drho_MMM) + (drho_PPM - drho_MMP)) + ((drho_PMM - drho_MPP) + (drho_PMP - drho_MPM)));
+
+    // d_010 = ( drho_PPM + drho_PPP + drho_MPM + drho_MPP - drho_PMM - drho_PMP - drho_MMM - drho_MMP) * c1o4;
+    d_010 = c1o4 * (((drho_PPP - drho_MMM) + (drho_PPM - drho_MMP)) + ((drho_MPP - drho_PMM) + (drho_MPM - drho_PMP)));
+
+    // d_001 = (-drho_PPM + drho_PPP - drho_MPM + drho_MPP - drho_PMM + drho_PMP - drho_MMM + drho_MMP) * c1o4;
+    d_001 = c1o4 * (((drho_PPP - drho_MMM) + (drho_MMP - drho_PPM)) + ((drho_MPP - drho_PMM) + (drho_PMP - drho_MPM)));
+
+    // d_110 = ( drho_PPM + drho_PPP - drho_MPM - drho_MPP - drho_PMM - drho_PMP + drho_MMM + drho_MMP) * c1o2;
+    d_110 = c1o2 * (((drho_PPP + drho_MMM) + (drho_PPM + drho_MMP)) - ((drho_PMM + drho_MPP) + (drho_PMP + drho_MPM)));
+
+    // d_101 = (-drho_PPM + drho_PPP + drho_MPM - drho_MPP - drho_PMM + drho_PMP + drho_MMM - drho_MMP) * c1o2;
+    d_101 = c1o2 * (((drho_PPP + drho_MMM) - (drho_PPM + drho_MMP)) + ((drho_PMP + drho_MPM) - (drho_PMM + drho_MPP)));
+
+    // d_011 = (-drho_PPM + drho_PPP - drho_MPM + drho_MPP + drho_PMM - drho_PMP + drho_MMM - drho_MMP) * c1o2;
+    d_011 = c1o2 * (((drho_PPP + drho_MMM) - (drho_PPM + drho_MMP)) + ((drho_PMM + drho_MPP) - (drho_PMP + drho_MPM)));
+
+    // d_111 =  -drho_PPM + drho_PPP + drho_MPM - drho_MPP + drho_PMM - drho_PMP - drho_MMM + drho_MMP;
+    d_111 = (((drho_PPP - drho_MMM) + (drho_MMP - drho_PPM)) + ((drho_PMM - drho_MPP) + (drho_MPM - drho_PMP)));
 
     //////////////////////////////////////////////////////////////////////////
     //! - Extrapolation for refinement in to the wall (polynomial coefficients)
@@ -745,10 +900,31 @@ __global__ void scaleCF_compressible(
     real y = -c1o4;
     real z = -c1o4;
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
+    // index of the base node and its neighbors
+    k_base_000 = indicesFineMMM[nodeIndex];
+    k_base_M00 = neighborXfine [k_base_000];
+    k_base_0M0 = neighborYfine [k_base_000];
+    k_base_00M = neighborZfine [k_base_000];
+    k_base_MM0 = neighborYfine [k_base_M00];
+    k_base_M0M = neighborZfine [k_base_M00];
+    k_base_0MM = neighborZfine [k_base_0M0];
+    k_base_MMM = neighborZfine [k_base_MM0];
+    //////////////////////////////////////////////////////////////////////////
+    // Set neighbor indices
+    k_000 = k_base_000;
+    k_M00 = k_base_M00;
+    k_0M0 = k_base_0M0;
+    k_00M = k_base_00M;
+    k_MM0 = k_base_MM0;
+    k_M0M = k_base_M0M;
+    k_0MM = k_base_0MM;
+    k_MMM = k_base_MMM;
     ////////////////////////////////////////////////////////////////////////////////
     //! - Set moments (zeroth to sixth order) on destination node
     //!
+
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     interpolateDistributions(
         x, y, z,
         m_000, 
@@ -766,27 +942,6 @@ __global__ void scaleCF_compressible(
         kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage
     );
 
-    //////////////////////////////////////////////////////////////////////////
-    // index of the base node and its neighbors
-    k_base_000 = indicesFineMMM[k_thread];
-    k_base_M00 = neighborXfine [k_base_000];
-    k_base_0M0 = neighborYfine [k_base_000];
-    k_base_00M = neighborZfine [k_base_000];
-    k_base_MM0 = neighborYfine [k_base_M00];
-    k_base_M0M = neighborZfine [k_base_M00];
-    k_base_0MM = neighborZfine [k_base_0M0];
-    k_base_MMM = neighborZfine [k_base_MM0];
-    //////////////////////////////////////////////////////////////////////////
-    // Set neighbor indices
-    k_000 = k_base_000;
-    k_M00 = k_base_M00;
-    k_0M0 = k_base_0M0;
-    k_00M = k_base_00M;
-    k_MM0 = k_base_MM0;
-    k_M0M = k_base_M0M;
-    k_0MM = k_base_0MM;
-    k_MMM = k_base_MMM;
-
     //////////////////////////////////////////////////////////////////////////
     //! - Write distributions: style of reading and writing the distributions from/to
     //! stored arrays dependent on timestep is based on the esoteric twist algorithm
@@ -830,9 +985,22 @@ __global__ void scaleCF_compressible(
     x = -c1o4;
     y = -c1o4;
     z =  c1o4;
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // Set neighbor indices
+    k_000 = k_00M;
+    k_M00 = k_M0M;
+    k_0M0 = k_0MM;
+    k_00M = neighborZfine[k_00M];
+    k_MM0 = k_MMM;
+    k_M0M = neighborZfine[k_M0M];
+    k_0MM = neighborZfine[k_0MM];
+    k_MMM = neighborZfine[k_MMM];
 
     ////////////////////////////////////////////////////////////////////////////////
     // Set moments (zeroth to sixth orders) on destination node
+
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     interpolateDistributions(
         x, y, z,
         m_000, 
@@ -850,17 +1018,6 @@ __global__ void scaleCF_compressible(
         kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage
     );
 
-    ////////////////////////////////////////////////////////////////////////////////////
-    // Set neighbor indices
-    k_000 = k_00M;
-    k_M00 = k_M0M;
-    k_0M0 = k_0MM;
-    k_00M = neighborZfine[k_00M];
-    k_MM0 = k_MMM;
-    k_M0M = neighborZfine[k_M0M];
-    k_0MM = neighborZfine[k_0MM];
-    k_MMM = neighborZfine[k_MMM];
-
     //////////////////////////////////////////////////////////////////////////
     // Write distributions
     (distFine.f[DIR_000])[k_000] = f_000;
@@ -900,9 +1057,21 @@ __global__ void scaleCF_compressible(
     y = -c1o4;
     z =  c1o4;
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // Set neighbor indices
+    k_000 = k_M00;
+    k_M00 = neighborXfine[k_M00];
+    k_0M0 = k_MM0;
+    k_00M = k_M0M;
+    k_MM0 = neighborXfine[k_MM0];
+    k_M0M = neighborXfine[k_M0M];
+    k_0MM = k_MMM;
+    k_MMM = neighborXfine[k_MMM];
 
     ////////////////////////////////////////////////////////////////////////////////
     // Set moments (zeroth to sixth orders) on destination node
+
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     interpolateDistributions(
         x, y, z,
         m_000, 
@@ -920,17 +1089,6 @@ __global__ void scaleCF_compressible(
         kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage
     );
 
-    ////////////////////////////////////////////////////////////////////////////////////
-    // Set neighbor indices
-    k_000 = k_M00;
-    k_M00 = neighborXfine[k_M00];
-    k_0M0 = k_MM0;
-    k_00M = k_M0M;
-    k_MM0 = neighborXfine[k_MM0];
-    k_M0M = neighborXfine[k_M0M];
-    k_0MM = k_MMM;
-    k_MMM = neighborXfine[k_MMM];
-
     //////////////////////////////////////////////////////////////////////////
     // Write distributions
     (distFine.f[DIR_000])[k_000] = f_000;
@@ -969,9 +1127,22 @@ __global__ void scaleCF_compressible(
     x =  c1o4;
     y = -c1o4;
     z = -c1o4;
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // Set neighbor indices
+    k_00M = k_000;
+    k_M0M = k_M00;
+    k_0MM = k_0M0;
+    k_MMM = k_MM0;
+    k_000 = k_base_M00;
+    k_M00 = neighborXfine[k_base_M00];
+    k_0M0 = k_base_MM0;
+    k_MM0 = neighborXfine[k_base_MM0];
 
     ////////////////////////////////////////////////////////////////////////////////
     // Set moments (zeroth to sixth orders) on destination node
+
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     interpolateDistributions(
         x, y, z,
         m_000, 
@@ -989,17 +1160,6 @@ __global__ void scaleCF_compressible(
         kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage
     );
 
-    ////////////////////////////////////////////////////////////////////////////////////
-    // Set neighbor indices
-    k_00M = k_000;
-    k_M0M = k_M00;
-    k_0MM = k_0M0;
-    k_MMM = k_MM0;
-    k_000 = k_base_M00;
-    k_M00 = neighborXfine[k_base_M00];
-    k_0M0 = k_base_MM0;
-    k_MM0 = neighborXfine[k_base_MM0];
-
     //////////////////////////////////////////////////////////////////////////
     // Write distributions
     (distFine.f[DIR_000])[k_000] = f_000;
@@ -1039,25 +1199,6 @@ __global__ void scaleCF_compressible(
     y =  c1o4;
     z = -c1o4;
     
-    ////////////////////////////////////////////////////////////////////////////////
-    // Set moments (zeroth to sixth orders) on destination node
-    interpolateDistributions(
-        x, y, z,
-        m_000, 
-        m_100, m_010, m_001,
-        m_011, m_101, m_110, m_200, m_020, m_002,
-        m_111, m_210, m_012, m_201, m_021, m_120, m_102,
-        m_022, m_202, m_220, m_211, m_121, m_112,
-        m_122, m_212, m_221,
-        m_222,
-        a_000, a_100, a_010, a_001, a_200, a_020, a_002, a_110,  a_101, a_011, a_111,
-        b_000, b_100, b_010, b_001, b_200, b_020, b_002, b_110,  b_101, b_011, b_111,
-        c_000, c_100, c_010, c_001, c_200, c_020, c_002, c_110,  c_101, c_011, c_111,
-        d_000, d_100, d_010, d_001, d_110, d_101, d_011, d_111,
-        LaplaceRho, eps_new, omegaF, 
-        kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage
-    );
-
     //////////////////////////////////////////////////////////////////////////
     // index of the base node and its neighbors
     k_base_000 = k_base_0M0;
@@ -1080,6 +1221,28 @@ __global__ void scaleCF_compressible(
     k_0MM = k_base_0MM;
     k_MMM = k_base_MMM;
 
+    ////////////////////////////////////////////////////////////////////////////////
+    // Set moments (zeroth to sixth orders) on destination node
+
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
+    interpolateDistributions(
+        x, y, z,
+        m_000, 
+        m_100, m_010, m_001,
+        m_011, m_101, m_110, m_200, m_020, m_002,
+        m_111, m_210, m_012, m_201, m_021, m_120, m_102,
+        m_022, m_202, m_220, m_211, m_121, m_112,
+        m_122, m_212, m_221,
+        m_222,
+        a_000, a_100, a_010, a_001, a_200, a_020, a_002, a_110,  a_101, a_011, a_111,
+        b_000, b_100, b_010, b_001, b_200, b_020, b_002, b_110,  b_101, b_011, b_111,
+        c_000, c_100, c_010, c_001, c_200, c_020, c_002, c_110,  c_101, c_011, c_111,
+        d_000, d_100, d_010, d_001, d_110, d_101, d_011, d_111,
+        LaplaceRho, eps_new, omegaF, 
+        kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage
+    );
+
     //////////////////////////////////////////////////////////////////////////
     // Write distributions
     (distFine.f[DIR_000])[k_000] = f_000;
@@ -1118,9 +1281,22 @@ __global__ void scaleCF_compressible(
     x = -c1o4;
     y =  c1o4;
     z =  c1o4;
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // Set neighbor indices
+    k_000 = k_00M;
+    k_M00 = k_M0M;
+    k_0M0 = k_0MM;
+    k_00M = neighborZfine[k_00M];
+    k_MM0 = k_MMM;
+    k_M0M = neighborZfine[k_M0M];
+    k_0MM = neighborZfine[k_0MM];
+    k_MMM = neighborZfine[k_MMM];
 
     ////////////////////////////////////////////////////////////////////////////////
     // Set moments (zeroth to sixth orders) on destination node
+
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     interpolateDistributions(
         x, y, z,
         m_000, 
@@ -1138,17 +1314,6 @@ __global__ void scaleCF_compressible(
         kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage
     );
 
-    ////////////////////////////////////////////////////////////////////////////////////
-    // Set neighbor indices
-    k_000 = k_00M;
-    k_M00 = k_M0M;
-    k_0M0 = k_0MM;
-    k_00M = neighborZfine[k_00M];
-    k_MM0 = k_MMM;
-    k_M0M = neighborZfine[k_M0M];
-    k_0MM = neighborZfine[k_0MM];
-    k_MMM = neighborZfine[k_MMM];
-
     //////////////////////////////////////////////////////////////////////////
     // Write distributions
     (distFine.f[DIR_000])[k_000] = f_000;
@@ -1187,9 +1352,22 @@ __global__ void scaleCF_compressible(
     x = c1o4;
     y = c1o4;
     z = c1o4;
+    ////////////////////////////////////////////////////////////////////////////////////
+    // Set neighbor indices
+    k_000 = k_M00;
+    k_M00 = neighborXfine[k_M00];
+    k_0M0 = k_MM0;
+    k_00M = k_M0M;
+    k_MM0 = neighborXfine[k_MM0];
+    k_M0M = neighborXfine[k_M0M];
+    k_0MM = k_MMM;
+    k_MMM = neighborXfine[k_MMM];
 
     ////////////////////////////////////////////////////////////////////////////////
     // Set moments (zeroth to sixth orders) on destination node
+
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     interpolateDistributions(
         x, y, z,
         m_000, 
@@ -1207,17 +1385,6 @@ __global__ void scaleCF_compressible(
         kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage
     );
 
-    ////////////////////////////////////////////////////////////////////////////////////
-    // Set neighbor indices
-    k_000 = k_M00;
-    k_M00 = neighborXfine[k_M00];
-    k_0M0 = k_MM0;
-    k_00M = k_M0M;
-    k_MM0 = neighborXfine[k_MM0];
-    k_M0M = neighborXfine[k_M0M];
-    k_0MM = k_MMM;
-    k_MMM = neighborXfine[k_MMM];
-
     //////////////////////////////////////////////////////////////////////////
     // Write distributions
     (distFine.f[DIR_000])[k_000] = f_000;
@@ -1256,9 +1423,22 @@ __global__ void scaleCF_compressible(
     x =  c1o4;
     y =  c1o4;
     z = -c1o4;
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // Set neighbor indices
+    k_00M = k_000;
+    k_M0M = k_M00;
+    k_0MM = k_0M0;
+    k_MMM = k_MM0;
+    k_000 = k_base_M00;
+    k_M00 = neighborXfine[k_base_M00];
+    k_0M0 = k_base_MM0;
+    k_MM0 = neighborXfine[k_base_MM0];
 
     ////////////////////////////////////////////////////////////////////////////////
     // Set moments (zeroth to sixth orders) on destination node
+
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     interpolateDistributions(
         x, y, z,
         m_000, 
@@ -1276,17 +1456,6 @@ __global__ void scaleCF_compressible(
         kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage
     );
 
-    ////////////////////////////////////////////////////////////////////////////////////
-    // Set neighbor indices
-    k_00M = k_000;
-    k_M0M = k_M00;
-    k_0MM = k_0M0;
-    k_MMM = k_MM0;
-    k_000 = k_base_M00;
-    k_M00 = neighborXfine[k_base_M00];
-    k_0M0 = k_base_MM0;
-    k_MM0 = neighborXfine[k_base_MM0];
-
     //////////////////////////////////////////////////////////////////////////
     // Write distributions
     (distFine.f[DIR_000])[k_000] = f_000;
@@ -1317,3 +1486,7 @@ __global__ void scaleCF_compressible(
     (distFine.f[DIR_PMM])[k_0MM] = f_PMM;
     (distFine.f[DIR_MMM])[k_MMM] = f_MMM;
 }
+
+template __global__ void scaleCF_compressible<true>( real* distributionsCoarse, real* distributionsFine, unsigned int* neighborXcoarse, unsigned int* neighborYcoarse, unsigned int* neighborZcoarse, unsigned int* neighborXfine, unsigned int* neighborYfine, unsigned int* neighborZfine, unsigned long long numberOfLBnodesCoarse, unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* indicesCoarseMMM, unsigned int* indicesFineMMM, unsigned int numberOfInterfaceNodes, real omegaCoarse, real omegaFine, real* turbulentViscosityCoarse, real* turbulentViscosityFine, ICellNeigh offsetCF);
+
+template __global__ void scaleCF_compressible<false>( real* distributionsCoarse, real* distributionsFine, unsigned int* neighborXcoarse, unsigned int* neighborYcoarse, unsigned int* neighborZcoarse, unsigned int* neighborXfine, unsigned int* neighborYfine, unsigned int* neighborZfine, unsigned long long numberOfLBnodesCoarse, unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* indicesCoarseMMM, unsigned int* indicesFineMMM, unsigned int numberOfInterfaceNodes, real omegaCoarse, real omegaFine, real* turbulentViscosityCoarse, real* turbulentViscosityFine, ICellNeigh offsetCF);
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleFC_compressible.cu b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleFC_compressible.cu
index 3ab8b9d20279eff341ca42d20cee9fe7550a2039..5776ba476e3537360b32ee85f32514324946ff75 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleFC_compressible.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleFC_compressible.cu
@@ -31,12 +31,13 @@
 //! \author Martin Schoenherr, Anna Wellmann
 //=======================================================================================
 
-#include "Kernel/Utilities/DistributionHelper.cuh"
-#include "Kernel/Utilities/ChimeraTransformation.h"
-#include "Kernel/Utilities/ScalingHelperFunctions.h"
+#include "LBM/GPUHelperFunctions/ChimeraTransformation.h"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
+#include "LBM/GPUHelperFunctions/ScalingUtilities.h"
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
+using namespace vf::gpu;
 
 //////////////////////////////////////////////////////////////////////////
 //! \brief Interpolate from fine to coarse
@@ -45,7 +46,7 @@ using namespace vf::lbm::dir;
 //!
 
 // based on scaleFC_RhoSq_comp_27
-__global__ void scaleFC_compressible(
+template<bool hasTurbulentViscosity> __global__ void scaleFC_compressible(
     real *distributionsCoarse,
     real *distributionsFine,
     unsigned int *neighborXcoarse,
@@ -54,24 +55,26 @@ __global__ void scaleFC_compressible(
     unsigned int *neighborXfine,
     unsigned int *neighborYfine,
     unsigned int *neighborZfine,
-    unsigned int numberOfLBnodesCoarse,
-    unsigned int numberOfLBnodesFine,
+    unsigned long long numberOfLBnodesCoarse,
+    unsigned long long numberOfLBnodesFine,
     bool isEvenTimestep,
     unsigned int *indicesCoarse000,
     unsigned int *indicesFineMMM,
     unsigned int numberOfInterfaceNodes,
     real omegaCoarse,
     real omegaFine,
-    OffFC offsetFC)
+    real* turbulentViscosityCoarse,
+    real* turbulentViscosityFine,
+    ICellNeigh neighborFineToCoarse)
 {
     ////////////////////////////////////////////////////////////////////////////////
-    //! - Get the thread index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //! - Get the node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
     //!
-    const unsigned k_thread = vf::gpu::getNodeIndex();
+    const unsigned nodeIndex = getNodeIndex();
 
     //////////////////////////////////////////////////////////////////////////
     //! - Return for non-interface node
-    if (k_thread >= numberOfInterfaceNodes)
+    if (nodeIndex >= numberOfInterfaceNodes)
         return;
 
     //////////////////////////////////////////////////////////////////////////
@@ -80,8 +83,9 @@ __global__ void scaleFC_compressible(
     //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
     //! DOI:10.3390/computation5020019 ]</b></a>
     //!
-    Distributions27 distFine   = vf::gpu::getDistributionReferences27(distributionsFine,   numberOfLBnodesFine,   true);
-    Distributions27 distCoarse = vf::gpu::getDistributionReferences27(distributionsCoarse, numberOfLBnodesCoarse, isEvenTimestep);
+    Distributions27 distFine, distCoarse;
+    getPointersToDistributions(distFine, distributionsFine, numberOfLBnodesFine, true);
+    getPointersToDistributions(distCoarse, distributionsCoarse, numberOfLBnodesCoarse, isEvenTimestep);
 
     ////////////////////////////////////////////////////////////////////////////////
     //! - declare local variables for source nodes
@@ -117,7 +121,7 @@ __global__ void scaleFC_compressible(
     // source node BSW = MMM
     //////////////////////////////////////////////////////////////////////////
     // index of the base node and its neighbors
-    unsigned int k_base_000 = indicesFineMMM[k_thread];
+    unsigned int k_base_000 = indicesFineMMM[nodeIndex];
     unsigned int k_base_M00 = neighborXfine [k_base_000];
     unsigned int k_base_0M0 = neighborYfine [k_base_000];
     unsigned int k_base_00M = neighborZfine [k_base_000];
@@ -136,6 +140,8 @@ __global__ void scaleFC_compressible(
     unsigned int k_0MM = k_base_0MM;
     unsigned int k_MMM = k_base_MMM;
 
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     calculateMomentsOnSourceNodes( distFine, omegaF,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_MMM, vx1_MMM, vx2_MMM, vx3_MMM,
         kxyFromfcNEQ_MMM, kyzFromfcNEQ_MMM, kxzFromfcNEQ_MMM, kxxMyyFromfcNEQ_MMM, kxxMzzFromfcNEQ_MMM);
@@ -153,6 +159,8 @@ __global__ void scaleFC_compressible(
     k_0MM = neighborZfine[k_0MM];
     k_MMM = neighborZfine[k_MMM];
 
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     calculateMomentsOnSourceNodes( distFine, omegaF,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_MMP, vx1_MMP, vx2_MMP, vx3_MMP,
         kxyFromfcNEQ_MMP, kyzFromfcNEQ_MMP, kxzFromfcNEQ_MMP, kxxMyyFromfcNEQ_MMP, kxxMzzFromfcNEQ_MMP);
@@ -170,6 +178,8 @@ __global__ void scaleFC_compressible(
     k_0MM = k_MMM;
     k_MMM = neighborXfine[k_MMM];
 
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     calculateMomentsOnSourceNodes( distFine, omegaF,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_PMP, vx1_PMP, vx2_PMP, vx3_PMP,
         kxyFromfcNEQ_PMP, kyzFromfcNEQ_PMP, kxzFromfcNEQ_PMP, kxxMyyFromfcNEQ_PMP, kxxMzzFromfcNEQ_PMP);
@@ -187,6 +197,8 @@ __global__ void scaleFC_compressible(
     k_0M0 = k_base_MM0;
     k_MM0 = neighborXfine[k_base_MM0];
 
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     calculateMomentsOnSourceNodes( distFine, omegaF,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_PMM, vx1_PMM, vx2_PMM, vx3_PMM,
         kxyFromfcNEQ_PMM, kyzFromfcNEQ_PMM, kxzFromfcNEQ_PMM, kxxMyyFromfcNEQ_PMM, kxxMzzFromfcNEQ_PMM);
@@ -214,6 +226,8 @@ __global__ void scaleFC_compressible(
     k_0MM = k_base_0MM;
     k_MMM = k_base_MMM;
 
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     calculateMomentsOnSourceNodes( distFine, omegaF,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_MPM, vx1_MPM, vx2_MPM, vx3_MPM,
         kxyFromfcNEQ_MPM, kyzFromfcNEQ_MPM, kxzFromfcNEQ_MPM, kxxMyyFromfcNEQ_MPM, kxxMzzFromfcNEQ_MPM);
@@ -230,6 +244,8 @@ __global__ void scaleFC_compressible(
     k_M0M = neighborZfine[k_M0M];
     k_0MM = neighborZfine[k_0MM];
     k_MMM = neighborZfine[k_MMM];
+
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
     
     calculateMomentsOnSourceNodes( distFine, omegaF,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_MPP, vx1_MPP, vx2_MPP, vx3_MPP,
@@ -248,6 +264,8 @@ __global__ void scaleFC_compressible(
     k_0MM = k_MMM;
     k_MMM = neighborXfine[k_MMM];
 
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     calculateMomentsOnSourceNodes( distFine, omegaF,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_PPP, vx1_PPP, vx2_PPP, vx3_PPP,
         kxyFromfcNEQ_PPP, kyzFromfcNEQ_PPP, kxzFromfcNEQ_PPP, kxxMyyFromfcNEQ_PPP, kxxMzzFromfcNEQ_PPP);
@@ -265,6 +283,8 @@ __global__ void scaleFC_compressible(
     k_0M0 = k_base_MM0;
     k_MM0 = neighborXfine[k_base_MM0];
     
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     calculateMomentsOnSourceNodes( distFine, omegaF,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_PPM, vx1_PPM, vx2_PPM, vx3_PPM,
         kxyFromfcNEQ_PPM, kyzFromfcNEQ_PPM, kxzFromfcNEQ_PPM, kxxMyyFromfcNEQ_PPM, kxxMzzFromfcNEQ_PPM);
@@ -278,115 +298,120 @@ __global__ void scaleFC_compressible(
     real c_000, c_100, c_010, c_001, c_200, c_020, c_002, c_110, c_101, c_011;
     real d_000, d_100, d_010, d_001, d_110, d_101, d_011;
 
-    a_000 = (-kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_PPP + kxxMyyFromfcNEQ_MPM + kxxMyyFromfcNEQ_MPP -
-            kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_PMP + kxxMyyFromfcNEQ_MMM + kxxMyyFromfcNEQ_MMP -
-            kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_PPP + kxxMzzFromfcNEQ_MPM + kxxMzzFromfcNEQ_MPP -
-            kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_PMP + kxxMzzFromfcNEQ_MMM + kxxMzzFromfcNEQ_MMP -
-            c2o1 * kxyFromfcNEQ_PPM - c2o1 * kxyFromfcNEQ_PPP - c2o1 * kxyFromfcNEQ_MPM - c2o1 * kxyFromfcNEQ_MPP +
-            c2o1 * kxyFromfcNEQ_PMM + c2o1 * kxyFromfcNEQ_PMP + c2o1 * kxyFromfcNEQ_MMM + c2o1 * kxyFromfcNEQ_MMP +
-            c2o1 * kxzFromfcNEQ_PPM - c2o1 * kxzFromfcNEQ_PPP + c2o1 * kxzFromfcNEQ_MPM - c2o1 * kxzFromfcNEQ_MPP +
-            c2o1 * kxzFromfcNEQ_PMM - c2o1 * kxzFromfcNEQ_PMP + c2o1 * kxzFromfcNEQ_MMM - c2o1 * kxzFromfcNEQ_MMP +
-            c8o1 * vx1_PPM + c8o1 * vx1_PPP + c8o1 * vx1_MPM + c8o1 * vx1_MPP + c8o1 * vx1_PMM + c8o1 * vx1_PMP +
-            c8o1 * vx1_MMM + c8o1 * vx1_MMP + c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM - c2o1 * vx2_MPP -
-            c2o1 * vx2_PMM - c2o1 * vx2_PMP + c2o1 * vx2_MMM + c2o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP +
-            c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM + c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) /
-            c64o1;
-    b_000 = (c2o1 * kxxMyyFromfcNEQ_PPM + c2o1 * kxxMyyFromfcNEQ_PPP + c2o1 * kxxMyyFromfcNEQ_MPM +
-            c2o1 * kxxMyyFromfcNEQ_MPP - c2o1 * kxxMyyFromfcNEQ_PMM - c2o1 * kxxMyyFromfcNEQ_PMP -
-            c2o1 * kxxMyyFromfcNEQ_MMM - c2o1 * kxxMyyFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_PPP -
-            kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_MPP + kxxMzzFromfcNEQ_PMM + kxxMzzFromfcNEQ_PMP +
-            kxxMzzFromfcNEQ_MMM + kxxMzzFromfcNEQ_MMP - c2o1 * kxyFromfcNEQ_PPM - c2o1 * kxyFromfcNEQ_PPP +
-            c2o1 * kxyFromfcNEQ_MPM + c2o1 * kxyFromfcNEQ_MPP - c2o1 * kxyFromfcNEQ_PMM - c2o1 * kxyFromfcNEQ_PMP +
-            c2o1 * kxyFromfcNEQ_MMM + c2o1 * kxyFromfcNEQ_MMP + c2o1 * kyzFromfcNEQ_PPM - c2o1 * kyzFromfcNEQ_PPP +
-            c2o1 * kyzFromfcNEQ_MPM - c2o1 * kyzFromfcNEQ_MPP + c2o1 * kyzFromfcNEQ_PMM - c2o1 * kyzFromfcNEQ_PMP +
-            c2o1 * kyzFromfcNEQ_MMM - c2o1 * kyzFromfcNEQ_MMP + c2o1 * vx1_PPM + c2o1 * vx1_PPP - c2o1 * vx1_MPM -
-            c2o1 * vx1_MPP - c2o1 * vx1_PMM - c2o1 * vx1_PMP + c2o1 * vx1_MMM + c2o1 * vx1_MMP + c8o1 * vx2_PPM +
-            c8o1 * vx2_PPP + c8o1 * vx2_MPM + c8o1 * vx2_MPP + c8o1 * vx2_PMM + c8o1 * vx2_PMP + c8o1 * vx2_MMM +
-            c8o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP - c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM -
-            c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) /
-            c64o1;
-    c_000 = (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_PPP + kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_MPP +
-            kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_PMP + kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_MMP -
-            c2o1 * kxxMzzFromfcNEQ_PPM + c2o1 * kxxMzzFromfcNEQ_PPP - c2o1 * kxxMzzFromfcNEQ_MPM +
-            c2o1 * kxxMzzFromfcNEQ_MPP - c2o1 * kxxMzzFromfcNEQ_PMM + c2o1 * kxxMzzFromfcNEQ_PMP -
-            c2o1 * kxxMzzFromfcNEQ_MMM + c2o1 * kxxMzzFromfcNEQ_MMP - c2o1 * kxzFromfcNEQ_PPM -
-            c2o1 * kxzFromfcNEQ_PPP + c2o1 * kxzFromfcNEQ_MPM + c2o1 * kxzFromfcNEQ_MPP - c2o1 * kxzFromfcNEQ_PMM -
-            c2o1 * kxzFromfcNEQ_PMP + c2o1 * kxzFromfcNEQ_MMM + c2o1 * kxzFromfcNEQ_MMP - c2o1 * kyzFromfcNEQ_PPM -
-            c2o1 * kyzFromfcNEQ_PPP - c2o1 * kyzFromfcNEQ_MPM - c2o1 * kyzFromfcNEQ_MPP + c2o1 * kyzFromfcNEQ_PMM +
-            c2o1 * kyzFromfcNEQ_PMP + c2o1 * kyzFromfcNEQ_MMM + c2o1 * kyzFromfcNEQ_MMP - c2o1 * vx1_PPM +
-            c2o1 * vx1_PPP + c2o1 * vx1_MPM - c2o1 * vx1_MPP - c2o1 * vx1_PMM + c2o1 * vx1_PMP + c2o1 * vx1_MMM -
-            c2o1 * vx1_MMP - c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM -
-            c2o1 * vx2_PMP + c2o1 * vx2_MMM - c2o1 * vx2_MMP + c8o1 * vx3_PPM + c8o1 * vx3_PPP + c8o1 * vx3_MPM +
-            c8o1 * vx3_MPP + c8o1 * vx3_PMM + c8o1 * vx3_PMP + c8o1 * vx3_MMM + c8o1 * vx3_MMP) /
-            c64o1;
-    a_100  = (vx1_PPM + vx1_PPP - vx1_MPM - vx1_MPP + vx1_PMM + vx1_PMP - vx1_MMM - vx1_MMP) / c4o1;
-    b_100  = (vx2_PPM + vx2_PPP - vx2_MPM - vx2_MPP + vx2_PMM + vx2_PMP - vx2_MMM - vx2_MMP) / c4o1;
-    c_100  = (vx3_PPM + vx3_PPP - vx3_MPM - vx3_MPP + vx3_PMM + vx3_PMP - vx3_MMM - vx3_MMP) / c4o1;
-    a_200 = (kxxMyyFromfcNEQ_PPM + kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_MPP +
-            kxxMyyFromfcNEQ_PMM + kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_MMP +
-            kxxMzzFromfcNEQ_PPM + kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_MPP +
-            kxxMzzFromfcNEQ_PMM + kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_MMP + c2o1 * vx2_PPM +
-            c2o1 * vx2_PPP - c2o1 * vx2_MPM - c2o1 * vx2_MPP - c2o1 * vx2_PMM - c2o1 * vx2_PMP + c2o1 * vx2_MMM +
-            c2o1 * vx2_MMP - c2o1 * vx3_PPM + c2o1 * vx3_PPP + c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM +
-            c2o1 * vx3_PMP + c2o1 * vx3_MMM - c2o1 * vx3_MMP) /
-            c16o1;
-    b_200 = (kxyFromfcNEQ_PPM + kxyFromfcNEQ_PPP - kxyFromfcNEQ_MPM - kxyFromfcNEQ_MPP + kxyFromfcNEQ_PMM +
-            kxyFromfcNEQ_PMP - kxyFromfcNEQ_MMM - kxyFromfcNEQ_MMP - c2o1 * vx1_PPM - c2o1 * vx1_PPP +
-            c2o1 * vx1_MPM + c2o1 * vx1_MPP + c2o1 * vx1_PMM + c2o1 * vx1_PMP - c2o1 * vx1_MMM - c2o1 * vx1_MMP) /
-            c8o1;
-    c_200 = (kxzFromfcNEQ_PPM + kxzFromfcNEQ_PPP - kxzFromfcNEQ_MPM - kxzFromfcNEQ_MPP + kxzFromfcNEQ_PMM +
-            kxzFromfcNEQ_PMP - kxzFromfcNEQ_MMM - kxzFromfcNEQ_MMP + c2o1 * vx1_PPM - c2o1 * vx1_PPP -
-            c2o1 * vx1_MPM + c2o1 * vx1_MPP + c2o1 * vx1_PMM - c2o1 * vx1_PMP - c2o1 * vx1_MMM + c2o1 * vx1_MMP) /
-            c8o1;
-    a_010  = (vx1_PPM + vx1_PPP + vx1_MPM + vx1_MPP - vx1_PMM - vx1_PMP - vx1_MMM - vx1_MMP) / c4o1;
-    b_010  = (vx2_PPM + vx2_PPP + vx2_MPM + vx2_MPP - vx2_PMM - vx2_PMP - vx2_MMM - vx2_MMP) / c4o1;
-    c_010  = (vx3_PPM + vx3_PPP + vx3_MPM + vx3_MPP - vx3_PMM - vx3_PMP - vx3_MMM - vx3_MMP) / c4o1;
-    a_020 = (kxyFromfcNEQ_PPM + kxyFromfcNEQ_PPP + kxyFromfcNEQ_MPM + kxyFromfcNEQ_MPP - kxyFromfcNEQ_PMM -
-            kxyFromfcNEQ_PMP - kxyFromfcNEQ_MMM - kxyFromfcNEQ_MMP - c2o1 * vx2_PPM - c2o1 * vx2_PPP +
-            c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM + c2o1 * vx2_PMP - c2o1 * vx2_MMM - c2o1 * vx2_MMP) /
-            c8o1;
-    b_020 = (-c2o1 * kxxMyyFromfcNEQ_PPM - c2o1 * kxxMyyFromfcNEQ_PPP - c2o1 * kxxMyyFromfcNEQ_MPM -
-            c2o1 * kxxMyyFromfcNEQ_MPP + c2o1 * kxxMyyFromfcNEQ_PMM + c2o1 * kxxMyyFromfcNEQ_PMP +
-            c2o1 * kxxMyyFromfcNEQ_MMM + c2o1 * kxxMyyFromfcNEQ_MMP + kxxMzzFromfcNEQ_PPM + kxxMzzFromfcNEQ_PPP +
-            kxxMzzFromfcNEQ_MPM + kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_PMP -
-            kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_MMP + c2o1 * vx1_PPM + c2o1 * vx1_PPP - c2o1 * vx1_MPM -
-            c2o1 * vx1_MPP - c2o1 * vx1_PMM - c2o1 * vx1_PMP + c2o1 * vx1_MMM + c2o1 * vx1_MMP - c2o1 * vx3_PPM +
-            c2o1 * vx3_PPP - c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM - c2o1 * vx3_PMP + c2o1 * vx3_MMM -
-            c2o1 * vx3_MMP) /
-            c16o1;
-    c_020 = (kyzFromfcNEQ_PPM + kyzFromfcNEQ_PPP + kyzFromfcNEQ_MPM + kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM -
-            kyzFromfcNEQ_PMP - kyzFromfcNEQ_MMM - kyzFromfcNEQ_MMP + c2o1 * vx2_PPM - c2o1 * vx2_PPP +
-            c2o1 * vx2_MPM - c2o1 * vx2_MPP - c2o1 * vx2_PMM + c2o1 * vx2_PMP - c2o1 * vx2_MMM + c2o1 * vx2_MMP) /
-            c8o1;
-    a_001  = (-vx1_PPM + vx1_PPP - vx1_MPM + vx1_MPP - vx1_PMM + vx1_PMP - vx1_MMM + vx1_MMP) / c4o1;
-    b_001  = (-vx2_PPM + vx2_PPP - vx2_MPM + vx2_MPP - vx2_PMM + vx2_PMP - vx2_MMM + vx2_MMP) / c4o1;
-    c_001  = (-vx3_PPM + vx3_PPP - vx3_MPM + vx3_MPP - vx3_PMM + vx3_PMP - vx3_MMM + vx3_MMP) / c4o1;
-    a_002 = (-kxzFromfcNEQ_PPM + kxzFromfcNEQ_PPP - kxzFromfcNEQ_MPM + kxzFromfcNEQ_MPP - kxzFromfcNEQ_PMM +
-            kxzFromfcNEQ_PMP - kxzFromfcNEQ_MMM + kxzFromfcNEQ_MMP + c2o1 * vx3_PPM - c2o1 * vx3_PPP -
-            c2o1 * vx3_MPM + c2o1 * vx3_MPP + c2o1 * vx3_PMM - c2o1 * vx3_PMP - c2o1 * vx3_MMM + c2o1 * vx3_MMP) /
-            c8o1;
-    b_002 = (-kyzFromfcNEQ_PPM + kyzFromfcNEQ_PPP - kyzFromfcNEQ_MPM + kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM +
-            kyzFromfcNEQ_PMP - kyzFromfcNEQ_MMM + kyzFromfcNEQ_MMP + c2o1 * vx3_PPM - c2o1 * vx3_PPP +
-            c2o1 * vx3_MPM - c2o1 * vx3_MPP - c2o1 * vx3_PMM + c2o1 * vx3_PMP - c2o1 * vx3_MMM + c2o1 * vx3_MMP) /
-            c8o1;
-    c_002 = (-kxxMyyFromfcNEQ_PPM + kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MPM + kxxMyyFromfcNEQ_MPP -
-            kxxMyyFromfcNEQ_PMM + kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MMM + kxxMyyFromfcNEQ_MMP +
-            c2o1 * kxxMzzFromfcNEQ_PPM - c2o1 * kxxMzzFromfcNEQ_PPP + c2o1 * kxxMzzFromfcNEQ_MPM -
-            c2o1 * kxxMzzFromfcNEQ_MPP + c2o1 * kxxMzzFromfcNEQ_PMM - c2o1 * kxxMzzFromfcNEQ_PMP +
-            c2o1 * kxxMzzFromfcNEQ_MMM - c2o1 * kxxMzzFromfcNEQ_MMP - c2o1 * vx1_PPM + c2o1 * vx1_PPP +
-            c2o1 * vx1_MPM - c2o1 * vx1_MPP - c2o1 * vx1_PMM + c2o1 * vx1_PMP + c2o1 * vx1_MMM - c2o1 * vx1_MMP -
-            c2o1 * vx2_PPM + c2o1 * vx2_PPP - c2o1 * vx2_MPM + c2o1 * vx2_MPP + c2o1 * vx2_PMM - c2o1 * vx2_PMP +
-            c2o1 * vx2_MMM - c2o1 * vx2_MMP) /
-            c16o1;
-    a_110 = (vx1_PPM + vx1_PPP - vx1_MPM - vx1_MPP - vx1_PMM - vx1_PMP + vx1_MMM + vx1_MMP) / c2o1;
-    b_110 = (vx2_PPM + vx2_PPP - vx2_MPM - vx2_MPP - vx2_PMM - vx2_PMP + vx2_MMM + vx2_MMP) / c2o1;
-    c_110 = (vx3_PPM + vx3_PPP - vx3_MPM - vx3_MPP - vx3_PMM - vx3_PMP + vx3_MMM + vx3_MMP) / c2o1;
-    a_101 = (-vx1_PPM + vx1_PPP + vx1_MPM - vx1_MPP - vx1_PMM + vx1_PMP + vx1_MMM - vx1_MMP) / c2o1;
-    b_101 = (-vx2_PPM + vx2_PPP + vx2_MPM - vx2_MPP - vx2_PMM + vx2_PMP + vx2_MMM - vx2_MMP) / c2o1;
-    c_101 = (-vx3_PPM + vx3_PPP + vx3_MPM - vx3_MPP - vx3_PMM + vx3_PMP + vx3_MMM - vx3_MMP) / c2o1;
-    a_011 = (-vx1_PPM + vx1_PPP - vx1_MPM + vx1_MPP + vx1_PMM - vx1_PMP + vx1_MMM - vx1_MMP) / c2o1;
-    b_011 = (-vx2_PPM + vx2_PPP - vx2_MPM + vx2_MPP + vx2_PMM - vx2_PMP + vx2_MMM - vx2_MMP) / c2o1;
-    c_011 = (-vx3_PPM + vx3_PPP - vx3_MPM + vx3_MPP + vx3_PMM - vx3_PMP + vx3_MMM - vx3_MMP) / c2o1;
+    a_000 = c1o64 * (
+            c2o1 * (
+            ((kxyFromfcNEQ_MMM - kxyFromfcNEQ_PPP) + (kxyFromfcNEQ_MMP - kxyFromfcNEQ_PPM)) + ((kxyFromfcNEQ_PMM - kxyFromfcNEQ_MPP) + (kxyFromfcNEQ_PMP - kxyFromfcNEQ_MPM)) + 
+            ((kxzFromfcNEQ_MMM - kxzFromfcNEQ_PPP) + (kxzFromfcNEQ_PPM - kxzFromfcNEQ_MMP)) + ((kxzFromfcNEQ_PMM - kxzFromfcNEQ_MPP) + (kxzFromfcNEQ_MPM - kxzFromfcNEQ_PMP)) + 
+            ((vx2_PPP + vx2_MMM) + (vx2_PPM + vx2_MMP)) - ((vx2_MPP + vx2_PMM) + (vx2_MPM + vx2_PMP)) + 
+            ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MMP)) + ((vx3_PMP + vx3_MPM) - (vx3_MPP + vx3_PMM))) + 
+            c8o1 * (((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) + ((vx1_MPP + vx1_PMM) + (vx1_PMP + vx1_MPM))) +
+            ((kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_PPP) + (kxxMyyFromfcNEQ_MMP - kxxMyyFromfcNEQ_PPM)) + 
+            ((kxxMyyFromfcNEQ_MPP - kxxMyyFromfcNEQ_PMM) + (kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_PMP)) +
+            ((kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_PPP) + (kxxMzzFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM)) + 
+            ((kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM) + (kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_PMP)));
+    b_000 = c1o64 * (
+            c2o1 * (
+            ((kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MMM) + (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_MMP)) + 
+            ((kxxMyyFromfcNEQ_MPP - kxxMyyFromfcNEQ_PMM) + (kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_PMP)) + 
+            ((kxyFromfcNEQ_MMM - kxyFromfcNEQ_PPP) + (kxyFromfcNEQ_MMP - kxyFromfcNEQ_PPM)) + 
+            ((kxyFromfcNEQ_MPP - kxyFromfcNEQ_PMM) + (kxyFromfcNEQ_MPM - kxyFromfcNEQ_PMP)) + 
+            ((kyzFromfcNEQ_MMM - kyzFromfcNEQ_PPP) + (kyzFromfcNEQ_PPM - kyzFromfcNEQ_MMP)) + 
+            ((kyzFromfcNEQ_PMM - kyzFromfcNEQ_MPP) + (kyzFromfcNEQ_MPM - kyzFromfcNEQ_PMP)) + 
+            ((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) - ((vx1_MPM + vx1_MPP) + (vx1_PMM + vx1_PMP)) + 
+            ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MMP)) + ((vx3_MPP + vx3_PMM) - (vx3_MPM + vx3_PMP))) + 
+            c8o1 * (((vx2_PPP + vx2_MMM) + (vx2_PPM + vx2_MMP)) + ((vx2_MPP + vx2_PMM) + (vx2_MPM + vx2_PMP))) + 
+            ((kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_PPP) + (kxxMzzFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM)) +
+            ((kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_MPP) + (kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MPM)));
+    c_000 = c1o64 * ( 
+            c2o1 * (
+            ((kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MMM) + (kxxMzzFromfcNEQ_MMP - kxxMzzFromfcNEQ_PPM)) + 
+            ((kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM) + (kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MPM)) + 
+            ((kxzFromfcNEQ_MMM - kxzFromfcNEQ_PPP) + (kxzFromfcNEQ_MMP - kxzFromfcNEQ_PPM)) + 
+            ((kxzFromfcNEQ_MPP - kxzFromfcNEQ_PMM) + (kxzFromfcNEQ_MPM - kxzFromfcNEQ_PMP)) + 
+            ((kyzFromfcNEQ_MMM - kyzFromfcNEQ_PPP) + (kyzFromfcNEQ_MMP - kyzFromfcNEQ_PPM)) + 
+            ((kyzFromfcNEQ_PMM - kyzFromfcNEQ_MPP) + (kyzFromfcNEQ_PMP - kyzFromfcNEQ_MPM)) + 
+            ((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_MPM + vx1_PMP) - (vx1_MPP + vx1_PMM)) + 
+            ((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_MPP + vx2_PMM) - (vx2_MPM + vx2_PMP))) + 
+            c8o1 * (((vx3_PPP + vx3_MMM) + (vx3_PPM + vx3_MMP)) + ((vx3_PMM + vx3_MPP) + (vx3_PMP + vx3_MPM))) +
+            ((kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_PPP) + (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_MMP)) + 
+            ((kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_MPP) + (kxxMyyFromfcNEQ_MPM - kxxMyyFromfcNEQ_PMP)));
+
+    a_100 = c1o4 * (((vx1_PPP - vx1_MMM) + (vx1_PPM - vx1_MMP)) + ((vx1_PMM - vx1_MPP) + (vx1_PMP - vx1_MPM)));
+    b_100 = c1o4 * (((vx2_PPP - vx2_MMM) + (vx2_PPM - vx2_MMP)) + ((vx2_PMM - vx2_MPP) + (vx2_PMP - vx2_MPM)));
+    c_100 = c1o4 * (((vx3_PPP - vx3_MMM) + (vx3_PPM - vx3_MMP)) + ((vx3_PMM - vx3_MPP) + (vx3_PMP - vx3_MPM)));
+
+    a_200 = c1o16 * ( 
+            c2o1 * (
+            ((vx2_PPP + vx2_MMM) + (vx2_PPM - vx2_MPP)) + ((vx2_MMP - vx2_PMM) - (vx2_MPM + vx2_PMP)) + 
+            ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MPP)) + ((vx3_MPM + vx3_PMP) - (vx3_MMP + vx3_PMM))) + 
+            ((kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MMM) + (kxxMyyFromfcNEQ_PPM - kxxMyyFromfcNEQ_MMP)) + 
+            ((kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_MPP) + (kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MPM)) + 
+            ((kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MMM) + (kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_MMP)) + 
+            ((kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_MPP) + (kxxMzzFromfcNEQ_PMP - kxxMzzFromfcNEQ_MPM)));
+    b_200 = c1o8 * (
+            c2o1 * (
+            -((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) + ((vx1_MPP + vx1_PMM) + (vx1_MPM + vx1_PMP))) +
+            ((kxyFromfcNEQ_PPP - kxyFromfcNEQ_MMM) + (kxyFromfcNEQ_PPM - kxyFromfcNEQ_MMP)) + 
+            ((kxyFromfcNEQ_PMM - kxyFromfcNEQ_MPP) + (kxyFromfcNEQ_PMP - kxyFromfcNEQ_MPM)));
+    c_200 = c1o8 * (
+            c2o1 * (
+            ((vx1_PPM + vx1_MMP) - (vx1_PPP + vx1_MMM)) + ((vx1_MPP + vx1_PMM) - (vx1_MPM + vx1_PMP))) +
+            ((kxzFromfcNEQ_PPP - kxzFromfcNEQ_MMM) + (kxzFromfcNEQ_PPM - kxzFromfcNEQ_MMP)) + 
+            ((kxzFromfcNEQ_PMM - kxzFromfcNEQ_MPP) + (kxzFromfcNEQ_PMP - kxzFromfcNEQ_MPM)));
+
+    a_010 = c1o4 * (((vx1_PPP - vx1_MMM) + (vx1_PPM - vx1_MMP)) + ((vx1_MPP - vx1_PMM) + (vx1_MPM - vx1_PMP)));
+    b_010 = c1o4 * (((vx2_PPP - vx2_MMM) + (vx2_PPM - vx2_MMP)) + ((vx2_MPP - vx2_PMM) + (vx2_MPM - vx2_PMP)));
+    c_010 = c1o4 * (((vx3_PPP - vx3_MMM) + (vx3_PPM - vx3_MMP)) + ((vx3_MPP - vx3_PMM) + (vx3_MPM - vx3_PMP)));
+
+    a_020 = c1o8 * (
+            c2o1 * (-((vx2_PPP + vx2_MMM) + (vx2_MMP + vx2_PPM)) + ((vx2_MPP + vx2_PMM) + (vx2_MPM + vx2_PMP))) +
+            ((kxyFromfcNEQ_PPP - kxyFromfcNEQ_MMM) + (kxyFromfcNEQ_PPM - kxyFromfcNEQ_MMP)) + 
+            ((kxyFromfcNEQ_MPP - kxyFromfcNEQ_PMM) + (kxyFromfcNEQ_MPM - kxyFromfcNEQ_PMP)));
+    b_020 = c1o16 * (
+            c2o1 * (
+            ((kxxMyyFromfcNEQ_MMM - kxxMyyFromfcNEQ_PPP) + (kxxMyyFromfcNEQ_MMP - kxxMyyFromfcNEQ_PPM)) +
+            ((kxxMyyFromfcNEQ_PMM - kxxMyyFromfcNEQ_MPP) + (kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MPM)) +
+            ((vx1_PPP + vx1_MMM) + (vx1_PPM + vx1_MMP)) - ((vx1_MPP + vx1_PMM) + (vx1_PMP + vx1_MPM)) + 
+            ((vx3_PPP + vx3_MMM) - (vx3_PPM + vx3_MMP)) + ((vx3_MPP + vx3_PMM) - (vx3_MPM + vx3_PMP))) +
+            ((kxxMzzFromfcNEQ_PPP - kxxMzzFromfcNEQ_MMM) + (kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_MMP)) + 
+            ((kxxMzzFromfcNEQ_MPP - kxxMzzFromfcNEQ_PMM) + (kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_PMP)));
+    c_020 = c1o8 * (
+            c2o1 * (((vx2_MMP + vx2_PPM) - (vx2_PPP + vx2_MMM)) + ((vx2_PMP + vx2_MPM) - (vx2_MPP + vx2_PMM))) +
+            ((kyzFromfcNEQ_PPP - kyzFromfcNEQ_MMM) + (kyzFromfcNEQ_PPM - kyzFromfcNEQ_MMP)) +
+            ((kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM) + (kyzFromfcNEQ_MPM - kyzFromfcNEQ_PMP)));
+
+    a_001 = c1o4 * (((vx1_PPP - vx1_MMM) + (vx1_MMP - vx1_PPM)) + ((vx1_MPP - vx1_PMM) + (vx1_PMP - vx1_MPM)));
+    b_001 = c1o4 * (((vx2_PPP - vx2_MMM) + (vx2_MMP - vx2_PPM)) + ((vx2_MPP - vx2_PMM) + (vx2_PMP - vx2_MPM)));
+    c_001 = c1o4 * (((vx3_PPP - vx3_MMM) + (vx3_MMP - vx3_PPM)) + ((vx3_MPP - vx3_PMM) + (vx3_PMP - vx3_MPM)));
+
+    a_002 = c1o8 * (
+            c2o1 * (((vx3_PPM + vx3_MMP) - (vx3_PPP + vx3_MMM)) + ((vx3_MPP + vx3_PMM) - (vx3_PMP + vx3_MPM))) +
+                    ((kxzFromfcNEQ_PPP - kxzFromfcNEQ_MMM) + (kxzFromfcNEQ_MMP - kxzFromfcNEQ_PPM)) +
+                    ((kxzFromfcNEQ_PMP - kxzFromfcNEQ_MPM) + (kxzFromfcNEQ_MPP - kxzFromfcNEQ_PMM)));
+    b_002 = c1o8 * (
+            c2o1 * (((vx3_PPM + vx3_MMP) - (vx3_PPP + vx3_MMM)) + ((vx3_MPM + vx3_PMP) - (vx3_PMM + vx3_MPP))) + 
+                    ((kyzFromfcNEQ_PPP - kyzFromfcNEQ_MMM) + (kyzFromfcNEQ_MMP - kyzFromfcNEQ_PPM)) + 
+                    ((kyzFromfcNEQ_PMP - kyzFromfcNEQ_MPM) + (kyzFromfcNEQ_MPP - kyzFromfcNEQ_PMM)));
+    c_002 = c1o16 * (
+            c2o1 * (
+            ((kxxMzzFromfcNEQ_MMM - kxxMzzFromfcNEQ_PPP) + (kxxMzzFromfcNEQ_PPM - kxxMzzFromfcNEQ_MMP)) + 
+            ((kxxMzzFromfcNEQ_MPM - kxxMzzFromfcNEQ_PMP) + (kxxMzzFromfcNEQ_PMM - kxxMzzFromfcNEQ_MPP)) + 
+            ((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_MPM + vx1_PMP) - (vx1_PMM + vx1_MPP)) + 
+            ((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_PMM + vx2_MPP) - (vx2_MPM + vx2_PMP))) + 
+            ((kxxMyyFromfcNEQ_PPP - kxxMyyFromfcNEQ_MMM) + (kxxMyyFromfcNEQ_MMP - kxxMyyFromfcNEQ_PPM)) +
+            ((kxxMyyFromfcNEQ_PMP - kxxMyyFromfcNEQ_MPM) + (kxxMyyFromfcNEQ_MPP - kxxMyyFromfcNEQ_PMM)));
+
+    a_110 = c1o2 * (((vx1_PPP + vx1_MMM) + (vx1_MMP + vx1_PPM)) - ((vx1_MPM + vx1_PMP) + (vx1_PMM + vx1_MPP)));
+    b_110 = c1o2 * (((vx2_PPP + vx2_MMM) + (vx2_MMP + vx2_PPM)) - ((vx2_MPM + vx2_PMP) + (vx2_PMM + vx2_MPP)));
+    c_110 = c1o2 * (((vx3_PPP + vx3_MMM) + (vx3_MMP + vx3_PPM)) - ((vx3_MPM + vx3_PMP) + (vx3_PMM + vx3_MPP)));
+
+    a_101 = c1o2 * (((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_MPM + vx1_PMP) - (vx1_PMM + vx1_MPP)));
+    b_101 = c1o2 * (((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_MPM + vx2_PMP) - (vx2_PMM + vx2_MPP)));
+    c_101 = c1o2 * (((vx3_PPP + vx3_MMM) - (vx3_MMP + vx3_PPM)) + ((vx3_MPM + vx3_PMP) - (vx3_PMM + vx3_MPP)));
+    
+    a_011 = c1o2 * (((vx1_PPP + vx1_MMM) - (vx1_MMP + vx1_PPM)) + ((vx1_PMM + vx1_MPP) - (vx1_MPM + vx1_PMP)));
+    b_011 = c1o2 * (((vx2_PPP + vx2_MMM) - (vx2_MMP + vx2_PPM)) + ((vx2_PMM + vx2_MPP) - (vx2_MPM + vx2_PMP)));
+    c_011 = c1o2 * (((vx3_PPP + vx3_MMM) - (vx3_MMP + vx3_PPM)) + ((vx3_PMM + vx3_MPP) - (vx3_MPM + vx3_PMP)));
 
     //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     
@@ -399,9 +424,9 @@ __global__ void scaleFC_compressible(
     ////////////////////////////////////////////////////////////////////////////////
     //! - Set the relative position of the offset cell {-1, 0, 1}
     //!
-    real xoff    = offsetFC.xOffFC[k_thread];
-    real yoff    = offsetFC.yOffFC[k_thread];
-    real zoff    = offsetFC.zOffFC[k_thread];
+    real xoff    = neighborFineToCoarse.x[nodeIndex];
+    real yoff    = neighborFineToCoarse.y[nodeIndex];
+    real zoff    = neighborFineToCoarse.z[nodeIndex];
      
     real xoff_sq = xoff * xoff;
     real yoff_sq = yoff * yoff;
@@ -412,15 +437,14 @@ __global__ void scaleFC_compressible(
     //! 
     real LaplaceRho = 
         ((xoff != c0o1) || (yoff != c0o1) || (zoff != c0o1))
-        ? c0o1
-        : -c3o1 * (a_100 * a_100 + b_010 * b_010 + c_001 * c_001) - c6o1 * (b_100 * a_010 + c_100 * a_001 + c_010 * b_001);
-    d_000 = ( drho_PPM + drho_PPP + drho_MPM + drho_MPP + drho_PMM + drho_PMP + drho_MMM + drho_MMP - c2o1 * LaplaceRho) * c1o8;
-    d_100 = ( drho_PPM + drho_PPP - drho_MPM - drho_MPP + drho_PMM + drho_PMP - drho_MMM - drho_MMP) * c1o4;
-    d_010 = ( drho_PPM + drho_PPP + drho_MPM + drho_MPP - drho_PMM - drho_PMP - drho_MMM - drho_MMP) * c1o4;
-    d_001 = (-drho_PPM + drho_PPP - drho_MPM + drho_MPP - drho_PMM + drho_PMP - drho_MMM + drho_MMP) * c1o4;
-    d_110 = ( drho_PPM + drho_PPP - drho_MPM - drho_MPP - drho_PMM - drho_PMP + drho_MMM + drho_MMP) * c1o2;
-    d_101 = (-drho_PPM + drho_PPP + drho_MPM - drho_MPP - drho_PMM + drho_PMP + drho_MMM - drho_MMP) * c1o2;
-    d_011 = (-drho_PPM + drho_PPP - drho_MPM + drho_MPP + drho_PMM - drho_PMP + drho_MMM - drho_MMP) * c1o2;
+        ? c0o1 : -c3o1 * (a_100 * a_100 + b_010 * b_010 + c_001 * c_001) - c6o1 * (b_100 * a_010 + c_100 * a_001 + c_010 * b_001);
+    d_000 =  c1o8 * ((((drho_PPP + drho_MMM) + (drho_PPM + drho_MMP)) + ((drho_PMM + drho_MPP) + (drho_PMP + drho_MPM))) - c2o1 * LaplaceRho);
+    d_100 = c1o4 * (((drho_PPP - drho_MMM) + (drho_PPM - drho_MMP)) + ((drho_PMM - drho_MPP) + (drho_PMP - drho_MPM)));
+    d_010 = c1o4 * (((drho_PPP - drho_MMM) + (drho_PPM - drho_MMP)) + ((drho_MPP - drho_PMM) + (drho_MPM - drho_PMP)));
+    d_001 = c1o4 * (((drho_PPP - drho_MMM) + (drho_MMP - drho_PPM)) + ((drho_MPP - drho_PMM) + (drho_PMP - drho_MPM)));
+    d_110 = c1o2 * (((drho_PPP + drho_MMM) + (drho_PPM + drho_MMP)) - ((drho_PMM + drho_MPP) + (drho_PMP + drho_MPM)));
+    d_101 = c1o2 * (((drho_PPP + drho_MMM) - (drho_PPM + drho_MMP)) + ((drho_PMP + drho_MPM) - (drho_PMM + drho_MPP)));
+    d_011 = c1o2 * (((drho_PPP + drho_MMM) - (drho_PPM + drho_MMP)) + ((drho_PMM + drho_MPP) - (drho_PMP + drho_MPM)));
 
 
     //////////////////////////////////////////////////////////////////////////
@@ -534,6 +558,18 @@ __global__ void scaleFC_compressible(
     // y = 0.;
     // z = 0.;
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // index of the destination node and its neighbors
+    k_000 = indicesCoarse000[nodeIndex];
+    k_M00 = neighborXcoarse [k_000];
+    k_0M0 = neighborYcoarse [k_000];
+    k_00M = neighborZcoarse [k_000];
+    k_MM0 = neighborYcoarse [k_M00];
+    k_M0M = neighborZcoarse [k_M00];
+    k_0MM = neighborZcoarse [k_0M0];
+    k_MMM = neighborZcoarse [k_MM0];
+    ////////////////////////////////////////////////////////////////////////////////////
+
+    if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]);
 
     ////////////////////////////////////////////////////////////////////////////////
     //! - Set macroscopic values on destination node (zeroth and first order moments)
@@ -636,19 +672,6 @@ __global__ void scaleFC_compressible(
     backwardInverseChimeraWithK(m_210, m_211, m_212, vvz, vz_sq, c9o1,  c1o9);
     backwardInverseChimeraWithK(m_220, m_221, m_222, vvz, vz_sq, c36o1, c1o36);
 
-
-    ////////////////////////////////////////////////////////////////////////////////////
-    // index of the destination node and its neighbors
-    k_000 = indicesCoarse000[k_thread];
-    k_M00 = neighborXcoarse [k_000];
-    k_0M0 = neighborYcoarse [k_000];
-    k_00M = neighborZcoarse [k_000];
-    k_MM0 = neighborYcoarse [k_M00];
-    k_M0M = neighborZcoarse [k_M00];
-    k_0MM = neighborZcoarse [k_0M0];
-    k_MMM = neighborZcoarse [k_MM0];
-    ////////////////////////////////////////////////////////////////////////////////////
-
     ////////////////////////////////////////////////////////////////////////////////////
     //! - Write distributions: style of reading and writing the distributions from/to
     //! stored arrays dependent on timestep is based on the esoteric twist algorithm
@@ -684,3 +707,7 @@ __global__ void scaleFC_compressible(
     (distCoarse.f[DIR_MMM])[k_MMM] = f_MMM;
     ////////////////////////////////////////////////////////////////////////////////////
 }
+
+template __global__ void scaleFC_compressible<true>( real *distributionsCoarse, real *distributionsFine, unsigned int *neighborXcoarse, unsigned int *neighborYcoarse, unsigned int *neighborZcoarse, unsigned int *neighborXfine, unsigned int *neighborYfine, unsigned int *neighborZfine, unsigned long long numberOfLBnodesCoarse, unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int *indicesCoarse000, unsigned int *indicesFineMMM, unsigned int numberOfInterfaceNodes, real omegaCoarse, real omegaFine, real* turbulentViscosityCoarse, real* turbulentViscosityFine, ICellNeigh neighborFineToCoarse);
+
+template __global__ void scaleFC_compressible<false>( real *distributionsCoarse, real *distributionsFine, unsigned int *neighborXcoarse, unsigned int *neighborYcoarse, unsigned int *neighborZcoarse, unsigned int *neighborXfine, unsigned int *neighborYfine, unsigned int *neighborZfine, unsigned long long numberOfLBnodesCoarse, unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int *indicesCoarse000, unsigned int *indicesFineMMM, unsigned int numberOfInterfaceNodes, real omegaCoarse, real omegaFine, real* turbulentViscosityCoarse, real* turbulentViscosityFine, ICellNeigh neighborFineToCoarse);
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Init27.cu b/src/gpu/VirtualFluids_GPU/GPU/Init27.cu
index 6d497d2a1ab7ec305bec4f1ad1ed2e2d63c4dc27..ff844cfecd9e4cad02e41879ddd68246d0fe9ab2 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Init27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Init27.cu
@@ -1,9 +1,9 @@
 /* Device code */
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -15,7 +15,7 @@ __global__ void LBInit27( int myid,
                                      unsigned int* neighborY,
                                      unsigned int* neighborZ,
                                      real* vParabel,
-                                     unsigned int size_Mat,
+                                     unsigned long long numberOfLBnodes,
                                      unsigned int grid_nx, 
                                      unsigned int grid_ny, 
                                      unsigned int grid_nz, 
@@ -24,33 +24,33 @@ __global__ void LBInit27( int myid,
                                      int maxlev)
 {
    Distributions27 D;
-   D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-   D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-   D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-   D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-   D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-   D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-   D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-   D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-   D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-   D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-   D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-   D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-   D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-   D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-   D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-   D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-   D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-   D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-   D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-   D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-   D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-   D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-   D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-   D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-   D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+   D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+   D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+   D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+   D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+   D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+   D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+   D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+   D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+   D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+   D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+   D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+   D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+   D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+   D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+   D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+   D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+   D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+   D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+   D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+   D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+   D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+   D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+   D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+   D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+   D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+   D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+   D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    ////////////////////////////////////////////////////////////////////////////////
    unsigned int  k;                   // Zugriff auf arrays im device
    //
@@ -142,32 +142,32 @@ __global__ void LBInit27( int myid,
    real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
    (D.f[DIR_000])[kzero] =   c8o27* (drho-cu_sq);
-   (D.f[DIR_P00   ])[ke   ] =   c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
-   (D.f[DIR_M00   ])[kw   ] =   c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
-   (D.f[DIR_0P0   ])[kn   ] =   c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
-   (D.f[DIR_0M0   ])[ks   ] =   c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
-   (D.f[DIR_00P   ])[kt   ] =   c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
-   (D.f[DIR_00M   ])[kb   ] =   c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
-   (D.f[DIR_PP0  ])[kne  ] =   c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
-   (D.f[DIR_MM0  ])[ksw  ] =   c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
-   (D.f[DIR_PM0  ])[kse  ] =   c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
-   (D.f[DIR_MP0  ])[knw  ] =   c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
-   (D.f[DIR_P0P  ])[kte  ] =   c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
-   (D.f[DIR_M0M  ])[kbw  ] =   c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
-   (D.f[DIR_P0M  ])[kbe  ] =   c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
-   (D.f[DIR_M0P  ])[ktw  ] =   c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
-   (D.f[DIR_0PP  ])[ktn  ] =   c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
-   (D.f[DIR_0MM  ])[kbs  ] =   c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
-   (D.f[DIR_0PM  ])[kbn  ] =   c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
-   (D.f[DIR_0MP  ])[kts  ] =   c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
-   (D.f[DIR_PPP ])[ktne ] =   c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
-   (D.f[DIR_MMM ])[kbsw ] =   c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
-   (D.f[DIR_PPM ])[kbne ] =   c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
-   (D.f[DIR_MMP ])[ktsw ] =   c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
-   (D.f[DIR_PMP ])[ktse ] =   c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
-   (D.f[DIR_MPM ])[kbnw ] =   c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
-   (D.f[DIR_PMM ])[kbse ] =   c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
-   (D.f[DIR_MPP ])[ktnw ] =   c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
+   (D.f[DIR_P00])[ke   ] =   c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
+   (D.f[DIR_M00])[kw   ] =   c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
+   (D.f[DIR_0P0])[kn   ] =   c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
+   (D.f[DIR_0M0])[ks   ] =   c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
+   (D.f[DIR_00P])[kt   ] =   c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
+   (D.f[DIR_00M])[kb   ] =   c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
+   (D.f[DIR_PP0])[kne  ] =   c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
+   (D.f[DIR_MM0])[ksw  ] =   c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
+   (D.f[DIR_PM0])[kse  ] =   c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
+   (D.f[DIR_MP0])[knw  ] =   c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
+   (D.f[DIR_P0P])[kte  ] =   c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
+   (D.f[DIR_M0M])[kbw  ] =   c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
+   (D.f[DIR_P0M])[kbe  ] =   c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
+   (D.f[DIR_M0P])[ktw  ] =   c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
+   (D.f[DIR_0PP])[ktn  ] =   c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
+   (D.f[DIR_0MM])[kbs  ] =   c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
+   (D.f[DIR_0PM])[kbn  ] =   c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
+   (D.f[DIR_0MP])[kts  ] =   c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
+   (D.f[DIR_PPP])[ktne ] =   c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
+   (D.f[DIR_MMM])[kbsw ] =   c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
+   (D.f[DIR_PPM])[kbne ] =   c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
+   (D.f[DIR_MMP])[ktsw ] =   c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
+   (D.f[DIR_PMP])[ktse ] =   c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
+   (D.f[DIR_MPM])[kbnw ] =   c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
+   (D.f[DIR_PMM])[kbse ] =   c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
+   (D.f[DIR_MPP])[ktnw ] =   c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
 
 }
 ////////////////////////////////////////////////////////////////////////////////
@@ -191,7 +191,7 @@ __global__ void LBInitNonEqPartSP27( unsigned int* neighborX,
                                                 real* ux,
                                                 real* uy,
                                                 real* uz,
-                                                unsigned int size_Mat,
+                                                unsigned long long numberOfLBnodes,
                                                 real* DD,
                                                 real omega,
                                                 bool EvenOrOdd)
@@ -207,7 +207,7 @@ __global__ void LBInitNonEqPartSP27( unsigned int* neighborX,
     const unsigned k = nx*(ny*z + y) + x;
     //////////////////////////////////////////////////////////////////////////
     
-    if(k<size_Mat)
+    if(k<numberOfLBnodes)
     {
         ////////////////////////////////////////////////////////////////////////////////
         unsigned int BC;
@@ -218,63 +218,63 @@ __global__ void LBInitNonEqPartSP27( unsigned int* neighborX,
             Distributions27 D;
             if (EvenOrOdd==true)
             {
-                D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-                D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-                D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-                D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-                D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-                D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-                D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-                D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-                D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-                D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-                D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-                D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-                D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-                D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-                D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-                D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-                D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-                D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-                D.f[DIR_000] = &DD[DIR_000*size_Mat];
-                D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-                D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-                D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-                D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-                D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-                D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-                D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-                D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+                D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+                D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+                D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+                D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+                D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+                D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+                D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+                D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+                D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+                D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+                D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+                D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+                D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+                D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+                D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+                D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+                D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+                D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+                D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+                D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+                D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+                D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+                D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+                D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+                D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+                D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+                D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
             }
             else
             {
-                D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-                D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-                D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-                D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-                D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-                D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-                D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-                D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-                D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-                D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-                D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-                D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-                D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-                D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-                D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-                D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-                D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-                D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-                D.f[DIR_000] = &DD[DIR_000*size_Mat];
-                D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-                D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-                D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-                D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-                D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-                D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-                D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-                D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+                D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+                D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+                D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+                D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+                D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+                D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+                D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+                D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+                D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+                D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+                D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+                D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+                D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+                D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+                D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+                D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+                D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+                D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+                D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+                D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+                D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+                D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
+                D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+                D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+                D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+                D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+                D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
             }
             //////////////////////////////////////////////////////////////////////////
             real drho = rho[k];//0.0f;//
@@ -358,7 +358,7 @@ __global__ void LBInitNonEqPartSP27( unsigned int* neighborX,
 
             //////////////////////////////////////////////////////////////////////////
 
-            // the following code is copy and pasted from VirtualFluidsCore/Visitors/InitDistributionsBlockVisitor.cpp
+            // the following code is copy and pasted from VirtualFluidsVisitors/InitDistributionsBlockVisitor.cpp
             // i.e. Konstantins code
 
             real ax = dvx1dx;
@@ -397,62 +397,62 @@ __global__ void LBInitNonEqPartSP27( unsigned int* neighborX,
             real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
             
             (D.f[DIR_000])[kzero] =   c8o27* (drho-cu_sq);
-            (D.f[DIR_P00   ])[ke   ] =   c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
-            (D.f[DIR_M00   ])[kw   ] =   c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
-            (D.f[DIR_0P0   ])[kn   ] =   c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
-            (D.f[DIR_0M0   ])[ks   ] =   c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
-            (D.f[DIR_00P   ])[kt   ] =   c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
-            (D.f[DIR_00M   ])[kb   ] =   c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
-            (D.f[DIR_PP0  ])[kne  ] =   c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
-            (D.f[DIR_MM0  ])[ksw  ] =   c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
-            (D.f[DIR_PM0  ])[kse  ] =   c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
-            (D.f[DIR_MP0  ])[knw  ] =   c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
-            (D.f[DIR_P0P  ])[kte  ] =   c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
-            (D.f[DIR_M0M  ])[kbw  ] =   c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
-            (D.f[DIR_P0M  ])[kbe  ] =   c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
-            (D.f[DIR_M0P  ])[ktw  ] =   c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
-            (D.f[DIR_0PP  ])[ktn  ] =   c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
-            (D.f[DIR_0MM  ])[kbs  ] =   c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
-            (D.f[DIR_0PM  ])[kbn  ] =   c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
-            (D.f[DIR_0MP  ])[kts  ] =   c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
-            (D.f[DIR_PPP ])[ktne ] =   c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
-            (D.f[DIR_MMM ])[kbsw ] =   c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
-            (D.f[DIR_PPM ])[kbne ] =   c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
-            (D.f[DIR_MMP ])[ktsw ] =   c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
-            (D.f[DIR_PMP ])[ktse ] =   c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
-            (D.f[DIR_MPM ])[kbnw ] =   c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
-            (D.f[DIR_PMM ])[kbse ] =   c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
-            (D.f[DIR_MPP ])[ktnw ] =   c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
+            (D.f[DIR_P00])[ke   ] =   c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
+            (D.f[DIR_M00])[kw   ] =   c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
+            (D.f[DIR_0P0])[kn   ] =   c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
+            (D.f[DIR_0M0])[ks   ] =   c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
+            (D.f[DIR_00P])[kt   ] =   c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
+            (D.f[DIR_00M])[kb   ] =   c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
+            (D.f[DIR_PP0])[kne  ] =   c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
+            (D.f[DIR_MM0])[ksw  ] =   c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
+            (D.f[DIR_PM0])[kse  ] =   c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
+            (D.f[DIR_MP0])[knw  ] =   c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
+            (D.f[DIR_P0P])[kte  ] =   c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
+            (D.f[DIR_M0M])[kbw  ] =   c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
+            (D.f[DIR_P0M])[kbe  ] =   c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
+            (D.f[DIR_M0P])[ktw  ] =   c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
+            (D.f[DIR_0PP])[ktn  ] =   c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
+            (D.f[DIR_0MM])[kbs  ] =   c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
+            (D.f[DIR_0PM])[kbn  ] =   c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
+            (D.f[DIR_0MP])[kts  ] =   c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
+            (D.f[DIR_PPP])[ktne ] =   c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
+            (D.f[DIR_MMM])[kbsw ] =   c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
+            (D.f[DIR_PPM])[kbne ] =   c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
+            (D.f[DIR_MMP])[ktsw ] =   c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
+            (D.f[DIR_PMP])[ktse ] =   c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
+            (D.f[DIR_MPM])[kbnw ] =   c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
+            (D.f[DIR_PMM])[kbse ] =   c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
+            (D.f[DIR_MPP])[ktnw ] =   c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
 
             //////////////////////////////////////////////////////////////////////////
 
             (D.f[DIR_000])[kzero] += f_ZERO;
-            (D.f[DIR_P00   ])[ke   ] += f_E   ;
-            (D.f[DIR_M00   ])[kw   ] += f_E   ;
-            (D.f[DIR_0P0   ])[kn   ] += f_N   ;
-            (D.f[DIR_0M0   ])[ks   ] += f_N   ;
-            (D.f[DIR_00P   ])[kt   ] += f_T   ;
-            (D.f[DIR_00M   ])[kb   ] += f_T   ;
-            (D.f[DIR_PP0  ])[kne  ] += f_NE  ;
-            (D.f[DIR_MM0  ])[ksw  ] += f_NE  ;
-            (D.f[DIR_PM0  ])[kse  ] += f_SE  ;
-            (D.f[DIR_MP0  ])[knw  ] += f_SE  ;
-            (D.f[DIR_P0P  ])[kte  ] += f_TE  ;
-            (D.f[DIR_M0M  ])[kbw  ] += f_TE  ;
-            (D.f[DIR_P0M  ])[kbe  ] += f_BE  ;
-            (D.f[DIR_M0P  ])[ktw  ] += f_BE  ;
-            (D.f[DIR_0PP  ])[ktn  ] += f_TN  ;
-            (D.f[DIR_0MM  ])[kbs  ] += f_TN  ;
-            (D.f[DIR_0PM  ])[kbn  ] += f_BN  ;
-            (D.f[DIR_0MP  ])[kts  ] += f_BN  ;
-            (D.f[DIR_PPP ])[ktne ] += f_TNE ;
-            (D.f[DIR_MMM ])[kbsw ] += f_TNE ;
-            (D.f[DIR_PPM ])[kbne ] += f_TSW ;
-            (D.f[DIR_MMP ])[ktsw ] += f_TSW ;
-            (D.f[DIR_PMP ])[ktse ] += f_TSE ;
-            (D.f[DIR_MPM ])[kbnw ] += f_TSE ;
-            (D.f[DIR_PMM ])[kbse ] += f_TNW ;
-            (D.f[DIR_MPP ])[ktnw ] += f_TNW ;
+            (D.f[DIR_P00])[ke   ] += f_E   ;
+            (D.f[DIR_M00])[kw   ] += f_E   ;
+            (D.f[DIR_0P0])[kn   ] += f_N   ;
+            (D.f[DIR_0M0])[ks   ] += f_N   ;
+            (D.f[DIR_00P])[kt   ] += f_T   ;
+            (D.f[DIR_00M])[kb   ] += f_T   ;
+            (D.f[DIR_PP0])[kne  ] += f_NE  ;
+            (D.f[DIR_MM0])[ksw  ] += f_NE  ;
+            (D.f[DIR_PM0])[kse  ] += f_SE  ;
+            (D.f[DIR_MP0])[knw  ] += f_SE  ;
+            (D.f[DIR_P0P])[kte  ] += f_TE  ;
+            (D.f[DIR_M0M])[kbw  ] += f_TE  ;
+            (D.f[DIR_P0M])[kbe  ] += f_BE  ;
+            (D.f[DIR_M0P])[ktw  ] += f_BE  ;
+            (D.f[DIR_0PP])[ktn  ] += f_TN  ;
+            (D.f[DIR_0MM])[kbs  ] += f_TN  ;
+            (D.f[DIR_0PM])[kbn  ] += f_BN  ;
+            (D.f[DIR_0MP])[kts  ] += f_BN  ;
+            (D.f[DIR_PPP])[ktne ] += f_TNE ;
+            (D.f[DIR_MMM])[kbsw ] += f_TNE ;
+            (D.f[DIR_PPM])[kbne ] += f_TSW ;
+            (D.f[DIR_MMP])[ktsw ] += f_TSW ;
+            (D.f[DIR_PMP])[ktse ] += f_TSE ;
+            (D.f[DIR_MPM])[kbnw ] += f_TSE ;
+            (D.f[DIR_PMM])[kbse ] += f_TNW ;
+            (D.f[DIR_MPP])[ktnw ] += f_TNW ;
 
             //////////////////////////////////////////////////////////////////////////
         }
@@ -460,7 +460,7 @@ __global__ void LBInitNonEqPartSP27( unsigned int* neighborX,
 	    {
 		    //////////////////////////////////////////////////////////////////////////
 		    Distributions27 D;
-		    D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		    D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
 		    //////////////////////////////////////////////////////////////////////////
 		    (D.f[DIR_000])[k] = c96o1;
 		    //////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/InitAdvectionDiffusion27.cu b/src/gpu/VirtualFluids_GPU/GPU/InitAdvectionDiffusion27.cu
index c091aa8b9a29017ddc0f6ea6584e805d7afc4859..a4172403158adbd712e255676baa2616081e83b4 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/InitAdvectionDiffusion27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/InitAdvectionDiffusion27.cu
@@ -33,9 +33,9 @@
 /* Device code */
 #include "LBM/LB.h"
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 __global__ void InitAD27(
@@ -47,7 +47,7 @@ __global__ void InitAD27(
 	real* velocityX,
 	real* velocityY,
 	real* velocityZ,
-	uint size_Mat,
+	unsigned long long numberOfLBnodes,
 	real* distributionsAD,
 	bool isEvenTimestep)
 {
@@ -68,7 +68,7 @@ __global__ void InitAD27(
 
 	//////////////////////////////////////////////////////////////////////////
 	// run for all indices in size_Mat and fluid nodes
-	if ((k < size_Mat) && (typeOfGridNode[k] == GEO_FLUID))
+	if ((k < numberOfLBnodes) && (typeOfGridNode[k] == GEO_FLUID))
 	{
 		//////////////////////////////////////////////////////////////////////////
 		//! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
@@ -77,63 +77,63 @@ __global__ void InitAD27(
 		Distributions27 distAD;
 		if (isEvenTimestep)
 		{
-			distAD.f[DIR_P00   ] = &distributionsAD[DIR_P00   *size_Mat];
-			distAD.f[DIR_M00   ] = &distributionsAD[DIR_M00   *size_Mat];
-			distAD.f[DIR_0P0   ] = &distributionsAD[DIR_0P0   *size_Mat];
-			distAD.f[DIR_0M0   ] = &distributionsAD[DIR_0M0   *size_Mat];
-			distAD.f[DIR_00P   ] = &distributionsAD[DIR_00P   *size_Mat];
-			distAD.f[DIR_00M   ] = &distributionsAD[DIR_00M   *size_Mat];
-			distAD.f[DIR_PP0  ] = &distributionsAD[DIR_PP0  *size_Mat];
-			distAD.f[DIR_MM0  ] = &distributionsAD[DIR_MM0  *size_Mat];
-			distAD.f[DIR_PM0  ] = &distributionsAD[DIR_PM0  *size_Mat];
-			distAD.f[DIR_MP0  ] = &distributionsAD[DIR_MP0  *size_Mat];
-			distAD.f[DIR_P0P  ] = &distributionsAD[DIR_P0P  *size_Mat];
-			distAD.f[DIR_M0M  ] = &distributionsAD[DIR_M0M  *size_Mat];
-			distAD.f[DIR_P0M  ] = &distributionsAD[DIR_P0M  *size_Mat];
-			distAD.f[DIR_M0P  ] = &distributionsAD[DIR_M0P  *size_Mat];
-			distAD.f[DIR_0PP  ] = &distributionsAD[DIR_0PP  *size_Mat];
-			distAD.f[DIR_0MM  ] = &distributionsAD[DIR_0MM  *size_Mat];
-			distAD.f[DIR_0PM  ] = &distributionsAD[DIR_0PM  *size_Mat];
-			distAD.f[DIR_0MP  ] = &distributionsAD[DIR_0MP  *size_Mat];
-			distAD.f[DIR_000] = &distributionsAD[DIR_000*size_Mat];
-			distAD.f[DIR_PPP ] = &distributionsAD[DIR_PPP *size_Mat];
-			distAD.f[DIR_MMP ] = &distributionsAD[DIR_MMP *size_Mat];
-			distAD.f[DIR_PMP ] = &distributionsAD[DIR_PMP *size_Mat];
-			distAD.f[DIR_MPP ] = &distributionsAD[DIR_MPP *size_Mat];
-			distAD.f[DIR_PPM ] = &distributionsAD[DIR_PPM *size_Mat];
-			distAD.f[DIR_MMM ] = &distributionsAD[DIR_MMM *size_Mat];
-			distAD.f[DIR_PMM ] = &distributionsAD[DIR_PMM *size_Mat];
-			distAD.f[DIR_MPM ] = &distributionsAD[DIR_MPM *size_Mat];
+			distAD.f[DIR_P00] = &distributionsAD[DIR_P00 * numberOfLBnodes];
+			distAD.f[DIR_M00] = &distributionsAD[DIR_M00 * numberOfLBnodes];
+			distAD.f[DIR_0P0] = &distributionsAD[DIR_0P0 * numberOfLBnodes];
+			distAD.f[DIR_0M0] = &distributionsAD[DIR_0M0 * numberOfLBnodes];
+			distAD.f[DIR_00P] = &distributionsAD[DIR_00P * numberOfLBnodes];
+			distAD.f[DIR_00M] = &distributionsAD[DIR_00M * numberOfLBnodes];
+			distAD.f[DIR_PP0] = &distributionsAD[DIR_PP0 * numberOfLBnodes];
+			distAD.f[DIR_MM0] = &distributionsAD[DIR_MM0 * numberOfLBnodes];
+			distAD.f[DIR_PM0] = &distributionsAD[DIR_PM0 * numberOfLBnodes];
+			distAD.f[DIR_MP0] = &distributionsAD[DIR_MP0 * numberOfLBnodes];
+			distAD.f[DIR_P0P] = &distributionsAD[DIR_P0P * numberOfLBnodes];
+			distAD.f[DIR_M0M] = &distributionsAD[DIR_M0M * numberOfLBnodes];
+			distAD.f[DIR_P0M] = &distributionsAD[DIR_P0M * numberOfLBnodes];
+			distAD.f[DIR_M0P] = &distributionsAD[DIR_M0P * numberOfLBnodes];
+			distAD.f[DIR_0PP] = &distributionsAD[DIR_0PP * numberOfLBnodes];
+			distAD.f[DIR_0MM] = &distributionsAD[DIR_0MM * numberOfLBnodes];
+			distAD.f[DIR_0PM] = &distributionsAD[DIR_0PM * numberOfLBnodes];
+			distAD.f[DIR_0MP] = &distributionsAD[DIR_0MP * numberOfLBnodes];
+			distAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes];
+			distAD.f[DIR_PPP] = &distributionsAD[DIR_PPP * numberOfLBnodes];
+			distAD.f[DIR_MMP] = &distributionsAD[DIR_MMP * numberOfLBnodes];
+			distAD.f[DIR_PMP] = &distributionsAD[DIR_PMP * numberOfLBnodes];
+			distAD.f[DIR_MPP] = &distributionsAD[DIR_MPP * numberOfLBnodes];
+			distAD.f[DIR_PPM] = &distributionsAD[DIR_PPM * numberOfLBnodes];
+			distAD.f[DIR_MMM] = &distributionsAD[DIR_MMM * numberOfLBnodes];
+			distAD.f[DIR_PMM] = &distributionsAD[DIR_PMM * numberOfLBnodes];
+			distAD.f[DIR_MPM] = &distributionsAD[DIR_MPM * numberOfLBnodes];
 		}
 		else
 		{
-			distAD.f[DIR_M00   ] = &distributionsAD[DIR_P00   *size_Mat];
-			distAD.f[DIR_P00   ] = &distributionsAD[DIR_M00   *size_Mat];
-			distAD.f[DIR_0M0   ] = &distributionsAD[DIR_0P0   *size_Mat];
-			distAD.f[DIR_0P0   ] = &distributionsAD[DIR_0M0   *size_Mat];
-			distAD.f[DIR_00M   ] = &distributionsAD[DIR_00P   *size_Mat];
-			distAD.f[DIR_00P   ] = &distributionsAD[DIR_00M   *size_Mat];
-			distAD.f[DIR_MM0  ] = &distributionsAD[DIR_PP0  *size_Mat];
-			distAD.f[DIR_PP0  ] = &distributionsAD[DIR_MM0  *size_Mat];
-			distAD.f[DIR_MP0  ] = &distributionsAD[DIR_PM0  *size_Mat];
-			distAD.f[DIR_PM0  ] = &distributionsAD[DIR_MP0  *size_Mat];
-			distAD.f[DIR_M0M  ] = &distributionsAD[DIR_P0P  *size_Mat];
-			distAD.f[DIR_P0P  ] = &distributionsAD[DIR_M0M  *size_Mat];
-			distAD.f[DIR_M0P  ] = &distributionsAD[DIR_P0M  *size_Mat];
-			distAD.f[DIR_P0M  ] = &distributionsAD[DIR_M0P  *size_Mat];
-			distAD.f[DIR_0MM  ] = &distributionsAD[DIR_0PP  *size_Mat];
-			distAD.f[DIR_0PP  ] = &distributionsAD[DIR_0MM  *size_Mat];
-			distAD.f[DIR_0MP  ] = &distributionsAD[DIR_0PM  *size_Mat];
-			distAD.f[DIR_0PM  ] = &distributionsAD[DIR_0MP  *size_Mat];
-			distAD.f[DIR_000] = &distributionsAD[DIR_000*size_Mat];
-			distAD.f[DIR_MMM ] = &distributionsAD[DIR_PPP *size_Mat];
-			distAD.f[DIR_PPM ] = &distributionsAD[DIR_MMP *size_Mat];
-			distAD.f[DIR_MPM ] = &distributionsAD[DIR_PMP *size_Mat];
-			distAD.f[DIR_PMM ] = &distributionsAD[DIR_MPP *size_Mat];
-			distAD.f[DIR_MMP ] = &distributionsAD[DIR_PPM *size_Mat];
-			distAD.f[DIR_PPP ] = &distributionsAD[DIR_MMM *size_Mat];
-			distAD.f[DIR_MPP ] = &distributionsAD[DIR_PMM *size_Mat];
-			distAD.f[DIR_PMP ] = &distributionsAD[DIR_MPM *size_Mat];
+			distAD.f[DIR_M00] = &distributionsAD[DIR_P00 * numberOfLBnodes];
+			distAD.f[DIR_P00] = &distributionsAD[DIR_M00 * numberOfLBnodes];
+			distAD.f[DIR_0M0] = &distributionsAD[DIR_0P0 * numberOfLBnodes];
+			distAD.f[DIR_0P0] = &distributionsAD[DIR_0M0 * numberOfLBnodes];
+			distAD.f[DIR_00M] = &distributionsAD[DIR_00P * numberOfLBnodes];
+			distAD.f[DIR_00P] = &distributionsAD[DIR_00M * numberOfLBnodes];
+			distAD.f[DIR_MM0] = &distributionsAD[DIR_PP0 * numberOfLBnodes];
+			distAD.f[DIR_PP0] = &distributionsAD[DIR_MM0 * numberOfLBnodes];
+			distAD.f[DIR_MP0] = &distributionsAD[DIR_PM0 * numberOfLBnodes];
+			distAD.f[DIR_PM0] = &distributionsAD[DIR_MP0 * numberOfLBnodes];
+			distAD.f[DIR_M0M] = &distributionsAD[DIR_P0P * numberOfLBnodes];
+			distAD.f[DIR_P0P] = &distributionsAD[DIR_M0M * numberOfLBnodes];
+			distAD.f[DIR_M0P] = &distributionsAD[DIR_P0M * numberOfLBnodes];
+			distAD.f[DIR_P0M] = &distributionsAD[DIR_M0P * numberOfLBnodes];
+			distAD.f[DIR_0MM] = &distributionsAD[DIR_0PP * numberOfLBnodes];
+			distAD.f[DIR_0PP] = &distributionsAD[DIR_0MM * numberOfLBnodes];
+			distAD.f[DIR_0MP] = &distributionsAD[DIR_0PM * numberOfLBnodes];
+			distAD.f[DIR_0PM] = &distributionsAD[DIR_0MP * numberOfLBnodes];
+			distAD.f[DIR_000] = &distributionsAD[DIR_000 * numberOfLBnodes];
+			distAD.f[DIR_MMM] = &distributionsAD[DIR_PPP * numberOfLBnodes];
+			distAD.f[DIR_PPM] = &distributionsAD[DIR_MMP * numberOfLBnodes];
+			distAD.f[DIR_MPM] = &distributionsAD[DIR_PMP * numberOfLBnodes];
+			distAD.f[DIR_PMM] = &distributionsAD[DIR_MPP * numberOfLBnodes];
+			distAD.f[DIR_MMP] = &distributionsAD[DIR_PPM * numberOfLBnodes];
+			distAD.f[DIR_PPP] = &distributionsAD[DIR_MMM * numberOfLBnodes];
+			distAD.f[DIR_MPP] = &distributionsAD[DIR_PMM * numberOfLBnodes];
+			distAD.f[DIR_PMP] = &distributionsAD[DIR_MPM * numberOfLBnodes];
 		}
 		//////////////////////////////////////////////////////////////////////////
 		//! - Set local velocities and concetration
@@ -178,32 +178,32 @@ __global__ void InitAD27(
 		real cu_sq = c3o2*(vx1*vx1 + vx2*vx2 + vx3*vx3);
 
 		(distAD.f[DIR_000])[kzero] = c8o27  * conc * (c1o1 - cu_sq);
-		(distAD.f[DIR_P00   ])[ke   ] = c2o27  * conc * (c1o1 + c3o1 * ( vx1            ) + c9o2 * ( vx1            ) * ( vx1            ) - cu_sq);
-		(distAD.f[DIR_M00   ])[kw   ] = c2o27  * conc * (c1o1 + c3o1 * (-vx1            ) + c9o2 * (-vx1            ) * (-vx1            ) - cu_sq);
-		(distAD.f[DIR_0P0   ])[kn   ] = c2o27  * conc * (c1o1 + c3o1 * (       vx2      ) + c9o2 * (       vx2      ) * (       vx2      ) - cu_sq);
-		(distAD.f[DIR_0M0   ])[ks   ] = c2o27  * conc * (c1o1 + c3o1 * (     - vx2      ) + c9o2 * (     - vx2      ) * (     - vx2      ) - cu_sq);
-		(distAD.f[DIR_00P   ])[kt   ] = c2o27  * conc * (c1o1 + c3o1 * (             vx3) + c9o2 * (             vx3) * (             vx3) - cu_sq);
-		(distAD.f[DIR_00M   ])[kb   ] = c2o27  * conc * (c1o1 + c3o1 * (           - vx3) + c9o2 * (           - vx3) * (           - vx3) - cu_sq);
-		(distAD.f[DIR_PP0  ])[kne  ] = c1o54  * conc * (c1o1 + c3o1 * ( vx1 + vx2      ) + c9o2 * ( vx1 + vx2      ) * ( vx1 + vx2      ) - cu_sq);
-		(distAD.f[DIR_MM0  ])[ksw  ] = c1o54  * conc * (c1o1 + c3o1 * (-vx1 - vx2      ) + c9o2 * (-vx1 - vx2      ) * (-vx1 - vx2      ) - cu_sq);
-		(distAD.f[DIR_PM0  ])[kse  ] = c1o54  * conc * (c1o1 + c3o1 * ( vx1 - vx2      ) + c9o2 * ( vx1 - vx2      ) * ( vx1 - vx2      ) - cu_sq);
-		(distAD.f[DIR_MP0  ])[knw  ] = c1o54  * conc * (c1o1 + c3o1 * (-vx1 + vx2      ) + c9o2 * (-vx1 + vx2      ) * (-vx1 + vx2      ) - cu_sq);
-		(distAD.f[DIR_P0P  ])[kte  ] = c1o54  * conc * (c1o1 + c3o1 * ( vx1       + vx3) + c9o2 * ( vx1       + vx3) * ( vx1       + vx3) - cu_sq);
-		(distAD.f[DIR_M0M  ])[kbw  ] = c1o54  * conc * (c1o1 + c3o1 * (-vx1       - vx3) + c9o2 * (-vx1       - vx3) * (-vx1       - vx3) - cu_sq);
-		(distAD.f[DIR_P0M  ])[kbe  ] = c1o54  * conc * (c1o1 + c3o1 * ( vx1       - vx3) + c9o2 * ( vx1       - vx3) * ( vx1       - vx3) - cu_sq);
-		(distAD.f[DIR_M0P  ])[ktw  ] = c1o54  * conc * (c1o1 + c3o1 * (-vx1       + vx3) + c9o2 * (-vx1       + vx3) * (-vx1       + vx3) - cu_sq);
-		(distAD.f[DIR_0PP  ])[ktn  ] = c1o54  * conc * (c1o1 + c3o1 * (       vx2 + vx3) + c9o2 * (       vx2 + vx3) * (       vx2 + vx3) - cu_sq);
-		(distAD.f[DIR_0MM  ])[kbs  ] = c1o54  * conc * (c1o1 + c3o1 * (     - vx2 - vx3) + c9o2 * (     - vx2 - vx3) * (     - vx2 - vx3) - cu_sq);
-		(distAD.f[DIR_0PM  ])[kbn  ] = c1o54  * conc * (c1o1 + c3o1 * (       vx2 - vx3) + c9o2 * (       vx2 - vx3) * (       vx2 - vx3) - cu_sq);
-		(distAD.f[DIR_0MP  ])[kts  ] = c1o54  * conc * (c1o1 + c3o1 * (     - vx2 + vx3) + c9o2 * (     - vx2 + vx3) * (     - vx2 + vx3) - cu_sq);
-		(distAD.f[DIR_PPP ])[ktne ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 + vx2 + vx3) + c9o2 * ( vx1 + vx2 + vx3) * ( vx1 + vx2 + vx3) - cu_sq);
-		(distAD.f[DIR_MMM ])[kbsw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 - vx2 - vx3) + c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq);
-		(distAD.f[DIR_PPM ])[kbne ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 + vx2 - vx3) + c9o2 * ( vx1 + vx2 - vx3) * ( vx1 + vx2 - vx3) - cu_sq);
-		(distAD.f[DIR_MMP ])[ktsw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 - vx2 + vx3) + c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq);
-		(distAD.f[DIR_PMP ])[ktse ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 - vx2 + vx3) + c9o2 * ( vx1 - vx2 + vx3) * ( vx1 - vx2 + vx3) - cu_sq);
-		(distAD.f[DIR_MPM ])[kbnw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 + vx2 - vx3) + c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq);
-		(distAD.f[DIR_PMM ])[kbse ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 - vx2 - vx3) + c9o2 * ( vx1 - vx2 - vx3) * ( vx1 - vx2 - vx3) - cu_sq);
-		(distAD.f[DIR_MPP ])[ktnw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 + vx2 + vx3) + c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq);
+		(distAD.f[DIR_P00])[ke   ] = c2o27  * conc * (c1o1 + c3o1 * ( vx1            ) + c9o2 * ( vx1            ) * ( vx1            ) - cu_sq);
+		(distAD.f[DIR_M00])[kw   ] = c2o27  * conc * (c1o1 + c3o1 * (-vx1            ) + c9o2 * (-vx1            ) * (-vx1            ) - cu_sq);
+		(distAD.f[DIR_0P0])[kn   ] = c2o27  * conc * (c1o1 + c3o1 * (       vx2      ) + c9o2 * (       vx2      ) * (       vx2      ) - cu_sq);
+		(distAD.f[DIR_0M0])[ks   ] = c2o27  * conc * (c1o1 + c3o1 * (     - vx2      ) + c9o2 * (     - vx2      ) * (     - vx2      ) - cu_sq);
+		(distAD.f[DIR_00P])[kt   ] = c2o27  * conc * (c1o1 + c3o1 * (             vx3) + c9o2 * (             vx3) * (             vx3) - cu_sq);
+		(distAD.f[DIR_00M])[kb   ] = c2o27  * conc * (c1o1 + c3o1 * (           - vx3) + c9o2 * (           - vx3) * (           - vx3) - cu_sq);
+		(distAD.f[DIR_PP0])[kne  ] = c1o54  * conc * (c1o1 + c3o1 * ( vx1 + vx2      ) + c9o2 * ( vx1 + vx2      ) * ( vx1 + vx2      ) - cu_sq);
+		(distAD.f[DIR_MM0])[ksw  ] = c1o54  * conc * (c1o1 + c3o1 * (-vx1 - vx2      ) + c9o2 * (-vx1 - vx2      ) * (-vx1 - vx2      ) - cu_sq);
+		(distAD.f[DIR_PM0])[kse  ] = c1o54  * conc * (c1o1 + c3o1 * ( vx1 - vx2      ) + c9o2 * ( vx1 - vx2      ) * ( vx1 - vx2      ) - cu_sq);
+		(distAD.f[DIR_MP0])[knw  ] = c1o54  * conc * (c1o1 + c3o1 * (-vx1 + vx2      ) + c9o2 * (-vx1 + vx2      ) * (-vx1 + vx2      ) - cu_sq);
+		(distAD.f[DIR_P0P])[kte  ] = c1o54  * conc * (c1o1 + c3o1 * ( vx1       + vx3) + c9o2 * ( vx1       + vx3) * ( vx1       + vx3) - cu_sq);
+		(distAD.f[DIR_M0M])[kbw  ] = c1o54  * conc * (c1o1 + c3o1 * (-vx1       - vx3) + c9o2 * (-vx1       - vx3) * (-vx1       - vx3) - cu_sq);
+		(distAD.f[DIR_P0M])[kbe  ] = c1o54  * conc * (c1o1 + c3o1 * ( vx1       - vx3) + c9o2 * ( vx1       - vx3) * ( vx1       - vx3) - cu_sq);
+		(distAD.f[DIR_M0P])[ktw  ] = c1o54  * conc * (c1o1 + c3o1 * (-vx1       + vx3) + c9o2 * (-vx1       + vx3) * (-vx1       + vx3) - cu_sq);
+		(distAD.f[DIR_0PP])[ktn  ] = c1o54  * conc * (c1o1 + c3o1 * (       vx2 + vx3) + c9o2 * (       vx2 + vx3) * (       vx2 + vx3) - cu_sq);
+		(distAD.f[DIR_0MM])[kbs  ] = c1o54  * conc * (c1o1 + c3o1 * (     - vx2 - vx3) + c9o2 * (     - vx2 - vx3) * (     - vx2 - vx3) - cu_sq);
+		(distAD.f[DIR_0PM])[kbn  ] = c1o54  * conc * (c1o1 + c3o1 * (       vx2 - vx3) + c9o2 * (       vx2 - vx3) * (       vx2 - vx3) - cu_sq);
+		(distAD.f[DIR_0MP])[kts  ] = c1o54  * conc * (c1o1 + c3o1 * (     - vx2 + vx3) + c9o2 * (     - vx2 + vx3) * (     - vx2 + vx3) - cu_sq);
+		(distAD.f[DIR_PPP])[ktne ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 + vx2 + vx3) + c9o2 * ( vx1 + vx2 + vx3) * ( vx1 + vx2 + vx3) - cu_sq);
+		(distAD.f[DIR_MMM])[kbsw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 - vx2 - vx3) + c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq);
+		(distAD.f[DIR_PPM])[kbne ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 + vx2 - vx3) + c9o2 * ( vx1 + vx2 - vx3) * ( vx1 + vx2 - vx3) - cu_sq);
+		(distAD.f[DIR_MMP])[ktsw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 - vx2 + vx3) + c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq);
+		(distAD.f[DIR_PMP])[ktse ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 - vx2 + vx3) + c9o2 * ( vx1 - vx2 + vx3) * ( vx1 - vx2 + vx3) - cu_sq);
+		(distAD.f[DIR_MPM])[kbnw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 + vx2 - vx3) + c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq);
+		(distAD.f[DIR_PMM])[kbse ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 - vx2 - vx3) + c9o2 * ( vx1 - vx2 - vx3) * ( vx1 - vx2 - vx3) - cu_sq);
+		(distAD.f[DIR_MPP])[ktnw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 + vx2 + vx3) + c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq);
 	}
 }
 
@@ -263,63 +263,63 @@ __global__ void InitAD27(
 //          Distributions27 D27;
 //          if (EvenOrOdd==true)
 //          {
-//             D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
-//             D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
-//             D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
-//             D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
-//             D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
-//             D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
-//             D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
-//             D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
-//             D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
-//             D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
-//             D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
-//             D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
-//             D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
-//             D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
-//             D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
-//             D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
-//             D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
-//             D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
-//             D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-//             D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
-//             D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
-//             D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
-//             D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
-//             D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
-//             D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
-//             D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
-//             D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
+//             D27.f[DIR_P00] = &DD27[DIR_P00 * size_Mat];
+//             D27.f[DIR_M00] = &DD27[DIR_M00 * size_Mat];
+//             D27.f[DIR_0P0] = &DD27[DIR_0P0 * size_Mat];
+//             D27.f[DIR_0M0] = &DD27[DIR_0M0 * size_Mat];
+//             D27.f[DIR_00P] = &DD27[DIR_00P * size_Mat];
+//             D27.f[DIR_00M] = &DD27[DIR_00M * size_Mat];
+//             D27.f[DIR_PP0] = &DD27[DIR_PP0 * size_Mat];
+//             D27.f[DIR_MM0] = &DD27[DIR_MM0 * size_Mat];
+//             D27.f[DIR_PM0] = &DD27[DIR_PM0 * size_Mat];
+//             D27.f[DIR_MP0] = &DD27[DIR_MP0 * size_Mat];
+//             D27.f[DIR_P0P] = &DD27[DIR_P0P * size_Mat];
+//             D27.f[DIR_M0M] = &DD27[DIR_M0M * size_Mat];
+//             D27.f[DIR_P0M] = &DD27[DIR_P0M * size_Mat];
+//             D27.f[DIR_M0P] = &DD27[DIR_M0P * size_Mat];
+//             D27.f[DIR_0PP] = &DD27[DIR_0PP * size_Mat];
+//             D27.f[DIR_0MM] = &DD27[DIR_0MM * size_Mat];
+//             D27.f[DIR_0PM] = &DD27[DIR_0PM * size_Mat];
+//             D27.f[DIR_0MP] = &DD27[DIR_0MP * size_Mat];
+//             D27.f[DIR_000] = &DD27[DIR_000 * size_Mat];
+//             D27.f[DIR_PPP] = &DD27[DIR_PPP * size_Mat];
+//             D27.f[DIR_MMP] = &DD27[DIR_MMP * size_Mat];
+//             D27.f[DIR_PMP] = &DD27[DIR_PMP * size_Mat];
+//             D27.f[DIR_MPP] = &DD27[DIR_MPP * size_Mat];
+//             D27.f[DIR_PPM] = &DD27[DIR_PPM * size_Mat];
+//             D27.f[DIR_MMM] = &DD27[DIR_MMM * size_Mat];
+//             D27.f[DIR_PMM] = &DD27[DIR_PMM * size_Mat];
+//             D27.f[DIR_MPM] = &DD27[DIR_MPM * size_Mat];
 //          }
 //          else
 //          {
-//             D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
-//             D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
-//             D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
-//             D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
-//             D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
-//             D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
-//             D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
-//             D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
-//             D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
-//             D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
-//             D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
-//             D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
-//             D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
-//             D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
-//             D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
-//             D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
-//             D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
-//             D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
-//             D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-//             D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
-//             D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
-//             D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
-//             D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
-//             D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
-//             D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
-//             D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
-//             D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
+//             D27.f[DIR_M00] = &DD27[DIR_P00 * size_Mat];
+//             D27.f[DIR_P00] = &DD27[DIR_M00 * size_Mat];
+//             D27.f[DIR_0M0] = &DD27[DIR_0P0 * size_Mat];
+//             D27.f[DIR_0P0] = &DD27[DIR_0M0 * size_Mat];
+//             D27.f[DIR_00M] = &DD27[DIR_00P * size_Mat];
+//             D27.f[DIR_00P] = &DD27[DIR_00M * size_Mat];
+//             D27.f[DIR_MM0] = &DD27[DIR_PP0 * size_Mat];
+//             D27.f[DIR_PP0] = &DD27[DIR_MM0 * size_Mat];
+//             D27.f[DIR_MP0] = &DD27[DIR_PM0 * size_Mat];
+//             D27.f[DIR_PM0] = &DD27[DIR_MP0 * size_Mat];
+//             D27.f[DIR_M0M] = &DD27[DIR_P0P * size_Mat];
+//             D27.f[DIR_P0P] = &DD27[DIR_M0M * size_Mat];
+//             D27.f[DIR_M0P] = &DD27[DIR_P0M * size_Mat];
+//             D27.f[DIR_P0M] = &DD27[DIR_M0P * size_Mat];
+//             D27.f[DIR_0MM] = &DD27[DIR_0PP * size_Mat];
+//             D27.f[DIR_0PP] = &DD27[DIR_0MM * size_Mat];
+//             D27.f[DIR_0MP] = &DD27[DIR_0PM * size_Mat];
+//             D27.f[DIR_0PM] = &DD27[DIR_0MP * size_Mat];
+//             D27.f[DIR_000] = &DD27[DIR_000 * size_Mat];
+//             D27.f[DIR_MMM] = &DD27[DIR_PPP * size_Mat];
+//             D27.f[DIR_PPM] = &DD27[DIR_MMP * size_Mat];
+//             D27.f[DIR_MPM] = &DD27[DIR_PMP * size_Mat];
+//             D27.f[DIR_PMM] = &DD27[DIR_MPP * size_Mat];
+//             D27.f[DIR_MMP] = &DD27[DIR_PPM * size_Mat];
+//             D27.f[DIR_PPP] = &DD27[DIR_MMM * size_Mat];
+//             D27.f[DIR_MPP] = &DD27[DIR_PMM * size_Mat];
+//             D27.f[DIR_PMP] = &DD27[DIR_MPM * size_Mat];
 //          }
 //          //////////////////////////////////////////////////////////////////////////
 //          real ConcD = Conc[k];
@@ -391,32 +391,32 @@ __global__ void InitAD27(
 //          real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
 //          (D27.f[DIR_000])[kzero] =   c8o27* ConcD*(c1o1-cu_sq);
-//          (D27.f[DIR_P00   ])[ke   ] =   c2o27* ConcD*(c1o1+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
-//          (D27.f[DIR_M00   ])[kw   ] =   c2o27* ConcD*(c1o1+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
-//          (D27.f[DIR_0P0   ])[kn   ] =   c2o27* ConcD*(c1o1+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
-//          (D27.f[DIR_0M0   ])[ks   ] =   c2o27* ConcD*(c1o1+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
-//          (D27.f[DIR_00P   ])[kt   ] =   c2o27* ConcD*(c1o1+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
-//          (D27.f[DIR_00M   ])[kb   ] =   c2o27* ConcD*(c1o1+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
-//          (D27.f[DIR_PP0  ])[kne  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
-//          (D27.f[DIR_MM0  ])[ksw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
-//          (D27.f[DIR_PM0  ])[kse  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
-//          (D27.f[DIR_MP0  ])[knw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
-//          (D27.f[DIR_P0P  ])[kte  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
-//          (D27.f[DIR_M0M  ])[kbw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
-//          (D27.f[DIR_P0M  ])[kbe  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
-//          (D27.f[DIR_M0P  ])[ktw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
-//          (D27.f[DIR_0PP  ])[ktn  ] =   c1o54* ConcD*(c1o1+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
-//          (D27.f[DIR_0MM  ])[kbs  ] =   c1o54* ConcD*(c1o1+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
-//          (D27.f[DIR_0PM  ])[kbn  ] =   c1o54* ConcD*(c1o1+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
-//          (D27.f[DIR_0MP  ])[kts  ] =   c1o54* ConcD*(c1o1+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
-//          (D27.f[DIR_PPP ])[ktne ] =   c1o216*ConcD*(c1o1+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
-//          (D27.f[DIR_MMM ])[kbsw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
-//          (D27.f[DIR_PPM ])[kbne ] =   c1o216*ConcD*(c1o1+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
-//          (D27.f[DIR_MMP ])[ktsw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
-//          (D27.f[DIR_PMP ])[ktse ] =   c1o216*ConcD*(c1o1+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
-//          (D27.f[DIR_MPM ])[kbnw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
-//          (D27.f[DIR_PMM ])[kbse ] =   c1o216*ConcD*(c1o1+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
-//          (D27.f[DIR_MPP ])[ktnw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
+//          (D27.f[DIR_P00])[ke   ] =   c2o27* ConcD*(c1o1+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
+//          (D27.f[DIR_M00])[kw   ] =   c2o27* ConcD*(c1o1+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
+//          (D27.f[DIR_0P0])[kn   ] =   c2o27* ConcD*(c1o1+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
+//          (D27.f[DIR_0M0])[ks   ] =   c2o27* ConcD*(c1o1+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
+//          (D27.f[DIR_00P])[kt   ] =   c2o27* ConcD*(c1o1+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
+//          (D27.f[DIR_00M])[kb   ] =   c2o27* ConcD*(c1o1+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
+//          (D27.f[DIR_PP0])[kne  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
+//          (D27.f[DIR_MM0])[ksw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
+//          (D27.f[DIR_PM0])[kse  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
+//          (D27.f[DIR_MP0])[knw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
+//          (D27.f[DIR_P0P])[kte  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
+//          (D27.f[DIR_M0M])[kbw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
+//          (D27.f[DIR_P0M])[kbe  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
+//          (D27.f[DIR_M0P])[ktw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
+//          (D27.f[DIR_0PP])[ktn  ] =   c1o54* ConcD*(c1o1+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
+//          (D27.f[DIR_0MM])[kbs  ] =   c1o54* ConcD*(c1o1+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
+//          (D27.f[DIR_0PM])[kbn  ] =   c1o54* ConcD*(c1o1+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
+//          (D27.f[DIR_0MP])[kts  ] =   c1o54* ConcD*(c1o1+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
+//          (D27.f[DIR_PPP])[ktne ] =   c1o216*ConcD*(c1o1+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
+//          (D27.f[DIR_MMM])[kbsw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
+//          (D27.f[DIR_PPM])[kbne ] =   c1o216*ConcD*(c1o1+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
+//          (D27.f[DIR_MMP])[ktsw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
+//          (D27.f[DIR_PMP])[ktse ] =   c1o216*ConcD*(c1o1+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
+//          (D27.f[DIR_MPM])[kbnw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
+//          (D27.f[DIR_PMM])[kbse ] =   c1o216*ConcD*(c1o1+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
+//          (D27.f[DIR_MPP])[ktnw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
 //          ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 //       }
 //    }
@@ -448,7 +448,7 @@ __global__ void InitAD7( unsigned int* neighborX,
                                     real* ux,
                                     real* uy,
                                     real* uz,
-                                    unsigned int size_Mat,
+                                    unsigned long long numberOfLBnodes,
                                     real* DD7,
                                     bool EvenOrOdd)
 {
@@ -463,7 +463,7 @@ __global__ void InitAD7( unsigned int* neighborX,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<size_Mat)
+   if(k<numberOfLBnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       unsigned int BC;
@@ -474,23 +474,23 @@ __global__ void InitAD7( unsigned int* neighborX,
          Distributions7 D7;
          if (EvenOrOdd==true)
          {
-            D7.f[0] = &DD7[0*size_Mat];
-            D7.f[1] = &DD7[1*size_Mat];
-            D7.f[2] = &DD7[2*size_Mat];
-            D7.f[3] = &DD7[3*size_Mat];
-            D7.f[4] = &DD7[4*size_Mat];
-            D7.f[5] = &DD7[5*size_Mat];
-            D7.f[6] = &DD7[6*size_Mat];
+            D7.f[0] = &DD7[0*numberOfLBnodes];
+            D7.f[1] = &DD7[1*numberOfLBnodes];
+            D7.f[2] = &DD7[2*numberOfLBnodes];
+            D7.f[3] = &DD7[3*numberOfLBnodes];
+            D7.f[4] = &DD7[4*numberOfLBnodes];
+            D7.f[5] = &DD7[5*numberOfLBnodes];
+            D7.f[6] = &DD7[6*numberOfLBnodes];
          }
          else
          {
-            D7.f[0] = &DD7[0*size_Mat];
-            D7.f[2] = &DD7[1*size_Mat];
-            D7.f[1] = &DD7[2*size_Mat];
-            D7.f[4] = &DD7[3*size_Mat];
-            D7.f[3] = &DD7[4*size_Mat];
-            D7.f[6] = &DD7[5*size_Mat];
-            D7.f[5] = &DD7[6*size_Mat];
+            D7.f[0] = &DD7[0*numberOfLBnodes];
+            D7.f[2] = &DD7[1*numberOfLBnodes];
+            D7.f[1] = &DD7[2*numberOfLBnodes];
+            D7.f[4] = &DD7[3*numberOfLBnodes];
+            D7.f[3] = &DD7[4*numberOfLBnodes];
+            D7.f[6] = &DD7[5*numberOfLBnodes];
+            D7.f[5] = &DD7[6*numberOfLBnodes];
          }
          //////////////////////////////////////////////////////////////////////////
          real ConcD = Conc[k];
diff --git a/src/gpu/VirtualFluids_GPU/GPU/KernelUtilities.h b/src/gpu/VirtualFluids_GPU/GPU/KernelUtilities.h
deleted file mode 100644
index 2f6a11aa17398b65858508c3f94b241c16551b37..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/GPU/KernelUtilities.h
+++ /dev/null
@@ -1,177 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
-//  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
-//  for more details.
-//  
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file KernelUtilities.h
-//! \ingroup GPU
-//! \author Martin Schoenherr, Anna Wellmann
-//======================================================================================
-#ifndef KERNELUTILS_H
-#define KERNELUTILS_H
-
-#include "LBM/LB.h"
-#include "lbm/constants/D3Q27.h"
-#include "lbm/constants/NumericConstants.h"
-
-using namespace vf::lbm::constant;
-using namespace vf::lbm::dir;
-
-__inline__ __device__ void getPointersToDistributions(Distributions27 &dist, real *distributionArray, const uint numberOfLBnodes, const bool isEvenTimestep)
-{
-    if (isEvenTimestep)
-    {
-        dist.f[DIR_P00   ] = &distributionArray[DIR_P00   *numberOfLBnodes];
-        dist.f[DIR_M00   ] = &distributionArray[DIR_M00   *numberOfLBnodes];
-        dist.f[DIR_0P0   ] = &distributionArray[DIR_0P0   *numberOfLBnodes];
-        dist.f[DIR_0M0   ] = &distributionArray[DIR_0M0   *numberOfLBnodes];
-        dist.f[DIR_00P   ] = &distributionArray[DIR_00P   *numberOfLBnodes];
-        dist.f[DIR_00M   ] = &distributionArray[DIR_00M   *numberOfLBnodes];
-        dist.f[DIR_PP0  ] = &distributionArray[DIR_PP0  *numberOfLBnodes];
-        dist.f[DIR_MM0  ] = &distributionArray[DIR_MM0  *numberOfLBnodes];
-        dist.f[DIR_PM0  ] = &distributionArray[DIR_PM0  *numberOfLBnodes];
-        dist.f[DIR_MP0  ] = &distributionArray[DIR_MP0  *numberOfLBnodes];
-        dist.f[DIR_P0P  ] = &distributionArray[DIR_P0P  *numberOfLBnodes];
-        dist.f[DIR_M0M  ] = &distributionArray[DIR_M0M  *numberOfLBnodes];
-        dist.f[DIR_P0M  ] = &distributionArray[DIR_P0M  *numberOfLBnodes];
-        dist.f[DIR_M0P  ] = &distributionArray[DIR_M0P  *numberOfLBnodes];
-        dist.f[DIR_0PP  ] = &distributionArray[DIR_0PP  *numberOfLBnodes];
-        dist.f[DIR_0MM  ] = &distributionArray[DIR_0MM  *numberOfLBnodes];
-        dist.f[DIR_0PM  ] = &distributionArray[DIR_0PM  *numberOfLBnodes];
-        dist.f[DIR_0MP  ] = &distributionArray[DIR_0MP  *numberOfLBnodes];
-        dist.f[DIR_000] = &distributionArray[DIR_000*numberOfLBnodes];
-        dist.f[DIR_PPP ] = &distributionArray[DIR_PPP *numberOfLBnodes];
-        dist.f[DIR_MMP ] = &distributionArray[DIR_MMP *numberOfLBnodes];
-        dist.f[DIR_PMP ] = &distributionArray[DIR_PMP *numberOfLBnodes];
-        dist.f[DIR_MPP ] = &distributionArray[DIR_MPP *numberOfLBnodes];
-        dist.f[DIR_PPM ] = &distributionArray[DIR_PPM *numberOfLBnodes];
-        dist.f[DIR_MMM ] = &distributionArray[DIR_MMM *numberOfLBnodes];
-        dist.f[DIR_PMM ] = &distributionArray[DIR_PMM *numberOfLBnodes];
-        dist.f[DIR_MPM ] = &distributionArray[DIR_MPM *numberOfLBnodes];
-    }
-    else
-    {
-         dist.f[DIR_M00   ] = &distributionArray[DIR_P00   *numberOfLBnodes];
-         dist.f[DIR_P00   ] = &distributionArray[DIR_M00   *numberOfLBnodes];
-         dist.f[DIR_0M0   ] = &distributionArray[DIR_0P0   *numberOfLBnodes];
-         dist.f[DIR_0P0   ] = &distributionArray[DIR_0M0   *numberOfLBnodes];
-         dist.f[DIR_00M   ] = &distributionArray[DIR_00P   *numberOfLBnodes];
-         dist.f[DIR_00P   ] = &distributionArray[DIR_00M   *numberOfLBnodes];
-         dist.f[DIR_MM0  ] = &distributionArray[DIR_PP0  *numberOfLBnodes];
-         dist.f[DIR_PP0  ] = &distributionArray[DIR_MM0  *numberOfLBnodes];
-         dist.f[DIR_MP0  ] = &distributionArray[DIR_PM0  *numberOfLBnodes];
-         dist.f[DIR_PM0  ] = &distributionArray[DIR_MP0  *numberOfLBnodes];
-         dist.f[DIR_M0M  ] = &distributionArray[DIR_P0P  *numberOfLBnodes];
-         dist.f[DIR_P0P  ] = &distributionArray[DIR_M0M  *numberOfLBnodes];
-         dist.f[DIR_M0P  ] = &distributionArray[DIR_P0M  *numberOfLBnodes];
-         dist.f[DIR_P0M  ] = &distributionArray[DIR_M0P  *numberOfLBnodes];
-         dist.f[DIR_0MM  ] = &distributionArray[DIR_0PP  *numberOfLBnodes];
-         dist.f[DIR_0PP  ] = &distributionArray[DIR_0MM  *numberOfLBnodes];
-         dist.f[DIR_0MP  ] = &distributionArray[DIR_0PM  *numberOfLBnodes];
-         dist.f[DIR_0PM  ] = &distributionArray[DIR_0MP  *numberOfLBnodes];
-         dist.f[DIR_000] = &distributionArray[DIR_000*numberOfLBnodes];
-         dist.f[DIR_PPP ] = &distributionArray[DIR_MMM *numberOfLBnodes];
-         dist.f[DIR_MMP ] = &distributionArray[DIR_PPM *numberOfLBnodes];
-         dist.f[DIR_PMP ] = &distributionArray[DIR_MPM *numberOfLBnodes];
-         dist.f[DIR_MPP ] = &distributionArray[DIR_PMM *numberOfLBnodes];
-         dist.f[DIR_PPM ] = &distributionArray[DIR_MMP *numberOfLBnodes];
-         dist.f[DIR_MMM ] = &distributionArray[DIR_PPP *numberOfLBnodes];
-         dist.f[DIR_PMM ] = &distributionArray[DIR_MPP *numberOfLBnodes];
-         dist.f[DIR_MPM ] = &distributionArray[DIR_PMP *numberOfLBnodes];
-    }
-}
-
-__inline__ __device__ void getPointersToSubgridDistances(SubgridDistances27& subgridD, real* subgridDistances, const unsigned int numberOfSubgridIndices)
-{
-    subgridD.q[DIR_P00   ] = &subgridDistances[DIR_P00    *numberOfSubgridIndices];
-    subgridD.q[DIR_M00   ] = &subgridDistances[DIR_M00    *numberOfSubgridIndices];
-    subgridD.q[DIR_0P0   ] = &subgridDistances[DIR_0P0    *numberOfSubgridIndices];
-    subgridD.q[DIR_0M0   ] = &subgridDistances[DIR_0M0    *numberOfSubgridIndices];
-    subgridD.q[DIR_00P   ] = &subgridDistances[DIR_00P    *numberOfSubgridIndices];
-    subgridD.q[DIR_00M   ] = &subgridDistances[DIR_00M    *numberOfSubgridIndices];
-    subgridD.q[DIR_PP0  ] = &subgridDistances[DIR_PP0   *numberOfSubgridIndices];
-    subgridD.q[DIR_MM0  ] = &subgridDistances[DIR_MM0   *numberOfSubgridIndices];
-    subgridD.q[DIR_PM0  ] = &subgridDistances[DIR_PM0   *numberOfSubgridIndices];
-    subgridD.q[DIR_MP0  ] = &subgridDistances[DIR_MP0   *numberOfSubgridIndices];
-    subgridD.q[DIR_P0P  ] = &subgridDistances[DIR_P0P   *numberOfSubgridIndices];
-    subgridD.q[DIR_M0M  ] = &subgridDistances[DIR_M0M   *numberOfSubgridIndices];
-    subgridD.q[DIR_P0M  ] = &subgridDistances[DIR_P0M   *numberOfSubgridIndices];
-    subgridD.q[DIR_M0P  ] = &subgridDistances[DIR_M0P   *numberOfSubgridIndices];
-    subgridD.q[DIR_0PP  ] = &subgridDistances[DIR_0PP   *numberOfSubgridIndices];
-    subgridD.q[DIR_0MM  ] = &subgridDistances[DIR_0MM   *numberOfSubgridIndices];
-    subgridD.q[DIR_0PM  ] = &subgridDistances[DIR_0PM   *numberOfSubgridIndices];
-    subgridD.q[DIR_0MP  ] = &subgridDistances[DIR_0MP   *numberOfSubgridIndices];
-    subgridD.q[DIR_000] = &subgridDistances[DIR_000 *numberOfSubgridIndices];
-    subgridD.q[DIR_PPP ] = &subgridDistances[DIR_PPP  *numberOfSubgridIndices];
-    subgridD.q[DIR_MMP ] = &subgridDistances[DIR_MMP  *numberOfSubgridIndices];
-    subgridD.q[DIR_PMP ] = &subgridDistances[DIR_PMP  *numberOfSubgridIndices];
-    subgridD.q[DIR_MPP ] = &subgridDistances[DIR_MPP  *numberOfSubgridIndices];
-    subgridD.q[DIR_PPM ] = &subgridDistances[DIR_PPM  *numberOfSubgridIndices];
-    subgridD.q[DIR_MMM ] = &subgridDistances[DIR_MMM  *numberOfSubgridIndices];
-    subgridD.q[DIR_PMM ] = &subgridDistances[DIR_PMM  *numberOfSubgridIndices];
-    subgridD.q[DIR_MPM ] = &subgridDistances[DIR_MPM  *numberOfSubgridIndices];
-}
-
-__inline__ __device__ real getEquilibriumForBC(const real& drho, const real& velocity, const real& cu_sq, const real weight)
-{
-    return weight * (drho + c9o2 * velocity * velocity * (c1o1 + drho) - cu_sq);
-}
-
-__inline__ __device__ real getInterpolatedDistributionForVeloBC(const real& q, const real& f, const real& fInverse, const real& feq, 
-                                                                const real& omega, const real& velocity, const real weight)
-{
-
-    return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2 
-           + (q * (f + fInverse) - c6o1 * weight * velocity) / (c1o1 + q);
-}
-
-__inline__ __device__ real getBounceBackDistributionForVeloBC(  const real& f, 
-                                                                const real& velocity, const real weight)
-{
-
-    return f - (c6o1 * weight * velocity);
-}
-
-__inline__ __device__ real getInterpolatedDistributionForNoSlipBC(const real& q, const real& f, const real& fInverse, const real& feq, 
-                                                                  const real& omega)
-{
-
-    return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2 
-           + (q * (f + fInverse)) / (c1o1 + q);
-}
-
-
-__inline__ __device__ real getInterpolatedDistributionForVeloWithPressureBC(const real& q, const real& f, const real& fInverse, const real& feq, 
-                                                                            const real& omega, const real& drho, const real& velocity, const real weight)
-{
-
-    return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2 
-           + (q * (f + fInverse) - c6o1 * weight * velocity) / (c1o1 + q) - weight * drho;
-}
-
-
-
-#endif
diff --git a/src/gpu/VirtualFluids_GPU/GPU/KineticEnergyAnalyzer.cu b/src/gpu/VirtualFluids_GPU/GPU/KineticEnergyAnalyzer.cu
index 51368bbe09e6fc43a7a1ff6b8b15387417774964..df88632f8fd48b3ae8d50b444a65076ab0a0c12f 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/KineticEnergyAnalyzer.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/KineticEnergyAnalyzer.cu
@@ -14,17 +14,17 @@
 
 #include <iomanip>
 
-//#include "Core/Logger/Logger.h"
+#include "cuda/CudaGrid.h"
 
 #include "Parameter/Parameter.h"
 // includes, kernels
 #include "GPU/GPU_Kernels.cuh"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
-__global__                 void kineticEnergyKernel  (real* vx, real* vy, real* vz, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* kineticEnergy, uint* isFluid, uint size_Mat);
+__global__                 void kineticEnergyKernel  (real* vx, real* vy, real* vz, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* kineticEnergy, uint* isFluid, unsigned long long numberOfLBnodes);
 
 __host__ __device__ inline void kineticEnergyFunction(real* vx, real* vy, real* vz, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* kineticEnergy, uint* isFluid, uint index);
 
@@ -35,56 +35,42 @@ bool KineticEnergyAnalyzer::run(uint iter)
     if( iter % this->analyzeIter != 0 ) return false;
 
 	int lev = 0;
-	int size_Mat = this->para->getParD(lev)->numberOfNodes;
-
-    thrust::device_vector<real> kineticEnergy(size_Mat, c0o1);
-    thrust::device_vector<uint> isFluid      (size_Mat, 0);
-
-	unsigned int numberOfThreads = 128;
-    int Grid = (size_Mat / numberOfThreads)+1;
-    int Grid1, Grid2;
-    if (Grid>512)
-    {
-       Grid1 = 512;
-       Grid2 = (Grid/Grid1)+1;
-    } 
-    else
-    {
-       Grid1 = 1;
-       Grid2 = Grid;
-    }
-    dim3 grid(Grid1, Grid2);
-    dim3 threads(numberOfThreads, 1, 1 );
-
-    LBCalcMacCompSP27<<< grid, threads >>> (para->getParD(lev)->velocityX,
-											para->getParD(lev)->velocityY,
-											para->getParD(lev)->velocityZ,
-											para->getParD(lev)->rho,
-											para->getParD(lev)->pressure,
-											para->getParD(lev)->typeOfGridNode,
-											para->getParD(lev)->neighborX,
-											para->getParD(lev)->neighborY,
-											para->getParD(lev)->neighborZ,
-											para->getParD(lev)->numberOfNodes,
-											para->getParD(lev)->distributions.f[0],
-											para->getParD(lev)->isEvenTimestep); 
-    getLastCudaError("LBCalcMacSP27 execution failed"); 
-
-	kineticEnergyKernel <<< grid, threads >>> ( para->getParD(lev)->velocityX, 
-											    para->getParD(lev)->velocityY, 
-												para->getParD(lev)->velocityZ, 
-												para->getParD(lev)->rho, 
-											    para->getParD(lev)->neighborX,
-											    para->getParD(lev)->neighborY,
-											    para->getParD(lev)->neighborZ,
-											    para->getParD(lev)->neighborInverse,
-											    para->getParD(lev)->typeOfGridNode,
-												kineticEnergy.data().get(), 
-                                                isFluid.data().get(),
-												size_Mat);
-	cudaDeviceSynchronize();
-
-	 getLastCudaError("kineticEnergyKernel execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(lev)->numberofthreads, para->getParD(lev)->numberOfNodes);
+
+    thrust::device_vector<real> kineticEnergy( this->para->getParD(lev)->numberOfNodes, c0o1);
+    thrust::device_vector<uint> isFluid      ( this->para->getParD(lev)->numberOfNodes, 0);
+
+    LBCalcMacCompSP27<<< grid.grid, grid.threads >>>(
+        para->getParD(lev)->velocityX,
+        para->getParD(lev)->velocityY,
+        para->getParD(lev)->velocityZ,
+        para->getParD(lev)->rho,
+        para->getParD(lev)->pressure,
+        para->getParD(lev)->typeOfGridNode,
+        para->getParD(lev)->neighborX,
+        para->getParD(lev)->neighborY,
+        para->getParD(lev)->neighborZ,
+        para->getParD(lev)->numberOfNodes,
+        para->getParD(lev)->distributions.f[0],
+        para->getParD(lev)->isEvenTimestep); 
+    getLastCudaError("LBCalcMacCompSP27 execution failed"); 
+
+    kineticEnergyKernel<<< grid.grid, grid.threads >>>(
+        para->getParD(lev)->velocityX, 
+        para->getParD(lev)->velocityY, 
+        para->getParD(lev)->velocityZ, 
+        para->getParD(lev)->rho, 
+        para->getParD(lev)->neighborX,
+        para->getParD(lev)->neighborY,
+        para->getParD(lev)->neighborZ,
+        para->getParD(lev)->neighborInverse,
+        para->getParD(lev)->typeOfGridNode,
+        kineticEnergy.data().get(), 
+        isFluid.data().get(),
+        para->getParD(lev)->numberOfNodes);
+    cudaDeviceSynchronize();
+
+    getLastCudaError("kineticEnergyKernel execution failed");
 
 	 real EKin               = thrust::reduce(kineticEnergy.begin(), kineticEnergy.end(), c0o1, thrust::plus<real>());
      uint numberOfFluidNodes = thrust::reduce(isFluid.begin(),       isFluid.end(),       0,    thrust::plus<uint>());
@@ -99,7 +85,7 @@ bool KineticEnergyAnalyzer::run(uint iter)
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-__global__ void kineticEnergyKernel(real* vx, real* vy, real* vz, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* kineticEnergy, uint* isFluid, uint size_Mat)
+__global__ void kineticEnergyKernel(real* vx, real* vy, real* vz, real* rho, uint* neighborX, uint* neighborY, uint* neighborZ, uint* neighborWSB, uint* geo, real* kineticEnergy, uint* isFluid, unsigned long long numberOfLBnodes)
 {
     //////////////////////////////////////////////////////////////////////////
     const uint x = threadIdx.x;  // Globaler x-Index 
@@ -115,7 +101,7 @@ __global__ void kineticEnergyKernel(real* vx, real* vy, real* vz, real* rho, uin
 
     //if( index % 34 == 0 || index % 34 == 33 ) return;
 
-    if( index >= size_Mat) return;
+    if( index >= (uint)numberOfLBnodes) return;
 
 	unsigned int BC;
 	BC = geo[index];
@@ -153,7 +139,6 @@ KineticEnergyAnalyzer::KineticEnergyAnalyzer(SPtr<Parameter> para, uint analyzeI
 
 void KineticEnergyAnalyzer::writeToFile(std::string filename)
 {
-    //*logging::out << logging::Logger::INFO_INTERMEDIATE << "KineticEnergyAnalyzer::writeToFile( " << filename << " )" << "\n";
 	std::cout << "KineticEnergyAnalyzer::writeToFile( " << filename << " )" << "\n";
 
     std::ofstream file;
@@ -165,7 +150,6 @@ void KineticEnergyAnalyzer::writeToFile(std::string filename)
 
     file.close();
 
-    //*logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
 	std::cout << "done!\n";
 }
 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/KineticEnergyAnalyzer.h b/src/gpu/VirtualFluids_GPU/GPU/KineticEnergyAnalyzer.h
index ca5a97aaef3432dfef78a1ce8822e85dca115207..11759c24460a1541d9aa66e325e6b04c15c1d488 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/KineticEnergyAnalyzer.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/KineticEnergyAnalyzer.h
@@ -7,7 +7,7 @@
 
 
 #include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
+#include "DataTypes.h"
 #include "VirtualFluids_GPU_export.h"
 
 class Parameter;
diff --git a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
index ebab2b3e8a38bc221017ffb604d6f1c142afe1d9..0a54db35bc4598702f3c3a3b194eb054a9ca478a 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
@@ -18,2989 +18,3049 @@
 
 #include "Parameter/Parameter.h"
 //////////////////////////////////////////////////////////////////////////
-void KernelCas27( unsigned int grid_nx,
-                             unsigned int grid_ny,
-                             unsigned int grid_nz,
-                             real s9,
-                             unsigned int* bcMatD,
-                             unsigned int* neighborX,
-                             unsigned int* neighborY,
-                             unsigned int* neighborZ,
-                             real* DD,
-                             int size_Mat,
-                             bool EvenOrOdd)
+void KernelCas27(
+    unsigned int grid_nx,
+    unsigned int grid_ny,
+    unsigned int grid_nz,
+    real s9,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    bool EvenOrOdd)
 {
-   dim3 threads       ( grid_nx, 1, 1 );
-   dim3 grid          ( grid_ny, grid_nz );   // Gitter fuer Kollision und Propagation
-
-      LB_Kernel_Casc27<<< grid, threads >>>( s9,
-                                             bcMatD,
-                                             neighborX,
-                                             neighborY,
-                                             neighborZ,
-                                             DD,
-                                             size_Mat,
-                                             EvenOrOdd);
-     getLastCudaError("LB_Kernel_Casc27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void KernelCasSP27( unsigned int numberOfThreads,
-                               real s9,
-                               unsigned int* bcMatD,
-                               unsigned int* neighborX,
-                               unsigned int* neighborY,
-                               unsigned int* neighborZ,
-                               real* DD,
-                               int size_Mat,
-                               bool EvenOrOdd)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-      LB_Kernel_Casc_SP_27<<< grid.grid, grid.threads >>>(s9,
-                                                bcMatD,
-                                                neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                DD,
-                                                size_Mat,
-                                                EvenOrOdd);
-      getLastCudaError("LB_Kernel_Casc_SP_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void KernelCasSPMS27( unsigned int numberOfThreads,
-                                 real s9,
-                                 unsigned int* bcMatD,
-                                 unsigned int* neighborX,
-                                 unsigned int* neighborY,
-                                 unsigned int* neighborZ,
-                                 real* DD,
-                                 int size_Mat,
-                                 bool EvenOrOdd)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-      LB_Kernel_Casc_SP_MS_27<<< grid.grid, grid.threads >>>(s9,
-                                                   bcMatD,
-                                                   neighborX,
-                                                   neighborY,
-                                                   neighborZ,
-                                                   DD,
-                                                   size_Mat,
-                                                   EvenOrOdd);
-      getLastCudaError("LB_Kernel_Casc_SP_MS_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void KernelCasSPMSOHM27( unsigned int numberOfThreads,
-                                    real s9,
-                                    unsigned int* bcMatD,
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    real* DD,
-                                    int size_Mat,
-                                    bool EvenOrOdd)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-      LB_Kernel_Casc_SP_MS_OHM_27<<< grid.grid, grid.threads >>>(  s9,
-                                                         bcMatD,
-                                                         neighborX,
-                                                         neighborY,
-                                                         neighborZ,
-                                                         DD,
-                                                         size_Mat,
-                                                         EvenOrOdd);
-      getLastCudaError("LB_Kernel_Casc_SP_MS_OHM_27 execution failed");
+    dim3 threads       ( grid_nx, 1, 1 );
+    dim3 grid          ( grid_ny, grid_nz );   // Gitter fuer Kollision und Propagation
+
+    LB_Kernel_Casc27<<< grid, threads >>>(
+        s9,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_Casc27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void KernelKumCompSRTSP27(
+void KernelCasSP27( 
     unsigned int numberOfThreads,
-    real omega,
+    real s9,
     unsigned int* bcMatD,
     unsigned int* neighborX,
     unsigned int* neighborY,
     unsigned int* neighborZ,
-    real* DDStart,
-    int size_Mat,
-    int level,
-    real* forces,
+    real* DD,
+    unsigned long long numberOfLBnodes,
     bool EvenOrOdd)
 {
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-   LB_Kernel_Kum_New_Comp_SRT_SP_27 <<< grid.grid, grid.threads >>>(
-       omega,
-       bcMatD,
-       neighborX,
-       neighborY,
-       neighborZ,
-       DDStart,
-       size_Mat,
-       level,
-       forces,
-       EvenOrOdd);
-      getLastCudaError("LB_Kernel_Kum_New_Comp_SRT_SP_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void KernelKum1hSP27(    unsigned int numberOfThreads,
-                                    real omega,
-                                    real deltaPhi,
-                                    real angularVelocity,
-                                    unsigned int* bcMatD,
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    real* coordX,
-                                    real* coordY,
-                                    real* coordZ,
-                                    real* DDStart,
-                                    int size_Mat,
-                                    bool EvenOrOdd)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-        LB_Kernel_Kum_1h_SP_27<<< grid.grid, grid.threads >>>(omega,
-                                                    deltaPhi,
-                                                    angularVelocity,
-                                                    bcMatD,
-                                                    neighborX,
-                                                    neighborY,
-                                                    neighborZ,
-                                                    coordX,
-                                                    coordY,
-                                                    coordZ,
-                                                    DDStart,
-                                                    size_Mat,
-                                                    EvenOrOdd);
-        getLastCudaError("LB_Kernel_Kum_New_SP_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void KernelCascadeSP27(  unsigned int numberOfThreads,
-                                    real s9,
-                                    unsigned int* bcMatD,
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    real* DD,
-                                    int size_Mat,
-                                    bool EvenOrOdd)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-        LB_Kernel_Cascade_SP_27<<< grid.grid, grid.threads >>>(s9,
-                                                    bcMatD,
-                                                    neighborX,
-                                                    neighborY,
-                                                    neighborZ,
-                                                    DD,
-                                                    size_Mat,
-                                                    EvenOrOdd);
-        getLastCudaError("LB_Kernel_Cascade_SP_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void KernelKumNewSP27(   unsigned int numberOfThreads,
-                                    real s9,
-                                    unsigned int* bcMatD,
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    real* DD,
-                                    int size_Mat,
-                                    bool EvenOrOdd)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-
-        LB_Kernel_Kum_New_SP_27<<< grid.grid, grid.threads >>>(s9,
-                                                    bcMatD,
-                                                    neighborX,
-                                                    neighborY,
-                                                    neighborZ,
-                                                    DD,
-                                                    size_Mat,
-                                                    EvenOrOdd);
-        getLastCudaError("LB_Kernel_Kum_New_SP_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void KernelKumNewCompSP27(unsigned int numberOfThreads,
-                                    real s9,
-                                    unsigned int* bcMatD,
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    real* DD,
-                                    int size_Mat,
-                                    int size_Array,
-                                    int level,
-                                    real* forces,
-                                    bool EvenOrOdd)
-{
-    //int Grid = size_Array / numberOfThreads;
-    //dim3 grid(Grid, 1, 1);
-    //dim3 threads(numberOfThreads, 1, 1 );
-
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-        //LB_Kernel_Kum_New_Comp_SP_27<<< grid.grid, grid.threads >>>(	s9,
-        //													bcMatD,
-        //													neighborX,
-        //													neighborY,
-        //													neighborZ,
-        //													DD,
-        //													size_Mat,
-        //													level,
-        //													forces,
-        //													EvenOrOdd);
-        //getLastCudaError("LB_Kernel_Kum_New_Comp_SP_27 execution failed");
-}
-
-//////////////////////////////////////////////////////////////////////////
-void CumulantOnePreconditionedErrorDiffusionChimCompSP27(unsigned int numberOfThreads,
-                                                                    real s9,
-                                                                    unsigned int* bcMatD,
-                                                                    unsigned int* neighborX,
-                                                                    unsigned int* neighborY,
-                                                                    unsigned int* neighborZ,
-                                                                    real* DD,
-                                                                    int size_Mat,
-                                                                    int size_Array,
-                                                                    int level,
-                                                                    real* forces,
-                                                                    bool EvenOrOdd)
-{
-    //int Grid = size_Array / numberOfThreads;
-    //dim3 grid(Grid, 1, 1);
-    //dim3 threads(numberOfThreads, 1, 1 );
-
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-
-    Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27 <<< grid.grid, grid.threads >>>(	s9,
-                                                                                        bcMatD,
-                                                                                        neighborX,
-                                                                                        neighborY,
-                                                                                        neighborZ,
-                                                                                        DD,
-                                                                                        size_Mat,
-                                                                                        level,
-                                                                                        forces,
-                                                                                        EvenOrOdd);
-        getLastCudaError("Cumulant_One_preconditioned_chim_Comp_SP_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void CumulantOnePreconditionedChimCompSP27(  unsigned int numberOfThreads,
-                                                        real s9,
-                                                        unsigned int* bcMatD,
-                                                        unsigned int* neighborX,
-                                                        unsigned int* neighborY,
-                                                        unsigned int* neighborZ,
-                                                        real* DD,
-                                                        int size_Mat,
-                                                        int size_Array,
-                                                        int level,
-                                                        real* forces,
-                                                        bool EvenOrOdd)
-{
-    //int Grid = size_Array / numberOfThreads;
-    //dim3 grid(Grid, 1, 1);
-    //dim3 threads(numberOfThreads, 1, 1 );
-
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-
-    Cumulant_One_preconditioned_chim_Comp_SP_27 <<< grid.grid, grid.threads >>>(	s9,
-                                                                        bcMatD,
-                                                                        neighborX,
-                                                                        neighborY,
-                                                                        neighborZ,
-                                                                        DD,
-                                                                        size_Mat,
-                                                                        level,
-                                                                        forces,
-                                                                        EvenOrOdd);
-        getLastCudaError("Cumulant_One_preconditioned_chim_Comp_SP_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void CumulantOneChimCompSP27(unsigned int numberOfThreads,
-                                        real s9,
-                                        unsigned int* bcMatD,
-                                        unsigned int* neighborX,
-                                        unsigned int* neighborY,
-                                        unsigned int* neighborZ,
-                                        real* DD,
-                                        int size_Mat,
-                                        int size_Array,
-                                        int level,
-                                        real* forces,
-                                        bool EvenOrOdd)
-{
-    //int Grid = size_Array / numberOfThreads;
-    //dim3 grid(Grid, 1, 1);
-    //dim3 threads(numberOfThreads, 1, 1 );
-
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-
-    Cumulant_One_chim_Comp_SP_27 <<< grid.grid, grid.threads >>>(	s9,
-                                                        bcMatD,
-                                                        neighborX,
-                                                        neighborY,
-                                                        neighborZ,
-                                                        DD,
-                                                        size_Mat,
-                                                        level,
-                                                        forces,
-                                                        EvenOrOdd);
-        getLastCudaError("Cumulant_One_chim_Comp_SP_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void KernelKumIsoTestSP27(unsigned int numberOfThreads,
-                                     real s9,
-                                     unsigned int* bcMatD,
-                                     unsigned int* neighborX,
-                                     unsigned int* neighborY,
-                                     unsigned int* neighborZ,
-                                     real* DD,
-                                     real* dxxUx,
-                                     real* dyyUy,
-                                     real* dzzUz,
-                                     int size_Mat,
-                                     bool EvenOrOdd)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-
-    LB_Kernel_Kum_IsoTest_SP_27<<< grid.grid, grid.threads >>>(s9,
-                                                    bcMatD,
-                                                    neighborX,
-                                                    neighborY,
-                                                    neighborZ,
-                                                    DD,
-                                                    dxxUx,
-                                                    dyyUy,
-                                                    dzzUz,
-                                                    size_Mat,
-                                                    EvenOrOdd);
-    getLastCudaError("LB_Kernel_Kum_IsoTest_SP_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void KernelKumCompSP27(  unsigned int numberOfThreads,
-                                    real s9,
-                                    unsigned int* bcMatD,
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    real* DD,
-                                    int size_Mat,
-                                    bool EvenOrOdd)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-
-        LB_Kernel_Kum_Comp_SP_27<<< grid.grid, grid.threads >>>(s9,
-                                                    bcMatD,
-                                                    neighborX,
-                                                    neighborY,
-                                                    neighborZ,
-                                                    DD,
-                                                    size_Mat,
-                                                    EvenOrOdd);
-        getLastCudaError("LB_Kernel_Kum_Comp_SP_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void KernelPMCumOneCompSP27(unsigned int numberOfThreads,
-                                       real omega,
-                                       unsigned int* neighborX,
-                                       unsigned int* neighborY,
-                                       unsigned int* neighborZ,
-                                       real* DD,
-                                       int size_Mat,
-                                       int level,
-                                       real* forces,
-                                       real porosity,
-                                       real darcy,
-                                       real forchheimer,
-                                       unsigned int sizeOfPorousMedia,
-                                       unsigned int* nodeIdsPorousMedia,
-                                       bool EvenOrOdd)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-
-    LB_Kernel_PM_Cum_One_Comp_SP_27 <<< grid.grid, grid.threads >>>(omega,
-                                                          neighborX,
-                                                          neighborY,
-                                                          neighborZ,
-                                                          DD,
-                                                          size_Mat,
-                                                          level,
-                                                          forces,
-                                                          porosity,
-                                                          darcy,
-                                                          forchheimer,
-                                                          sizeOfPorousMedia,
-                                                          nodeIdsPorousMedia,
-                                                          EvenOrOdd);
-    getLastCudaError("LB_Kernel_PM_Cum_One_Comp_SP_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LB_Kernel_Casc_SP_27<<< grid.grid, grid.threads >>>(
+        s9,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_Casc_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void KernelWaleBySoniMalavCumAA2016CompSP27(
+void KernelCasSPMS27(
     unsigned int numberOfThreads,
     real s9,
     unsigned int* bcMatD,
     unsigned int* neighborX,
     unsigned int* neighborY,
     unsigned int* neighborZ,
-    unsigned int* neighborWSB,
-    real* veloX,
-    real* veloY,
-    real* veloZ,
     real* DD,
-    real* turbulentViscosity,
-    int size_Mat,
-    int size_Array,
-    int level,
-    real* forces,
+    unsigned long long numberOfLBnodes,
     bool EvenOrOdd)
 {
-    //int Grid = size_Array / numberOfThreads;
-    //dim3 grid(Grid, 1, 1);
-    //dim3 threads(numberOfThreads, 1, 1 );
-
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
 
-
-    LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27 << < grid.grid, grid.threads >> >(
+    LB_Kernel_Casc_SP_MS_27<<< grid.grid, grid.threads >>>(
         s9,
         bcMatD,
         neighborX,
         neighborY,
         neighborZ,
-        neighborWSB,
-        veloX,
-        veloY,
-        veloZ,
         DD,
-        turbulentViscosity,
-        size_Mat,
-        level,
-        forces,
+        numberOfLBnodes,
         EvenOrOdd);
-    getLastCudaError("LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void KernelADincomp7(   unsigned int numberOfThreads,
-                                   real diffusivity,
-                                   unsigned int* bcMatD,
-                                   unsigned int* neighborX,
-                                   unsigned int* neighborY,
-                                   unsigned int* neighborZ,
-                                   real* DD,
-                                   real* DD7,
-                                   int size_Mat,
-                                   bool EvenOrOdd)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-      LB_Kernel_AD_Incomp_7<<< grid.grid, grid.threads >>>( diffusivity,
-                                                  bcMatD,
-                                                  neighborX,
-                                                  neighborY,
-                                                  neighborZ,
-                                                  DD,
-                                                  DD7,
-                                                  size_Mat,
-                                                  EvenOrOdd);
-      getLastCudaError("LB_Kernel_AD_Incomp_7 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void KernelADincomp27( unsigned int numberOfThreads,
-                                  real diffusivity,
-                                  unsigned int* bcMatD,
-                                  unsigned int* neighborX,
-                                  unsigned int* neighborY,
-                                  unsigned int* neighborZ,
-                                  real* DD,
-                                  real* DD27,
-                                  int size_Mat,
-                                  bool EvenOrOdd)
-{
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-    LB_Kernel_AD_Incomp_27<<< grid.grid, grid.threads >>>( diffusivity,
-                                                    bcMatD,
-                                                    neighborX,
-                                                    neighborY,
-                                                    neighborZ,
-                                                    DD,
-                                                    DD27,
-                                                    size_Mat,
-                                                    EvenOrOdd);
-    getLastCudaError("LB_Kernel_AD_Incomp_27 execution failed");
+    getLastCudaError("LB_Kernel_Casc_SP_MS_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void Init27( int myid,
-                        int numprocs,
-                        real u0,
-                        unsigned int* geoD,
-                        unsigned int* neighborX,
-                        unsigned int* neighborY,
-                        unsigned int* neighborZ,
-                        real* vParab,
-                        unsigned int size_Mat,
-                        unsigned int grid_nx,
-                        unsigned int grid_ny,
-                        unsigned int grid_nz,
-                        real* DD,
-                        int level,
-                        int maxlevel)
+void KernelCasSPMSOHM27(
+    unsigned int numberOfThreads,
+    real s9,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    bool EvenOrOdd)
 {
-   dim3 threads       ( grid_nx, 1, 1 );
-   dim3 grid          ( grid_ny, grid_nz );   // Gitter fuer Kollision und Propagation
-
-    LBInit27<<< grid, threads >>> (  myid,
-                                       numprocs,
-                                       u0,
-                                       geoD,
-                                       neighborX,
-                                       neighborY,
-                                       neighborZ,
-                                       vParab,
-                                       size_Mat,
-                                       grid_nx,
-                                       grid_ny,
-                                       grid_nz,
-                                       DD,
-                                       level,
-                                       maxlevel);
-    getLastCudaError("LBInit27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void InitNonEqPartSP27( unsigned int numberOfThreads,
-                                   unsigned int* neighborX,
-                                   unsigned int* neighborY,
-                                   unsigned int* neighborZ,
-                                   unsigned int* neighborWSB,
-                                   unsigned int* geoD,
-                                   real* rho,
-                                   real* ux,
-                                   real* uy,
-                                   real* uz,
-                                   unsigned int size_Mat,
-                                   real* DD,
-                                   real omega,
-                                   bool EvenOrOdd)
-{
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-    LBInitNonEqPartSP27<<< grid.grid, grid.threads >>>( neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                neighborWSB,
-                                                geoD,
-                                                rho,
-                                                ux,
-                                                uy,
-                                                uz,
-                                                size_Mat,
-                                                DD,
-                                                omega,
-                                                EvenOrOdd);
-    getLastCudaError("LBInitNonEqPartSP27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void InitThS7(     unsigned int numberOfThreads,
-                              unsigned int* neighborX,
-                              unsigned int* neighborY,
-                              unsigned int* neighborZ,
-                              unsigned int* geoD,
-                              real* Conc,
-                              real* ux,
-                              real* uy,
-                              real* uz,
-                              unsigned int size_Mat,
-                              real* DD7,
-                              bool EvenOrOdd)
-{
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-    InitAD7<<< grid.grid, grid.threads >>>( neighborX,
-                                       neighborY,
-                                       neighborZ,
-                                       geoD,
-                                       Conc,
-                                       ux,
-                                       uy,
-                                       uz,
-                                       size_Mat,
-                                       DD7,
-                                       EvenOrOdd);
-    getLastCudaError("InitAD7 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void InitADDev27( unsigned int numberOfThreads,
-                           unsigned int* neighborX,
-                           unsigned int* neighborY,
-                           unsigned int* neighborZ,
-                           unsigned int* geoD,
-                           real* Conc,
-                           real* ux,
-                           real* uy,
-                           real* uz,
-                           unsigned int size_Mat,
-                           real* DD27,
-                           bool EvenOrOdd)
-{
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-    InitAD27<<< grid.grid, grid.threads >>>(neighborX,
-                                       neighborY,
-                                       neighborZ,
-                                       geoD,
-                                       Conc,
-                                       ux,
-                                       uy,
-                                       uz,
-                                       size_Mat,
-                                       DD27,
-                                       EvenOrOdd);
-    getLastCudaError("InitAD27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LB_Kernel_Casc_SP_MS_OHM_27<<< grid.grid, grid.threads >>>(
+        s9,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_Casc_SP_MS_OHM_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void PostProcessorF3_2018Fehlberg(
+void KernelKumCompSRTSP27(
     unsigned int numberOfThreads,
     real omega,
     unsigned int* bcMatD,
     unsigned int* neighborX,
     unsigned int* neighborY,
     unsigned int* neighborZ,
-    real* rhoOut,
-    real* vxOut,
-    real* vyOut,
-    real* vzOut,
     real* DDStart,
-    real* G6,
-    int size_Mat,
+    unsigned long long numberOfLBnodes,
     int level,
     real* forces,
     bool EvenOrOdd)
 {
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-    LB_PostProcessor_F3_2018_Fehlberg <<< grid.grid, grid.threads >>> (   omega,
-                                                                  bcMatD,
-                                                                  neighborX,
-                                                                  neighborY,
-                                                                  neighborZ,
-                                                                  rhoOut,
-                                                                  vxOut,
-                                                                  vyOut,
-                                                                  vzOut,
-                                                                  DDStart,
-                                                                  G6,
-                                                                  size_Mat,
-                                                                  level,
-                                                                  forces,
-                                                                  EvenOrOdd);
-    getLastCudaError("LB_PostProcessor_F3_2018_Fehlberg execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LB_Kernel_Kum_New_Comp_SRT_SP_27 <<< grid.grid, grid.threads >>>(
+        omega,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DDStart,
+        numberOfLBnodes,
+        level,
+        forces,
+        EvenOrOdd);
+        getLastCudaError("LB_Kernel_Kum_New_Comp_SRT_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void CalcMac27( real* vxD,
-                           real* vyD,
-                           real* vzD,
-                           real* rhoD,
-                           unsigned int* geoD,
-                           unsigned int* neighborX,
-                           unsigned int* neighborY,
-                           unsigned int* neighborZ,
-                           unsigned int size_Mat,
-                           unsigned int grid_nx,
-                           unsigned int grid_ny,
-                           unsigned int grid_nz,
-                           real* DD,
-                           bool isEvenTimestep)
+void KernelKum1hSP27(
+    unsigned int numberOfThreads,
+    real omega,
+    real deltaPhi,
+    real angularVelocity,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* coordX,
+    real* coordY,
+    real* coordZ,
+    real* DDStart,
+    unsigned long long numberOfLBnodes,
+    bool EvenOrOdd)
 {
-   dim3 threads       ( grid_nx, 1, 1 );
-   dim3 grid          ( grid_ny, grid_nz );
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
 
-    LBCalcMac27<<< grid, threads >>> (  vxD,
-                                          vyD,
-                                          vzD,
-                                          rhoD,
-                                          geoD,
-                                          neighborX,
-                                          neighborY,
-                                          neighborZ,
-                                          size_Mat,
-                                          DD,
-                                          isEvenTimestep);
-    getLastCudaError("LBCalcMac27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void CalcMacSP27( real* vxD,
-                             real* vyD,
-                             real* vzD,
-                             real* rhoD,
-                             real* pressD,
-                             unsigned int* geoD,
-                             unsigned int* neighborX,
-                             unsigned int* neighborY,
-                             unsigned int* neighborZ,
-                             unsigned int size_Mat,
-                             unsigned int numberOfThreads,
-                             real* DD,
-                             bool isEvenTimestep)
-{
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-    LBCalcMacSP27<<< grid.grid, grid.threads >>> (   vxD,
-                                             vyD,
-                                             vzD,
-                                             rhoD,
-                                             pressD,
-                                             geoD,
-                                             neighborX,
-                                             neighborY,
-                                             neighborZ,
-                                             size_Mat,
-                                             DD,
-                                             isEvenTimestep);
-    getLastCudaError("LBCalcMacSP27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void CalcMacCompSP27( real* vxD,
-                                 real* vyD,
-                                 real* vzD,
-                                 real* rhoD,
-                                 real* pressD,
-                                 unsigned int* geoD,
-                                 unsigned int* neighborX,
-                                 unsigned int* neighborY,
-                                 unsigned int* neighborZ,
-                                 unsigned int size_Mat,
-                                 unsigned int numberOfThreads,
-                                 real* DD,
-                                 bool isEvenTimestep)
-{
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-    LBCalcMacCompSP27<<< grid.grid, grid.threads >>> (   vxD,
-                                                 vyD,
-                                                 vzD,
-                                                 rhoD,
-                                                 pressD,
-                                                 geoD,
-                                                 neighborX,
-                                                 neighborY,
-                                                 neighborZ,
-                                                 size_Mat,
-                                                 DD,
-                                                 isEvenTimestep);
-    getLastCudaError("LBCalcMacSP27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void CalcMacThS7(  real* Conc,
-                              unsigned int* geoD,
-                              unsigned int* neighborX,
-                              unsigned int* neighborY,
-                              unsigned int* neighborZ,
-                              unsigned int size_Mat,
-                              unsigned int numberOfThreads,
-                              real* DD7,
-                              bool isEvenTimestep)
-{
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-    CalcConc7<<< grid.grid, grid.threads >>> (Conc,
-                                          geoD,
-                                          neighborX,
-                                          neighborY,
-                                          neighborZ,
-                                          size_Mat,
-                                          DD7,
-                                          isEvenTimestep);
-    getLastCudaError("CalcConc7 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void PlaneConcThS7(real* Conc,
-                              int* kPC,
-                              unsigned int numberOfPointskPC,
-                              unsigned int* geoD,
-                              unsigned int* neighborX,
-                              unsigned int* neighborY,
-                              unsigned int* neighborZ,
-                              unsigned int size_Mat,
-                              unsigned int numberOfThreads,
-                              real* DD7,
-                              bool isEvenTimestep)
-{
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfPointskPC);
-
-    GetPlaneConc7<<< grid.grid, grid.threads >>> (	Conc,
-                                                kPC,
-                                                numberOfPointskPC,
-                                                geoD,
-                                                neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                size_Mat,
-                                                DD7,
-                                                isEvenTimestep);
-    getLastCudaError("GetPlaneConc7 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void PlaneConcThS27(real* Conc,
-                               int* kPC,
-                               unsigned int numberOfPointskPC,
-                               unsigned int* geoD,
-                               unsigned int* neighborX,
-                               unsigned int* neighborY,
-                               unsigned int* neighborZ,
-                               unsigned int size_Mat,
-                               unsigned int numberOfThreads,
-                               real* DD27,
-                               bool isEvenTimestep)
-{
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfPointskPC);
-
-    GetPlaneConc27<<< grid.grid, grid.threads >>> (	Conc,
-                                                kPC,
-                                                numberOfPointskPC,
-                                                geoD,
-                                                neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                size_Mat,
-                                                DD27,
-                                                isEvenTimestep);
-    getLastCudaError("GetPlaneConc27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void CalcConcentration27( unsigned int numberOfThreads,
-                                     real* Conc,
-                                     unsigned int* geoD,
-                                     unsigned int* neighborX,
-                                     unsigned int* neighborY,
-                                     unsigned int* neighborZ,
-                                     unsigned int size_Mat,
-                                     real* DD27,
-                                     bool isEvenTimestep)
-{
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-    CalcConc27<<< grid.grid, grid.threads >>> (  Conc,
-                                             geoD,
-                                             neighborX,
-                                             neighborY,
-                                             neighborZ,
-                                             size_Mat,
-                                             DD27,
-                                             isEvenTimestep);
-    getLastCudaError("CalcConc27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void CalcMedSP27(  real* vxD,
-                              real* vyD,
-                              real* vzD,
-                              real* rhoD,
-                              real* pressD,
-                              unsigned int* geoD,
-                              unsigned int* neighborX,
-                              unsigned int* neighborY,
-                              unsigned int* neighborZ,
-                              unsigned int size_Mat,
-                              unsigned int numberOfThreads,
-                              real* DD,
-                              bool isEvenTimestep)
-{
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-    LBCalcMedSP27<<< grid.grid, grid.threads >>> (   vxD,
-                                             vyD,
-                                             vzD,
-                                             rhoD,
-                                             pressD,
-                                             geoD,
-                                             neighborX,
-                                             neighborY,
-                                             neighborZ,
-                                             size_Mat,
-                                             DD,
-                                             isEvenTimestep);
-    getLastCudaError("LBCalcMedSP27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void CalcMedCompSP27(  real* vxD,
-                                  real* vyD,
-                                  real* vzD,
-                                  real* rhoD,
-                                  real* pressD,
-                                  unsigned int* geoD,
-                                  unsigned int* neighborX,
-                                  unsigned int* neighborY,
-                                  unsigned int* neighborZ,
-                                  unsigned int size_Mat,
-                                  unsigned int numberOfThreads,
-                                  real* DD,
-                                  bool isEvenTimestep)
-{
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-    LBCalcMedCompSP27<<< grid.grid, grid.threads >>> (   vxD,
-                                                 vyD,
-                                                 vzD,
-                                                 rhoD,
-                                                 pressD,
-                                                 geoD,
-                                                 neighborX,
-                                                 neighborY,
-                                                 neighborZ,
-                                                 size_Mat,
-                                                 DD,
-                                                 isEvenTimestep);
-    getLastCudaError("LBCalcMedSP27 execution failed");
+    LB_Kernel_Kum_1h_SP_27<<< grid.grid, grid.threads >>>(
+        omega,
+        deltaPhi,
+        angularVelocity,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        coordX,
+        coordY,
+        coordZ,
+        DDStart,
+        numberOfLBnodes,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_Kum_1h_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void CalcMedCompAD27(
-    real* vxD,
-    real* vyD,
-    real* vzD,
-    real* rhoD,
-    real* pressD,
-    real* concD,
-    unsigned int* geoD,
+void KernelCascadeSP27(
+    unsigned int numberOfThreads,
+    real s9,
+    unsigned int* bcMatD,
     unsigned int* neighborX,
     unsigned int* neighborY,
     unsigned int* neighborZ,
-    unsigned int size_Mat,
-    unsigned int numberOfThreads,
     real* DD,
-    real* DD_AD,
-    bool isEvenTimestep)
+    unsigned long long numberOfLBnodes,
+    bool EvenOrOdd)
 {
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
 
-    LBCalcMedCompAD27 <<< grid.grid, grid.threads >>> (
-        vxD,
-        vyD,
-        vzD,
-        rhoD,
-        pressD,
-        concD,
-        geoD,
+    LB_Kernel_Cascade_SP_27<<< grid.grid, grid.threads >>>(
+        s9,
+        bcMatD,
         neighborX,
         neighborY,
         neighborZ,
-        size_Mat,
         DD,
-        DD_AD,
-        isEvenTimestep);
-    getLastCudaError("LBCalcMedAD27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void CalcMacMedSP27(  real* vxD,
-                                 real* vyD,
-                                 real* vzD,
-                                 real* rhoD,
-                                 real* pressD,
-                                 unsigned int* geoD,
-                                 unsigned int* neighborX,
-                                 unsigned int* neighborY,
-                                 unsigned int* neighborZ,
-                                 unsigned int tdiff,
-                                 unsigned int size_Mat,
-                                 unsigned int numberOfThreads,
-                                 bool isEvenTimestep)
-{
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-    LBCalcMacMedSP27<<< grid.grid, grid.threads >>> (   vxD,
-                                                vyD,
-                                                vzD,
-                                                rhoD,
-                                                pressD,
-                                                geoD,
-                                                neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                tdiff,
-                                                size_Mat,
-                                                isEvenTimestep);
-    getLastCudaError("LBCalcMacMedSP27 execution failed");
+        numberOfLBnodes,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_Cascade_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void ResetMedianValuesSP27(
-    real* vxD,
-    real* vyD,
-    real* vzD,
-    real* rhoD,
-    real* pressD,
-    unsigned int size_Mat,
+void KernelKumNewSP27(
     unsigned int numberOfThreads,
-    bool isEvenTimestep)
+    real s9,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    bool EvenOrOdd)
 {
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
 
 
-    LBResetMedianValuesSP27 << < grid.grid, grid.threads >> > (
-        vxD,
-        vyD,
-        vzD,
-        rhoD,
-        pressD,
-        size_Mat,
-        isEvenTimestep);
-    getLastCudaError("LBResetMedianValuesSP27 execution failed");
+    LB_Kernel_Kum_New_SP_27<<< grid.grid, grid.threads >>>(
+        s9,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_Kum_New_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void ResetMedianValuesAD27(
-    real* vxD,
-    real* vyD,
-    real* vzD,
-    real* rhoD,
-    real* pressD,
-    real* concD,
-    unsigned int size_Mat,
+void KernelKumNewCompSP27(
     unsigned int numberOfThreads,
-    bool isEvenTimestep)
+    real s9,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    int size_Array,
+    int level,
+    real* forces,
+    bool EvenOrOdd)
 {
-    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
 
-    LBResetMedianValuesAD27 << < grid.grid, grid.threads >> > (
-        vxD,
-        vyD,
-        vzD,
-        rhoD,
-        pressD,
-        concD,
-        size_Mat,
-        isEvenTimestep);
-    getLastCudaError("LBResetMedianValuesAD27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void Calc2ndMomentsIncompSP27(real* kxyFromfcNEQ,
-                                         real* kyzFromfcNEQ,
-                                         real* kxzFromfcNEQ,
-                                         real* kxxMyyFromfcNEQ,
-                                         real* kxxMzzFromfcNEQ,
-                                         unsigned int* geoD,
-                                         unsigned int* neighborX,
-                                         unsigned int* neighborY,
-                                         unsigned int* neighborZ,
-                                         unsigned int size_Mat,
-                                         unsigned int numberOfThreads,
-                                         real* DD,
-                                         bool isEvenTimestep)
-{
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-    LBCalc2ndMomentsIncompSP27<<< grid.grid, grid.threads >>> (  kxyFromfcNEQ,
-                                                         kyzFromfcNEQ,
-                                                         kxzFromfcNEQ,
-                                                         kxxMyyFromfcNEQ,
-                                                         kxxMzzFromfcNEQ,
-                                                         geoD,
-                                                         neighborX,
-                                                         neighborY,
-                                                         neighborZ,
-                                                         size_Mat,
-                                                         DD,
-                                                         isEvenTimestep);
-    getLastCudaError("LBCalc2ndMomentsIncompSP27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void Calc2ndMomentsCompSP27( real* kxyFromfcNEQ,
-                                        real* kyzFromfcNEQ,
-                                        real* kxzFromfcNEQ,
-                                        real* kxxMyyFromfcNEQ,
-                                        real* kxxMzzFromfcNEQ,
-                                        unsigned int* geoD,
-                                        unsigned int* neighborX,
-                                        unsigned int* neighborY,
-                                        unsigned int* neighborZ,
-                                        unsigned int size_Mat,
-                                        unsigned int numberOfThreads,
-                                        real* DD,
-                                        bool isEvenTimestep)
-{
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-    LBCalc2ndMomentsCompSP27<<< grid.grid, grid.threads >>> (kxyFromfcNEQ,
-                                                     kyzFromfcNEQ,
-                                                     kxzFromfcNEQ,
-                                                     kxxMyyFromfcNEQ,
-                                                     kxxMzzFromfcNEQ,
-                                                     geoD,
-                                                     neighborX,
-                                                     neighborY,
-                                                     neighborZ,
-                                                     size_Mat,
-                                                     DD,
-                                                     isEvenTimestep);
-    getLastCudaError("LBCalc2ndMomentsCompSP27 execution failed");
+    //LB_Kernel_Kum_New_Comp_SP_27<<< grid.grid, grid.threads >>>(	s9,
+    //													bcMatD,
+    //													neighborX,
+    //													neighborY,
+    //													neighborZ,
+    //													DD,
+    //													numberOfLBnodes,
+    //													level,
+    //													forces,
+    //													EvenOrOdd);
+    //getLastCudaError("LB_Kernel_Kum_New_Comp_SP_27 execution failed");
 }
+
 //////////////////////////////////////////////////////////////////////////
-void Calc3rdMomentsIncompSP27(real* CUMbbb,
-                                         real* CUMabc,
-                                         real* CUMbac,
-                                         real* CUMbca,
-                                         real* CUMcba,
-                                         real* CUMacb,
-                                         real* CUMcab,
-                                         unsigned int* geoD,
-                                         unsigned int* neighborX,
-                                         unsigned int* neighborY,
-                                         unsigned int* neighborZ,
-                                         unsigned int size_Mat,
-                                         unsigned int numberOfThreads,
-                                         real* DD,
-                                         bool isEvenTimestep)
-{
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-    LBCalc3rdMomentsIncompSP27<<< grid.grid, grid.threads >>> (  CUMbbb,
-                                                         CUMabc,
-                                                         CUMbac,
-                                                         CUMbca,
-                                                         CUMcba,
-                                                         CUMacb,
-                                                         CUMcab,
-                                                         geoD,
-                                                         neighborX,
-                                                         neighborY,
-                                                         neighborZ,
-                                                         DD,
-                                                         size_Mat,
-                                                         isEvenTimestep);
-    getLastCudaError("LBCalc3rdMomentsIncompSP27 execution failed");
+void CumulantOnePreconditionedErrorDiffusionChimCompSP27(
+    unsigned int numberOfThreads,
+    real s9,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    int size_Array,
+    int level,
+    real* forces,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27 <<< grid.grid, grid.threads >>>(
+        s9,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        level,
+        forces,
+        EvenOrOdd);
+    getLastCudaError("Cumulant_One_preconditioned_chim_Comp_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void Calc3rdMomentsCompSP27( real* CUMbbb,
-                                        real* CUMabc,
-                                        real* CUMbac,
-                                        real* CUMbca,
-                                        real* CUMcba,
-                                        real* CUMacb,
-                                        real* CUMcab,
-                                        unsigned int* geoD,
-                                        unsigned int* neighborX,
-                                        unsigned int* neighborY,
-                                        unsigned int* neighborZ,
-                                        unsigned int size_Mat,
-                                        unsigned int numberOfThreads,
-                                        real* DD,
-                                        bool isEvenTimestep)
-{
-    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-    LBCalc3rdMomentsCompSP27<<< grid.grid, grid.threads >>> (CUMbbb,
-                                                     CUMabc,
-                                                     CUMbac,
-                                                     CUMbca,
-                                                     CUMcba,
-                                                     CUMacb,
-                                                     CUMcab,
-                                                     geoD,
-                                                     neighborX,
-                                                     neighborY,
-                                                     neighborZ,
-                                                     DD,
-                                                     size_Mat,
-                                                     isEvenTimestep);
-    getLastCudaError("LBCalc3rdMomentsCompSP27 execution failed");
-}
+void CumulantOnePreconditionedChimCompSP27(
+    unsigned int numberOfThreads,
+    real s9,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    int size_Array,
+    int level,
+    real* forces,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    Cumulant_One_preconditioned_chim_Comp_SP_27 <<< grid.grid, grid.threads >>>(
+        s9,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        level,
+        forces,
+        EvenOrOdd);
+    getLastCudaError("Cumulant_One_preconditioned_chim_Comp_SP_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CumulantOneChimCompSP27(
+    unsigned int numberOfThreads,
+    real s9,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    int size_Array,
+    int level,
+    real* forces,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    Cumulant_One_chim_Comp_SP_27 <<< grid.grid, grid.threads >>>(
+        s9,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        level,
+        forces,
+        EvenOrOdd);
+    getLastCudaError("Cumulant_One_chim_Comp_SP_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void KernelKumIsoTestSP27(
+    unsigned int numberOfThreads,
+    real s9,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    real* dxxUx,
+    real* dyyUy,
+    real* dzzUz,
+    unsigned long long numberOfLBnodes,
+    bool EvenOrOdd)
+{
+   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LB_Kernel_Kum_IsoTest_SP_27<<< grid.grid, grid.threads >>>(
+        s9,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        dxxUx,
+        dyyUy,
+        dzzUz,
+        numberOfLBnodes,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_Kum_IsoTest_SP_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void KernelKumCompSP27(
+    unsigned int numberOfThreads,
+    real s9,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+
+    LB_Kernel_Kum_Comp_SP_27<<< grid.grid, grid.threads >>>(
+        s9,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_Kum_Comp_SP_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void KernelPMCumOneCompSP27(
+    unsigned int numberOfThreads,
+    real omega,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    int level,
+    real* forces,
+    real porosity,
+    real darcy,
+    real forchheimer,
+    unsigned int sizeOfPorousMedia,
+    unsigned int* nodeIdsPorousMedia,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LB_Kernel_PM_Cum_One_Comp_SP_27 <<< grid.grid, grid.threads >>>(
+        omega,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        level,
+        forces,
+        porosity,
+        darcy,
+        forchheimer,
+        sizeOfPorousMedia,
+        nodeIdsPorousMedia,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_PM_Cum_One_Comp_SP_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void KernelWaleBySoniMalavCumAA2016CompSP27(
+    unsigned int numberOfThreads,
+    real s9,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int* neighborWSB,
+    real* veloX,
+    real* veloY,
+    real* veloZ,
+    real* DD,
+    real* turbulentViscosity,
+    unsigned long long numberOfLBnodes,
+    int size_Array,
+    int level,
+    real* forces,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27 <<< grid.grid, grid.threads >>>(
+        s9,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        neighborWSB,
+        veloX,
+        veloY,
+        veloZ,
+        DD,
+        turbulentViscosity,
+        numberOfLBnodes,
+        level,
+        forces,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void KernelADincomp7(
+    unsigned int numberOfThreads,
+    real diffusivity,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    real* DD7,
+    unsigned long long numberOfLBnodes,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LB_Kernel_AD_Incomp_7<<< grid.grid, grid.threads >>>(
+        diffusivity,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        DD7,
+        numberOfLBnodes,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_AD_Incomp_7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void KernelADincomp27(
+    unsigned int numberOfThreads,
+    real diffusivity,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    real* DD27,
+    unsigned long long numberOfLBnodes,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LB_Kernel_AD_Incomp_27<<< grid.grid, grid.threads >>>(
+        diffusivity,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        DD27,
+        numberOfLBnodes,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_AD_Incomp_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void Init27(
+    int myid,
+    int numprocs,
+    real u0,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* vParab,
+    unsigned long long numberOfLBnodes,
+    unsigned int grid_nx,
+    unsigned int grid_ny,
+    unsigned int grid_nz,
+    real* DD,
+    int level,
+    int maxlevel)
+{
+    dim3 threads       ( grid_nx, 1, 1 );
+    dim3 grid          ( grid_ny, grid_nz );
+
+    LBInit27<<< grid, threads >>> (
+        myid,
+        numprocs,
+        u0,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        vParab,
+        numberOfLBnodes,
+        grid_nx,
+        grid_ny,
+        grid_nz,
+        DD,
+        level,
+        maxlevel);
+    getLastCudaError("LBInit27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void InitNonEqPartSP27(
+    unsigned int numberOfThreads,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int* neighborWSB,
+    unsigned int* geoD,
+    real* rho,
+    real* ux,
+    real* uy,
+    real* uz,
+    unsigned long long numberOfLBnodes,
+    real* DD,
+    real omega,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBInitNonEqPartSP27<<< grid.grid, grid.threads >>>(
+        neighborX,
+        neighborY,
+        neighborZ,
+        neighborWSB,
+        geoD,
+        rho,
+        ux,
+        uy,
+        uz,
+        numberOfLBnodes,
+        DD,
+        omega,
+        EvenOrOdd);
+    getLastCudaError("LBInitNonEqPartSP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void InitThS7(
+    unsigned int numberOfThreads,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int* geoD,
+    real* Conc,
+    real* ux,
+    real* uy,
+    real* uz,
+    unsigned long long numberOfLBnodes,
+    real* DD7,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    InitAD7<<< grid.grid, grid.threads >>>(
+        neighborX,
+        neighborY,
+        neighborZ,
+        geoD,
+        Conc,
+        ux,
+        uy,
+        uz,
+        numberOfLBnodes,
+        DD7,
+        EvenOrOdd);
+    getLastCudaError("InitAD7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void InitADDev27(
+    unsigned int numberOfThreads,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int* geoD,
+    real* Conc,
+    real* ux,
+    real* uy,
+    real* uz,
+    unsigned long long numberOfLBnodes,
+    real* DD27,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    InitAD27<<< grid.grid, grid.threads >>>(
+        neighborX,
+        neighborY,
+        neighborZ,
+        geoD,
+        Conc,
+        ux,
+        uy,
+        uz,
+        numberOfLBnodes,
+        DD27,
+        EvenOrOdd);
+    getLastCudaError("InitAD27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void PostProcessorF3_2018Fehlberg(
+    unsigned int numberOfThreads,
+    real omega,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* rhoOut,
+    real* vxOut,
+    real* vyOut,
+    real* vzOut,
+    real* DDStart,
+    real* G6,
+    unsigned long long numberOfLBnodes,
+    int level,
+    real* forces,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LB_PostProcessor_F3_2018_Fehlberg <<< grid.grid, grid.threads >>> (
+        omega,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        rhoOut,
+        vxOut,
+        vyOut,
+        vzOut,
+        DDStart,
+        G6,
+        numberOfLBnodes,
+        level,
+        forces,
+        EvenOrOdd);
+    getLastCudaError("LB_PostProcessor_F3_2018_Fehlberg execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CalcMac27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int grid_nx,
+    unsigned int grid_ny,
+    unsigned int grid_nz,
+    real* DD,
+    bool isEvenTimestep)
+{
+   dim3 threads       ( grid_nx, 1, 1 );
+   dim3 grid          ( grid_ny, grid_nz );
+
+    LBCalcMac27<<< grid, threads >>> (
+        vxD,
+        vyD,
+        vzD,
+        rhoD,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD,
+        isEvenTimestep);
+    getLastCudaError("LBCalcMac27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CalcMacSP27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBCalcMacSP27<<< grid.grid, grid.threads >>> (
+        vxD,
+        vyD,
+        vzD,
+        rhoD,
+        pressD,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD,
+        isEvenTimestep);
+    getLastCudaError("LBCalcMacSP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CalcMacCompSP27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBCalcMacCompSP27<<< grid.grid, grid.threads >>> (
+        vxD,
+        vyD,
+        vzD,
+        rhoD,
+        pressD,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD,
+        isEvenTimestep);
+    getLastCudaError("LBCalcMacCompSP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CalcMacThS7(
+    real* Conc,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD7,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    CalcConc7<<< grid.grid, grid.threads >>> (
+        Conc,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD7,
+        isEvenTimestep);
+    getLastCudaError("CalcConc7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void PlaneConcThS7(
+    real* Conc,
+    int* kPC,
+    unsigned int numberOfPointskPC,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD7,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfPointskPC);
+
+    GetPlaneConc7<<< grid.grid, grid.threads >>> (
+        Conc,
+        kPC,
+        numberOfPointskPC,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD7,
+        isEvenTimestep);
+    getLastCudaError("GetPlaneConc7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void PlaneConcThS27(
+    real* Conc,
+    int* kPC,
+    unsigned int numberOfPointskPC,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD27,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfPointskPC);
+
+    GetPlaneConc27<<< grid.grid, grid.threads >>> (
+        Conc,
+        kPC,
+        numberOfPointskPC,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD27,
+        isEvenTimestep);
+    getLastCudaError("GetPlaneConc27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CalcConcentration27(
+    unsigned int numberOfThreads,
+    real* Conc,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    real* DD27,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    CalcConc27<<< grid.grid, grid.threads >>> (
+        Conc,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD27,
+        isEvenTimestep);
+    getLastCudaError("CalcConc27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CalcMedSP27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBCalcMedSP27<<< grid.grid, grid.threads >>> (
+        vxD,
+        vyD,
+        vzD,
+        rhoD,
+        pressD,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD,
+        isEvenTimestep);
+    getLastCudaError("LBCalcMedSP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CalcMedCompSP27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBCalcMedCompSP27<<< grid.grid, grid.threads >>> (
+        vxD,
+        vyD,
+        vzD,
+        rhoD,
+        pressD,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD,
+        isEvenTimestep);
+    getLastCudaError("LBCalcMedCompSP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CalcMedCompAD27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    real* concD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD_AD,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBCalcMedCompAD27 <<< grid.grid, grid.threads >>> (
+        vxD,
+        vyD,
+        vzD,
+        rhoD,
+        pressD,
+        concD,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD,
+        DD_AD,
+        isEvenTimestep);
+    getLastCudaError("LBCalcMedCompAD27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CalcMacMedSP27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int tdiff,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBCalcMacMedSP27<<< grid.grid, grid.threads >>> (
+        vxD,
+        vyD,
+        vzD,
+        rhoD,
+        pressD,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        tdiff,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("LBCalcMacMedSP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ResetMedianValuesSP27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBResetMedianValuesSP27 <<< grid.grid, grid.threads >>> (
+        vxD,
+        vyD,
+        vzD,
+        rhoD,
+        pressD,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("LBResetMedianValuesSP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ResetMedianValuesAD27(
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    real* concD,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBResetMedianValuesAD27 <<< grid.grid, grid.threads >>> (
+        vxD,
+        vyD,
+        vzD,
+        rhoD,
+        pressD,
+        concD,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("LBResetMedianValuesAD27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void Calc2ndMomentsIncompSP27(
+    real* kxyFromfcNEQ,
+    real* kyzFromfcNEQ,
+    real* kxzFromfcNEQ,
+    real* kxxMyyFromfcNEQ,
+    real* kxxMzzFromfcNEQ,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBCalc2ndMomentsIncompSP27<<< grid.grid, grid.threads >>> (
+        kxyFromfcNEQ,
+        kyzFromfcNEQ,
+        kxzFromfcNEQ,
+        kxxMyyFromfcNEQ,
+        kxxMzzFromfcNEQ,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD,
+        isEvenTimestep);
+    getLastCudaError("LBCalc2ndMomentsIncompSP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void Calc2ndMomentsCompSP27(
+    real* kxyFromfcNEQ,
+    real* kyzFromfcNEQ,
+    real* kxzFromfcNEQ,
+    real* kxxMyyFromfcNEQ,
+    real* kxxMzzFromfcNEQ,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBCalc2ndMomentsCompSP27<<< grid.grid, grid.threads >>> (
+        kxyFromfcNEQ,
+        kyzFromfcNEQ,
+        kxzFromfcNEQ,
+        kxxMyyFromfcNEQ,
+        kxxMzzFromfcNEQ,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD,
+        isEvenTimestep);
+    getLastCudaError("LBCalc2ndMomentsCompSP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void Calc3rdMomentsIncompSP27(
+    real* CUMbbb,
+    real* CUMabc,
+    real* CUMbac,
+    real* CUMbca,
+    real* CUMcba,
+    real* CUMacb,
+    real* CUMcab,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBCalc3rdMomentsIncompSP27<<< grid.grid, grid.threads >>> (
+        CUMbbb,
+        CUMabc,
+        CUMbac,
+        CUMbca,
+        CUMcba,
+        CUMacb,
+        CUMcab,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("LBCalc3rdMomentsIncompSP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void Calc3rdMomentsCompSP27(
+    real* CUMbbb,
+    real* CUMabc,
+    real* CUMbac,
+    real* CUMbca,
+    real* CUMcba,
+    real* CUMacb,
+    real* CUMcab,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBCalc3rdMomentsCompSP27<<< grid.grid, grid.threads >>> (
+        CUMbbb,
+        CUMabc,
+        CUMbac,
+        CUMbca,
+        CUMcba,
+        CUMacb,
+        CUMcab,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("LBCalc3rdMomentsCompSP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CalcHigherMomentsIncompSP27(
+    real* CUMcbb,
+    real* CUMbcb,
+    real* CUMbbc,
+    real* CUMcca,
+    real* CUMcac,
+    real* CUMacc,
+    real* CUMbcc,
+    real* CUMcbc,
+    real* CUMccb,
+    real* CUMccc,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBCalcHigherMomentsIncompSP27<<< grid.grid, grid.threads >>> (
+        CUMcbb,
+        CUMbcb,
+        CUMbbc,
+        CUMcca,
+        CUMcac,
+        CUMacc,
+        CUMbcc,
+        CUMcbc,
+        CUMccb,
+        CUMccc,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("LBCalcHigherMomentsIncompSP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CalcHigherMomentsCompSP27(
+    real* CUMcbb,
+    real* CUMbcb,
+    real* CUMbbc,
+    real* CUMcca,
+    real* CUMcac,
+    real* CUMacc,
+    real* CUMbcc,
+    real* CUMcbc,
+    real* CUMccb,
+    real* CUMccc,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    real* DD,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+
+    LBCalcHigherMomentsCompSP27<<< grid.grid, grid.threads >>> (
+        CUMcbb,
+        CUMbcb,
+        CUMbbc,
+        CUMcca,
+        CUMcac,
+        CUMacc,
+        CUMbcc,
+        CUMcbc,
+        CUMccb,
+        CUMccc,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("LBCalcHigherMomentsCompSP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void LBCalcMeasurePoints27(
+    real* vxMP,
+    real* vyMP,
+    real* vzMP,
+    real* rhoMP,
+    unsigned int* kMP,
+    unsigned int numberOfPointskMP,
+    unsigned int MPClockCycle,
+    unsigned int t,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    real* DD,
+    unsigned int numberOfThreads,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfPointskMP);
+
+    LBCalcMeasurePoints<<< grid.grid, grid.threads >>> (
+        vxMP,
+        vyMP,
+        vzMP,
+        rhoMP,
+        kMP,
+        numberOfPointskMP,
+        MPClockCycle,
+        t,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD,
+        isEvenTimestep);
+    getLastCudaError("LBCalcMeasurePoints execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void BcPress27(
+    int nx,
+    int ny,
+    int tz,
+    unsigned int grid_nx,
+    unsigned int grid_ny,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    dim3 threads       ( grid_nx, 1, 1 );
+    dim3 grid          ( grid_ny, 1 );
+
+    LB_BC_Press_East27<<< grid, threads >>> (
+        nx,
+        ny,
+        tz,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("LB_BC_Press_East27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void BcVel27(
+    int nx,
+    int ny,
+    int nz,
+    int itz,
+    unsigned int grid_nx,
+    unsigned int grid_ny,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    real u0x,
+    real om)
+{
+    dim3 threads       ( grid_nx, 1, 1 );
+    dim3 grid          ( grid_ny, 1 );
+
+    LB_BC_Vel_West_27<<< grid, threads >>> (
+        nx,
+        ny,
+        nz,
+        itz,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        DD,
+        numberOfLBnodes,
+        isEvenTimestep,
+        u0x,
+        grid_nx,
+        grid_ny,
+        om);
+    getLastCudaError("LB_BC_Vel_West_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QADPressDev7(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD7,
+    real* temp,
+    real* velo,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QADPress7<<< grid.grid, grid.threads >>>(
+        DD,
+        DD7,
+        temp,
+        velo,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QADPress7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QADPressDev27(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD27,
+    real* temp,
+    real* velo,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QADPress27<<< grid.grid, grid.threads >>>(
+        DD,
+        DD27,
+        temp,
+        velo,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QADPress27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QADPressNEQNeighborDev27(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD27,
+    int* k_Q,
+    int* k_N,
+    int numberOfBCnodes,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QADPressNEQNeighbor27<<< grid.grid, grid.threads >>>(
+        DD,
+        DD27,
+        k_Q,
+        k_N,
+        numberOfBCnodes,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+       getLastCudaError("QADPressNEQNeighbor27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QADVelDev7(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD7,
+    real* temp,
+    real* velo,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QADVel7<<< grid.grid, grid.threads >>> (
+        DD,
+        DD7,
+        temp,
+        velo,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QADVel7 execution failed");
+}
 //////////////////////////////////////////////////////////////////////////
-void CalcHigherMomentsIncompSP27(real* CUMcbb,
-                                            real* CUMbcb,
-                                            real* CUMbbc,
-                                            real* CUMcca,
-                                            real* CUMcac,
-                                            real* CUMacc,
-                                            real* CUMbcc,
-                                            real* CUMcbc,
-                                            real* CUMccb,
-                                            real* CUMccc,
-                                            unsigned int* geoD,
-                                            unsigned int* neighborX,
-                                            unsigned int* neighborY,
-                                            unsigned int* neighborZ,
-                                            unsigned int size_Mat,
-                                            unsigned int numberOfThreads,
-                                            real* DD,
-                                            bool isEvenTimestep)
-{
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-    LBCalcHigherMomentsIncompSP27<<< grid.grid, grid.threads >>> (CUMcbb,
-                                                          CUMbcb,
-                                                          CUMbbc,
-                                                          CUMcca,
-                                                          CUMcac,
-                                                          CUMacc,
-                                                          CUMbcc,
-                                                          CUMcbc,
-                                                          CUMccb,
-                                                          CUMccc,
-                                                          geoD,
-                                                          neighborX,
-                                                          neighborY,
-                                                          neighborZ,
-                                                          DD,
-                                                          size_Mat,
-                                                          isEvenTimestep);
-    getLastCudaError("LBCalcHigherMomentsIncompSP27 execution failed");
+void QADVelDev27(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD27,
+    real* temp,
+    real* velo,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QADVel27<<< grid.grid, grid.threads >>> (
+        DD,
+        DD27,
+        temp,
+        velo,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QADVel27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QADDev7(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD7,
+    real* temp,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QAD7<<< grid.grid, grid.threads >>> (
+        DD,
+        DD7,
+        temp,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QAD7 execution failed");
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+// Other advection diffusion kernels are in kernel factory :(
+void FactorizedCentralMomentsAdvectionDiffusionDeviceKernel(
+   uint numberOfThreads,
+   real omegaDiffusivity,
+   uint* typeOfGridNode,
+   uint* neighborX,
+   uint* neighborY,
+   uint* neighborZ,
+   real* distributions,
+   real* distributionsAD,
+   unsigned long long numberOfLBnodes,
+   real* forces,
+   bool isEvenTimestep)
+{
+    int Grid = (numberOfLBnodes / numberOfThreads) + 1;
+    dim3 grid(Grid, 1, 1);
+    dim3 threads(numberOfThreads, 1, 1);
+
+    Factorized_Central_Moments_Advection_Diffusion_Device_Kernel <<< grid, threads >>> (
+        omegaDiffusivity,
+        typeOfGridNode,
+        neighborX,
+        neighborY,
+        neighborZ,
+        distributions,
+        distributionsAD,
+        numberOfLBnodes,
+        forces,
+        isEvenTimestep);
+    getLastCudaError("Factorized_Central_Moments_Advection_Diffusion_Device_Kernel execution failed");
+}
+
+//////////////////////////////////////////////////////////////////////////
+void ADSlipVelDevComp(
+    uint numberOfThreads,
+    real * normalX,
+    real * normalY,
+    real * normalZ,
+    real * distributions,
+    real * distributionsAD,
+    int* QindexArray,
+    real * Qarrays,
+    uint numberOfBCnodes,
+    real omegaDiffusivity,
+    uint * neighborX,
+    uint * neighborY,
+    uint * neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    AD_SlipVelDeviceComp <<< grid.grid, grid.threads >>> (
+        normalX,
+        normalY,
+        normalZ,
+        distributions,
+        distributionsAD,
+        QindexArray,
+        Qarrays,
+        numberOfBCnodes,
+        omegaDiffusivity,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("AD_SlipVelDeviceComp execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+
+void QADDirichletDev27(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD27,
+    real* temp,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QADDirichlet27<<< grid.grid, grid.threads >>> (
+        DD,
+        DD27,
+        temp,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QADDirichletDev27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QADBBDev27(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD27,
+    real* temp,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QADBB27<<< grid.grid, grid.threads >>> (
+        DD,
+        DD27,
+        temp,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QADBB27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QNoSlipADincompDev7(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD7,
+    real* temp,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QNoSlipADincomp7<<< grid.grid, grid.threads >>> (
+        DD,
+        DD7,
+        temp,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QNoSlipADincomp7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QNoSlipADincompDev27(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD27,
+    real* temp,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QNoSlipADincomp27<<< grid.grid, grid.threads >>> (
+        DD,
+        DD27,
+        temp,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QNoSlipADincomp27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QADVeloIncompDev7(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD7,
+    real* temp,
+    real* velo,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QADVeloIncomp7<<< grid.grid, grid.threads >>> (
+        DD,
+        DD7,
+        temp,
+        velo,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QADVeloIncomp7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QADVeloIncompDev27(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD27,
+    real* temp,
+    real* velo,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QADVeloIncomp27<<< grid.grid, grid.threads >>> (
+        DD,
+        DD27,
+        temp,
+        velo,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QADVeloIncomp27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QADPressIncompDev7(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD7,
+    real* temp,
+    real* velo,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QADPressIncomp7<<< grid.grid, grid.threads >>>(
+        DD,
+        DD7,
+        temp,
+        velo,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QADPressIncomp7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QADPressIncompDev27(
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD27,
+    real* temp,
+    real* velo,
+    real diffusivity,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QADPressIncomp27<<< grid.grid, grid.threads >>>(
+        DD,
+        DD27,
+        temp,
+        velo,
+        diffusivity,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QADPressIncomp27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+{
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+
+    QDevice27<<< grid, threads >>> (
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+      getLastCudaError("QDevice27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+{
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+
+    QDeviceComp27<<< grid, threads >>> (
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QDeviceComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void CalcHigherMomentsCompSP27(  real* CUMcbb,
-                                            real* CUMbcb,
-                                            real* CUMbbc,
-                                            real* CUMcca,
-                                            real* CUMcac,
-                                            real* CUMacc,
-                                            real* CUMbcc,
-                                            real* CUMcbc,
-                                            real* CUMccb,
-                                            real* CUMccc,
-                                            unsigned int* geoD,
-                                            unsigned int* neighborX,
-                                            unsigned int* neighborY,
-                                            unsigned int* neighborZ,
-                                            unsigned int size_Mat,
-                                            unsigned int numberOfThreads,
-                                            real* DD,
-                                            bool isEvenTimestep)
-{
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-    LBCalcHigherMomentsCompSP27<<< grid.grid, grid.threads >>> (  CUMcbb,
-                                                          CUMbcb,
-                                                          CUMbbc,
-                                                          CUMcca,
-                                                          CUMcac,
-                                                          CUMacc,
-                                                          CUMbcc,
-                                                          CUMcbc,
-                                                          CUMccb,
-                                                          CUMccc,
-                                                          geoD,
-                                                          neighborX,
-                                                          neighborY,
-                                                          neighborZ,
-                                                          DD,
-                                                          size_Mat,
-                                                          isEvenTimestep);
-    getLastCudaError("LBCalcHigherMomentsCompSP27 execution failed");
+void QDevCompThinWalls27(
+    unsigned int numberOfThreads,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* geom,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int* neighborWSB,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QDeviceCompThinWallsPartOne27 <<< grid.grid, grid.threads >>> (
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QDeviceCompThinWallsPartOne27 execution failed");
+
+    QThinWallsPartTwo27 <<< grid.grid, grid.threads >>> (
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        geom,
+        neighborX,
+        neighborY,
+        neighborZ,
+        neighborWSB,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QThinWallsPartTwo27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void LBCalcMeasurePoints27(real* vxMP,
-                                      real* vyMP,
-                                      real* vzMP,
-                                      real* rhoMP,
-                                      unsigned int* kMP,
-                                      unsigned int numberOfPointskMP,
-                                      unsigned int MPClockCycle,
-                                      unsigned int t,
-                                      unsigned int* geoD,
-                                      unsigned int* neighborX,
-                                      unsigned int* neighborY,
-                                      unsigned int* neighborZ,
-                                      unsigned int size_Mat,
-                                      real* DD,
-                                      unsigned int numberOfThreads,
-                                      bool isEvenTimestep)
-{
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfPointskMP);
-
-    LBCalcMeasurePoints<<< grid.grid, grid.threads >>> (vxMP,
-                                                vyMP,
-                                                vzMP,
-                                                rhoMP,
-                                                kMP,
-                                                numberOfPointskMP,
-                                                MPClockCycle,
-                                                t,
-                                                geoD,
-                                                neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                size_Mat,
-                                                DD,
-                                                isEvenTimestep);
-    getLastCudaError("LBCalcMeasurePoints execution failed");
+void QDev3rdMomentsComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+{
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1);
+
+    QDevice3rdMomentsComp27<<< grid, threads >>> (
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+   getLastCudaError("QDevice3rdMomentsComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void BcPress27( int nx,
-                           int ny,
-                           int tz,
-                           unsigned int grid_nx,
-                           unsigned int grid_ny,
-                           unsigned int* bcMatD,
-                           unsigned int* neighborX,
-                           unsigned int* neighborY,
-                           unsigned int* neighborZ,
-                           real* DD,
-                           unsigned int size_Mat,
-                           bool isEvenTimestep)
+void QDevIncompHighNu27(
+    unsigned int numberOfThreads,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
-    dim3 threads       ( grid_nx, 1, 1 );
-    dim3 grid          ( grid_ny, 1 );
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
-    LB_BC_Press_East27<<< grid, threads >>> ( nx,
-                                                ny,
-                                                tz,
-                                                bcMatD,
-                                                neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                DD,
-                                                size_Mat,
-                                                isEvenTimestep);
-    getLastCudaError("LB_BC_Press_East27 execution failed");
+    QDeviceIncompHighNu27<<< grid.grid, grid.threads >>> (
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QDeviceIncompHighNu27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void BcVel27(int nx,
-                        int ny,
-                        int nz,
-                        int itz,
-                        unsigned int grid_nx,
-                        unsigned int grid_ny,
-                        unsigned int* bcMatD,
-                        unsigned int* neighborX,
-                        unsigned int* neighborY,
-                        unsigned int* neighborZ,
-                        real* DD,
-                        unsigned int size_Mat,
-                        bool isEvenTimestep,
-                        real u0x,
-                        real om)
+void QDevCompHighNu27(
+    unsigned int numberOfThreads,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
-    dim3 threads       ( grid_nx, 1, 1 );
-    dim3 grid          ( grid_ny, 1 );
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
-    LB_BC_Vel_West_27<<< grid, threads >>> (  nx,
-                                                ny,
-                                                nz,
-                                                itz,
-                                                bcMatD,
-                                                neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                DD,
-                                                size_Mat,
-                                                isEvenTimestep,
-                                                u0x,
-                                                grid_nx,
-                                                grid_ny,
-                                                om);
-    getLastCudaError("LB_BC_Vel_West_27 execution failed");
+    QDeviceCompHighNu27<<< grid.grid, grid.threads >>> (
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QDeviceCompHighNu27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void QADPressDev7( unsigned int numberOfThreads,
-                              real* DD,
-                              real* DD7,
-                              real* temp,
-                              real* velo,
-                              real diffusivity,
-                              int* k_Q,
-                              real* QQ,
-                              unsigned int numberOfBCnodes,
-                              real om1,
-                              unsigned int* neighborX,
-                              unsigned int* neighborY,
-                              unsigned int* neighborZ,
-                              unsigned int size_Mat,
-                              bool isEvenTimestep)
-{
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-      QADPress7<<< grid.grid, grid.threads >>>( DD,
-                                       DD7,
-                                       temp,
-                                       velo,
-                                       diffusivity,
-                                       k_Q,
-                                       QQ,
-                                       numberOfBCnodes,
-                                       om1,
-                                       neighborX,
-                                       neighborY,
-                                       neighborZ,
-                                       size_Mat,
-                                       isEvenTimestep);
-    getLastCudaError("QADPress7 execution failed");
+void QVelDevicePlainBB27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+{
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+
+    QVelDevPlainBB27<<< grid, threads >>> (
+        boundaryCondition->Vx,
+        boundaryCondition->Vy,
+        boundaryCondition->Vz,
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QVelDevicePlainBB27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void QADPressDev27(unsigned int numberOfThreads,
-                              real* DD,
-                              real* DD27,
-                              real* temp,
-                              real* velo,
-                              real diffusivity,
-                              int* k_Q,
-                              real* QQ,
-                              unsigned int numberOfBCnodes,
-                              real om1,
-                              unsigned int* neighborX,
-                              unsigned int* neighborY,
-                              unsigned int* neighborZ,
-                              unsigned int size_Mat,
-                              bool isEvenTimestep)
-{
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-      QADPress27<<< grid.grid, grid.threads >>>(   DD,
-                                          DD27,
-                                          temp,
-                                          velo,
-                                          diffusivity,
-                                          k_Q,
-                                          QQ,
-                                          numberOfBCnodes,
-                                          om1,
-                                          neighborX,
-                                          neighborY,
-                                          neighborZ,
-                                          size_Mat,
-                                          isEvenTimestep);
-    getLastCudaError("QADPress27 execution failed");
+void QVelDeviceCouette27(
+    unsigned int numberOfThreads,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QVelDevCouette27<<< grid.grid, grid.threads >>> (
+        vx,
+        vy,
+        vz,
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QVelDevCouette27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void QADPressNEQNeighborDev27(
-                                            unsigned int numberOfThreads,
-                                            real* DD,
-                                            real* DD27,
-                                            int* k_Q,
-                                            int* k_N,
-                                            int numberOfBCnodes,
-                                            unsigned int* neighborX,
-                                            unsigned int* neighborY,
-                                            unsigned int* neighborZ,
-                                            unsigned int size_Mat,
-                                            bool isEvenTimestep
-                                        )
+void QVelDevice1h27(
+    unsigned int numberOfThreads,
+    int nx,
+    int ny,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    real Phi,
+    real angularVelocity,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* coordX,
+    real* coordY,
+    real* coordZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
-
     vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
-    QADPressNEQNeighbor27<<< grid.grid, grid.threads >>>(
-                                                DD,
-                                                DD27,
-                                                k_Q,
-                                                k_N,
-                                                numberOfBCnodes,
-                                                neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                size_Mat,
-                                                isEvenTimestep
-                                              );
-       getLastCudaError("QADPressNEQNeighbor27 execution failed");
+    QVelDev1h27<<< grid.grid, grid.threads >>> (
+        nx,
+        ny,
+        vx,
+        vy,
+        vz,
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        Phi,
+        angularVelocity,
+        neighborX,
+        neighborY,
+        neighborZ,
+        coordX,
+        coordY,
+        coordZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QVelDev1h27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void QADVelDev7(unsigned int numberOfThreads,
-                           real* DD,
-                           real* DD7,
-                           real* temp,
-                           real* velo,
-                           real diffusivity,
-                           int* k_Q,
-                           real* QQ,
-                           unsigned int numberOfBCnodes,
-                           real om1,
-                           unsigned int* neighborX,
-                           unsigned int* neighborY,
-                           unsigned int* neighborZ,
-                           unsigned int size_Mat,
-                           bool isEvenTimestep)
+void QVelDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-      QADVel7<<< grid.grid, grid.threads >>> (
-                                       DD,
-                                       DD7,
-                                       temp,
-                                       velo,
-                                       diffusivity,
-                                       k_Q,
-                                       QQ,
-                                       numberOfBCnodes,
-                                       om1,
-                                       neighborX,
-                                       neighborY,
-                                       neighborZ,
-                                       size_Mat,
-                                       isEvenTimestep);
-    getLastCudaError("QADVel7 execution failed");
+    QVelDevice27<<< grid, threads >>> (
+        parameterDevice->nx,
+        parameterDevice->ny,
+        boundaryCondition->Vx,
+        boundaryCondition->Vy,
+        boundaryCondition->Vz,
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QVelDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void QADVelDev27(  unsigned int numberOfThreads,
-                              real* DD,
-                              real* DD27,
-                              real* temp,
-                              real* velo,
-                              real diffusivity,
-                              int* k_Q,
-                              real* QQ,
-                              unsigned int numberOfBCnodes,
-                              real om1,
-                              unsigned int* neighborX,
-                              unsigned int* neighborY,
-                              unsigned int* neighborZ,
-                              unsigned int size_Mat,
-                              bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-      QADVel27<<< grid.grid, grid.threads >>> ( DD,
-                                      DD27,
-                                      temp,
-                                      velo,
-                                      diffusivity,
-                                      k_Q,
-                                      QQ,
-                                      numberOfBCnodes,
-                                      om1,
-                                      neighborX,
-                                      neighborY,
-                                      neighborZ,
-                                      size_Mat,
-                                      isEvenTimestep);
-      getLastCudaError("QADVel27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QADDev7(unsigned int numberOfThreads,
-                        real* DD,
-                        real* DD7,
-                        real* temp,
-                        real diffusivity,
-                        int* k_Q,
-                        real* QQ,
-                        unsigned int numberOfBCnodes,
-                        real om1,
-                        unsigned int* neighborX,
-                        unsigned int* neighborY,
-                        unsigned int* neighborZ,
-                        unsigned int size_Mat,
-                        bool isEvenTimestep)
+void QVelDevCompPlusSlip27(
+    unsigned int numberOfThreads,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
     vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
-      QAD7<<< grid.grid, grid.threads >>> (     DD,
-                                       DD7,
-                                       temp,
-                                       diffusivity,
-                                       k_Q,
-                                       QQ,
-                                       numberOfBCnodes,
-                                       om1,
-                                       neighborX,
-                                       neighborY,
-                                       neighborZ,
-                                       size_Mat,
-                                       isEvenTimestep);
-      getLastCudaError("QAD7 execution failed");
+    QVelDeviceCompPlusSlip27<<< grid.grid, grid.threads >>> (
+        vx,
+        vy,
+        vz,
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QVelDeviceCompPlusSlip27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QVelDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+{
+    dim3 grid = vf::cuda::getCudaGrid(parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+
+    QVelDeviceComp27<<< grid, threads >>> (
+        boundaryCondition->Vx,
+        boundaryCondition->Vy,
+        boundaryCondition->Vz,
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+   getLastCudaError("QVelDeviceComp27 execution failed");
 }
+//////////////////////////////////////////////////////////////////////////
+void QVelDevCompThinWalls27(
+    unsigned int numberOfThreads,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* geom,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int* neighborWSB,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
+    QVelDeviceCompThinWallsPartOne27<<< grid.grid, grid.threads >>> (
+        vx,
+        vy,
+        vz,
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QVelDeviceCompThinWallsPartOne27 execution failed");
 
+    QThinWallsPartTwo27 <<< grid.grid, grid.threads >>> (
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        geom,
+        neighborX,
+        neighborY,
+        neighborZ,
+        neighborWSB,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QThinWallsPartTwo27 execution failed");
+}
 //////////////////////////////////////////////////////////////////////////
-// Other advection diffusion kernels are in kernel factory :(
-void FactorizedCentralMomentsAdvectionDiffusionDeviceKernel(
-   uint numberOfThreads,
-   real omegaDiffusivity,
-   uint* typeOfGridNode,
-   uint* neighborX,
-   uint* neighborY,
-   uint* neighborZ,
-   real* distributions,
-   real* distributionsAD,
-   int size_Mat,
-   real* forces,
-   bool isEvenTimestep)
+void QVelDevCompZeroPress27(LBMSimulationParameter *parameterDevice, QforBoundaryConditions *boundaryCondition)
 {
-   int Grid = (size_Mat / numberOfThreads) + 1;
-   dim3 grid(Grid, 1, 1);
-   dim3 threads(numberOfThreads, 1, 1);
+   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-   Factorized_Central_Moments_Advection_Diffusion_Device_Kernel <<< grid, threads >>> (
-      omegaDiffusivity,
-      typeOfGridNode,
-      neighborX,
-      neighborY,
-      neighborZ,
-      distributions,
-      distributionsAD,
-      size_Mat,
-      forces,
-      isEvenTimestep);
-   getLastCudaError("Factorized_Central_Moments_Advection_Diffusion_Device_Kernel execution failed");
+    QVelDeviceCompZeroPress27<<< grid, threads >>> (
+        boundaryCondition->Vx,
+        boundaryCondition->Vy,
+        boundaryCondition->Vz,
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QVelDeviceCompZeroPress27 execution failed");
 }
+//////////////////////////////////////////////////////////////////////////
+void QVelDevIncompHighNu27(
+    unsigned int numberOfThreads,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
+    QVelDeviceIncompHighNu27<<< grid.grid, grid.threads >>> (
+        vx,
+        vy,
+        vz,
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QVelDeviceIncompHighNu27 execution failed");
+}
 //////////////////////////////////////////////////////////////////////////
-void ADSlipVelDevComp(
-    uint numberOfThreads,
-    real * normalX,
-    real * normalY,
-    real * normalZ,
-    real * distributions,
-    real * distributionsAD,
-    int* QindexArray,
-    real * Qarrays,
-    uint numberOfBCnodes,
-    real omegaDiffusivity,
-    uint * neighborX,
-    uint * neighborY,
-    uint * neighborZ,
-    uint size_Mat,
+void QVelDevCompHighNu27(
+    unsigned int numberOfThreads,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
     bool isEvenTimestep)
 {
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
-    AD_SlipVelDeviceComp << < grid.grid, grid.threads >> > (
-        normalX,
-        normalY,
-        normalZ,
-        distributions,
-        distributionsAD,
-        QindexArray,
-        Qarrays,
+    QVelDeviceCompHighNu27<<< grid.grid, grid.threads >>> (
+        vx,
+        vy,
+        vz,
+        DD,
+        k_Q,
+        QQ,
         numberOfBCnodes,
-        omegaDiffusivity,
+        om1,
         neighborX,
         neighborY,
         neighborZ,
-        size_Mat,
+        numberOfLBnodes,
         isEvenTimestep);
-    getLastCudaError("AD_SlipVelDeviceComp execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-
-void QADDirichletDev27( unsigned int numberOfThreads,
-                                   real* DD,
-                                   real* DD27,
-                                   real* temp,
-                                   real diffusivity,
-                                   int* k_Q,
-                                   real* QQ,
-                                   unsigned int numberOfBCnodes,
-                                   real om1,
-                                   unsigned int* neighborX,
-                                   unsigned int* neighborY,
-                                   unsigned int* neighborZ,
-                                   unsigned int size_Mat,
-                                   bool isEvenTimestep)
-{
-       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-      QADDirichlet27<<< grid.grid, grid.threads >>> (
-                                               DD,
-                                               DD27,
-                                               temp,
-                                               diffusivity,
-                                               k_Q,
-                                               QQ,
-                                               numberOfBCnodes,
-                                               om1,
-                                               neighborX,
-                                               neighborY,
-                                               neighborZ,
-                                               size_Mat,
-                                               isEvenTimestep);
-      getLastCudaError("QADDirichletDev27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QADBBDev27(unsigned int numberOfThreads,
-                           real* DD,
-                           real* DD27,
-                           real* temp,
-                           real diffusivity,
-                           int* k_Q,
-                           real* QQ,
-                           unsigned int numberOfBCnodes,
-                           real om1,
-                           unsigned int* neighborX,
-                           unsigned int* neighborY,
-                           unsigned int* neighborZ,
-                           unsigned int size_Mat,
-                           bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-      QADBB27<<< grid.grid, grid.threads >>> (  DD,
-                                       DD27,
-                                       temp,
-                                       diffusivity,
-                                       k_Q,
-                                       QQ,
-                                       numberOfBCnodes,
-                                       om1,
-                                       neighborX,
-                                       neighborY,
-                                       neighborZ,
-                                       size_Mat,
-                                       isEvenTimestep);
-      getLastCudaError("QADBB27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QNoSlipADincompDev7(unsigned int numberOfThreads,
-                                    real* DD,
-                                    real* DD7,
-                                    real* temp,
-                                    real diffusivity,
-                                    int* k_Q,
-                                    real* QQ,
-                                    unsigned int numberOfBCnodes,
-                                    real om1,
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    unsigned int size_Mat,
-                                    bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-      QNoSlipADincomp7<<< grid.grid, grid.threads >>> (
-                                               DD,
-                                               DD7,
-                                               temp,
-                                               diffusivity,
-                                               k_Q,
-                                               QQ,
-                                               numberOfBCnodes,
-                                               om1,
-                                               neighborX,
-                                               neighborY,
-                                               neighborZ,
-                                               size_Mat,
-                                               isEvenTimestep);
-      getLastCudaError("QNoSlipADincomp7 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QNoSlipADincompDev27(  unsigned int numberOfThreads,
-                                       real* DD,
-                                       real* DD27,
-                                       real* temp,
-                                       real diffusivity,
-                                       int* k_Q,
-                                       real* QQ,
-                                       unsigned int numberOfBCnodes,
-                                       real om1,
-                                       unsigned int* neighborX,
-                                       unsigned int* neighborY,
-                                       unsigned int* neighborZ,
-                                       unsigned int size_Mat,
-                                       bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-      QNoSlipADincomp27<<< grid.grid, grid.threads >>> (
-                                               DD,
-                                               DD27,
-                                               temp,
-                                               diffusivity,
-                                               k_Q,
-                                               QQ,
-                                               numberOfBCnodes,
-                                               om1,
-                                               neighborX,
-                                               neighborY,
-                                               neighborZ,
-                                               size_Mat,
-                                               isEvenTimestep);
-      getLastCudaError("QNoSlipADincomp27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QADVeloIncompDev7( unsigned int numberOfThreads,
-                                   real* DD,
-                                   real* DD7,
-                                   real* temp,
-                                   real* velo,
-                                   real diffusivity,
-                                   int* k_Q,
-                                   real* QQ,
-                                   unsigned int numberOfBCnodes,
-                                   real om1,
-                                   unsigned int* neighborX,
-                                   unsigned int* neighborY,
-                                   unsigned int* neighborZ,
-                                   unsigned int size_Mat,
-                                   bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-      QADVeloIncomp7<<< grid.grid, grid.threads >>> ( DD,
-                                                 DD7,
-                                               temp,
-                                               velo,
-                                               diffusivity,
-                                               k_Q,
-                                               QQ,
-                                               numberOfBCnodes,
-                                               om1,
-                                               neighborX,
-                                               neighborY,
-                                               neighborZ,
-                                               size_Mat,
-                                               isEvenTimestep);
-      getLastCudaError("QADVeloIncomp7 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QADVeloIncompDev27(   unsigned int numberOfThreads,
-                                      real* DD,
-                                      real* DD27,
-                                      real* temp,
-                                      real* velo,
-                                      real diffusivity,
-                                      int* k_Q,
-                                      real* QQ,
-                                      unsigned int numberOfBCnodes,
-                                      real om1,
-                                      unsigned int* neighborX,
-                                      unsigned int* neighborY,
-                                      unsigned int* neighborZ,
-                                      unsigned int size_Mat,
-                                      bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-      QADVeloIncomp27<<< grid.grid, grid.threads >>> (
-                                              DD,
-                                              DD27,
-                                              temp,
-                                              velo,
-                                              diffusivity,
-                                              k_Q,
-                                              QQ,
-                                              numberOfBCnodes,
-                                              om1,
-                                              neighborX,
-                                              neighborY,
-                                              neighborZ,
-                                              size_Mat,
-                                              isEvenTimestep);
-      getLastCudaError("QADVeloIncomp27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QADPressIncompDev7( unsigned int numberOfThreads,
-                                      real* DD,
-                                      real* DD7,
-                                      real* temp,
-                                      real* velo,
-                                      real diffusivity,
-                                      int* k_Q,
-                                      real* QQ,
-                                      unsigned int numberOfBCnodes,
-                                      real om1,
-                                      unsigned int* neighborX,
-                                      unsigned int* neighborY,
-                                      unsigned int* neighborZ,
-                                      unsigned int size_Mat,
-                                      bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-      QADPressIncomp7<<< grid.grid, grid.threads >>>(
-                                               DD,
-                                               DD7,
-                                               temp,
-                                               velo,
-                                               diffusivity,
-                                               k_Q,
-                                               QQ,
-                                               numberOfBCnodes,
-                                               om1,
-                                               neighborX,
-                                               neighborY,
-                                               neighborZ,
-                                               size_Mat,
-                                               isEvenTimestep);
-      getLastCudaError("QADPressIncomp7 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QADPressIncompDev27(  unsigned int numberOfThreads,
-                                      real* DD,
-                                      real* DD27,
-                                      real* temp,
-                                      real* velo,
-                                      real diffusivity,
-                                      int* k_Q,
-                                      real* QQ,
-                                      unsigned int numberOfBCnodes,
-                                      real om1,
-                                      unsigned int* neighborX,
-                                      unsigned int* neighborY,
-                                      unsigned int* neighborZ,
-                                      unsigned int size_Mat,
-                                      bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-      QADPressIncomp27<<< grid.grid, grid.threads >>>(DD,
-                                                DD27,
-                                              temp,
-                                              velo,
-                                              diffusivity,
-                                              k_Q,
-                                              QQ,
-                                              numberOfBCnodes,
-                                              om1,
-                                              neighborX,
-                                              neighborY,
-                                              neighborZ,
-                                              size_Mat,
-                                              isEvenTimestep);
-      getLastCudaError("QADPressIncomp27 execution failed");
+    getLastCudaError("QVelDeviceComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void QDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QVeloDevEQ27(
+    unsigned int numberOfThreads,
+    real* VeloX,
+    real* VeloY,
+    real* VeloZ,
+    real* DD,
+    int* k_Q,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
-
-      QDevice27<<< grid, threads >>> (
-            parameterDevice->distributions.f[0],
-            boundaryCondition->k,
-            boundaryCondition->q27[0],
-            boundaryCondition->numberOfBCnodes,
-            parameterDevice->omega,
-            parameterDevice->neighborX,
-            parameterDevice->neighborY,
-            parameterDevice->neighborZ,
-            parameterDevice->numberOfNodes,
-            parameterDevice->isEvenTimestep);
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
-      getLastCudaError("QDevice27 execution failed");
+    QVeloDeviceEQ27<<< grid.grid, grid.threads >>> (
+        VeloX,
+        VeloY,
+        VeloZ,
+        DD,
+        k_Q,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QVeloDeviceEQ27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void QDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QSlipDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
-
-      QDeviceComp27<<< grid, threads >>> (
-           parameterDevice->distributions.f[0],
-           boundaryCondition->k,
-           boundaryCondition->q27[0],
-           boundaryCondition->numberOfBCnodes,
-           parameterDevice->omega,
-           parameterDevice->neighborX,
-           parameterDevice->neighborY,
-           parameterDevice->neighborZ,
-           parameterDevice->numberOfNodes,
-           parameterDevice->isEvenTimestep);
-      getLastCudaError("QDeviceComp27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QDevCompThinWalls27(unsigned int numberOfThreads,
-                                    real* DD,
-                                    int* k_Q,
-                                    real* QQ,
-                                    unsigned int numberOfBCnodes,
-                                    real om1,
-                                    unsigned int* geom,
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    unsigned int* neighborWSB,
-                                    unsigned int size_Mat,
-                                    bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-   QDeviceCompThinWallsPartOne27 <<< grid.grid, grid.threads >>> (DD,
-                                                         k_Q,
-                                                         QQ,
-                                                         numberOfBCnodes,
-                                                         om1,
-                                                         neighborX,
-                                                         neighborY,
-                                                         neighborZ,
-                                                         size_Mat,
-                                                         isEvenTimestep);
-   getLastCudaError("QDeviceCompThinWallsPartOne27 execution failed");
-
-   QThinWallsPartTwo27 <<< grid.grid, grid.threads >>> ( DD,
-                                                k_Q,
-                                                QQ,
-                                                numberOfBCnodes,
-                                                geom,
-                                                neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                neighborWSB,
-                                                size_Mat,
-                                                isEvenTimestep);
-   getLastCudaError("QThinWallsPartTwo27 execution failed");
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
+    QSlipDevice27<<< grid, threads >>> (
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QSlipDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void QDev3rdMomentsComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QSlipDevCompTurbulentViscosity27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1);
-
-   QDevice3rdMomentsComp27<<< grid, threads >>> (
-         parameterDevice->distributions.f[0],
-         boundaryCondition->k,
-         boundaryCondition->q27[0],
-         boundaryCondition->numberOfBCnodes,
-         parameterDevice->omega,
-         parameterDevice->neighborX,
-         parameterDevice->neighborY,
-         parameterDevice->neighborZ,
-         parameterDevice->numberOfNodes,
-         parameterDevice->isEvenTimestep);
-   getLastCudaError("QDevice3rdMomentsComp27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QDevIncompHighNu27( unsigned int numberOfThreads,
-                                    real* DD,
-                                    int* k_Q,
-                                    real* QQ,
-                                    unsigned int numberOfBCnodes,
-                                    real om1,
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    unsigned int size_Mat,
-                                    bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-      QDeviceIncompHighNu27<<< grid.grid, grid.threads >>> (
-                                                   DD,
-                                                   k_Q,
-                                                   QQ,
-                                                   numberOfBCnodes,
-                                                   om1,
-                                                   neighborX,
-                                                   neighborY,
-                                                   neighborZ,
-                                                   size_Mat,
-                                                   isEvenTimestep);
-      getLastCudaError("QDeviceIncompHighNu27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QDevCompHighNu27(   unsigned int numberOfThreads,
-                                    real* DD,
-                                    int* k_Q,
-                                    real* QQ,
-                                    unsigned int numberOfBCnodes,
-                                    real om1,
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    unsigned int size_Mat,
-                                    bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-      QDeviceCompHighNu27<<< grid.grid, grid.threads >>> (
-                                                   DD,
-                                                   k_Q,
-                                                   QQ,
-                                                   numberOfBCnodes,
-                                                   om1,
-                                                   neighborX,
-                                                   neighborY,
-                                                   neighborZ,
-                                                   size_Mat,
-                                                   isEvenTimestep);
-      getLastCudaError("QDevice27 execution failed");
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+
+    QSlipDeviceComp27TurbViscosity<<< grid, threads >>> (
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->turbViscosity,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QSlipDeviceComp27TurbViscosity execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void QVelDevicePlainBB27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QSlipPressureDevCompTurbulentViscosity27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-   QVelDevPlainBB27<<< grid, threads >>> (
-         boundaryCondition->Vx,
-         boundaryCondition->Vy,
-         boundaryCondition->Vz,
-         parameterDevice->distributions.f[0],
-         boundaryCondition->k,
-         boundaryCondition->q27[0],
-         boundaryCondition->numberOfBCnodes,
-         parameterDevice->neighborX,
-         parameterDevice->neighborY,
-         parameterDevice->neighborZ,
-         parameterDevice->numberOfNodes,
-         parameterDevice->isEvenTimestep);
-   getLastCudaError("QVelDevicePlainBB27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QVelDeviceCouette27(unsigned int numberOfThreads,
-                                    real* vx,
-                                    real* vy,
-                                    real* vz,
-                                    real* DD,
-                                    int* k_Q,
-                                    real* QQ,
-                                    unsigned int numberOfBCnodes,
-                                    real om1,
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    unsigned int size_Mat,
-                                    bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-      QVelDevCouette27<<< grid.grid, grid.threads >>> ( vx,
-                                                vy,
-                                                vz,
-                                                DD,
-                                                k_Q,
-                                                QQ,
-                                                numberOfBCnodes,
-                                                om1,
-                                                neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                size_Mat,
-                                                isEvenTimestep);
-      getLastCudaError("QVelDevicePlainBB27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QVelDevice1h27(   unsigned int numberOfThreads,
-                                  int nx,
-                                  int ny,
-                                  real* vx,
-                                  real* vy,
-                                  real* vz,
-                                  real* DD,
-                                  int* k_Q,
-                                  real* QQ,
-                                  unsigned int numberOfBCnodes,
-                                  real om1,
-                                  real Phi,
-                                  real angularVelocity,
-                                  unsigned int* neighborX,
-                                  unsigned int* neighborY,
-                                  unsigned int* neighborZ,
-                                  real* coordX,
-                                  real* coordY,
-                                  real* coordZ,
-                                  unsigned int size_Mat,
-                                  bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-   QVelDev1h27<<< grid.grid, grid.threads >>> (nx,
-                                          ny,
-                                          vx,
-                                          vy,
-                                          vz,
-                                          DD,
-                                          k_Q,
-                                          QQ,
-                                          numberOfBCnodes,
-                                          om1,
-                                          Phi,
-                                          angularVelocity,
-                                          neighborX,
-                                          neighborY,
-                                          neighborZ,
-                                          coordX,
-                                          coordY,
-                                          coordZ,
-                                          size_Mat,
-                                          isEvenTimestep);
-      getLastCudaError("QVelDevice27 execution failed");
+    QSlipPressureDeviceComp27TurbViscosity<<< grid, threads >>> (
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->turbViscosity,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QSlipDeviceComp27TurbViscosity execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void QVelDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-      QVelDevice27<<< grid, threads >>> (
-            parameterDevice->nx,
-            parameterDevice->ny,
-            boundaryCondition->Vx,
-            boundaryCondition->Vy,
-            boundaryCondition->Vz,
-            parameterDevice->distributions.f[0],
-            boundaryCondition->k,
-            boundaryCondition->q27[0],
-            boundaryCondition->numberOfBCnodes,
-            parameterDevice->omega,
-            parameterDevice->neighborX,
-            parameterDevice->neighborY,
-            parameterDevice->neighborZ,
-            parameterDevice->numberOfNodes,
-            parameterDevice->isEvenTimestep);
-      getLastCudaError("QVelDevice27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QVelDevCompPlusSlip27(unsigned int numberOfThreads,
-                                      real* vx,
-                                      real* vy,
-                                      real* vz,
-                                      real* DD,
-                                      int* k_Q,
-                                      real* QQ,
-                                      unsigned int numberOfBCnodes,
-                                      real om1,
-                                      unsigned int* neighborX,
-                                      unsigned int* neighborY,
-                                      unsigned int* neighborZ,
-                                      unsigned int size_Mat,
-                                      bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-      QVelDeviceCompPlusSlip27<<< grid.grid, grid.threads >>> (
-                                                      vx,
-                                                      vy,
-                                                      vz,
-                                                      DD,
-                                                      k_Q,
-                                                      QQ,
-                                                      numberOfBCnodes,
-                                                      om1,
-                                                      neighborX,
-                                                      neighborY,
-                                                      neighborZ,
-                                                      size_Mat,
-                                                      isEvenTimestep);
-      getLastCudaError("QVelDeviceCompPlusSlip27 execution failed");
+    QSlipDeviceComp27<<< grid, threads >>> (
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QSlipDeviceComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void QVelDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void BBSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid(parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-   QVelDeviceComp27<<< grid, threads >>> (
-            boundaryCondition->Vx,
-            boundaryCondition->Vy,
-            boundaryCondition->Vz,
-            parameterDevice->distributions.f[0],
-            boundaryCondition->k,
-            boundaryCondition->q27[0],
-            boundaryCondition->numberOfBCnodes,
-            parameterDevice->omega,
-            parameterDevice->neighborX,
-            parameterDevice->neighborY,
-            parameterDevice->neighborZ,
-            parameterDevice->numberOfNodes,
-            parameterDevice->isEvenTimestep);
-   getLastCudaError("QVelDeviceComp27 execution failed");
+    BBSlipDeviceComp27<<< grid, threads >>> (
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("BBSlipDeviceComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void QVelDevCompThinWalls27(unsigned int numberOfThreads,
-                                       real* vx,
-                                       real* vy,
-                                       real* vz,
-                                       real* DD,
-                                       int* k_Q,
-                                       real* QQ,
-                                       unsigned int numberOfBCnodes,
-                                       real om1,
-                                         unsigned int* geom,
-                                       unsigned int* neighborX,
-                                       unsigned int* neighborY,
-                                       unsigned int* neighborZ,
-                                         unsigned int* neighborWSB,
-                                       unsigned int size_Mat,
-                                       bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-   QVelDeviceCompThinWallsPartOne27<<< grid.grid, grid.threads >>> (vx,
-                                                              vy,
-                                                              vz,
-                                                              DD,
-                                                              k_Q,
-                                                              QQ,
-                                                              numberOfBCnodes,
-                                                              om1,
-                                                              neighborX,
-                                                              neighborY,
-                                                              neighborZ,
-                                                              size_Mat,
-                                                              isEvenTimestep);
-   getLastCudaError("QVelDeviceCompThinWallsPartOne27 execution failed");
-
-    QThinWallsPartTwo27 <<< grid.grid, grid.threads >>> (
-       DD,
-       k_Q,
-       QQ,
-       numberOfBCnodes,
-       geom,
-       neighborX,
-       neighborY,
-       neighborZ,
-       neighborWSB,
-       size_Mat,
-       isEvenTimestep);
-   getLastCudaError("QThinWallsPartTwo27 execution failed");
-}
-
-void QVelDevCompZeroPress27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QSlipGeomDevComp27(
+    unsigned int numberOfThreads,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    real* NormalX,
+    real* NormalY,
+    real* NormalZ,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    vf::cuda::CudaGrid grid(numberOfThreads, numberOfBCnodes);
 
-   QVelDeviceCompZeroPress27<<< grid, threads >>> (
-            boundaryCondition->Vx,
-            boundaryCondition->Vy,
-            boundaryCondition->Vz,
-            parameterDevice->distributions.f[0],
-            boundaryCondition->k,
-            boundaryCondition->q27[0],
-            boundaryCondition->numberOfBCnodes,
-            parameterDevice->omega,
-            parameterDevice->neighborX,
-            parameterDevice->neighborY,
-            parameterDevice->neighborZ,
-            parameterDevice->numberOfNodes,
-            parameterDevice->isEvenTimestep);
-   getLastCudaError("QVelDeviceCompZeroPress27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QVelDevIncompHighNu27(unsigned int numberOfThreads,
-                                      real* vx,
-                                      real* vy,
-                                      real* vz,
-                                      real* DD,
-                                      int* k_Q,
-                                      real* QQ,
-                                      unsigned int numberOfBCnodes,
-                                      real om1,
-                                      unsigned int* neighborX,
-                                      unsigned int* neighborY,
-                                      unsigned int* neighborZ,
-                                      unsigned int size_Mat,
-                                      bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-      QVelDeviceIncompHighNu27<<< grid.grid, grid.threads >>> (
-                                                      vx,
-                                                      vy,
-                                                      vz,
-                                                      DD,
-                                                      k_Q,
-                                                      QQ,
-                                                      numberOfBCnodes,
-                                                      om1,
-                                                      neighborX,
-                                                      neighborY,
-                                                      neighborZ,
-                                                      size_Mat,
-                                                      isEvenTimestep);
-      getLastCudaError("QVelDeviceIncompHighNu27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QVelDevCompHighNu27(  unsigned int numberOfThreads,
-                                      real* vx,
-                                      real* vy,
-                                      real* vz,
-                                      real* DD,
-                                      int* k_Q,
-                                      real* QQ,
-                                      unsigned int numberOfBCnodes,
-                                      real om1,
-                                      unsigned int* neighborX,
-                                      unsigned int* neighborY,
-                                      unsigned int* neighborZ,
-                                      unsigned int size_Mat,
-                                      bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-      QVelDeviceCompHighNu27<<< grid.grid, grid.threads >>> (
-                                                      vx,
-                                                      vy,
-                                                      vz,
-                                                      DD,
-                                                      k_Q,
-                                                      QQ,
-                                                      numberOfBCnodes,
-                                                      om1,
-                                                      neighborX,
-                                                      neighborY,
-                                                      neighborZ,
-                                                      size_Mat,
-                                                      isEvenTimestep);
-      getLastCudaError("QVelDeviceComp27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QVeloDevEQ27(unsigned int numberOfThreads,
-                             real* VeloX,
-                             real* VeloY,
-                             real* VeloZ,
-                             real* DD,
-                             int* k_Q,
-                             int numberOfBCnodes,
-                             real om1,
-                             unsigned int* neighborX,
-                             unsigned int* neighborY,
-                             unsigned int* neighborZ,
-                             unsigned int size_Mat,
-                             bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-   QVeloDeviceEQ27<<< grid.grid, grid.threads >>> (VeloX,
-                                             VeloY,
-                                             VeloZ,
-                                             DD,
-                                             k_Q,
-                                             numberOfBCnodes,
-                                             om1,
-                                             neighborX,
-                                             neighborY,
-                                             neighborZ,
-                                             size_Mat,
-                                             isEvenTimestep);
-      getLastCudaError("QVeloDeviceEQ27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QVeloStreetDevEQ27(
-    uint  numberOfThreads,
-    real* veloXfraction,
-    real* veloYfraction,
-    int*  naschVelo,
-    real* DD,
-    int*  naschIndex,
-    int   numberOfStreetNodes,
-    real  velocityRatio,
-    uint* neighborX,
-    uint* neighborY,
-    uint* neighborZ,
-    uint  size_Mat,
-    bool  isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfStreetNodes);
-
-    QVeloStreetDeviceEQ27 << < grid.grid, grid.threads >> > (
-        veloXfraction,
-        veloYfraction,
-        naschVelo,
+    QSlipGeomDeviceComp27<<< grid.grid, grid.threads >>> (
         DD,
-        naschIndex,
-        numberOfStreetNodes,
-        velocityRatio,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        NormalX,
+        NormalY,
+        NormalZ,
         neighborX,
         neighborY,
         neighborZ,
-        size_Mat,
+        numberOfLBnodes,
         isEvenTimestep);
-    getLastCudaError("QVeloStreetDeviceEQ27 execution failed");
+    getLastCudaError("QSlipGeomDeviceComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void QSlipDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QSlipNormDevComp27(
+    unsigned int numberOfThreads,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    real* NormalX,
+    real* NormalY,
+    real* NormalZ,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
-   QSlipDevice27<<< grid, threads >>> (
-         parameterDevice->distributions.f[0],
-         boundaryCondition->k,
-         boundaryCondition->q27[0],
-         boundaryCondition->numberOfBCnodes,
-         parameterDevice->omega,
-         parameterDevice->neighborX,
-         parameterDevice->neighborY,
-         parameterDevice->neighborZ,
-         parameterDevice->numberOfNodes,
-         parameterDevice->isEvenTimestep);
-   getLastCudaError("QSlipDevice27 execution failed");
+    QSlipNormDeviceComp27<<< grid.grid, grid.threads >>> (
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        NormalX,
+        NormalY,
+        NormalZ,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QSlipNormDeviceComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void QSlipDevCompTurbulentViscosity27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QStressDevComp27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid(  para->getParD(level)->numberofthreads, boundaryCondition->numberOfBCnodes);
+    dim3 threads(para->getParD(level)->numberofthreads, 1, 1 );
 
-   QSlipDeviceComp27TurbViscosity<<< grid, threads >>> (
-         parameterDevice->distributions.f[0],
-         boundaryCondition->k,
-         boundaryCondition->q27[0],
-         boundaryCondition->numberOfBCnodes,
-         parameterDevice->omega,
-         parameterDevice->neighborX,
-         parameterDevice->neighborY,
-         parameterDevice->neighborZ,
-         parameterDevice->turbViscosity,
-         parameterDevice->numberOfNodes,
-         parameterDevice->isEvenTimestep);
-   getLastCudaError("QSlipDeviceComp27TurbViscosity execution failed");
+    QStressDeviceComp27<<< grid, threads >>> (
+        para->getParD(level)->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->kN,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        para->getParD(level)->omega,
+        para->getParD(level)->turbViscosity,
+        para->getParD(level)->velocityX,
+        para->getParD(level)->velocityY,
+        para->getParD(level)->velocityY,
+        boundaryCondition->normalX,
+        boundaryCondition->normalY,
+        boundaryCondition->normalZ,
+        boundaryCondition->Vx,
+        boundaryCondition->Vy,
+        boundaryCondition->Vz,
+        boundaryCondition->Vx1,
+        boundaryCondition->Vy1,
+        boundaryCondition->Vz1,
+        para->getParD(level)->wallModel.samplingOffset,
+        para->getParD(level)->wallModel.z0,
+        para->getHasWallModelMonitor(),
+        para->getParD(level)->wallModel.u_star,
+        para->getParD(level)->wallModel.Fx,
+        para->getParD(level)->wallModel.Fy,
+        para->getParD(level)->wallModel.Fz,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("QStressDeviceComp27 execution failed");
 }
+
 //////////////////////////////////////////////////////////////////////////
-void QSlipPressureDevCompTurbulentViscosity27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void BBStressDev27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( para->getParD(level)->numberofthreads, boundaryCondition->numberOfBCnodes);
+    dim3 threads(para->getParD(level)->numberofthreads, 1, 1 );
 
-   QSlipPressureDeviceComp27TurbViscosity<<< grid, threads >>> (
-         parameterDevice->distributions.f[0],
-         boundaryCondition->k,
-         boundaryCondition->q27[0],
-         boundaryCondition->numberOfBCnodes,
-         parameterDevice->omega,
-         parameterDevice->neighborX,
-         parameterDevice->neighborY,
-         parameterDevice->neighborZ,
-         parameterDevice->turbViscosity,
-         parameterDevice->numberOfNodes,
-         parameterDevice->isEvenTimestep);
-   getLastCudaError("QSlipDeviceComp27TurbViscosity execution failed");
+    BBStressDevice27<<< grid, threads >>> (
+        para->getParD(level)->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->kN,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        para->getParD(level)->velocityX,
+        para->getParD(level)->velocityY,
+        para->getParD(level)->velocityY,
+        boundaryCondition->normalX,
+        boundaryCondition->normalY,
+        boundaryCondition->normalZ,
+        boundaryCondition->Vx,
+        boundaryCondition->Vy,
+        boundaryCondition->Vz,
+        boundaryCondition->Vx1,
+        boundaryCondition->Vy1,
+        boundaryCondition->Vz1,
+        para->getParD(level)->wallModel.samplingOffset,
+        para->getParD(level)->wallModel.z0,
+        para->getHasWallModelMonitor(),
+        para->getParD(level)->wallModel.u_star,
+        para->getParD(level)->wallModel.Fx,
+        para->getParD(level)->wallModel.Fy,
+        para->getParD(level)->wallModel.Fz,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("BBStressDevice27 execution failed");
 }
+
 //////////////////////////////////////////////////////////////////////////
-void QSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void BBStressPressureDev27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( para->getParD(level)->numberofthreads, boundaryCondition->numberOfBCnodes);
+    dim3 threads(para->getParD(level)->numberofthreads, 1, 1 );
 
-   QSlipDeviceComp27<<< grid, threads >>> (
-         parameterDevice->distributions.f[0],
-         boundaryCondition->k,
-         boundaryCondition->q27[0],
-         boundaryCondition->numberOfBCnodes,
-         parameterDevice->omega,
-         parameterDevice->neighborX,
-         parameterDevice->neighborY,
-         parameterDevice->neighborZ,
-         parameterDevice->numberOfNodes,
-         parameterDevice->isEvenTimestep);
-   getLastCudaError("QSlipDeviceComp27 execution failed");
+    BBStressPressureDevice27<<< grid, threads >>> (
+        para->getParD(level)->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->kN,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        para->getParD(level)->velocityX,
+        para->getParD(level)->velocityY,
+        para->getParD(level)->velocityY,
+        boundaryCondition->normalX,
+        boundaryCondition->normalY,
+        boundaryCondition->normalZ,
+        boundaryCondition->Vx,
+        boundaryCondition->Vy,
+        boundaryCondition->Vz,
+        boundaryCondition->Vx1,
+        boundaryCondition->Vy1,
+        boundaryCondition->Vz1,
+        para->getParD(level)->wallModel.samplingOffset,
+        para->getParD(level)->wallModel.z0,
+        para->getHasWallModelMonitor(),
+        para->getParD(level)->wallModel.u_star,
+        para->getParD(level)->wallModel.Fx,
+        para->getParD(level)->wallModel.Fy,
+        para->getParD(level)->wallModel.Fz,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("BBStressPressureDevice27 execution failed");
 }
-//////////////////////////////////////////////////////////////////////////
-void BBSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
-{
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-   QSlipDeviceComp27<<< grid, threads >>> (
-         parameterDevice->distributions.f[0],
-         boundaryCondition->k,
-         boundaryCondition->q27[0],
-         boundaryCondition->numberOfBCnodes,
-         parameterDevice->omega,
-         parameterDevice->neighborX,
-         parameterDevice->neighborY,
-         parameterDevice->neighborZ,
-         parameterDevice->numberOfNodes,
-         parameterDevice->isEvenTimestep);
-   getLastCudaError("BBSlipDeviceComp27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QSlipGeomDevComp27(unsigned int numberOfThreads,
-                                   real* DD,
-                                   int* k_Q,
-                                   real* QQ,
-                                   unsigned int numberOfBCnodes,
-                                   real om1,
-                                   real* NormalX,
-                                   real* NormalY,
-                                   real* NormalZ,
-                                   unsigned int* neighborX,
-                                   unsigned int* neighborY,
-                                   unsigned int* neighborZ,
-                                   unsigned int size_Mat,
-                                   bool isEvenTimestep)
+//////////////////////////////////////////////////////////////////////////
+void QPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-    vf::cuda::CudaGrid grid(numberOfThreads, numberOfBCnodes);
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-   QSlipGeomDeviceComp27<<< grid.grid, grid.threads >>> (DD,
-                                                   k_Q,
-                                                   QQ,
-                                                   numberOfBCnodes,
-                                                   om1,
-                                                   NormalX,
-                                                   NormalY,
-                                                   NormalZ,
-                                                   neighborX,
-                                                   neighborY,
-                                                   neighborZ,
-                                                   size_Mat,
-                                                   isEvenTimestep);
-   getLastCudaError("QSlipGeomDeviceComp27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QSlipNormDevComp27(unsigned int numberOfThreads,
-                                   real* DD,
-                                   int* k_Q,
-                                   real* QQ,
-                                   unsigned int numberOfBCnodes,
-                                   real om1,
-                                   real* NormalX,
-                                   real* NormalY,
-                                   real* NormalZ,
-                                   unsigned int* neighborX,
-                                   unsigned int* neighborY,
-                                   unsigned int* neighborZ,
-                                   unsigned int size_Mat,
-                                   bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-   QSlipNormDeviceComp27<<< grid.grid, grid.threads >>> (DD,
-                                                   k_Q,
-                                                   QQ,
-                                                   numberOfBCnodes,
-                                                   om1,
-                                                   NormalX,
-                                                   NormalY,
-                                                   NormalZ,
-                                                   neighborX,
-                                                   neighborY,
-                                                   neighborZ,
-                                                   size_Mat,
-                                                   isEvenTimestep);
-      getLastCudaError("QSlipGeomDeviceComp27 execution failed");
+    QPressDevice27<<< grid, threads >>> (
+        boundaryCondition->RhoBC,
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QPressDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void QStressDevComp27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level)
+void QPressDevAntiBB27(
+    unsigned int numberOfThreads,
+    real* rhoBC,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
-   dim3 grid = vf::cuda::getCudaGrid(  para->getParD(level)->numberofthreads, boundaryCondition->numberOfBCnodes);
-   dim3 threads(para->getParD(level)->numberofthreads, 1, 1 );
-
-      QStressDeviceComp27<<< grid, threads >>> (
-         para->getParD(level)->distributions.f[0],
-         boundaryCondition->k,
-         boundaryCondition->kN,
-         boundaryCondition->q27[0],
-         boundaryCondition->numberOfBCnodes,
-         para->getParD(level)->omega,
-         para->getParD(level)->turbViscosity,
-         para->getParD(level)->velocityX,
-         para->getParD(level)->velocityY,
-         para->getParD(level)->velocityY,
-         boundaryCondition->normalX,
-         boundaryCondition->normalY,
-         boundaryCondition->normalZ,
-         boundaryCondition->Vx,
-         boundaryCondition->Vy,
-         boundaryCondition->Vz,
-         boundaryCondition->Vx1,
-         boundaryCondition->Vy1,
-         boundaryCondition->Vz1,
-         para->getParD(level)->wallModel.samplingOffset,
-         para->getParD(level)->wallModel.z0,
-         para->getHasWallModelMonitor(),
-         para->getParD(level)->wallModel.u_star,
-         para->getParD(level)->wallModel.Fx,
-         para->getParD(level)->wallModel.Fy,
-         para->getParD(level)->wallModel.Fz,
-         para->getParD(level)->neighborX,
-         para->getParD(level)->neighborY,
-         para->getParD(level)->neighborZ,
-         para->getParD(level)->numberOfNodes,
-         para->getParD(level)->isEvenTimestep);
-      getLastCudaError("QSlipDeviceComp27 execution failed");
-}
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
-//////////////////////////////////////////////////////////////////////////
-void BBStressDev27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level)
-{
-   dim3 grid = vf::cuda::getCudaGrid( para->getParD(level)->numberofthreads, boundaryCondition->numberOfBCnodes);
-   dim3 threads(para->getParD(level)->numberofthreads, 1, 1 );
-
-   BBStressDevice27<<< grid, threads >>> (
-      para->getParD(level)->distributions.f[0],
-      boundaryCondition->k,
-      boundaryCondition->kN,
-      boundaryCondition->q27[0],
-      boundaryCondition->numberOfBCnodes,
-      para->getParD(level)->velocityX,
-      para->getParD(level)->velocityY,
-      para->getParD(level)->velocityY,
-      boundaryCondition->normalX,
-      boundaryCondition->normalY,
-      boundaryCondition->normalZ,
-      boundaryCondition->Vx,
-      boundaryCondition->Vy,
-      boundaryCondition->Vz,
-      boundaryCondition->Vx1,
-      boundaryCondition->Vy1,
-      boundaryCondition->Vz1,
-      para->getParD(level)->wallModel.samplingOffset,
-      para->getParD(level)->wallModel.z0,
-      para->getHasWallModelMonitor(),
-      para->getParD(level)->wallModel.u_star,
-      para->getParD(level)->wallModel.Fx,
-      para->getParD(level)->wallModel.Fy,
-      para->getParD(level)->wallModel.Fz,
-      para->getParD(level)->neighborX,
-      para->getParD(level)->neighborY,
-      para->getParD(level)->neighborZ,
-      para->getParD(level)->numberOfNodes,
-      para->getParD(level)->isEvenTimestep);
-      getLastCudaError("BBStressDevice27 execution failed");
+    QPressDeviceAntiBB27<<< grid.grid, grid.threads >>>(
+        rhoBC,
+        vx,
+        vy,
+        vz,
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QPressDeviceAntiBB27 execution failed");
 }
-
 //////////////////////////////////////////////////////////////////////////
-void BBStressPressureDev27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level)
+void QPressDevFixBackflow27(
+    unsigned int numberOfThreads,
+    real* rhoBC,
+    real* DD,
+    int* k_Q,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
-   dim3 grid = vf::cuda::getCudaGrid( para->getParD(level)->numberofthreads, boundaryCondition->numberOfBCnodes);
-   dim3 threads(para->getParD(level)->numberofthreads, 1, 1 );
-
-   BBStressPressureDevice27<<< grid, threads >>> (
-      para->getParD(level)->distributions.f[0],
-      boundaryCondition->k,
-      boundaryCondition->kN,
-      boundaryCondition->q27[0],
-      boundaryCondition->numberOfBCnodes,
-      para->getParD(level)->velocityX,
-      para->getParD(level)->velocityY,
-      para->getParD(level)->velocityY,
-      boundaryCondition->normalX,
-      boundaryCondition->normalY,
-      boundaryCondition->normalZ,
-      boundaryCondition->Vx,
-      boundaryCondition->Vy,
-      boundaryCondition->Vz,
-      boundaryCondition->Vx1,
-      boundaryCondition->Vy1,
-      boundaryCondition->Vz1,
-      para->getParD(level)->wallModel.samplingOffset,
-      para->getParD(level)->wallModel.z0,
-      para->getHasWallModelMonitor(),
-      para->getParD(level)->wallModel.u_star,
-      para->getParD(level)->wallModel.Fx,
-      para->getParD(level)->wallModel.Fy,
-      para->getParD(level)->wallModel.Fz,
-      para->getParD(level)->neighborX,
-      para->getParD(level)->neighborY,
-      para->getParD(level)->neighborZ,
-      para->getParD(level)->numberOfNodes,
-      para->getParD(level)->isEvenTimestep);
-      getLastCudaError("BBStressDevice27 execution failed");
-}
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
+    QPressDeviceFixBackflow27<<< grid.grid, grid.threads >>> (
+        rhoBC,
+        DD,
+        k_Q,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QPressDeviceFixBackflow27 execution failed");
+}
 //////////////////////////////////////////////////////////////////////////
-void QPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QPressDevDirDepBot27(
+    unsigned int numberOfThreads,
+    real* rhoBC,
+    real* DD,
+    int* k_Q,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
-   QPressDevice27<<< grid, threads >>> (
-      boundaryCondition->RhoBC,
-      parameterDevice->distributions.f[0],
-      boundaryCondition->k,
-      boundaryCondition->q27[0],
-      boundaryCondition->numberOfBCnodes,
-      parameterDevice->omega,
-      parameterDevice->neighborX,
-      parameterDevice->neighborY,
-      parameterDevice->neighborZ,
-      parameterDevice->numberOfNodes,
-      parameterDevice->isEvenTimestep);
-   getLastCudaError("QPressDevice27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QPressDevAntiBB27(  unsigned int numberOfThreads,
-                                    real* rhoBC,
-                                    real* vx,
-                                    real* vy,
-                                    real* vz,
-                                    real* DD,
-                                    int* k_Q,
-                                    real* QQ,
-                                    int numberOfBCnodes,
-                                    real om1,
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    unsigned int size_Mat,
-                                    bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-   QPressDeviceAntiBB27<<< grid.grid, grid.threads >>>( rhoBC,
-                                                vx,
-                                                vy,
-                                                vz,
-                                                DD,
-                                                k_Q,
-                                                QQ,
-                                                numberOfBCnodes,
-                                                om1,
-                                                neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                size_Mat,
-                                                isEvenTimestep);
-   getLastCudaError("QPressDeviceAntiBB27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QPressDevFixBackflow27( unsigned int numberOfThreads,
-                                        real* rhoBC,
-                                        real* DD,
-                                        int* k_Q,
-                                        unsigned int numberOfBCnodes,
-                                        real om1,
-                                        unsigned int* neighborX,
-                                        unsigned int* neighborY,
-                                        unsigned int* neighborZ,
-                                        unsigned int size_Mat,
-                                        bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-   QPressDeviceFixBackflow27<<< grid.grid, grid.threads >>> (  rhoBC,
-                                                         DD,
-                                                         k_Q,
-                                                         numberOfBCnodes,
-                                                         om1,
-                                                         neighborX,
-                                                         neighborY,
-                                                         neighborZ,
-                                                         size_Mat,
-                                                         isEvenTimestep);
-   getLastCudaError("QPressDeviceFixBackflow27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QPressDevDirDepBot27(  unsigned int numberOfThreads,
-                                       real* rhoBC,
-                                       real* DD,
-                                       int* k_Q,
-                                       unsigned int numberOfBCnodes,
-                                       real om1,
-                                       unsigned int* neighborX,
-                                       unsigned int* neighborY,
-                                       unsigned int* neighborZ,
-                                       unsigned int size_Mat,
-                                       bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-   QPressDeviceDirDepBot27<<< grid.grid, grid.threads >>> ( rhoBC,
-                                                      DD,
-                                                      k_Q,
-                                                      numberOfBCnodes,
-                                                      om1,
-                                                      neighborX,
-                                                      neighborY,
-                                                      neighborZ,
-                                                      size_Mat,
-                                                      isEvenTimestep);
-   getLastCudaError("QPressDeviceDirDepBot27 execution failed");
+    QPressDeviceDirDepBot27<<< grid.grid, grid.threads >>> (
+        rhoBC,
+        DD,
+        k_Q,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QPressDeviceDirDepBot27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QPressNoRhoDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-   QPressNoRhoDevice27<<< grid, threads >>> (
-         boundaryCondition->RhoBC,
-         parameterDevice->distributions.f[0],
-         boundaryCondition->k,
-         boundaryCondition->kN,
-         boundaryCondition->numberOfBCnodes,
-         parameterDevice->omega,
-         parameterDevice->neighborX,
-         parameterDevice->neighborY,
-         parameterDevice->neighborZ,
-         parameterDevice->numberOfNodes,
-         parameterDevice->isEvenTimestep,
-         vf::lbm::dir::DIR_P00);
-   getLastCudaError("QPressNoRhoDevice27 execution failed");
+    QPressNoRhoDevice27<<< grid, threads >>> (
+        boundaryCondition->RhoBC,
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->kN,
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep,
+        vf::lbm::dir::DIR_P00);
+    getLastCudaError("QPressNoRhoDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QPressZeroRhoOutflowDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-   QPressZeroRhoOutflowDevice27<<< grid, threads >>> (
-         boundaryCondition->RhoBC,
-         parameterDevice->distributions.f[0],
-         boundaryCondition->k,
-         boundaryCondition->kN,
-         boundaryCondition->numberOfBCnodes,
-         parameterDevice->omega,
-         parameterDevice->neighborX,
-         parameterDevice->neighborY,
-         parameterDevice->neighborZ,
-         parameterDevice->numberOfNodes,
-         parameterDevice->isEvenTimestep,
-         vf::lbm::dir::DIR_P00,
-         parameterDevice->outflowPressureCorrectionFactor);
-   getLastCudaError("QPressZeroRhoOutflowDev27 execution failed");
+    QPressZeroRhoOutflowDevice27<<< grid, threads >>> (
+        boundaryCondition->RhoBC,
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->kN,
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep,
+        vf::lbm::dir::DIR_P00,
+        parameterDevice->outflowPressureCorrectionFactor);
+    getLastCudaError("QPressZeroRhoOutflowDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QInflowScaleByPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+
+    QInflowScaleByPressDevice27<<< grid, threads >>> (
+        boundaryCondition->RhoBC,
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->kN,
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QInflowScaleByPressDevice27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QPressDevOld27(
+    unsigned int numberOfThreads,
+    real* rhoBC,
+    real* DD,
+    int* k_Q,
+    int* k_N,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
-   QInflowScaleByPressDevice27<<< grid, threads >>> (
-           boundaryCondition->RhoBC,
-           parameterDevice->distributions.f[0],
-           boundaryCondition->k,
-           boundaryCondition->kN,
-           boundaryCondition->numberOfBCnodes,
-           parameterDevice->omega,
-           parameterDevice->neighborX,
-           parameterDevice->neighborY,
-           parameterDevice->neighborZ,
-           parameterDevice->numberOfNodes,
-           parameterDevice->isEvenTimestep);
-   getLastCudaError("QInflowScaleByPressDevice27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QPressDevOld27(  unsigned int numberOfThreads,
-                                     real* rhoBC,
-                                     real* DD,
-                                     int* k_Q,
-                                     int* k_N,
-                                     unsigned int numberOfBCnodes,
-                                     real om1,
-                                     unsigned int* neighborX,
-                                     unsigned int* neighborY,
-                                     unsigned int* neighborZ,
-                                     unsigned int size_Mat,
-                                     bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-   QPressDeviceOld27<<< grid.grid, grid.threads >>> ( rhoBC,
-                                                DD,
-                                                k_Q,
-                                                k_N,
-                                                numberOfBCnodes,
-                                                om1,
-                                                neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                size_Mat,
-                                                isEvenTimestep);
-   getLastCudaError("QPressDeviceOld27 execution failed");
+    QPressDeviceOld27<<< grid.grid, grid.threads >>> (
+        rhoBC,
+        DD,
+        k_Q,
+        k_N,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QPressDeviceOld27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QPressDevIncompNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-   QPressDeviceIncompNEQ27<<< grid, threads >>> (
-         boundaryCondition->RhoBC,
-         parameterDevice->distributions.f[0],
-         boundaryCondition->k,
-         boundaryCondition->kN,
-         boundaryCondition->numberOfBCnodes,
-         parameterDevice->omega,
-         parameterDevice->neighborX,
-         parameterDevice->neighborY,
-         parameterDevice->neighborZ,
-         parameterDevice->numberOfNodes,
-         parameterDevice->isEvenTimestep);
-   getLastCudaError("QPressDeviceIncompNEQ27 execution failed");
+    QPressDeviceIncompNEQ27<<< grid, threads >>> (
+        boundaryCondition->RhoBC,
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->kN,
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QPressDeviceIncompNEQ27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QPressDevNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
-   QPressDeviceNEQ27<<< grid, threads >>> (
+    QPressDeviceNEQ27<<< grid, threads >>> (
         boundaryCondition->RhoBC,
         parameterDevice->distributions.f[0],
         boundaryCondition->k,
@@ -3012,200 +3072,210 @@ void QPressDevNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditi
         parameterDevice->neighborZ,
         parameterDevice->numberOfNodes,
         parameterDevice->isEvenTimestep);
-   getLastCudaError("QPressDevNEQ27 execution failed");
+    getLastCudaError("QPressDevNEQ27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QPressDevEQZ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+
+    QPressDeviceEQZ27<<< grid, threads >>> (
+        boundaryCondition->RhoBC,
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->kN,
+        parameterDevice->kDistTestRE.f[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->omega,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QPressDeviceEQZ27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QPressDevZero27(
+    unsigned int numberOfThreads,
+    real* DD,
+    int* k_Q,
+    unsigned int numberOfBCnodes,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QPressDeviceZero27<<< grid.grid, grid.threads >>> (
+        DD,
+        k_Q,
+        numberOfBCnodes,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QPressDeviceOld27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QPressDevFake27(
+    unsigned int numberOfThreads,
+    real* rhoBC,
+    real* DD,
+    int* k_Q,
+    int* k_N,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
 
-      QPressDeviceEQZ27<<< grid, threads >>> (
-            boundaryCondition->RhoBC,
-            parameterDevice->distributions.f[0],
-            boundaryCondition->k,
-            boundaryCondition->kN,
-            parameterDevice->kDistTestRE.f[0],
-            boundaryCondition->numberOfBCnodes,
-            parameterDevice->omega,
-            parameterDevice->neighborX,
-            parameterDevice->neighborY,
-            parameterDevice->neighborZ,
-            parameterDevice->numberOfNodes,
-            parameterDevice->isEvenTimestep);
-      getLastCudaError("QPressDeviceEQZ27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QPressDevZero27(unsigned int numberOfThreads,
-                                real* DD,
-                                int* k_Q,
-                                unsigned int numberOfBCnodes,
-                                unsigned int* neighborX,
-                                unsigned int* neighborY,
-                                unsigned int* neighborZ,
-                                unsigned int size_Mat,
-                                bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-   QPressDeviceZero27<<< grid.grid, grid.threads >>> (DD,
-                                                k_Q,
-                                                numberOfBCnodes,
-                                                neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                size_Mat,
-                                                isEvenTimestep);
-   getLastCudaError("QPressDeviceOld27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QPressDevFake27(     unsigned int numberOfThreads,
-                                     real* rhoBC,
-                                     real* DD,
-                                     int* k_Q,
-                                     int* k_N,
-                                     unsigned int numberOfBCnodes,
-                                     real om1,
-                                     unsigned int* neighborX,
-                                     unsigned int* neighborY,
-                                     unsigned int* neighborZ,
-                                     unsigned int size_Mat,
-                                     bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-
-      QPressDeviceFake27<<< grid.grid, grid.threads >>> (rhoBC,
-                                                DD,
-                                                k_Q,
-                                                k_N,
-                                                numberOfBCnodes,
-                                                om1,
-                                                neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                size_Mat,
-                                                isEvenTimestep);
-      getLastCudaError("QPressDeviceFake27 execution failed");
+    QPressDeviceFake27<<< grid.grid, grid.threads >>> (
+        rhoBC,
+        DD,
+        k_Q,
+        k_N,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("QPressDeviceFake27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void BBDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
-   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
-   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+
+    BBDevice27<<< grid, threads >>> (
+        parameterDevice->distributions.f[0],
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->numberOfBCnodes,
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("BBDevice27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QPressDev27_IntBB(
+    unsigned int numberOfThreads,
+    real* rho,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
-   BBDevice27<<< grid, threads >>> (
-         parameterDevice->distributions.f[0],
-         boundaryCondition->k,
-         boundaryCondition->q27[0],
-         boundaryCondition->numberOfBCnodes,
-         parameterDevice->neighborX,
-         parameterDevice->neighborY,
-         parameterDevice->neighborZ,
-         parameterDevice->numberOfNodes,
-         parameterDevice->isEvenTimestep);
-   getLastCudaError("BBDevice27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void QPressDev27_IntBB(  unsigned int numberOfThreads,
-                                    real* rho,
-                                    real* DD,
-                                    int* k_Q,
-                                    real* QQ,
-                                    unsigned int numberOfBCnodes,
-                                    real om1,
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    unsigned int size_Mat,
-                                    bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-    QPressDevice27_IntBB<<< grid.grid, grid.threads >>> (rho,
-                                                    DD,
-                                                    k_Q,
-                                                    QQ,
-                                                    numberOfBCnodes,
-                                                    om1,
-                                                    neighborX,
-                                                    neighborY,
-                                                    neighborZ,
-                                                    size_Mat,
-                                                    isEvenTimestep);
+    QPressDevice27_IntBB<<< grid.grid, grid.threads >>> (
+        rho,
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
     getLastCudaError("QPressDevice27_IntBB execution failed");
 }
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
 //////////////////////////////////////////////////////////////////////////
-void PressSchlaffer27(unsigned int numberOfThreads,
-                                 real* rhoBC,
-                                 real* DD,
-                                 real* vx0,
-                                 real* vy0,
-                                 real* vz0,
-                                 real* deltaVz0,
-                                 int* k_Q,
-                                 int* k_N,
-                                 int numberOfBCnodes,
-                                 real om1,
-                                 unsigned int* neighborX,
-                                 unsigned int* neighborY,
-                                 unsigned int* neighborZ,
-                                 unsigned int size_Mat,
-                                 bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-   PressSchlaff27<<< grid.grid, grid.threads >>>(  rhoBC,
-                                             DD,
-                                             vx0,
-                                             vy0,
-                                             vz0,
-                                             deltaVz0,
-                                             k_Q,
-                                             k_N,
-                                             numberOfBCnodes,
-                                             om1,
-                                             neighborX,
-                                             neighborY,
-                                             neighborZ,
-                                             size_Mat,
-                                             isEvenTimestep);
-   getLastCudaError("PressSchlaff27 execution failed");
+void PressSchlaffer27(
+    unsigned int numberOfThreads,
+    real* rhoBC,
+    real* DD,
+    real* vx0,
+    real* vy0,
+    real* vz0,
+    real* deltaVz0,
+    int* k_Q,
+    int* k_N,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    PressSchlaff27<<< grid.grid, grid.threads >>>(
+        rhoBC,
+        DD,
+        vx0,
+        vy0,
+        vz0,
+        deltaVz0,
+        k_Q,
+        k_N,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("PressSchlaff27 execution failed");
 }
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
 //////////////////////////////////////////////////////////////////////////
-void VelSchlaffer27(  unsigned int numberOfThreads,
-                                 int t,
-                                 real* DD,
-                                 real* vz0,
-                                 real* deltaVz0,
-                                 int* k_Q,
-                                 int* k_N,
-                                 int numberOfBCnodes,
-                                 real om1,
-                                 unsigned int* neighborX,
-                                 unsigned int* neighborY,
-                                 unsigned int* neighborZ,
-                                 unsigned int size_Mat,
-                                 bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-   VelSchlaff27<<< grid.grid, grid.threads >>>( t,
-                                          DD,
-                                          vz0,
-                                          deltaVz0,
-                                          k_Q,
-                                          k_N,
-                                          numberOfBCnodes,
-                                          om1,
-                                          neighborX,
-                                          neighborY,
-                                          neighborZ,
-                                          size_Mat,
-                                          isEvenTimestep);
-      getLastCudaError("VelSchlaff27 execution failed");
+void VelSchlaffer27(
+    unsigned int numberOfThreads,
+    int t,
+    real* DD,
+    real* vz0,
+    real* deltaVz0,
+    int* k_Q,
+    int* k_N,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    VelSchlaff27<<< grid.grid, grid.threads >>>(
+        t,
+        DD,
+        vz0,
+        deltaVz0,
+        k_Q,
+        k_N,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("VelSchlaff27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QPrecursorDevCompZeroPress(LBMSimulationParameter* parameterDevice,
@@ -3213,38 +3283,37 @@ void QPrecursorDevCompZeroPress(LBMSimulationParameter* parameterDevice,
                                 real timeRatio,
                                 real velocityRatio)
 {
-
     vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
 
-    QPrecursorDeviceCompZeroPress<<< grid.grid, grid.threads >>>(boundaryCondition->k,
-                                                                boundaryCondition->numberOfBCnodes,
-                                                                boundaryCondition->numberOfPrecursorNodes,
-                                                                boundaryCondition->sizeQ,
-                                                                parameterDevice->omega,
-                                                                parameterDevice->distributions.f[0],
-                                                                boundaryCondition->q27[0],
-                                                                parameterDevice->neighborX,
-                                                                parameterDevice->neighborY,
-                                                                parameterDevice->neighborZ,
-                                                                boundaryCondition->planeNeighbor0PP,
-                                                                boundaryCondition->planeNeighbor0PM,
-                                                                boundaryCondition->planeNeighbor0MP,
-                                                                boundaryCondition->planeNeighbor0MM,
-                                                                boundaryCondition->weights0PP,
-                                                                boundaryCondition->weights0PM,
-                                                                boundaryCondition->weights0MP,
-                                                                boundaryCondition->weights0MM,
-                                                                boundaryCondition->last,
-                                                                boundaryCondition->current,
-                                                                boundaryCondition->velocityX,
-                                                                boundaryCondition->velocityY,
-                                                                boundaryCondition->velocityZ,
-                                                                timeRatio,
-                                                                velocityRatio,
-                                                                parameterDevice->numberOfNodes,
-                                                                parameterDevice->isEvenTimestep);
+    QPrecursorDeviceCompZeroPress<<< grid.grid, grid.threads >>>(
+        boundaryCondition->k,
+        boundaryCondition->numberOfBCnodes,
+        boundaryCondition->numberOfPrecursorNodes,
+        boundaryCondition->sizeQ,
+        parameterDevice->omega,
+        parameterDevice->distributions.f[0],
+        boundaryCondition->q27[0],
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        boundaryCondition->planeNeighbor0PP,
+        boundaryCondition->planeNeighbor0PM,
+        boundaryCondition->planeNeighbor0MP,
+        boundaryCondition->planeNeighbor0MM,
+        boundaryCondition->weights0PP,
+        boundaryCondition->weights0PM,
+        boundaryCondition->weights0MP,
+        boundaryCondition->weights0MM,
+        boundaryCondition->last,
+        boundaryCondition->current,
+        boundaryCondition->velocityX,
+        boundaryCondition->velocityY,
+        boundaryCondition->velocityZ,
+        timeRatio,
+        velocityRatio,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
     getLastCudaError("QPrecursorDeviceCompZeroPress execution failed");
-
 }
 //////////////////////////////////////////////////////////////////////////
 void PrecursorDevEQ27( LBMSimulationParameter* parameterDevice,
@@ -3252,34 +3321,34 @@ void PrecursorDevEQ27( LBMSimulationParameter* parameterDevice,
                         real timeRatio,
                         real velocityRatio)
 {
-
     vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
 
-    PrecursorDeviceEQ27<<< grid.grid, grid.threads >>>(boundaryCondition->k,
-                                                    boundaryCondition->numberOfBCnodes,
-                                                    boundaryCondition->numberOfPrecursorNodes,
-                                                    parameterDevice->omega,
-                                                    parameterDevice->distributions.f[0],
-                                                    parameterDevice->neighborX,
-                                                    parameterDevice->neighborX,
-                                                    parameterDevice->neighborX,
-                                                    boundaryCondition->planeNeighbor0PP,
-                                                    boundaryCondition->planeNeighbor0PM,
-                                                    boundaryCondition->planeNeighbor0MP,
-                                                    boundaryCondition->planeNeighbor0MM,
-                                                    boundaryCondition->weights0PP,
-                                                    boundaryCondition->weights0PM,
-                                                    boundaryCondition->weights0MP,
-                                                    boundaryCondition->weights0MM,
-                                                    boundaryCondition->last,
-                                                    boundaryCondition->current,
-                                                    boundaryCondition->velocityX,
-                                                    boundaryCondition->velocityY,
-                                                    boundaryCondition->velocityZ,
-                                                    timeRatio,
-                                                    velocityRatio,
-                                                    parameterDevice->numberOfNodes,
-                                                    parameterDevice->isEvenTimestep);
+    PrecursorDeviceEQ27<<< grid.grid, grid.threads >>>(
+        boundaryCondition->k,
+        boundaryCondition->numberOfBCnodes,
+        boundaryCondition->numberOfPrecursorNodes,
+        parameterDevice->omega,
+        parameterDevice->distributions.f[0],
+        parameterDevice->neighborX,
+        parameterDevice->neighborX,
+        parameterDevice->neighborX,
+        boundaryCondition->planeNeighbor0PP,
+        boundaryCondition->planeNeighbor0PM,
+        boundaryCondition->planeNeighbor0MP,
+        boundaryCondition->planeNeighbor0MM,
+        boundaryCondition->weights0PP,
+        boundaryCondition->weights0PM,
+        boundaryCondition->weights0MP,
+        boundaryCondition->weights0MM,
+        boundaryCondition->last,
+        boundaryCondition->current,
+        boundaryCondition->velocityX,
+        boundaryCondition->velocityY,
+        boundaryCondition->velocityZ,
+        timeRatio,
+        velocityRatio,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
     getLastCudaError("PrecursorDeviceEQ27 execution failed");
 
 }
@@ -3289,2035 +3358,2127 @@ void PrecursorDevDistributions( LBMSimulationParameter* parameterDevice,
                                 real timeRatio,
                                 real velocityRatio)
 {
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
+
+    PrecursorDeviceDistributions<<< grid.grid, grid.threads >>>(
+        boundaryCondition->k,
+        boundaryCondition->numberOfBCnodes,
+        boundaryCondition->numberOfPrecursorNodes,
+        parameterDevice->distributions.f[0],
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        boundaryCondition->planeNeighbor0PP,
+        boundaryCondition->planeNeighbor0PM,
+        boundaryCondition->planeNeighbor0MP,
+        boundaryCondition->planeNeighbor0MM,
+        boundaryCondition->weights0PP,
+        boundaryCondition->weights0PM,
+        boundaryCondition->weights0MP,
+        boundaryCondition->weights0MM,
+        boundaryCondition->last,
+        boundaryCondition->current,
+        timeRatio,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("PrecursorDeviceDistributions execution failed");
+
+}
+
+//////////////////////////////////////////////////////////////////////////
+void QPrecursorDevDistributions( LBMSimulationParameter* parameterDevice,
+                                QforPrecursorBoundaryConditions* boundaryCondition,
+                                real timeRatio,
+                                real velocityRatio)
+{
+
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
+
+    QPrecursorDeviceDistributions<<< grid.grid, grid.threads >>>(
+        boundaryCondition->k,
+        boundaryCondition->q27[0],
+        boundaryCondition->sizeQ,
+        boundaryCondition->numberOfBCnodes,
+        boundaryCondition->numberOfPrecursorNodes,
+        parameterDevice->distributions.f[0],
+        parameterDevice->neighborX,
+        parameterDevice->neighborY,
+        parameterDevice->neighborZ,
+        boundaryCondition->planeNeighbor0PP,
+        boundaryCondition->planeNeighbor0PM,
+        boundaryCondition->planeNeighbor0MP,
+        boundaryCondition->planeNeighbor0MM,
+        boundaryCondition->weights0PP,
+        boundaryCondition->weights0PM,
+        boundaryCondition->weights0MP,
+        boundaryCondition->weights0MM,
+        boundaryCondition->last,
+        boundaryCondition->current,
+        timeRatio,
+        parameterDevice->numberOfNodes,
+        parameterDevice->isEvenTimestep);
+    getLastCudaError("QPrecursorDeviceCompZeroPress execution failed");
+
+}
+//////////////////////////////////////////////////////////////////////////
+extern "C" void PropVelo(
+    unsigned int numberOfThreads,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* rho,
+    real* ux,
+    real* uy,
+    real* uz,
+    int* k_Q,
+    unsigned int size_Prop,
+    unsigned long long numberOfLBnodes,
+    unsigned int* bcMatD,
+    real* DD,
+    bool EvenOrOdd)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Prop);
+
+    PropellerBC<<< grid.grid, grid.threads >>>(
+        neighborX,
+        neighborY,
+        neighborZ,
+        rho,
+        ux,
+        uy,
+        uz,
+        k_Q,
+        size_Prop,
+        numberOfLBnodes,
+        bcMatD,
+        DD,
+        EvenOrOdd);
+    getLastCudaError("PropellerBC execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCF27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCF27<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF);
+    getLastCudaError("scaleCF27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCFEff27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborCoarseToFine)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCFEff27<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        neighborCoarseToFine);
+    getLastCudaError("scaleCFEff27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCFLast27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborCoarseToFine)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCFLast27<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        neighborCoarseToFine);
+    getLastCudaError("scaleCFLast27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCFpress27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborCoarseToFine)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCFpress27<<< grid.grid, grid.threads >>>(
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        neighborCoarseToFine);
+    getLastCudaError("scaleCFpress27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCF_Fix_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborCoarseToFine)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCF_Fix_27<<< grid.grid, grid.threads >>>(
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        neighborCoarseToFine);
+    getLastCudaError("scaleCF_Fix_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCF_Fix_comp_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborCoarseToFine)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCF_Fix_comp_27<<< grid.grid, grid.threads >>>(
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        neighborCoarseToFine);
+    getLastCudaError("scaleCF_Fix_comp_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCF_0817_comp_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborCoarseToFine,
+    CUstream_st *stream)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCF_0817_comp_27<<< grid.grid, grid.threads, 0, stream >>>(
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        neighborCoarseToFine);
+    getLastCudaError("scaleCF_0817_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCF_comp_D3Q27F3_2018(
+    real* DC,
+    real* DF,
+    real* G6,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborCoarseToFine)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCF_comp_D3Q27F3_2018 <<< grid.grid, grid.threads >>>(
+        DC,
+        DF,
+        G6,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        neighborCoarseToFine);
+    getLastCudaError("scaleCF_comp_D3Q27F3_2018 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCF_comp_D3Q27F3(
+    real* DC,
+    real* DF,
+    real* G6,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborCoarseToFine,
+    CUstream_st *stream)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCF_comp_D3Q27F3 <<< grid.grid, grid.threads, 0, stream >>>(
+        DC,
+        DF,
+        G6,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        neighborCoarseToFine);
+    getLastCudaError("scaleCF_comp_D3Q27F3 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCF_staggered_time_comp_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborCoarseToFine)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCF_staggered_time_comp_27<<< grid.grid, grid.threads >>>(
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        neighborCoarseToFine);
+    getLastCudaError("scaleCF_staggered_time_comp_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCF_RhoSq_comp_27(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICells * coarseToFine, ICellNeigh& neighborCoarseToFine, CUstream_st *stream)
+{
+    dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads,  coarseToFine->numberOfCells);
+    dim3 threads(parameterDeviceC->numberofthreads, 1, 1 );
+
+    scaleCF_RhoSq_comp_27<<<grid, threads, 0, stream>>>(
+        parameterDeviceC->distributions.f[0],
+        parameterDeviceF->distributions.f[0],
+        parameterDeviceC->neighborX,
+        parameterDeviceC->neighborY,
+        parameterDeviceC->neighborZ,
+        parameterDeviceF->neighborX,
+        parameterDeviceF->neighborY,
+        parameterDeviceF->neighborZ,
+        parameterDeviceC->numberOfNodes,
+        parameterDeviceF->numberOfNodes,
+        parameterDeviceC->isEvenTimestep,
+        coarseToFine->coarseCellIndices,
+        coarseToFine->fineCellIndices,
+        coarseToFine->numberOfCells,
+        parameterDeviceC->omega,
+        parameterDeviceF->omega,
+        parameterDeviceC->viscosity,
+        parameterDeviceC->nx,
+        parameterDeviceC->ny,
+        parameterDeviceF->nx,
+        parameterDeviceF->ny,
+        neighborCoarseToFine);
+    getLastCudaError("scaleCF_RhoSq_27 execution failed");
+}
+
+template<bool hasTurbulentViscosity> void ScaleCF_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICells * coarseToFine, ICellNeigh& neighborCoarseToFine, CUstream_st *stream)
+{
+    dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads,  coarseToFine->numberOfCells);
+    dim3 threads(parameterDeviceC->numberofthreads, 1, 1 );
+
+    scaleCF_compressible<hasTurbulentViscosity><<<grid, threads, 0, stream>>>(
+        parameterDeviceC->distributions.f[0],
+        parameterDeviceF->distributions.f[0],
+        parameterDeviceC->neighborX,
+        parameterDeviceC->neighborY,
+        parameterDeviceC->neighborZ,
+        parameterDeviceF->neighborX,
+        parameterDeviceF->neighborY,
+        parameterDeviceF->neighborZ,
+        parameterDeviceC->numberOfNodes,
+        parameterDeviceF->numberOfNodes,
+        parameterDeviceC->isEvenTimestep,
+        coarseToFine->coarseCellIndices,
+        coarseToFine->fineCellIndices,
+        coarseToFine->numberOfCells,
+        parameterDeviceC->omega,
+        parameterDeviceF->omega,
+        parameterDeviceC->turbViscosity,
+        parameterDeviceF->turbViscosity,
+        neighborCoarseToFine);
+
+    getLastCudaError("scaleCF_compressible execution failed");
+}
+template void ScaleCF_compressible<true>(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICells * coarseToFine, ICellNeigh& neighborCoarseToFine, CUstream_st *stream);
+template void ScaleCF_compressible<false>(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICells * coarseToFine, ICellNeigh& neighborCoarseToFine, CUstream_st *stream);
+
+//////////////////////////////////////////////////////////////////////////
+void ScaleCF_RhoSq_3rdMom_comp_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborCoarseToFine,
+    CUstream_st *stream)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCF_RhoSq_3rdMom_comp_27<<< grid.grid, grid.threads, 0, stream >>>(
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        neighborCoarseToFine);
+    getLastCudaError("scaleCF_RhoSq_3rdMom_comp_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCF_AA2016_comp_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborCoarseToFine,
+    CUstream_st *stream)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCF_AA2016_comp_27<<< grid.grid, grid.threads, 0, stream >>>(
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        neighborCoarseToFine);
+    getLastCudaError("scaleCF_AA2016_comp_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCF_NSPress_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborCoarseToFine)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCF_NSPress_27<<< grid.grid, grid.threads >>>(
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        neighborCoarseToFine);
+    getLastCudaError("scaleCF_NSPress_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCFThSMG7(
+    real* DC,
+    real* DF,
+    real* DD7C,
+    real* DD7F,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real nu,
+    real diffusivity_fine,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborCoarseToFine)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCFThSMG7<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        DD7C,
+        DD7F,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        nu,
+        diffusivity_fine,
+        neighborCoarseToFine);
+    getLastCudaError("scaleCFThSMG7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCFThS7(
+    real* DC,
+    real* DF,
+    real* DD7C,
+    real* DD7F,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real nu,
+    real diffusivity_fine,
+    unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCFThS7<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        DD7C,
+        DD7F,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        nu,
+        diffusivity_fine);
+    getLastCudaError("scaleCFThS7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleCFThS27(
+    real* DC,
+    real* DF,
+    real* DD27C,
+    real* DD27F,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posCSWB,
+    unsigned int* posFSWB,
+    unsigned int kCF,
+    real nu,
+    real diffusivity_fine,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborCoarseToFine)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
+
+    scaleCFThS27<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        DD27C,
+        DD27F,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posCSWB,
+        posFSWB,
+        kCF,
+        nu,
+        diffusivity_fine,
+        neighborCoarseToFine);
+    getLastCudaError("scaleCFThS27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFC27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFC27<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF);
+    getLastCudaError("scaleFC27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFCEff27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborFineToCoarse)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFCEff27<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        neighborFineToCoarse);
+    getLastCudaError("scaleFCEff27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFCLast27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborFineToCoarse)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFCLast27<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        neighborFineToCoarse);
+    getLastCudaError("Kernel execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFCpress27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborFineToCoarse)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFCpress27<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        neighborFineToCoarse);
+    getLastCudaError("scaleFCpress27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFC_Fix_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborFineToCoarse)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFC_Fix_27<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        neighborFineToCoarse);
+    getLastCudaError("scaleFC_Fix_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFC_Fix_comp_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborFineToCoarse)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFC_Fix_comp_27<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        neighborFineToCoarse);
+    getLastCudaError("scaleFC_Fix_comp_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFC_0817_comp_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborFineToCoarse,
+    CUstream_st *stream)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFC_0817_comp_27<<< grid.grid, grid.threads, 0, stream >>> (
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        neighborFineToCoarse);
+    getLastCudaError("scaleFC_0817_comp_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFC_comp_D3Q27F3_2018(
+    real* DC,
+    real* DF,
+    real* G6,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborFineToCoarse)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
 
-    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
-
-    PrecursorDeviceDistributions<<< grid.grid, grid.threads >>>(boundaryCondition->k,
-                                                            boundaryCondition->numberOfBCnodes,
-                                                            boundaryCondition->numberOfPrecursorNodes,
-                                                            parameterDevice->distributions.f[0],
-                                                            parameterDevice->neighborX,
-                                                            parameterDevice->neighborY,
-                                                            parameterDevice->neighborZ,
-                                                            boundaryCondition->planeNeighbor0PP,
-                                                            boundaryCondition->planeNeighbor0PM,
-                                                            boundaryCondition->planeNeighbor0MP,
-                                                            boundaryCondition->planeNeighbor0MM,
-                                                            boundaryCondition->weights0PP,
-                                                            boundaryCondition->weights0PM,
-                                                            boundaryCondition->weights0MP,
-                                                            boundaryCondition->weights0MM,
-                                                            boundaryCondition->last,
-                                                            boundaryCondition->current,
-                                                            timeRatio,
-                                                            parameterDevice->numberOfNodes,
-                                                            parameterDevice->isEvenTimestep);
-    getLastCudaError("QPrecursorDeviceCompZeroPress execution failed");
-
-}
-
-//////////////////////////////////////////////////////////////////////////
-void QPrecursorDevDistributions( LBMSimulationParameter* parameterDevice,
-                                QforPrecursorBoundaryConditions* boundaryCondition,
-                                real timeRatio,
-                                real velocityRatio)
+    scaleFC_comp_D3Q27F3_2018 <<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        G6,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        neighborFineToCoarse);
+    getLastCudaError("scaleFC_comp_D3Q27F3_2018 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFC_comp_D3Q27F3(
+    real* DC,
+    real* DF,
+    real* G6,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborFineToCoarse,
+    CUstream_st *stream)
 {
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
 
-    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
-
-    QPrecursorDeviceDistributions<<< grid.grid, grid.threads >>>(boundaryCondition->k,
-                                                                boundaryCondition->q27[0],
-                                                                boundaryCondition->sizeQ,
-                                                                boundaryCondition->numberOfBCnodes,
-                                                                boundaryCondition->numberOfPrecursorNodes,
-                                                                parameterDevice->distributions.f[0],
-                                                                parameterDevice->neighborX,
-                                                                parameterDevice->neighborY,
-                                                                parameterDevice->neighborZ,
-                                                                boundaryCondition->planeNeighbor0PP,
-                                                                boundaryCondition->planeNeighbor0PM,
-                                                                boundaryCondition->planeNeighbor0MP,
-                                                                boundaryCondition->planeNeighbor0MM,
-                                                                boundaryCondition->weights0PP,
-                                                                boundaryCondition->weights0PM,
-                                                                boundaryCondition->weights0MP,
-                                                                boundaryCondition->weights0MM,
-                                                                boundaryCondition->last,
-                                                                boundaryCondition->current,
-                                                                timeRatio,
-                                                                parameterDevice->numberOfNodes,
-                                                                parameterDevice->isEvenTimestep);
-    getLastCudaError("QPrecursorDeviceCompZeroPress execution failed");
+    scaleFC_comp_D3Q27F3 <<< grid.grid, grid.threads, 0, stream >>> (
+        DC,
+        DF,
+        G6,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        neighborFineToCoarse);
+    getLastCudaError("scaleFC_comp_D3Q27F3 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFC_staggered_time_comp_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborFineToCoarse)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFC_staggered_time_comp_27<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        neighborFineToCoarse);
+    getLastCudaError("scaleFC_staggered_time_comp_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFC_RhoSq_comp_27(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICells * fineToCoarse, ICellNeigh &neighborFineToCoarse, CUstream_st *stream)
+{
+    dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads,  fineToCoarse->numberOfCells);
+    dim3 threads(parameterDeviceC->numberofthreads, 1, 1 );
+
+    scaleFC_RhoSq_comp_27<<<grid, threads, 0, stream>>>(
+        parameterDeviceC->distributions.f[0],
+        parameterDeviceF->distributions.f[0],
+        parameterDeviceC->neighborX,
+        parameterDeviceC->neighborY,
+        parameterDeviceC->neighborZ,
+        parameterDeviceF->neighborX,
+        parameterDeviceF->neighborY,
+        parameterDeviceF->neighborZ,
+        parameterDeviceC->numberOfNodes,
+        parameterDeviceF->numberOfNodes,
+        parameterDeviceC->isEvenTimestep,
+        fineToCoarse->coarseCellIndices,
+        fineToCoarse->fineCellIndices,
+        fineToCoarse->numberOfCells,
+        parameterDeviceC->omega,
+        parameterDeviceF->omega,
+        parameterDeviceC->viscosity,
+        parameterDeviceC->nx,
+        parameterDeviceC->ny,
+        parameterDeviceF->nx,
+        parameterDeviceF->ny,
+        neighborFineToCoarse);
+    getLastCudaError("scaleFC_RhoSq_comp_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+template<bool hasTurbulentViscosity> void ScaleFC_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICells * fineToCoarse, ICellNeigh &neighborFineToCoarse, CUstream_st *stream)
+{
+    dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads,  fineToCoarse->numberOfCells);
+    dim3 threads(parameterDeviceC->numberofthreads, 1, 1 );
+
+    scaleFC_compressible<hasTurbulentViscosity><<<grid, threads, 0, stream>>>(
+        parameterDeviceC->distributions.f[0],
+        parameterDeviceF->distributions.f[0],
+        parameterDeviceC->neighborX,
+        parameterDeviceC->neighborY,
+        parameterDeviceC->neighborZ,
+        parameterDeviceF->neighborX,
+        parameterDeviceF->neighborY,
+        parameterDeviceF->neighborZ,
+        parameterDeviceC->numberOfNodes,
+        parameterDeviceF->numberOfNodes,
+        parameterDeviceC->isEvenTimestep,
+        fineToCoarse->coarseCellIndices,
+        fineToCoarse->fineCellIndices,
+        fineToCoarse->numberOfCells,
+        parameterDeviceC->omega,
+        parameterDeviceF->omega,
+        parameterDeviceC->turbViscosity,
+        parameterDeviceF->turbViscosity,
+        neighborFineToCoarse);
+
+    getLastCudaError("scaleFC_compressible execution failed");
+}
+template void ScaleFC_compressible<true>(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICells * fineToCoarse, ICellNeigh &neighborFineToCoarse, CUstream_st *stream);
+template void ScaleFC_compressible<false>(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICells * fineToCoarse, ICellNeigh &neighborFineToCoarse, CUstream_st *stream);
+
+//////////////////////////////////////////////////////////////////////////
+void ScaleFC_RhoSq_3rdMom_comp_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborFineToCoarse,
+    CUstream_st *stream)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFC_RhoSq_3rdMom_comp_27<<< grid.grid, grid.threads, 0, stream >>>(
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        neighborFineToCoarse);
+    getLastCudaError("scaleFC_RhoSq_3rdMom_comp_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFC_AA2016_comp_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborFineToCoarse,
+    CUstream_st *stream)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFC_AA2016_comp_27<<< grid.grid, grid.threads, 0, stream >>>(
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        neighborFineToCoarse);
+    getLastCudaError("scaleFC_AA2016_comp_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFC_NSPress_27(
+    real* DC,
+    real* DF,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real omCoarse,
+    real omFine,
+    real nu,
+    unsigned int nxC,
+    unsigned int nyC,
+    unsigned int nxF,
+    unsigned int nyF,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborFineToCoarse)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFC_NSPress_27<<< grid.grid, grid.threads >>> (
+        DC,
+        DF,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        omCoarse,
+        omFine,
+        nu,
+        nxC,
+        nyC,
+        nxF,
+        nyF,
+        neighborFineToCoarse);
+    getLastCudaError("scaleFC_NSPress_27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFCThSMG7(
+    real* DC,
+    real* DF,
+    real* DD7C,
+    real* DD7F,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real nu,
+    real diffusivity_coarse,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborFineToCoarse)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFCThSMG7<<< grid.grid, grid.threads >>>(
+        DC,
+        DF,
+        DD7C,
+        DD7F,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        nu,
+        diffusivity_coarse,
+        neighborFineToCoarse);
+    getLastCudaError("scaleFCThSMG7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFCThS7(
+    real* DC,
+    real* DF,
+    real* DD7C,
+    real* DD7F,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real nu,
+    real diffusivity_coarse,
+    unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFCThS7<<< grid.grid, grid.threads >>>(
+        DC,
+        DF,
+        DD7C,
+        DD7F,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        nu,
+        diffusivity_coarse);
+    getLastCudaError("scaleFCThS7 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void ScaleFCThS27(
+    real* DC,
+    real* DF,
+    real* DD27C,
+    real* DD27F,
+    unsigned int* neighborCX,
+    unsigned int* neighborCY,
+    unsigned int* neighborCZ,
+    unsigned int* neighborFX,
+    unsigned int* neighborFY,
+    unsigned int* neighborFZ,
+    unsigned long long numberOfLBnodesC,
+    unsigned long long numberOfLBnodesF,
+    bool isEvenTimestep,
+    unsigned int* posC,
+    unsigned int* posFSWB,
+    unsigned int kFC,
+    real nu,
+    real diffusivity_coarse,
+    unsigned int numberOfThreads,
+    ICellNeigh neighborFineToCoarse)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
+
+    scaleFCThS27<<< grid.grid, grid.threads >>>(
+        DC,
+        DF,
+        DD27C,
+        DD27F,
+        neighborCX,
+        neighborCY,
+        neighborCZ,
+        neighborFX,
+        neighborFY,
+        neighborFZ,
+        numberOfLBnodesC,
+        numberOfLBnodesF,
+        isEvenTimestep,
+        posC,
+        posFSWB,
+        kFC,
+        nu,
+        diffusivity_coarse,
+        neighborFineToCoarse);
+    getLastCudaError("scaleFCThS27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void DragLiftPostD27(
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    int numberOfBCnodes,
+    double *DragX,
+    double *DragY,
+    double *DragZ,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
+    DragLiftPost27<<< grid.grid, grid.threads >>>(
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        DragX,
+        DragY,
+        DragZ,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("DragLiftPost27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void PropVelo(   unsigned int numberOfThreads,
-                            unsigned int* neighborX,
-                            unsigned int* neighborY,
-                            unsigned int* neighborZ,
-                            real* rho,
-                            real* ux,
-                            real* uy,
-                            real* uz,
-                            int* k_Q,
-                            unsigned int size_Prop,
-                            unsigned int size_Mat,
-                            unsigned int* bcMatD,
-                            real* DD,
-                            bool EvenOrOdd)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Prop);
-
-      PropellerBC<<< grid.grid, grid.threads >>>(neighborX,
-                                       neighborY,
-                                       neighborZ,
-                                       rho,
-                                       ux,
-                                       uy,
-                                       uz,
-                                       k_Q,
-                                       size_Prop,
-                                       size_Mat,
-                                       bcMatD,
-                                       DD,
-                                       EvenOrOdd);
-      getLastCudaError("PropellerBC execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCF27( real* DC,
-                        real* DF,
-                        unsigned int* neighborCX,
-                        unsigned int* neighborCY,
-                        unsigned int* neighborCZ,
-                        unsigned int* neighborFX,
-                        unsigned int* neighborFY,
-                        unsigned int* neighborFZ,
-                        unsigned int size_MatC,
-                        unsigned int size_MatF,
-                        bool isEvenTimestep,
-                        unsigned int* posCSWB,
-                        unsigned int* posFSWB,
-                        unsigned int kCF,
-                        real omCoarse,
-                        real omFine,
-                        real nu,
-                        unsigned int nxC,
-                        unsigned int nyC,
-                        unsigned int nxF,
-                        unsigned int nyF,
-                        unsigned int numberOfThreads)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
-
-      scaleCF27<<< grid.grid, grid.threads >>> ( DC,
-                                             DF,
-                                             neighborCX,
-                                             neighborCY,
-                                             neighborCZ,
-                                             neighborFX,
-                                             neighborFY,
-                                             neighborFZ,
-                                             size_MatC,
-                                             size_MatF,
-                                             isEvenTimestep,
-                                             posCSWB,
-                                             posFSWB,
-                                             kCF,
-                                             omCoarse,
-                                             omFine,
-                                             nu,
-                                             nxC,
-                                             nyC,
-                                             nxF,
-                                             nyF);
-      getLastCudaError("scaleCF27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCFEff27(real* DC,
-                             real* DF,
-                             unsigned int* neighborCX,
-                             unsigned int* neighborCY,
-                             unsigned int* neighborCZ,
-                             unsigned int* neighborFX,
-                             unsigned int* neighborFY,
-                             unsigned int* neighborFZ,
-                             unsigned int size_MatC,
-                             unsigned int size_MatF,
-                             bool isEvenTimestep,
-                             unsigned int* posCSWB,
-                             unsigned int* posFSWB,
-                             unsigned int kCF,
-                             real omCoarse,
-                             real omFine,
-                             real nu,
-                             unsigned int nxC,
-                             unsigned int nyC,
-                             unsigned int nxF,
-                             unsigned int nyF,
-                             unsigned int numberOfThreads,
-                             OffCF offCF)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
-
-      scaleCFEff27<<< grid.grid, grid.threads >>> ( DC,
-                                                DF,
-                                                neighborCX,
-                                                neighborCY,
-                                                neighborCZ,
-                                                neighborFX,
-                                                neighborFY,
-                                                neighborFZ,
-                                                size_MatC,
-                                                size_MatF,
-                                                isEvenTimestep,
-                                                posCSWB,
-                                                posFSWB,
-                                                kCF,
-                                                omCoarse,
-                                                omFine,
-                                                nu,
-                                                nxC,
-                                                nyC,
-                                                nxF,
-                                                nyF,
-                                                offCF);
-      getLastCudaError("scaleCFEff27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCFLast27(real* DC,
-                              real* DF,
-                              unsigned int* neighborCX,
-                              unsigned int* neighborCY,
-                              unsigned int* neighborCZ,
-                              unsigned int* neighborFX,
-                              unsigned int* neighborFY,
-                              unsigned int* neighborFZ,
-                              unsigned int size_MatC,
-                              unsigned int size_MatF,
-                              bool isEvenTimestep,
-                              unsigned int* posCSWB,
-                              unsigned int* posFSWB,
-                              unsigned int kCF,
-                              real omCoarse,
-                              real omFine,
-                              real nu,
-                              unsigned int nxC,
-                              unsigned int nyC,
-                              unsigned int nxF,
-                              unsigned int nyF,
-                              unsigned int numberOfThreads,
-                              OffCF offCF)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
-
-      scaleCFLast27<<< grid.grid, grid.threads >>> (DC,
-                                                DF,
-                                                neighborCX,
-                                                neighborCY,
-                                                neighborCZ,
-                                                neighborFX,
-                                                neighborFY,
-                                                neighborFZ,
-                                                size_MatC,
-                                                size_MatF,
-                                                isEvenTimestep,
-                                                posCSWB,
-                                                posFSWB,
-                                                kCF,
-                                                omCoarse,
-                                                omFine,
-                                                nu,
-                                                nxC,
-                                                nyC,
-                                                nxF,
-                                                nyF,
-                                                offCF);
-      getLastCudaError("scaleCFLast27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCFpress27(  real* DC,
-                                 real* DF,
-                                 unsigned int* neighborCX,
-                                 unsigned int* neighborCY,
-                                 unsigned int* neighborCZ,
-                                 unsigned int* neighborFX,
-                                 unsigned int* neighborFY,
-                                 unsigned int* neighborFZ,
-                                 unsigned int size_MatC,
-                                 unsigned int size_MatF,
-                                 bool isEvenTimestep,
-                                 unsigned int* posCSWB,
-                                 unsigned int* posFSWB,
-                                 unsigned int kCF,
-                                 real omCoarse,
-                                 real omFine,
-                                 real nu,
-                                 unsigned int nxC,
-                                 unsigned int nyC,
-                                 unsigned int nxF,
-                                 unsigned int nyF,
-                                 unsigned int numberOfThreads,
-                                 OffCF offCF)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
-
-      scaleCFpress27<<< grid.grid, grid.threads >>>(DC,
-                                                DF,
-                                                neighborCX,
-                                                neighborCY,
-                                                neighborCZ,
-                                                neighborFX,
-                                                neighborFY,
-                                                neighborFZ,
-                                                size_MatC,
-                                                size_MatF,
-                                                isEvenTimestep,
-                                                posCSWB,
-                                                posFSWB,
-                                                kCF,
-                                                omCoarse,
-                                                omFine,
-                                                nu,
-                                                nxC,
-                                                nyC,
-                                                nxF,
-                                                nyF,
-                                                offCF);
-      getLastCudaError("scaleCFpress27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCF_Fix_27(  real* DC,
-                                 real* DF,
-                                 unsigned int* neighborCX,
-                                 unsigned int* neighborCY,
-                                 unsigned int* neighborCZ,
-                                 unsigned int* neighborFX,
-                                 unsigned int* neighborFY,
-                                 unsigned int* neighborFZ,
-                                 unsigned int size_MatC,
-                                 unsigned int size_MatF,
-                                 bool isEvenTimestep,
-                                 unsigned int* posCSWB,
-                                 unsigned int* posFSWB,
-                                 unsigned int kCF,
-                                 real omCoarse,
-                                 real omFine,
-                                 real nu,
-                                 unsigned int nxC,
-                                 unsigned int nyC,
-                                 unsigned int nxF,
-                                 unsigned int nyF,
-                                 unsigned int numberOfThreads,
-                                 OffCF offCF)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
-
-      scaleCF_Fix_27<<< grid.grid, grid.threads >>>(DC,
-                                                DF,
-                                                neighborCX,
-                                                neighborCY,
-                                                neighborCZ,
-                                                neighborFX,
-                                                neighborFY,
-                                                neighborFZ,
-                                                size_MatC,
-                                                size_MatF,
-                                                isEvenTimestep,
-                                                posCSWB,
-                                                posFSWB,
-                                                kCF,
-                                                omCoarse,
-                                                omFine,
-                                                nu,
-                                                nxC,
-                                                nyC,
-                                                nxF,
-                                                nyF,
-                                                offCF);
-      getLastCudaError("scaleCF_Fix_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCF_Fix_comp_27( real* DC,
-                                     real* DF,
-                                     unsigned int* neighborCX,
-                                     unsigned int* neighborCY,
-                                     unsigned int* neighborCZ,
-                                     unsigned int* neighborFX,
-                                     unsigned int* neighborFY,
-                                     unsigned int* neighborFZ,
-                                     unsigned int size_MatC,
-                                     unsigned int size_MatF,
-                                     bool isEvenTimestep,
-                                     unsigned int* posCSWB,
-                                     unsigned int* posFSWB,
-                                     unsigned int kCF,
-                                     real omCoarse,
-                                     real omFine,
-                                     real nu,
-                                     unsigned int nxC,
-                                     unsigned int nyC,
-                                     unsigned int nxF,
-                                     unsigned int nyF,
-                                     unsigned int numberOfThreads,
-                                     OffCF offCF)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
-
-      scaleCF_Fix_comp_27<<< grid.grid, grid.threads >>>(   DC,
-                                                        DF,
-                                                        neighborCX,
-                                                        neighborCY,
-                                                        neighborCZ,
-                                                        neighborFX,
-                                                        neighborFY,
-                                                        neighborFZ,
-                                                        size_MatC,
-                                                        size_MatF,
-                                                        isEvenTimestep,
-                                                        posCSWB,
-                                                        posFSWB,
-                                                        kCF,
-                                                        omCoarse,
-                                                        omFine,
-                                                        nu,
-                                                        nxC,
-                                                        nyC,
-                                                        nxF,
-                                                        nyF,
-                                                        offCF);
-      getLastCudaError("scaleCF_Fix_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCF_0817_comp_27(real* DC,
-                                     real* DF,
-                                     unsigned int* neighborCX,
-                                     unsigned int* neighborCY,
-                                     unsigned int* neighborCZ,
-                                     unsigned int* neighborFX,
-                                     unsigned int* neighborFY,
-                                     unsigned int* neighborFZ,
-                                     unsigned int size_MatC,
-                                     unsigned int size_MatF,
-                                     bool isEvenTimestep,
-                                     unsigned int* posCSWB,
-                                     unsigned int* posFSWB,
-                                     unsigned int kCF,
-                                     real omCoarse,
-                                     real omFine,
-                                     real nu,
-                                     unsigned int nxC,
-                                     unsigned int nyC,
-                                     unsigned int nxF,
-                                     unsigned int nyF,
-                                     unsigned int numberOfThreads,
-                                     OffCF offCF,
-                            CUstream_st *stream)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
-
-      scaleCF_0817_comp_27<<< grid.grid, grid.threads, 0, stream >>>(  DC,
-                                                        DF,
-                                                        neighborCX,
-                                                        neighborCY,
-                                                        neighborCZ,
-                                                        neighborFX,
-                                                        neighborFY,
-                                                        neighborFZ,
-                                                        size_MatC,
-                                                        size_MatF,
-                                                        isEvenTimestep,
-                                                        posCSWB,
-                                                        posFSWB,
-                                                        kCF,
-                                                        omCoarse,
-                                                        omFine,
-                                                        nu,
-                                                        nxC,
-                                                        nyC,
-                                                        nxF,
-                                                        nyF,
-                                                        offCF);
-      getLastCudaError("scaleCF_0817_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCF_comp_D3Q27F3_2018(real* DC,
-                                          real* DF,
-                                          real* G6,
-                                          unsigned int* neighborCX,
-                                          unsigned int* neighborCY,
-                                          unsigned int* neighborCZ,
-                                          unsigned int* neighborFX,
-                                          unsigned int* neighborFY,
-                                          unsigned int* neighborFZ,
-                                          unsigned int size_MatC,
-                                          unsigned int size_MatF,
-                                          bool isEvenTimestep,
-                                          unsigned int* posCSWB,
-                                          unsigned int* posFSWB,
-                                          unsigned int kCF,
-                                          real omCoarse,
-                                          real omFine,
-                                          real nu,
-                                          unsigned int nxC,
-                                          unsigned int nyC,
-                                          unsigned int nxF,
-                                          unsigned int nyF,
-                                          unsigned int numberOfThreads,
-                                          OffCF offCF)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
-
-      scaleCF_comp_D3Q27F3_2018 <<< grid.grid, grid.threads >>>(DC,
-                                                            DF,
-                                                            G6,
-                                                            neighborCX,
-                                                            neighborCY,
-                                                            neighborCZ,
-                                                            neighborFX,
-                                                            neighborFY,
-                                                            neighborFZ,
-                                                            size_MatC,
-                                                            size_MatF,
-                                                            isEvenTimestep,
-                                                            posCSWB,
-                                                            posFSWB,
-                                                            kCF,
-                                                            omCoarse,
-                                                            omFine,
-                                                            nu,
-                                                            nxC,
-                                                            nyC,
-                                                            nxF,
-                                                            nyF,
-                                                            offCF);
-      getLastCudaError("scaleCF_comp_D3Q27F3_2018 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCF_comp_D3Q27F3(real* DC,
-                                     real* DF,
-                                     real* G6,
-                                     unsigned int* neighborCX,
-                                     unsigned int* neighborCY,
-                                     unsigned int* neighborCZ,
-                                     unsigned int* neighborFX,
-                                     unsigned int* neighborFY,
-                                     unsigned int* neighborFZ,
-                                     unsigned int size_MatC,
-                                     unsigned int size_MatF,
-                                     bool isEvenTimestep,
-                                     unsigned int* posCSWB,
-                                     unsigned int* posFSWB,
-                                     unsigned int kCF,
-                                     real omCoarse,
-                                     real omFine,
-                                     real nu,
-                                     unsigned int nxC,
-                                     unsigned int nyC,
-                                     unsigned int nxF,
-                                     unsigned int nyF,
-                                     unsigned int numberOfThreads,
-                                     OffCF offCF,
-                            CUstream_st *stream)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
-
-      scaleCF_comp_D3Q27F3 <<< grid.grid, grid.threads, 0, stream >>>( DC,
-                                                        DF,
-                                                        G6,
-                                                        neighborCX,
-                                                        neighborCY,
-                                                        neighborCZ,
-                                                        neighborFX,
-                                                        neighborFY,
-                                                        neighborFZ,
-                                                        size_MatC,
-                                                        size_MatF,
-                                                        isEvenTimestep,
-                                                        posCSWB,
-                                                        posFSWB,
-                                                        kCF,
-                                                        omCoarse,
-                                                        omFine,
-                                                        nu,
-                                                        nxC,
-                                                        nyC,
-                                                        nxF,
-                                                        nyF,
-                                                        offCF);
-      getLastCudaError("scaleCF_comp_D3Q27F3 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCF_staggered_time_comp_27(  real* DC,
-                                                 real* DF,
-                                                 unsigned int* neighborCX,
-                                                 unsigned int* neighborCY,
-                                                 unsigned int* neighborCZ,
-                                                 unsigned int* neighborFX,
-                                                 unsigned int* neighborFY,
-                                                 unsigned int* neighborFZ,
-                                                 unsigned int size_MatC,
-                                                 unsigned int size_MatF,
-                                                 bool isEvenTimestep,
-                                                 unsigned int* posCSWB,
-                                                 unsigned int* posFSWB,
-                                                 unsigned int kCF,
-                                                 real omCoarse,
-                                                 real omFine,
-                                                 real nu,
-                                                 unsigned int nxC,
-                                                 unsigned int nyC,
-                                                 unsigned int nxF,
-                                                 unsigned int nyF,
-                                                 unsigned int numberOfThreads,
-                                                 OffCF offCF)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
-
-      scaleCF_staggered_time_comp_27<<< grid.grid, grid.threads >>>(    DC,
-                                                                    DF,
-                                                                    neighborCX,
-                                                                    neighborCY,
-                                                                    neighborCZ,
-                                                                    neighborFX,
-                                                                    neighborFY,
-                                                                    neighborFZ,
-                                                                    size_MatC,
-                                                                    size_MatF,
-                                                                    isEvenTimestep,
-                                                                    posCSWB,
-                                                                    posFSWB,
-                                                                    kCF,
-                                                                    omCoarse,
-                                                                    omFine,
-                                                                    nu,
-                                                                    nxC,
-                                                                    nyC,
-                                                                    nxF,
-                                                                    nyF,
-                                                                    offCF);
-      getLastCudaError("scaleCF_Fix_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCF_RhoSq_comp_27(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellCF * icellCF, OffCF& offsetCF, CUstream_st *stream)
-{
-   dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads,  icellCF->kCF);
-   dim3 threads(parameterDeviceC->numberofthreads, 1, 1 );
-
-   scaleCF_RhoSq_comp_27<<<grid, threads, 0, stream>>>(
-      parameterDeviceC->distributions.f[0],
-      parameterDeviceF->distributions.f[0],
-      parameterDeviceC->neighborX,
-      parameterDeviceC->neighborY,
-      parameterDeviceC->neighborZ,
-      parameterDeviceF->neighborX,
-      parameterDeviceF->neighborY,
-      parameterDeviceF->neighborZ,
-      parameterDeviceC->numberOfNodes,
-      parameterDeviceF->numberOfNodes,
-      parameterDeviceC->isEvenTimestep,
-      icellCF->ICellCFC,
-      icellCF->ICellCFF,
-      icellCF->kCF,
-      parameterDeviceC->omega,
-      parameterDeviceF->omega,
-      parameterDeviceC->vis,
-      parameterDeviceC->nx,
-      parameterDeviceC->ny,
-      parameterDeviceF->nx,
-      parameterDeviceF->ny,
-      offsetCF);
-   getLastCudaError("scaleCF_RhoSq_27 execution failed");
-}
-
-void ScaleCF_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellCF * icellCF, OffCF& offsetCF, CUstream_st *stream)
-{
-   dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads,  icellCF->kCF);
-   dim3 threads(parameterDeviceC->numberofthreads, 1, 1 );
-
-   scaleCF_compressible<<<grid, threads, 0, stream>>>(
-      parameterDeviceC->distributions.f[0],
-      parameterDeviceF->distributions.f[0],
-      parameterDeviceC->neighborX,
-      parameterDeviceC->neighborY,
-      parameterDeviceC->neighborZ,
-      parameterDeviceF->neighborX,
-      parameterDeviceF->neighborY,
-      parameterDeviceF->neighborZ,
-      parameterDeviceC->numberOfNodes,
-      parameterDeviceF->numberOfNodes,
-      parameterDeviceC->isEvenTimestep,
-      icellCF->ICellCFC,
-      icellCF->ICellCFF,
-      icellCF->kCF,
-      parameterDeviceC->omega,
-      parameterDeviceF->omega,
-      offsetCF);
-
-   getLastCudaError("scaleCF_compressible execution failed");
-}
-
-//////////////////////////////////////////////////////////////////////////
-void ScaleCF_RhoSq_3rdMom_comp_27(real* DC,
-                                             real* DF,
-                                             unsigned int* neighborCX,
-                                             unsigned int* neighborCY,
-                                             unsigned int* neighborCZ,
-                                             unsigned int* neighborFX,
-                                             unsigned int* neighborFY,
-                                             unsigned int* neighborFZ,
-                                             unsigned int size_MatC,
-                                             unsigned int size_MatF,
-                                             bool isEvenTimestep,
-                                             unsigned int* posCSWB,
-                                             unsigned int* posFSWB,
-                                             unsigned int kCF,
-                                             real omCoarse,
-                                             real omFine,
-                                             real nu,
-                                             unsigned int nxC,
-                                             unsigned int nyC,
-                                             unsigned int nxF,
-                                             unsigned int nyF,
-                                             unsigned int numberOfThreads,
-                                             OffCF offCF,
-                                  CUstream_st *stream)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
-
-      scaleCF_RhoSq_3rdMom_comp_27<<< grid.grid, grid.threads, 0, stream >>>(  DC,
-                                                                DF,
-                                                                neighborCX,
-                                                                neighborCY,
-                                                                neighborCZ,
-                                                                neighborFX,
-                                                                neighborFY,
-                                                                neighborFZ,
-                                                                size_MatC,
-                                                                size_MatF,
-                                                                isEvenTimestep,
-                                                                posCSWB,
-                                                                posFSWB,
-                                                                kCF,
-                                                                omCoarse,
-                                                                omFine,
-                                                                nu,
-                                                                nxC,
-                                                                nyC,
-                                                                nxF,
-                                                                nyF,
-                                                                offCF);
-      getLastCudaError("scaleCF_RhoSq_3rdMom_comp_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCF_AA2016_comp_27(real* DC,
-                                       real* DF,
-                                       unsigned int* neighborCX,
-                                       unsigned int* neighborCY,
-                                       unsigned int* neighborCZ,
-                                       unsigned int* neighborFX,
-                                       unsigned int* neighborFY,
-                                       unsigned int* neighborFZ,
-                                       unsigned int size_MatC,
-                                       unsigned int size_MatF,
-                                       bool isEvenTimestep,
-                                       unsigned int* posCSWB,
-                                       unsigned int* posFSWB,
-                                       unsigned int kCF,
-                                       real omCoarse,
-                                       real omFine,
-                                       real nu,
-                                       unsigned int nxC,
-                                       unsigned int nyC,
-                                       unsigned int nxF,
-                                       unsigned int nyF,
-                                       unsigned int numberOfThreads,
-                                       OffCF offCF,
-                              CUstream_st *stream)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
-
-      scaleCF_AA2016_comp_27<<< grid.grid, grid.threads, 0, stream >>>(DC,
-                                                        DF,
-                                                        neighborCX,
-                                                        neighborCY,
-                                                        neighborCZ,
-                                                        neighborFX,
-                                                        neighborFY,
-                                                        neighborFZ,
-                                                        size_MatC,
-                                                        size_MatF,
-                                                        isEvenTimestep,
-                                                        posCSWB,
-                                                        posFSWB,
-                                                        kCF,
-                                                        omCoarse,
-                                                        omFine,
-                                                        nu,
-                                                        nxC,
-                                                        nyC,
-                                                        nxF,
-                                                        nyF,
-                                                        offCF);
-      getLastCudaError("scaleCF_AA2016_comp_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCF_NSPress_27(  real* DC,
-                                     real* DF,
-                                     unsigned int* neighborCX,
-                                     unsigned int* neighborCY,
-                                     unsigned int* neighborCZ,
-                                     unsigned int* neighborFX,
-                                     unsigned int* neighborFY,
-                                     unsigned int* neighborFZ,
-                                     unsigned int size_MatC,
-                                     unsigned int size_MatF,
-                                     bool isEvenTimestep,
-                                     unsigned int* posCSWB,
-                                     unsigned int* posFSWB,
-                                     unsigned int kCF,
-                                     real omCoarse,
-                                     real omFine,
-                                     real nu,
-                                     unsigned int nxC,
-                                     unsigned int nyC,
-                                     unsigned int nxF,
-                                     unsigned int nyF,
-                                     unsigned int numberOfThreads,
-                                     OffCF offCF)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
-
-      scaleCF_NSPress_27<<< grid.grid, grid.threads >>>(DC,
-                                                    DF,
-                                                    neighborCX,
-                                                    neighborCY,
-                                                    neighborCZ,
-                                                    neighborFX,
-                                                    neighborFY,
-                                                    neighborFZ,
-                                                    size_MatC,
-                                                    size_MatF,
-                                                    isEvenTimestep,
-                                                    posCSWB,
-                                                    posFSWB,
-                                                    kCF,
-                                                    omCoarse,
-                                                    omFine,
-                                                    nu,
-                                                    nxC,
-                                                    nyC,
-                                                    nxF,
-                                                    nyF,
-                                                    offCF);
-      getLastCudaError("scaleCF_Fix_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCFThSMG7(   real* DC,
-                                 real* DF,
-                                 real* DD7C,
-                                 real* DD7F,
-                                 unsigned int* neighborCX,
-                                 unsigned int* neighborCY,
-                                 unsigned int* neighborCZ,
-                                 unsigned int* neighborFX,
-                                 unsigned int* neighborFY,
-                                 unsigned int* neighborFZ,
-                                 unsigned int size_MatC,
-                                 unsigned int size_MatF,
-                                 bool isEvenTimestep,
-                                 unsigned int* posCSWB,
-                                 unsigned int* posFSWB,
-                                 unsigned int kCF,
-                                 real nu,
-                                 real diffusivity_fine,
-                                 unsigned int numberOfThreads,
-                                 OffCF offCF)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
-
-      scaleCFThSMG7<<< grid.grid, grid.threads >>> (DC,
-                                                DF,
-                                                DD7C,
-                                                DD7F,
-                                                neighborCX,
-                                                neighborCY,
-                                                neighborCZ,
-                                                neighborFX,
-                                                neighborFY,
-                                                neighborFZ,
-                                                size_MatC,
-                                                size_MatF,
-                                                isEvenTimestep,
-                                                posCSWB,
-                                                posFSWB,
-                                                kCF,
-                                                nu,
-                                                diffusivity_fine,
-                                                offCF);
-      getLastCudaError("scaleCFThSMG7 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCFThS7(  real* DC,
-                              real* DF,
-                              real* DD7C,
-                              real* DD7F,
-                              unsigned int* neighborCX,
-                              unsigned int* neighborCY,
-                              unsigned int* neighborCZ,
-                              unsigned int* neighborFX,
-                              unsigned int* neighborFY,
-                              unsigned int* neighborFZ,
-                              unsigned int size_MatC,
-                              unsigned int size_MatF,
-                              bool isEvenTimestep,
-                              unsigned int* posCSWB,
-                              unsigned int* posFSWB,
-                              unsigned int kCF,
-                              real nu,
-                              real diffusivity_fine,
-                              unsigned int numberOfThreads)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
-
-      scaleCFThS7<<< grid.grid, grid.threads >>> (  DC,
-                                                DF,
-                                                DD7C,
-                                                DD7F,
-                                                neighborCX,
-                                                neighborCY,
-                                                neighborCZ,
-                                                neighborFX,
-                                                neighborFY,
-                                                neighborFZ,
-                                                size_MatC,
-                                                size_MatF,
-                                                isEvenTimestep,
-                                                posCSWB,
-                                                posFSWB,
-                                                kCF,
-                                                nu,
-                                                diffusivity_fine);
-      getLastCudaError("scaleCFThS7 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleCFThS27( real* DC,
-                              real* DF,
-                              real* DD27C,
-                              real* DD27F,
-                              unsigned int* neighborCX,
-                              unsigned int* neighborCY,
-                              unsigned int* neighborCZ,
-                              unsigned int* neighborFX,
-                              unsigned int* neighborFY,
-                              unsigned int* neighborFZ,
-                              unsigned int size_MatC,
-                              unsigned int size_MatF,
-                              bool isEvenTimestep,
-                              unsigned int* posCSWB,
-                              unsigned int* posFSWB,
-                              unsigned int kCF,
-                              real nu,
-                              real diffusivity_fine,
-                              unsigned int numberOfThreads,
-                              OffCF offCF)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
-
-      scaleCFThS27<<< grid.grid, grid.threads >>> ( DC,
-                                                DF,
-                                                DD27C,
-                                                DD27F,
-                                                neighborCX,
-                                                neighborCY,
-                                                neighborCZ,
-                                                neighborFX,
-                                                neighborFY,
-                                                neighborFZ,
-                                                size_MatC,
-                                                size_MatF,
-                                                isEvenTimestep,
-                                                posCSWB,
-                                                posFSWB,
-                                                kCF,
-                                                nu,
-                                                diffusivity_fine,
-                                                offCF);
-      getLastCudaError("scaleCFThS27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFC27( real* DC,
-                           real* DF,
-                           unsigned int* neighborCX,
-                           unsigned int* neighborCY,
-                           unsigned int* neighborCZ,
-                           unsigned int* neighborFX,
-                           unsigned int* neighborFY,
-                           unsigned int* neighborFZ,
-                           unsigned int size_MatC,
-                           unsigned int size_MatF,
-                           bool isEvenTimestep,
-                           unsigned int* posC,
-                           unsigned int* posFSWB,
-                           unsigned int kFC,
-                           real omCoarse,
-                           real omFine,
-                           real nu,
-                           unsigned int nxC,
-                           unsigned int nyC,
-                           unsigned int nxF,
-                           unsigned int nyF,
-                           unsigned int numberOfThreads)
-{
-
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
-
-      scaleFC27<<< grid.grid, grid.threads >>> ( DC,
-                                             DF,
-                                             neighborCX,
-                                             neighborCY,
-                                             neighborCZ,
-                                             neighborFX,
-                                             neighborFY,
-                                             neighborFZ,
-                                             size_MatC,
-                                             size_MatF,
-                                             isEvenTimestep,
-                                             posC,
-                                             posFSWB,
-                                             kFC,
-                                             omCoarse,
-                                             omFine,
-                                             nu,
-                                             nxC,
-                                             nyC,
-                                             nxF,
-                                             nyF);
-      getLastCudaError("scaleFC27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFCEff27(real* DC,
-                             real* DF,
-                             unsigned int* neighborCX,
-                             unsigned int* neighborCY,
-                             unsigned int* neighborCZ,
-                             unsigned int* neighborFX,
-                             unsigned int* neighborFY,
-                             unsigned int* neighborFZ,
-                             unsigned int size_MatC,
-                             unsigned int size_MatF,
-                             bool isEvenTimestep,
-                             unsigned int* posC,
-                             unsigned int* posFSWB,
-                             unsigned int kFC,
-                             real omCoarse,
-                             real omFine,
-                             real nu,
-                             unsigned int nxC,
-                             unsigned int nyC,
-                             unsigned int nxF,
-                             unsigned int nyF,
-                             unsigned int numberOfThreads,
-                             OffFC offFC)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
-
-      scaleFCEff27<<< grid.grid, grid.threads >>> ( DC,
-                                                DF,
-                                                neighborCX,
-                                                neighborCY,
-                                                neighborCZ,
-                                                neighborFX,
-                                                neighborFY,
-                                                neighborFZ,
-                                                size_MatC,
-                                                size_MatF,
-                                                isEvenTimestep,
-                                                posC,
-                                                posFSWB,
-                                                kFC,
-                                                omCoarse,
-                                                omFine,
-                                                nu,
-                                                nxC,
-                                                nyC,
-                                                nxF,
-                                                nyF,
-                                                offFC);
-      getLastCudaError("scaleFCEff27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFCLast27(real* DC,
-                              real* DF,
-                              unsigned int* neighborCX,
-                              unsigned int* neighborCY,
-                              unsigned int* neighborCZ,
-                              unsigned int* neighborFX,
-                              unsigned int* neighborFY,
-                              unsigned int* neighborFZ,
-                              unsigned int size_MatC,
-                              unsigned int size_MatF,
-                              bool isEvenTimestep,
-                              unsigned int* posC,
-                              unsigned int* posFSWB,
-                              unsigned int kFC,
-                              real omCoarse,
-                              real omFine,
-                              real nu,
-                              unsigned int nxC,
-                              unsigned int nyC,
-                              unsigned int nxF,
-                              unsigned int nyF,
-                              unsigned int numberOfThreads,
-                              OffFC offFC)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
-
-      scaleFCLast27<<< grid.grid, grid.threads >>> (DC,
-                                                DF,
-                                                neighborCX,
-                                                neighborCY,
-                                                neighborCZ,
-                                                neighborFX,
-                                                neighborFY,
-                                                neighborFZ,
-                                                size_MatC,
-                                                size_MatF,
-                                                isEvenTimestep,
-                                                posC,
-                                                posFSWB,
-                                                kFC,
-                                                omCoarse,
-                                                omFine,
-                                                nu,
-                                                nxC,
-                                                nyC,
-                                                nxF,
-                                                nyF,
-                                                offFC);
-      getLastCudaError("Kernel execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFCpress27(real* DC,
-                              real* DF,
-                              unsigned int* neighborCX,
-                              unsigned int* neighborCY,
-                              unsigned int* neighborCZ,
-                              unsigned int* neighborFX,
-                              unsigned int* neighborFY,
-                              unsigned int* neighborFZ,
-                              unsigned int size_MatC,
-                              unsigned int size_MatF,
-                              bool isEvenTimestep,
-                              unsigned int* posC,
-                              unsigned int* posFSWB,
-                              unsigned int kFC,
-                              real omCoarse,
-                              real omFine,
-                              real nu,
-                              unsigned int nxC,
-                              unsigned int nyC,
-                              unsigned int nxF,
-                              unsigned int nyF,
-                              unsigned int numberOfThreads,
-                              OffFC offFC)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
-
-      scaleFCpress27<<< grid.grid, grid.threads >>> (  DC,
-                                                   DF,
-                                                   neighborCX,
-                                                   neighborCY,
-                                                   neighborCZ,
-                                                   neighborFX,
-                                                   neighborFY,
-                                                   neighborFZ,
-                                                   size_MatC,
-                                                   size_MatF,
-                                                   isEvenTimestep,
-                                                   posC,
-                                                   posFSWB,
-                                                   kFC,
-                                                   omCoarse,
-                                                   omFine,
-                                                   nu,
-                                                   nxC,
-                                                   nyC,
-                                                   nxF,
-                                                   nyF,
-                                                   offFC);
-      getLastCudaError("scaleFCpress27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFC_Fix_27(real* DC,
-                              real* DF,
-                              unsigned int* neighborCX,
-                              unsigned int* neighborCY,
-                              unsigned int* neighborCZ,
-                              unsigned int* neighborFX,
-                              unsigned int* neighborFY,
-                              unsigned int* neighborFZ,
-                              unsigned int size_MatC,
-                              unsigned int size_MatF,
-                              bool isEvenTimestep,
-                              unsigned int* posC,
-                              unsigned int* posFSWB,
-                              unsigned int kFC,
-                              real omCoarse,
-                              real omFine,
-                              real nu,
-                              unsigned int nxC,
-                              unsigned int nyC,
-                              unsigned int nxF,
-                              unsigned int nyF,
-                              unsigned int numberOfThreads,
-                              OffFC offFC)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
-
-      scaleFC_Fix_27<<< grid.grid, grid.threads >>> (  DC,
-                                                   DF,
-                                                   neighborCX,
-                                                   neighborCY,
-                                                   neighborCZ,
-                                                   neighborFX,
-                                                   neighborFY,
-                                                   neighborFZ,
-                                                   size_MatC,
-                                                   size_MatF,
-                                                   isEvenTimestep,
-                                                   posC,
-                                                   posFSWB,
-                                                   kFC,
-                                                   omCoarse,
-                                                   omFine,
-                                                   nu,
-                                                   nxC,
-                                                   nyC,
-                                                   nxF,
-                                                   nyF,
-                                                   offFC);
-      getLastCudaError("scaleFC_Fix_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFC_Fix_comp_27(  real* DC,
-                                      real* DF,
-                                      unsigned int* neighborCX,
-                                      unsigned int* neighborCY,
-                                      unsigned int* neighborCZ,
-                                      unsigned int* neighborFX,
-                                      unsigned int* neighborFY,
-                                      unsigned int* neighborFZ,
-                                      unsigned int size_MatC,
-                                      unsigned int size_MatF,
-                                      bool isEvenTimestep,
-                                      unsigned int* posC,
-                                      unsigned int* posFSWB,
-                                      unsigned int kFC,
-                                      real omCoarse,
-                                      real omFine,
-                                      real nu,
-                                      unsigned int nxC,
-                                      unsigned int nyC,
-                                      unsigned int nxF,
-                                      unsigned int nyF,
-                                      unsigned int numberOfThreads,
-                                      OffFC offFC)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
-
-      scaleFC_Fix_comp_27<<< grid.grid, grid.threads >>> ( DC,
-                                                       DF,
-                                                       neighborCX,
-                                                       neighborCY,
-                                                       neighborCZ,
-                                                       neighborFX,
-                                                       neighborFY,
-                                                       neighborFZ,
-                                                       size_MatC,
-                                                       size_MatF,
-                                                       isEvenTimestep,
-                                                       posC,
-                                                       posFSWB,
-                                                       kFC,
-                                                       omCoarse,
-                                                       omFine,
-                                                       nu,
-                                                       nxC,
-                                                       nyC,
-                                                       nxF,
-                                                       nyF,
-                                                       offFC);
-      getLastCudaError("scaleFC_Fix_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFC_0817_comp_27( real* DC,
-                                      real* DF,
-                                      unsigned int* neighborCX,
-                                      unsigned int* neighborCY,
-                                      unsigned int* neighborCZ,
-                                      unsigned int* neighborFX,
-                                      unsigned int* neighborFY,
-                                      unsigned int* neighborFZ,
-                                      unsigned int size_MatC,
-                                      unsigned int size_MatF,
-                                      bool isEvenTimestep,
-                                      unsigned int* posC,
-                                      unsigned int* posFSWB,
-                                      unsigned int kFC,
-                                      real omCoarse,
-                                      real omFine,
-                                      real nu,
-                                      unsigned int nxC,
-                                      unsigned int nyC,
-                                      unsigned int nxF,
-                                      unsigned int nyF,
-                                      unsigned int numberOfThreads,
-                                      OffFC offFC,
-                             CUstream_st *stream)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
-
-      scaleFC_0817_comp_27<<< grid.grid, grid.threads, 0, stream >>> (DC,
-                                                       DF,
-                                                       neighborCX,
-                                                       neighborCY,
-                                                       neighborCZ,
-                                                       neighborFX,
-                                                       neighborFY,
-                                                       neighborFZ,
-                                                       size_MatC,
-                                                       size_MatF,
-                                                       isEvenTimestep,
-                                                       posC,
-                                                       posFSWB,
-                                                       kFC,
-                                                       omCoarse,
-                                                       omFine,
-                                                       nu,
-                                                       nxC,
-                                                       nyC,
-                                                       nxF,
-                                                       nyF,
-                                                       offFC);
-      getLastCudaError("scaleFC_0817_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFC_comp_D3Q27F3_2018( real* DC,
-                                           real* DF,
-                                           real* G6,
-                                           unsigned int* neighborCX,
-                                           unsigned int* neighborCY,
-                                           unsigned int* neighborCZ,
-                                           unsigned int* neighborFX,
-                                           unsigned int* neighborFY,
-                                           unsigned int* neighborFZ,
-                                           unsigned int size_MatC,
-                                           unsigned int size_MatF,
-                                           bool isEvenTimestep,
-                                           unsigned int* posC,
-                                           unsigned int* posFSWB,
-                                           unsigned int kFC,
-                                           real omCoarse,
-                                           real omFine,
-                                           real nu,
-                                           unsigned int nxC,
-                                           unsigned int nyC,
-                                           unsigned int nxF,
-                                           unsigned int nyF,
-                                           unsigned int numberOfThreads,
-                                           OffFC offFC)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
-
-     scaleFC_comp_D3Q27F3_2018 <<< grid.grid, grid.threads >>> (DC,
-                                                            DF,
-                                                            G6,
-                                                            neighborCX,
-                                                            neighborCY,
-                                                            neighborCZ,
-                                                            neighborFX,
-                                                            neighborFY,
-                                                            neighborFZ,
-                                                            size_MatC,
-                                                            size_MatF,
-                                                            isEvenTimestep,
-                                                            posC,
-                                                            posFSWB,
-                                                            kFC,
-                                                            omCoarse,
-                                                            omFine,
-                                                            nu,
-                                                            nxC,
-                                                            nyC,
-                                                            nxF,
-                                                            nyF,
-                                                            offFC);
-      getLastCudaError("scaleFC_comp_D3Q27F3_2018 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFC_comp_D3Q27F3( real* DC,
-                                      real* DF,
-                                      real* G6,
-                                      unsigned int* neighborCX,
-                                      unsigned int* neighborCY,
-                                      unsigned int* neighborCZ,
-                                      unsigned int* neighborFX,
-                                      unsigned int* neighborFY,
-                                      unsigned int* neighborFZ,
-                                      unsigned int size_MatC,
-                                      unsigned int size_MatF,
-                                      bool isEvenTimestep,
-                                      unsigned int* posC,
-                                      unsigned int* posFSWB,
-                                      unsigned int kFC,
-                                      real omCoarse,
-                                      real omFine,
-                                      real nu,
-                                      unsigned int nxC,
-                                      unsigned int nyC,
-                                      unsigned int nxF,
-                                      unsigned int nyF,
-                                      unsigned int numberOfThreads,
-                                      OffFC offFC,
-                             CUstream_st *stream)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
-
-     scaleFC_comp_D3Q27F3 <<< grid.grid, grid.threads, 0, stream >>> (DC,
-                                                       DF,
-                                                       G6,
-                                                       neighborCX,
-                                                       neighborCY,
-                                                       neighborCZ,
-                                                       neighborFX,
-                                                       neighborFY,
-                                                       neighborFZ,
-                                                       size_MatC,
-                                                       size_MatF,
-                                                       isEvenTimestep,
-                                                       posC,
-                                                       posFSWB,
-                                                       kFC,
-                                                       omCoarse,
-                                                       omFine,
-                                                       nu,
-                                                       nxC,
-                                                       nyC,
-                                                       nxF,
-                                                       nyF,
-                                                       offFC);
-      getLastCudaError("scaleFC_0817_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFC_staggered_time_comp_27(   real* DC,
-                                                  real* DF,
-                                                  unsigned int* neighborCX,
-                                                  unsigned int* neighborCY,
-                                                  unsigned int* neighborCZ,
-                                                  unsigned int* neighborFX,
-                                                  unsigned int* neighborFY,
-                                                  unsigned int* neighborFZ,
-                                                  unsigned int size_MatC,
-                                                  unsigned int size_MatF,
-                                                  bool isEvenTimestep,
-                                                  unsigned int* posC,
-                                                  unsigned int* posFSWB,
-                                                  unsigned int kFC,
-                                                  real omCoarse,
-                                                  real omFine,
-                                                  real nu,
-                                                  unsigned int nxC,
-                                                  unsigned int nyC,
-                                                  unsigned int nxF,
-                                                  unsigned int nyF,
-                                                  unsigned int numberOfThreads,
-                                                  OffFC offFC)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
-
-      scaleFC_staggered_time_comp_27<<< grid.grid, grid.threads >>> (  DC,
-                                                                   DF,
-                                                                   neighborCX,
-                                                                   neighborCY,
-                                                                   neighborCZ,
-                                                                   neighborFX,
-                                                                   neighborFY,
-                                                                   neighborFZ,
-                                                                   size_MatC,
-                                                                   size_MatF,
-                                                                   isEvenTimestep,
-                                                                   posC,
-                                                                   posFSWB,
-                                                                   kFC,
-                                                                   omCoarse,
-                                                                   omFine,
-                                                                   nu,
-                                                                   nxC,
-                                                                   nyC,
-                                                                   nxF,
-                                                                   nyF,
-                                                                   offFC);
-      getLastCudaError("scaleFC_Fix_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFC_RhoSq_comp_27(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellFC * icellFC, OffFC &offsetFC, CUstream_st *stream)
-{
-   dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads,  icellFC->kFC);
-   dim3 threads(parameterDeviceC->numberofthreads, 1, 1 );
-
-   scaleFC_RhoSq_comp_27<<<grid, threads, 0, stream>>>(
-      parameterDeviceC->distributions.f[0],
-      parameterDeviceF->distributions.f[0],
-      parameterDeviceC->neighborX,
-      parameterDeviceC->neighborY,
-      parameterDeviceC->neighborZ,
-      parameterDeviceF->neighborX,
-      parameterDeviceF->neighborY,
-      parameterDeviceF->neighborZ,
-      parameterDeviceC->numberOfNodes,
-      parameterDeviceF->numberOfNodes,
-      parameterDeviceC->isEvenTimestep,
-      icellFC->ICellFCC,
-      icellFC->ICellFCF,
-      icellFC->kFC,
-      parameterDeviceC->omega,
-      parameterDeviceF->omega,
-      parameterDeviceC->vis,
-      parameterDeviceC->nx,
-      parameterDeviceC->ny,
-      parameterDeviceF->nx,
-      parameterDeviceF->ny,
-      offsetFC);
-   getLastCudaError("scaleFC_RhoSq_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFC_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellFC * icellFC, OffFC &offsetFC, CUstream_st *stream)
-{
-   dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads,  icellFC->kFC);
-   dim3 threads(parameterDeviceC->numberofthreads, 1, 1 );
-
-   scaleFC_compressible<<<grid, threads, 0, stream>>>(
-      parameterDeviceC->distributions.f[0],
-      parameterDeviceF->distributions.f[0],
-      parameterDeviceC->neighborX,
-      parameterDeviceC->neighborY,
-      parameterDeviceC->neighborZ,
-      parameterDeviceF->neighborX,
-      parameterDeviceF->neighborY,
-      parameterDeviceF->neighborZ,
-      parameterDeviceC->numberOfNodes,
-      parameterDeviceF->numberOfNodes,
-      parameterDeviceC->isEvenTimestep,
-      icellFC->ICellFCC,
-      icellFC->ICellFCF,
-      icellFC->kFC,
-      parameterDeviceC->omega,
-      parameterDeviceF->omega,
-      offsetFC);
-   getLastCudaError("scaleFC_compressible execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFC_RhoSq_3rdMom_comp_27( real* DC,
-                                              real* DF,
-                                              unsigned int* neighborCX,
-                                              unsigned int* neighborCY,
-                                              unsigned int* neighborCZ,
-                                              unsigned int* neighborFX,
-                                              unsigned int* neighborFY,
-                                              unsigned int* neighborFZ,
-                                              unsigned int size_MatC,
-                                              unsigned int size_MatF,
-                                              bool isEvenTimestep,
-                                              unsigned int* posC,
-                                              unsigned int* posFSWB,
-                                              unsigned int kFC,
-                                              real omCoarse,
-                                              real omFine,
-                                              real nu,
-                                              unsigned int nxC,
-                                              unsigned int nyC,
-                                              unsigned int nxF,
-                                              unsigned int nyF,
-                                              unsigned int numberOfThreads,
-                                              OffFC offFC,
-                                   CUstream_st *stream)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
-
-      scaleFC_RhoSq_3rdMom_comp_27<<< grid.grid, grid.threads, 0, stream >>>(DC,
-                                                              DF,
-                                                              neighborCX,
-                                                              neighborCY,
-                                                              neighborCZ,
-                                                              neighborFX,
-                                                              neighborFY,
-                                                              neighborFZ,
-                                                              size_MatC,
-                                                              size_MatF,
-                                                              isEvenTimestep,
-                                                              posC,
-                                                              posFSWB,
-                                                              kFC,
-                                                              omCoarse,
-                                                              omFine,
-                                                              nu,
-                                                              nxC,
-                                                              nyC,
-                                                              nxF,
-                                                              nyF,
-                                                              offFC);
-      getLastCudaError("scaleFC_RhoSq_3rdMom_comp_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFC_AA2016_comp_27( real* DC,
-                                        real* DF,
-                                        unsigned int* neighborCX,
-                                        unsigned int* neighborCY,
-                                        unsigned int* neighborCZ,
-                                        unsigned int* neighborFX,
-                                        unsigned int* neighborFY,
-                                        unsigned int* neighborFZ,
-                                        unsigned int size_MatC,
-                                        unsigned int size_MatF,
-                                        bool isEvenTimestep,
-                                        unsigned int* posC,
-                                        unsigned int* posFSWB,
-                                        unsigned int kFC,
-                                        real omCoarse,
-                                        real omFine,
-                                        real nu,
-                                        unsigned int nxC,
-                                        unsigned int nyC,
-                                        unsigned int nxF,
-                                        unsigned int nyF,
-                                        unsigned int numberOfThreads,
-                                        OffFC offFC,
-                              CUstream_st *stream)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
-
-      scaleFC_AA2016_comp_27<<< grid.grid, grid.threads, 0, stream >>>(DC,
-                                                        DF,
-                                                        neighborCX,
-                                                        neighborCY,
-                                                        neighborCZ,
-                                                        neighborFX,
-                                                        neighborFY,
-                                                        neighborFZ,
-                                                        size_MatC,
-                                                        size_MatF,
-                                                        isEvenTimestep,
-                                                        posC,
-                                                        posFSWB,
-                                                        kFC,
-                                                        omCoarse,
-                                                        omFine,
-                                                        nu,
-                                                        nxC,
-                                                        nyC,
-                                                        nxF,
-                                                        nyF,
-                                                        offFC);
-      getLastCudaError("scaleFC_AA2016_comp_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFC_NSPress_27(real* DC,
-                                  real* DF,
-                                  unsigned int* neighborCX,
-                                  unsigned int* neighborCY,
-                                  unsigned int* neighborCZ,
-                                  unsigned int* neighborFX,
-                                  unsigned int* neighborFY,
-                                  unsigned int* neighborFZ,
-                                  unsigned int size_MatC,
-                                  unsigned int size_MatF,
-                                  bool isEvenTimestep,
-                                  unsigned int* posC,
-                                  unsigned int* posFSWB,
-                                  unsigned int kFC,
-                                  real omCoarse,
-                                  real omFine,
-                                  real nu,
-                                  unsigned int nxC,
-                                  unsigned int nyC,
-                                  unsigned int nxF,
-                                  unsigned int nyF,
-                                  unsigned int numberOfThreads,
-                                  OffFC offFC)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
-
-      scaleFC_NSPress_27<<< grid.grid, grid.threads >>> (  DC,
-                                                       DF,
-                                                       neighborCX,
-                                                       neighborCY,
-                                                       neighborCZ,
-                                                       neighborFX,
-                                                       neighborFY,
-                                                       neighborFZ,
-                                                       size_MatC,
-                                                       size_MatF,
-                                                       isEvenTimestep,
-                                                       posC,
-                                                       posFSWB,
-                                                       kFC,
-                                                       omCoarse,
-                                                       omFine,
-                                                       nu,
-                                                       nxC,
-                                                       nyC,
-                                                       nxF,
-                                                       nyF,
-                                                       offFC);
-      getLastCudaError("scaleFC_Fix_27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFCThSMG7(real* DC,
-                              real* DF,
-                              real* DD7C,
-                              real* DD7F,
-                              unsigned int* neighborCX,
-                              unsigned int* neighborCY,
-                              unsigned int* neighborCZ,
-                              unsigned int* neighborFX,
-                              unsigned int* neighborFY,
-                              unsigned int* neighborFZ,
-                              unsigned int size_MatC,
-                              unsigned int size_MatF,
-                              bool isEvenTimestep,
-                              unsigned int* posC,
-                              unsigned int* posFSWB,
-                              unsigned int kFC,
-                              real nu,
-                              real diffusivity_coarse,
-                              unsigned int numberOfThreads,
-                              OffFC offFC)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
-
-      scaleFCThSMG7<<< grid.grid, grid.threads >>>( DC,
-                                                DF,
-                                                DD7C,
-                                                DD7F,
-                                                neighborCX,
-                                                neighborCY,
-                                                neighborCZ,
-                                                neighborFX,
-                                                neighborFY,
-                                                neighborFZ,
-                                                size_MatC,
-                                                size_MatF,
-                                                isEvenTimestep,
-                                                posC,
-                                                posFSWB,
-                                                kFC,
-                                                nu,
-                                                diffusivity_coarse,
-                                                offFC);
-      getLastCudaError("scaleFCThSMG7 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFCThS7(  real* DC,
-                              real* DF,
-                              real* DD7C,
-                              real* DD7F,
-                              unsigned int* neighborCX,
-                              unsigned int* neighborCY,
-                              unsigned int* neighborCZ,
-                              unsigned int* neighborFX,
-                              unsigned int* neighborFY,
-                              unsigned int* neighborFZ,
-                              unsigned int size_MatC,
-                              unsigned int size_MatF,
-                              bool isEvenTimestep,
-                              unsigned int* posC,
-                              unsigned int* posFSWB,
-                              unsigned int kFC,
-                              real nu,
-                              real diffusivity_coarse,
-                              unsigned int numberOfThreads)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
-
-      scaleFCThS7<<< grid.grid, grid.threads >>>(DC,
-                                             DF,
-                                             DD7C,
-                                             DD7F,
-                                             neighborCX,
-                                             neighborCY,
-                                             neighborCZ,
-                                             neighborFX,
-                                             neighborFY,
-                                             neighborFZ,
-                                             size_MatC,
-                                             size_MatF,
-                                             isEvenTimestep,
-                                             posC,
-                                             posFSWB,
-                                             kFC,
-                                             nu,
-                                             diffusivity_coarse);
-      getLastCudaError("scaleFCThS7 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void ScaleFCThS27( real* DC,
-                              real* DF,
-                              real* DD27C,
-                              real* DD27F,
-                              unsigned int* neighborCX,
-                              unsigned int* neighborCY,
-                              unsigned int* neighborCZ,
-                              unsigned int* neighborFX,
-                              unsigned int* neighborFY,
-                              unsigned int* neighborFZ,
-                              unsigned int size_MatC,
-                              unsigned int size_MatF,
-                              bool isEvenTimestep,
-                              unsigned int* posC,
-                              unsigned int* posFSWB,
-                              unsigned int kFC,
-                              real nu,
-                              real diffusivity_coarse,
-                              unsigned int numberOfThreads,
-                              OffFC offFC)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
-
-      scaleFCThS27<<< grid.grid, grid.threads >>>(  DC,
-                                                DF,
-                                                DD27C,
-                                                DD27F,
-                                                neighborCX,
-                                                neighborCY,
-                                                neighborCZ,
-                                                neighborFX,
-                                                neighborFY,
-                                                neighborFZ,
-                                                size_MatC,
-                                                size_MatF,
-                                                isEvenTimestep,
-                                                posC,
-                                                posFSWB,
-                                                kFC,
-                                                nu,
-                                                diffusivity_coarse,
-                                                offFC);
-      getLastCudaError("scaleFCThS27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void DragLiftPostD27(real* DD,
-                                int* k_Q,
-                                real* QQ,
-                                int numberOfBCnodes,
-                                double *DragX,
-                                double *DragY,
-                                double *DragZ,
-                                unsigned int* neighborX,
-                                unsigned int* neighborY,
-                                unsigned int* neighborZ,
-                                unsigned int size_Mat,
-                                bool isEvenTimestep,
-                                unsigned int numberOfThreads)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-    DragLiftPost27<<< grid.grid, grid.threads >>>(DD,
-                                        k_Q,
-                                        QQ,
-                                        numberOfBCnodes,
-                                        DragX,
-                                        DragY,
-                                        DragZ,
-                                        neighborX,
-                                        neighborY,
-                                        neighborZ,
-                                        size_Mat,
-                                        isEvenTimestep);
-    getLastCudaError("DragLift27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void DragLiftPreD27( real* DD,
-                                int* k_Q,
-                                real* QQ,
-                                int numberOfBCnodes,
-                                double *DragX,
-                                double *DragY,
-                                double *DragZ,
-                                unsigned int* neighborX,
-                                unsigned int* neighborY,
-                                unsigned int* neighborZ,
-                                unsigned int size_Mat,
-                                bool isEvenTimestep,
-                                unsigned int numberOfThreads)
+void DragLiftPreD27(
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    int numberOfBCnodes,
+    double *DragX,
+    double *DragY,
+    double *DragZ,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    unsigned int numberOfThreads)
 {
     vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
-    DragLiftPre27<<< grid.grid, grid.threads >>>( DD,
-                                        k_Q,
-                                        QQ,
-                                        numberOfBCnodes,
-                                        DragX,
-                                        DragY,
-                                        DragZ,
-                                        neighborX,
-                                        neighborY,
-                                        neighborZ,
-                                        size_Mat,
-                                        isEvenTimestep);
-    getLastCudaError("DragLift27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void CalcCPtop27(real* DD,
-                            int* cpIndex,
-                            int nonCp,
-                            double *cpPress,
-                            unsigned int* neighborX,
-                            unsigned int* neighborY,
-                            unsigned int* neighborZ,
-                            unsigned int size_Mat,
-                            bool isEvenTimestep,
-                            unsigned int numberOfThreads)
+    DragLiftPre27<<< grid.grid, grid.threads >>>(
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        DragX,
+        DragY,
+        DragZ,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("DragLiftPre27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void CalcCPtop27(
+    real* DD,
+    int* cpIndex,
+    int nonCp,
+    double *cpPress,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    unsigned int numberOfThreads)
 {
     vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, nonCp);
 
-    CalcCP27<<< grid.grid, grid.threads >>>(DD,
-                                  cpIndex,
-                                  nonCp,
-                                  cpPress,
-                                  neighborX,
-                                  neighborY,
-                                  neighborZ,
-                                  size_Mat,
-                                  isEvenTimestep);
+    CalcCP27<<< grid.grid, grid.threads >>>(
+        DD,
+        cpIndex,
+        nonCp,
+        cpPress,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
     getLastCudaError("CalcCP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void CalcCPbottom27( real* DD,
-                                int* cpIndex,
-                                int nonCp,
-                                double *cpPress,
-                                unsigned int* neighborX,
-                                unsigned int* neighborY,
-                                unsigned int* neighborZ,
-                                unsigned int size_Mat,
-                                bool isEvenTimestep,
-                                unsigned int numberOfThreads)
+void CalcCPbottom27(
+    real* DD,
+    int* cpIndex,
+    int nonCp,
+    double *cpPress,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    unsigned int numberOfThreads)
 {
     vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, nonCp);
 
-    CalcCP27<<< grid.grid, grid.threads >>>(DD,
-                                  cpIndex,
-                                  nonCp,
-                                  cpPress,
-                                  neighborX,
-                                  neighborY,
-                                  neighborZ,
-                                  size_Mat,
-                                  isEvenTimestep);
+    CalcCP27<<< grid.grid, grid.threads >>>(
+        DD,
+        cpIndex,
+        nonCp,
+        cpPress,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
     getLastCudaError("CalcCP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void GetSendFsPreDev27(real* DD,
-                                  real* bufferFs,
-                                  int* sendIndex,
-                                  int buffmax,
-                                  unsigned int* neighborX,
-                                  unsigned int* neighborY,
-                                  unsigned int* neighborZ,
-                                  unsigned int size_Mat,
-                                  bool isEvenTimestep,
-                                  unsigned int numberOfThreads,
-                                  cudaStream_t stream)
+void GetSendFsPreDev27(
+    real* DD,
+    real* bufferFs,
+    int* sendIndex,
+    int buffmax,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    unsigned int numberOfThreads,
+    cudaStream_t stream)
 {
     vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax);
 
-    getSendFsPre27<<< grid.grid, grid.threads, 0, stream >>>(DD,
-                                        bufferFs,
-                                        sendIndex,
-                                        buffmax,
-                                        neighborX,
-                                        neighborY,
-                                        neighborZ,
-                                        size_Mat,
-                                        isEvenTimestep);
+    getSendFsPre27<<< grid.grid, grid.threads, 0, stream >>>(
+        DD,
+        bufferFs,
+        sendIndex,
+        buffmax,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
     getLastCudaError("getSendFsPre27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void GetSendFsPostDev27(real* DD,
-                                   real* bufferFs,
-                                   int* sendIndex,
-                                   int buffmax,
-                                   unsigned int* neighborX,
-                                   unsigned int* neighborY,
-                                   unsigned int* neighborZ,
-                                   unsigned int size_Mat,
-                                   bool isEvenTimestep,
-                                   unsigned int numberOfThreads,
-                                   cudaStream_t stream)
+void GetSendFsPostDev27(
+    real* DD,
+    real* bufferFs,
+    int* sendIndex,
+    int buffmax,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    unsigned int numberOfThreads,
+    cudaStream_t stream)
 {
     vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax);
 
-    getSendFsPost27<<< grid.grid, grid.threads, 0, stream >>>(DD,
-                                         bufferFs,
-                                         sendIndex,
-                                         buffmax,
-                                         neighborX,
-                                         neighborY,
-                                         neighborZ,
-                                         size_Mat,
-                                         isEvenTimestep);
+    getSendFsPost27<<< grid.grid, grid.threads, 0, stream >>>(
+        DD,
+        bufferFs,
+        sendIndex,
+        buffmax,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
     getLastCudaError("getSendFsPost27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void SetRecvFsPreDev27(real* DD,
-                                  real* bufferFs,
-                                  int* recvIndex,
-                                  int buffmax,
-                                  unsigned int* neighborX,
-                                  unsigned int* neighborY,
-                                  unsigned int* neighborZ,
-                                  unsigned int size_Mat,
-                                  bool isEvenTimestep,
-                                  unsigned int numberOfThreads,
-                                  cudaStream_t stream)
+void SetRecvFsPreDev27(
+    real* DD,
+    real* bufferFs,
+    int* recvIndex,
+    int buffmax,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    unsigned int numberOfThreads,
+    cudaStream_t stream)
 {
     vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax);
 
-    setRecvFsPre27<<< grid.grid, grid.threads, 0, stream >>>(DD,
-                                        bufferFs,
-                                        recvIndex,
-                                        buffmax,
-                                        neighborX,
-                                        neighborY,
-                                        neighborZ,
-                                        size_Mat,
-                                        isEvenTimestep);
+    setRecvFsPre27<<< grid.grid, grid.threads, 0, stream >>>(
+        DD,
+        bufferFs,
+        recvIndex,
+        buffmax,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
     getLastCudaError("setRecvFsPre27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void SetRecvFsPostDev27(real* DD,
-                                   real* bufferFs,
-                                   int* recvIndex,
-                                   int buffmax,
-                                   unsigned int* neighborX,
-                                   unsigned int* neighborY,
-                                   unsigned int* neighborZ,
-                                   unsigned int size_Mat,
-                                   bool isEvenTimestep,
-                                   unsigned int numberOfThreads,
-                                   cudaStream_t stream)
+void SetRecvFsPostDev27(
+    real* DD,
+    real* bufferFs,
+    int* recvIndex,
+    int buffmax,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    unsigned int numberOfThreads,
+    cudaStream_t stream)
 {
     vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax);
 
-    setRecvFsPost27<<< grid.grid, grid.threads, 0, stream >>>(DD,
-                                         bufferFs,
-                                         recvIndex,
-                                         buffmax,
-                                         neighborX,
-                                         neighborY,
-                                         neighborZ,
-                                         size_Mat,
-                                         isEvenTimestep);
+    setRecvFsPost27<<< grid.grid, grid.threads, 0, stream >>>(
+        DD,
+        bufferFs,
+        recvIndex,
+        buffmax,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
     getLastCudaError("setRecvFsPost27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
@@ -5329,7 +5490,7 @@ void getSendGsDevF3(
     unsigned int* neighborX,
     unsigned int* neighborY,
     unsigned int* neighborZ,
-    unsigned int size_Mat,
+    unsigned long long numberOfLBnodes,
     bool isEvenTimestep,
     unsigned int numberOfThreads)
 {
@@ -5343,7 +5504,7 @@ void getSendGsDevF3(
         neighborX,
         neighborY,
         neighborZ,
-        size_Mat,
+        numberOfLBnodes,
         isEvenTimestep);
     getLastCudaError("getSendGsF3 execution failed");
 }
@@ -5356,7 +5517,7 @@ void setRecvGsDevF3(
     unsigned int* neighborX,
     unsigned int* neighborY,
     unsigned int* neighborZ,
-    unsigned int size_Mat,
+    unsigned long long numberOfLBnodes,
     bool isEvenTimestep,
     unsigned int numberOfThreads)
 {
@@ -5370,285 +5531,295 @@ void setRecvGsDevF3(
         neighborX,
         neighborY,
         neighborZ,
-        size_Mat,
+        numberOfLBnodes,
         isEvenTimestep);
     getLastCudaError("setRecvGsF3 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void WallFuncDev27(unsigned int numberOfThreads,
-                              real* vx,
-                              real* vy,
-                              real* vz,
-                              real* DD,
-                              int* k_Q,
-                              real* QQ,
-                              unsigned int numberOfBCnodes,
-                              real om1,
-                              unsigned int* neighborX,
-                              unsigned int* neighborY,
-                              unsigned int* neighborZ,
-                              unsigned int size_Mat,
-                              bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-      WallFunction27<<< grid.grid, grid.threads >>> (
-                                              vx,
-                                              vy,
-                                              vz,
-                                              DD,
-                                              k_Q,
-                                              QQ,
-                                              numberOfBCnodes,
-                                              om1,
-                                              neighborX,
-                                              neighborY,
-                                              neighborZ,
-                                              size_Mat,
-                                              isEvenTimestep);
-      getLastCudaError("WallFunction27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void SetOutputWallVelocitySP27(unsigned int numberOfThreads,
-                                          real* vxD,
-                                          real* vyD,
-                                          real* vzD,
-                                          real* vxWall,
-                                          real* vyWall,
-                                          real* vzWall,
-                                          int numberOfWallNodes,
-                                          int* kWallNodes,
-                                          real* rhoD,
-                                          real* pressD,
-                                          unsigned int* geoD,
-                                          unsigned int* neighborX,
-                                          unsigned int* neighborY,
-                                          unsigned int* neighborZ,
-                                          unsigned int size_Mat,
-                                          real* DD,
-                                          bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfWallNodes);
-
-      LBSetOutputWallVelocitySP27<<< grid.grid, grid.threads >>> (	vxD,
-                                                            vyD,
-                                                            vzD,
-                                                            vxWall,
-                                                            vyWall,
-                                                            vzWall,
-                                                            numberOfWallNodes,
-                                                            kWallNodes,
-                                                            rhoD,
-                                                            pressD,
-                                                            geoD,
-                                                            neighborX,
-                                                            neighborY,
-                                                            neighborZ,
-                                                            size_Mat,
-                                                            DD,
-                                                            isEvenTimestep);
-      getLastCudaError("LBSetOutputWallVelocitySP27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void GetVelotoForce27(unsigned int numberOfThreads,
-                                 real* DD,
-                                 int* bcIndex,
-                                 int nonAtBC,
-                                 real* Vx,
-                                 real* Vy,
-                                 real* Vz,
-                                 unsigned int* neighborX,
-                                 unsigned int* neighborY,
-                                 unsigned int* neighborZ,
-                                 unsigned int size_Mat,
-                                 bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, nonAtBC);
-
-      GetVeloforForcing27<<< grid.grid, grid.threads >>> (DD,
-                                                bcIndex,
-                                                nonAtBC,
-                                                Vx,
-                                                Vy,
-                                                Vz,
-                                                neighborX,
-                                                neighborY,
-                                                neighborZ,
-                                                size_Mat,
-                                                isEvenTimestep);
-      getLastCudaError("GetVeloforForcing27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void InitParticlesDevice(real* coordX,
-                                    real* coordY,
-                                    real* coordZ,
-                                    real* coordParticleXlocal,
-                                    real* coordParticleYlocal,
-                                    real* coordParticleZlocal,
-                                    real* coordParticleXglobal,
-                                    real* coordParticleYglobal,
-                                    real* coordParticleZglobal,
-                                    real* veloParticleX,
-                                    real* veloParticleY,
-                                    real* veloParticleZ,
-                                    real* randArray,
-                                    unsigned int* particleID,
-                                    unsigned int* cellBaseID,
-                                    unsigned int* bcMatD,
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    unsigned int* neighborWSB,
-                                    int level,
-                                    unsigned int numberOfParticles,
-                                    unsigned int size_Mat,
-                                    unsigned int numberOfThreads)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfParticles);
-
-   InitParticles<<< grid.grid, grid.threads >>> (coordX,
-                                        coordY,
-                                        coordZ,
-                                        coordParticleXlocal,
-                                        coordParticleYlocal,
-                                        coordParticleZlocal,
-                                        coordParticleXglobal,
-                                        coordParticleYglobal,
-                                        coordParticleZglobal,
-                                        veloParticleX,
-                                        veloParticleY,
-                                        veloParticleZ,
-                                        randArray,
-                                        particleID,
-                                        cellBaseID,
-                                        bcMatD,
-                                        neighborX,
-                                        neighborY,
-                                        neighborZ,
-                                        neighborWSB,
-                                        level,
-                                        numberOfParticles,
-                                        size_Mat);
-      getLastCudaError("InitParticles execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void MoveParticlesDevice(real* coordX,
-                                    real* coordY,
-                                    real* coordZ,
-                                    real* coordParticleXlocal,
-                                    real* coordParticleYlocal,
-                                    real* coordParticleZlocal,
-                                    real* coordParticleXglobal,
-                                    real* coordParticleYglobal,
-                                    real* coordParticleZglobal,
-                                    real* veloParticleX,
-                                    real* veloParticleY,
-                                    real* veloParticleZ,
-                                    real* DD,
-                                    real  omega,
-                                    unsigned int* particleID,
-                                    unsigned int* cellBaseID,
-                                    unsigned int* bcMatD,
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    unsigned int* neighborWSB,
-                                    int level,
-                                    unsigned int timestep,
-                                    unsigned int numberOfTimesteps,
-                                    unsigned int numberOfParticles,
-                                    unsigned int size_Mat,
-                                    unsigned int numberOfThreads,
-                                    bool isEvenTimestep)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfParticles);
-
-   MoveParticles<<< grid.grid, grid.threads >>> (coordX,
-                                        coordY,
-                                        coordZ,
-                                        coordParticleXlocal,
-                                        coordParticleYlocal,
-                                        coordParticleZlocal,
-                                        coordParticleXglobal,
-                                        coordParticleYglobal,
-                                        coordParticleZglobal,
-                                        veloParticleX,
-                                        veloParticleY,
-                                        veloParticleZ,
-                                        DD,
-                                        omega,
-                                        particleID,
-                                        cellBaseID,
-                                        bcMatD,
-                                        neighborX,
-                                        neighborY,
-                                        neighborZ,
-                                        neighborWSB,
-                                        level,
-                                        timestep,
-                                        numberOfTimesteps,
-                                        numberOfParticles,
-                                        size_Mat,
-                                        isEvenTimestep);
-      getLastCudaError("MoveParticles execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void initRandomDevice(curandState* state,
-                                 unsigned int size_Mat,
-                                 unsigned int numberOfThreads)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-   initRandom<<< grid.grid, grid.threads >>> (state);
-   getLastCudaError("initRandom execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-void generateRandomValuesDevice( curandState* state,
-                                            unsigned int size_Mat,
-                                            real* randArray,
-                                            unsigned int numberOfThreads)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-   generateRandomValues<<< grid.grid, grid.threads >>> (state,randArray);
-   getLastCudaError("generateRandomValues execution failed");
+void WallFuncDev27(
+    unsigned int numberOfThreads,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    WallFunction27<<< grid.grid, grid.threads >>> (
+        vx,
+        vy,
+        vz,
+        DD,
+        k_Q,
+        QQ,
+        numberOfBCnodes,
+        om1,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("WallFunction27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void SetOutputWallVelocitySP27(
+    unsigned int numberOfThreads,
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* vxWall,
+    real* vyWall,
+    real* vzWall,
+    int numberOfWallNodes,
+    int* kWallNodes,
+    real* rhoD,
+    real* pressD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    real* DD,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfWallNodes);
+
+    LBSetOutputWallVelocitySP27<<< grid.grid, grid.threads >>> (
+        vxD,
+        vyD,
+        vzD,
+        vxWall,
+        vyWall,
+        vzWall,
+        numberOfWallNodes,
+        kWallNodes,
+        rhoD,
+        pressD,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        DD,
+        isEvenTimestep);
+    getLastCudaError("LBSetOutputWallVelocitySP27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void GetVelotoForce27(
+    unsigned int numberOfThreads,
+    real* DD,
+    int* bcIndex,
+    int nonAtBC,
+    real* Vx,
+    real* Vy,
+    real* Vz,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, nonAtBC);
+
+    GetVeloforForcing27<<< grid.grid, grid.threads >>> (
+        DD,
+        bcIndex,
+        nonAtBC,
+        Vx,
+        Vy,
+        Vz,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("GetVeloforForcing27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void InitParticlesDevice(
+    real* coordX,
+    real* coordY,
+    real* coordZ,
+    real* coordParticleXlocal,
+    real* coordParticleYlocal,
+    real* coordParticleZlocal,
+    real* coordParticleXglobal,
+    real* coordParticleYglobal,
+    real* coordParticleZglobal,
+    real* veloParticleX,
+    real* veloParticleY,
+    real* veloParticleZ,
+    real* randArray,
+    unsigned int* particleID,
+    unsigned int* cellBaseID,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int* neighborWSB,
+    int level,
+    unsigned int numberOfParticles,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfParticles);
+
+    InitParticles<<< grid.grid, grid.threads >>> (
+        coordX,
+        coordY,
+        coordZ,
+        coordParticleXlocal,
+        coordParticleYlocal,
+        coordParticleZlocal,
+        coordParticleXglobal,
+        coordParticleYglobal,
+        coordParticleZglobal,
+        veloParticleX,
+        veloParticleY,
+        veloParticleZ,
+        randArray,
+        particleID,
+        cellBaseID,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        neighborWSB,
+        level,
+        numberOfParticles,
+        numberOfLBnodes);
+    getLastCudaError("InitParticles execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void MoveParticlesDevice(
+    real* coordX,
+    real* coordY,
+    real* coordZ,
+    real* coordParticleXlocal,
+    real* coordParticleYlocal,
+    real* coordParticleZlocal,
+    real* coordParticleXglobal,
+    real* coordParticleYglobal,
+    real* coordParticleZglobal,
+    real* veloParticleX,
+    real* veloParticleY,
+    real* veloParticleZ,
+    real* DD,
+    real  omega,
+    unsigned int* particleID,
+    unsigned int* cellBaseID,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int* neighborWSB,
+    int level,
+    unsigned int timestep,
+    unsigned int numberOfTimesteps,
+    unsigned int numberOfParticles,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfParticles);
+
+    MoveParticles<<< grid.grid, grid.threads >>> (
+        coordX,
+        coordY,
+        coordZ,
+        coordParticleXlocal,
+        coordParticleYlocal,
+        coordParticleZlocal,
+        coordParticleXglobal,
+        coordParticleYglobal,
+        coordParticleZglobal,
+        veloParticleX,
+        veloParticleY,
+        veloParticleZ,
+        DD,
+        omega,
+        particleID,
+        cellBaseID,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        neighborWSB,
+        level,
+        timestep,
+        numberOfTimesteps,
+        numberOfParticles,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("MoveParticles execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void initRandomDevice(
+    curandState* state,
+    unsigned long long numberOfLBnodes,
+    unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+    initRandom<<< grid.grid, grid.threads >>> (state);
+    getLastCudaError("initRandom execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void generateRandomValuesDevice(
+    curandState* state,
+    unsigned long long numberOfLBnodes,
+    real* randArray,
+    unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+    generateRandomValues<<< grid.grid, grid.threads >>> (state,randArray);
+    getLastCudaError("generateRandomValues execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void CalcTurbulenceIntensityDevice(
-   real* vxx,
-   real* vyy,
-   real* vzz,
-   real* vxy,
-   real* vxz,
-   real* vyz,
-   real* vx_mean,
-   real* vy_mean,
-   real* vz_mean,
-   real* DD,
-   uint* typeOfGridNode,
-   unsigned int* neighborX,
-   unsigned int* neighborY,
-   unsigned int* neighborZ,
-   unsigned int size_Mat,
-   bool isEvenTimestep,
-   uint numberOfThreads)
-{
-   vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-   CalcTurbulenceIntensity<<<grid.grid, grid.threads>>>(
-     vxx,
-     vyy,
-     vzz,
-     vxy,
-     vxz,
-     vyz,
-     vx_mean,
-     vy_mean,
-     vz_mean,
-     DD,
-     typeOfGridNode,
-     neighborX,
-     neighborY,
-     neighborZ,
-     size_Mat,
-     isEvenTimestep);
-
-   getLastCudaError("CalcTurbulenceIntensity execution failed");
+    real* vxx,
+    real* vyy,
+    real* vzz,
+    real* vxy,
+    real* vxz,
+    real* vyz,
+    real* vx_mean,
+    real* vy_mean,
+    real* vz_mean,
+    real* DD,
+    uint* typeOfGridNode,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    uint numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfLBnodes);
+    CalcTurbulenceIntensity<<<grid.grid, grid.threads>>>(
+        vxx,
+        vyy,
+        vzz,
+        vxy,
+        vxz,
+        vyz,
+        vx_mean,
+        vy_mean,
+        vz_mean,
+        DD,
+        typeOfGridNode,
+        neighborX,
+        neighborY,
+        neighborZ,
+        numberOfLBnodes,
+        isEvenTimestep);
+    getLastCudaError("CalcTurbulenceIntensity execution failed");
 }
diff --git a/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu
index 314687c4b29a32962b386d7c083f72b754388e5b..0013ae977d41cb52ce163a53f2f1342d4d7b4c73 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu
@@ -1,92 +1,117 @@
-//  _    ___      __              __________      _     __        ______________   __
-// | |  / (_)____/ /___  ______ _/ / ____/ /_  __(_)___/ /____   /  ___/ __  / /  / /
-// | | / / / ___/ __/ / / / __ `/ / /_  / / / / / / __  / ___/  / /___/ /_/ / /  / /
-// | |/ / / /  / /_/ /_/ / /_/ / / __/ / / /_/ / / /_/ (__  )  / /_) / ____/ /__/ / 
-// |___/_/_/   \__/\__,_/\__,_/_/_/   /_/\__,_/_/\__,_/____/   \____/_/    \_____/
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
 //
-//////////////////////////////////////////////////////////////////////////
-/* Device code */
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file NoSlipBCs27.cu
+//! \ingroup GPU
+//! \author Martin Schoenherr, Anna Wellmann
+//======================================================================================
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
-#include "KernelUtilities.h"
+#include <basics/constants/NumericConstants.h>
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
+using namespace vf::gpu;
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QDevice3rdMomentsComp27(
-													 real* distributions, 
-													 int* subgridDistanceIndices, 
-													 real* subgridDistances,
-													 unsigned int numberOfBCnodes, 
-													 real omega, 
-													 unsigned int* neighborX,
-													 unsigned int* neighborY,
-													 unsigned int* neighborZ,
-													 unsigned int numberOfLBnodes, 
-													 bool isEvenTimestep)
+    real* distributions, 
+    int* subgridDistanceIndices, 
+    real* subgridDistances,
+    unsigned int numberOfBCnodes, 
+    real omega, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &distributions[DIR_P00   *numberOfLBnodes];
-      D.f[DIR_M00   ] = &distributions[DIR_M00   *numberOfLBnodes];
-      D.f[DIR_0P0   ] = &distributions[DIR_0P0   *numberOfLBnodes];
-      D.f[DIR_0M0   ] = &distributions[DIR_0M0   *numberOfLBnodes];
-      D.f[DIR_00P   ] = &distributions[DIR_00P   *numberOfLBnodes];
-      D.f[DIR_00M   ] = &distributions[DIR_00M   *numberOfLBnodes];
-      D.f[DIR_PP0  ] = &distributions[DIR_PP0  *numberOfLBnodes];
-      D.f[DIR_MM0  ] = &distributions[DIR_MM0  *numberOfLBnodes];
-      D.f[DIR_PM0  ] = &distributions[DIR_PM0  *numberOfLBnodes];
-      D.f[DIR_MP0  ] = &distributions[DIR_MP0  *numberOfLBnodes];
-      D.f[DIR_P0P  ] = &distributions[DIR_P0P  *numberOfLBnodes];
-      D.f[DIR_M0M  ] = &distributions[DIR_M0M  *numberOfLBnodes];
-      D.f[DIR_P0M  ] = &distributions[DIR_P0M  *numberOfLBnodes];
-      D.f[DIR_M0P  ] = &distributions[DIR_M0P  *numberOfLBnodes];
-      D.f[DIR_0PP  ] = &distributions[DIR_0PP  *numberOfLBnodes];
-      D.f[DIR_0MM  ] = &distributions[DIR_0MM  *numberOfLBnodes];
-      D.f[DIR_0PM  ] = &distributions[DIR_0PM  *numberOfLBnodes];
-      D.f[DIR_0MP  ] = &distributions[DIR_0MP  *numberOfLBnodes];
-      D.f[DIR_000] = &distributions[DIR_000*numberOfLBnodes];
-      D.f[DIR_PPP ] = &distributions[DIR_PPP *numberOfLBnodes];
-      D.f[DIR_MMP ] = &distributions[DIR_MMP *numberOfLBnodes];
-      D.f[DIR_PMP ] = &distributions[DIR_PMP *numberOfLBnodes];
-      D.f[DIR_MPP ] = &distributions[DIR_MPP *numberOfLBnodes];
-      D.f[DIR_PPM ] = &distributions[DIR_PPM *numberOfLBnodes];
-      D.f[DIR_MMM ] = &distributions[DIR_MMM *numberOfLBnodes];
-      D.f[DIR_PMM ] = &distributions[DIR_PMM *numberOfLBnodes];
-      D.f[DIR_MPM ] = &distributions[DIR_MPM *numberOfLBnodes];
+      D.f[DIR_P00] = &distributions[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &distributions[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &distributions[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &distributions[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &distributions[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &distributions[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &distributions[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &distributions[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &distributions[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &distributions[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &distributions[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &distributions[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &distributions[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &distributions[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &distributions[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &distributions[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &distributions[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &distributions[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &distributions[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &distributions[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &distributions[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &distributions[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &distributions[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &distributions[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &distributions[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &distributions[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &distributions[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &distributions[DIR_P00   *numberOfLBnodes];
-      D.f[DIR_P00   ] = &distributions[DIR_M00   *numberOfLBnodes];
-      D.f[DIR_0M0   ] = &distributions[DIR_0P0   *numberOfLBnodes];
-      D.f[DIR_0P0   ] = &distributions[DIR_0M0   *numberOfLBnodes];
-      D.f[DIR_00M   ] = &distributions[DIR_00P   *numberOfLBnodes];
-      D.f[DIR_00P   ] = &distributions[DIR_00M   *numberOfLBnodes];
-      D.f[DIR_MM0  ] = &distributions[DIR_PP0  *numberOfLBnodes];
-      D.f[DIR_PP0  ] = &distributions[DIR_MM0  *numberOfLBnodes];
-      D.f[DIR_MP0  ] = &distributions[DIR_PM0  *numberOfLBnodes];
-      D.f[DIR_PM0  ] = &distributions[DIR_MP0  *numberOfLBnodes];
-      D.f[DIR_M0M  ] = &distributions[DIR_P0P  *numberOfLBnodes];
-      D.f[DIR_P0P  ] = &distributions[DIR_M0M  *numberOfLBnodes];
-      D.f[DIR_M0P  ] = &distributions[DIR_P0M  *numberOfLBnodes];
-      D.f[DIR_P0M  ] = &distributions[DIR_M0P  *numberOfLBnodes];
-      D.f[DIR_0MM  ] = &distributions[DIR_0PP  *numberOfLBnodes];
-      D.f[DIR_0PP  ] = &distributions[DIR_0MM  *numberOfLBnodes];
-      D.f[DIR_0MP  ] = &distributions[DIR_0PM  *numberOfLBnodes];
-      D.f[DIR_0PM  ] = &distributions[DIR_0MP  *numberOfLBnodes];
-      D.f[DIR_000] = &distributions[DIR_000*numberOfLBnodes];
-      D.f[DIR_PPP ] = &distributions[DIR_MMM *numberOfLBnodes];
-      D.f[DIR_MMP ] = &distributions[DIR_PPM *numberOfLBnodes];
-      D.f[DIR_PMP ] = &distributions[DIR_MPM *numberOfLBnodes];
-      D.f[DIR_MPP ] = &distributions[DIR_PMM *numberOfLBnodes];
-      D.f[DIR_PPM ] = &distributions[DIR_MMP *numberOfLBnodes];
-      D.f[DIR_MMM ] = &distributions[DIR_PPP *numberOfLBnodes];
-      D.f[DIR_PMM ] = &distributions[DIR_MPP *numberOfLBnodes];
-      D.f[DIR_MPM ] = &distributions[DIR_PMP *numberOfLBnodes];
+      D.f[DIR_M00] = &distributions[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &distributions[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &distributions[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &distributions[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &distributions[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &distributions[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &distributions[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &distributions[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &distributions[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &distributions[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &distributions[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &distributions[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &distributions[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &distributions[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &distributions[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &distributions[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &distributions[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &distributions[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &distributions[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &distributions[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &distributions[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &distributions[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &distributions[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &distributions[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &distributions[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &distributions[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &distributions[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -107,24 +132,24 @@ __global__ void QDevice3rdMomentsComp27(
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &subgridDistances[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &subgridDistances[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &subgridDistances[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &subgridDistances[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &subgridDistances[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &subgridDistances[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &subgridDistances[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &subgridDistances[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &subgridDistances[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &subgridDistances[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &subgridDistances[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &subgridDistances[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &subgridDistances[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &subgridDistances[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &subgridDistances[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &subgridDistances[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &subgridDistances[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &subgridDistances[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &subgridDistances[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &subgridDistances[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &subgridDistances[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &subgridDistances[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &subgridDistances[DIR_00P * numberOfBCnodes];
+      q_dirB   = &subgridDistances[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &subgridDistances[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &subgridDistances[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &subgridDistances[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &subgridDistances[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &subgridDistances[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &subgridDistances[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &subgridDistances[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &subgridDistances[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &subgridDistances[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &subgridDistances[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &subgridDistances[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &subgridDistances[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &subgridDistances[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &subgridDistances[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &subgridDistances[DIR_PMP * numberOfBCnodes];
@@ -167,32 +192,32 @@ __global__ void QDevice3rdMomentsComp27(
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
             f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q, m3;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -217,63 +242,63 @@ __global__ void QDevice3rdMomentsComp27(
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &distributions[DIR_P00   *numberOfLBnodes];
-         D.f[DIR_M00   ] = &distributions[DIR_M00   *numberOfLBnodes];
-         D.f[DIR_0P0   ] = &distributions[DIR_0P0   *numberOfLBnodes];
-         D.f[DIR_0M0   ] = &distributions[DIR_0M0   *numberOfLBnodes];
-         D.f[DIR_00P   ] = &distributions[DIR_00P   *numberOfLBnodes];
-         D.f[DIR_00M   ] = &distributions[DIR_00M   *numberOfLBnodes];
-         D.f[DIR_PP0  ] = &distributions[DIR_PP0  *numberOfLBnodes];
-         D.f[DIR_MM0  ] = &distributions[DIR_MM0  *numberOfLBnodes];
-         D.f[DIR_PM0  ] = &distributions[DIR_PM0  *numberOfLBnodes];
-         D.f[DIR_MP0  ] = &distributions[DIR_MP0  *numberOfLBnodes];
-         D.f[DIR_P0P  ] = &distributions[DIR_P0P  *numberOfLBnodes];
-         D.f[DIR_M0M  ] = &distributions[DIR_M0M  *numberOfLBnodes];
-         D.f[DIR_P0M  ] = &distributions[DIR_P0M  *numberOfLBnodes];
-         D.f[DIR_M0P  ] = &distributions[DIR_M0P  *numberOfLBnodes];
-         D.f[DIR_0PP  ] = &distributions[DIR_0PP  *numberOfLBnodes];
-         D.f[DIR_0MM  ] = &distributions[DIR_0MM  *numberOfLBnodes];
-         D.f[DIR_0PM  ] = &distributions[DIR_0PM  *numberOfLBnodes];
-         D.f[DIR_0MP  ] = &distributions[DIR_0MP  *numberOfLBnodes];
-         D.f[DIR_000] = &distributions[DIR_000*numberOfLBnodes];
-         D.f[DIR_PPP ] = &distributions[DIR_PPP *numberOfLBnodes];
-         D.f[DIR_MMP ] = &distributions[DIR_MMP *numberOfLBnodes];
-         D.f[DIR_PMP ] = &distributions[DIR_PMP *numberOfLBnodes];
-         D.f[DIR_MPP ] = &distributions[DIR_MPP *numberOfLBnodes];
-         D.f[DIR_PPM ] = &distributions[DIR_PPM *numberOfLBnodes];
-         D.f[DIR_MMM ] = &distributions[DIR_MMM *numberOfLBnodes];
-         D.f[DIR_PMM ] = &distributions[DIR_PMM *numberOfLBnodes];
-         D.f[DIR_MPM ] = &distributions[DIR_MPM *numberOfLBnodes];
+         D.f[DIR_P00] = &distributions[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &distributions[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &distributions[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &distributions[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &distributions[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &distributions[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &distributions[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &distributions[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &distributions[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &distributions[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &distributions[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &distributions[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &distributions[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &distributions[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &distributions[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &distributions[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &distributions[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &distributions[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &distributions[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &distributions[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &distributions[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &distributions[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &distributions[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &distributions[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &distributions[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &distributions[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &distributions[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &distributions[DIR_P00   *numberOfLBnodes];
-         D.f[DIR_P00   ] = &distributions[DIR_M00   *numberOfLBnodes];
-         D.f[DIR_0M0   ] = &distributions[DIR_0P0   *numberOfLBnodes];
-         D.f[DIR_0P0   ] = &distributions[DIR_0M0   *numberOfLBnodes];
-         D.f[DIR_00M   ] = &distributions[DIR_00P   *numberOfLBnodes];
-         D.f[DIR_00P   ] = &distributions[DIR_00M   *numberOfLBnodes];
-         D.f[DIR_MM0  ] = &distributions[DIR_PP0  *numberOfLBnodes];
-         D.f[DIR_PP0  ] = &distributions[DIR_MM0  *numberOfLBnodes];
-         D.f[DIR_MP0  ] = &distributions[DIR_PM0  *numberOfLBnodes];
-         D.f[DIR_PM0  ] = &distributions[DIR_MP0  *numberOfLBnodes];
-         D.f[DIR_M0M  ] = &distributions[DIR_P0P  *numberOfLBnodes];
-         D.f[DIR_P0P  ] = &distributions[DIR_M0M  *numberOfLBnodes];
-         D.f[DIR_M0P  ] = &distributions[DIR_P0M  *numberOfLBnodes];
-         D.f[DIR_P0M  ] = &distributions[DIR_M0P  *numberOfLBnodes];
-         D.f[DIR_0MM  ] = &distributions[DIR_0PP  *numberOfLBnodes];
-         D.f[DIR_0PP  ] = &distributions[DIR_0MM  *numberOfLBnodes];
-         D.f[DIR_0MP  ] = &distributions[DIR_0PM  *numberOfLBnodes];
-         D.f[DIR_0PM  ] = &distributions[DIR_0MP  *numberOfLBnodes];
-         D.f[DIR_000] = &distributions[DIR_000*numberOfLBnodes];
-         D.f[DIR_PPP ] = &distributions[DIR_MMM *numberOfLBnodes];
-         D.f[DIR_MMP ] = &distributions[DIR_PPM *numberOfLBnodes];
-         D.f[DIR_PMP ] = &distributions[DIR_MPM *numberOfLBnodes];
-         D.f[DIR_MPP ] = &distributions[DIR_PMM *numberOfLBnodes];
-         D.f[DIR_PPM ] = &distributions[DIR_MMP *numberOfLBnodes];
-         D.f[DIR_MMM ] = &distributions[DIR_PPP *numberOfLBnodes];
-         D.f[DIR_PMM ] = &distributions[DIR_MPP *numberOfLBnodes];
-         D.f[DIR_MPM ] = &distributions[DIR_PMP *numberOfLBnodes];
+         D.f[DIR_M00] = &distributions[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &distributions[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &distributions[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &distributions[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &distributions[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &distributions[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &distributions[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &distributions[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &distributions[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &distributions[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &distributions[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &distributions[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &distributions[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &distributions[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &distributions[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &distributions[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &distributions[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &distributions[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &distributions[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &distributions[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &distributions[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &distributions[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &distributions[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &distributions[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &distributions[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &distributions[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &distributions[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
@@ -559,77 +584,78 @@ __global__ void QDevice3rdMomentsComp27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QDeviceIncompHighNu27(real* DD, 
-												 int* k_Q, 
-												 real* QQ,
-												 unsigned int numberOfBCnodes,
-												 real om1, 
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int numberOfLBnodes, 
-												 bool isEvenTimestep)
+__global__ void QDeviceIncompHighNu27(
+    real* DD, 
+    int* k_Q, 
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *numberOfLBnodes];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *numberOfLBnodes];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *numberOfLBnodes];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *numberOfLBnodes];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *numberOfLBnodes];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *numberOfLBnodes];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *numberOfLBnodes];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *numberOfLBnodes];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *numberOfLBnodes];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *numberOfLBnodes];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *numberOfLBnodes];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *numberOfLBnodes];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *numberOfLBnodes];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *numberOfLBnodes];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *numberOfLBnodes];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *numberOfLBnodes];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *numberOfLBnodes];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *numberOfLBnodes];
-      D.f[DIR_000] = &DD[DIR_000*numberOfLBnodes];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *numberOfLBnodes];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *numberOfLBnodes];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *numberOfLBnodes];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *numberOfLBnodes];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *numberOfLBnodes];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *numberOfLBnodes];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *numberOfLBnodes];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *numberOfLBnodes];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *numberOfLBnodes];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *numberOfLBnodes];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *numberOfLBnodes];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *numberOfLBnodes];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *numberOfLBnodes];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *numberOfLBnodes];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *numberOfLBnodes];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *numberOfLBnodes];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *numberOfLBnodes];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *numberOfLBnodes];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *numberOfLBnodes];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *numberOfLBnodes];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *numberOfLBnodes];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *numberOfLBnodes];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *numberOfLBnodes];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *numberOfLBnodes];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *numberOfLBnodes];
-      D.f[DIR_000] = &DD[DIR_000*numberOfLBnodes];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *numberOfLBnodes];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *numberOfLBnodes];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *numberOfLBnodes];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *numberOfLBnodes];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *numberOfLBnodes];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *numberOfLBnodes];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *numberOfLBnodes];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -650,24 +676,24 @@ __global__ void QDeviceIncompHighNu27(real* DD,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -710,32 +736,32 @@ __global__ void QDeviceIncompHighNu27(real* DD,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
             f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_E   = (D.f[DIR_P00   ])[ke   ];
-      f_W   = (D.f[DIR_M00   ])[kw   ];
-      f_N   = (D.f[DIR_0P0   ])[kn   ];
-      f_S   = (D.f[DIR_0M0   ])[ks   ];
-      f_T   = (D.f[DIR_00P   ])[kt   ];
-      f_B   = (D.f[DIR_00M   ])[kb   ];
-      f_NE  = (D.f[DIR_PP0  ])[kne  ];
-      f_SW  = (D.f[DIR_MM0  ])[ksw  ];
-      f_SE  = (D.f[DIR_PM0  ])[kse  ];
-      f_NW  = (D.f[DIR_MP0  ])[knw  ];
-      f_TE  = (D.f[DIR_P0P  ])[kte  ];
-      f_BW  = (D.f[DIR_M0M  ])[kbw  ];
-      f_BE  = (D.f[DIR_P0M  ])[kbe  ];
-      f_TW  = (D.f[DIR_M0P  ])[ktw  ];
-      f_TN  = (D.f[DIR_0PP  ])[ktn  ];
-      f_BS  = (D.f[DIR_0MM  ])[kbs  ];
-      f_BN  = (D.f[DIR_0PM  ])[kbn  ];
-      f_TS  = (D.f[DIR_0MP  ])[kts  ];
-      f_TNE = (D.f[DIR_PPP ])[ktne ];
-      f_TSW = (D.f[DIR_MMP ])[ktsw ];
-      f_TSE = (D.f[DIR_PMP ])[ktse ];
-      f_TNW = (D.f[DIR_MPP ])[ktnw ];
-      f_BNE = (D.f[DIR_PPM ])[kbne ];
-      f_BSW = (D.f[DIR_MMM ])[kbsw ];
-      f_BSE = (D.f[DIR_PMM ])[kbse ];
-      f_BNW = (D.f[DIR_MPM ])[kbnw ];
+      f_E   = (D.f[DIR_P00])[ke   ];
+      f_W   = (D.f[DIR_M00])[kw   ];
+      f_N   = (D.f[DIR_0P0])[kn   ];
+      f_S   = (D.f[DIR_0M0])[ks   ];
+      f_T   = (D.f[DIR_00P])[kt   ];
+      f_B   = (D.f[DIR_00M])[kb   ];
+      f_NE  = (D.f[DIR_PP0])[kne  ];
+      f_SW  = (D.f[DIR_MM0])[ksw  ];
+      f_SE  = (D.f[DIR_PM0])[kse  ];
+      f_NW  = (D.f[DIR_MP0])[knw  ];
+      f_TE  = (D.f[DIR_P0P])[kte  ];
+      f_BW  = (D.f[DIR_M0M])[kbw  ];
+      f_BE  = (D.f[DIR_P0M])[kbe  ];
+      f_TW  = (D.f[DIR_M0P])[ktw  ];
+      f_TN  = (D.f[DIR_0PP])[ktn  ];
+      f_BS  = (D.f[DIR_0MM])[kbs  ];
+      f_BN  = (D.f[DIR_0PM])[kbn  ];
+      f_TS  = (D.f[DIR_0MP])[kts  ];
+      f_TNE = (D.f[DIR_PPP])[ktne ];
+      f_TSW = (D.f[DIR_MMP])[ktsw ];
+      f_TSE = (D.f[DIR_PMP])[ktse ];
+      f_TNW = (D.f[DIR_MPP])[ktnw ];
+      f_BNE = (D.f[DIR_PPM])[kbne ];
+      f_BSW = (D.f[DIR_MMM])[kbsw ];
+      f_BSE = (D.f[DIR_PMM])[kbse ];
+      f_BNW = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -760,63 +786,63 @@ __global__ void QDeviceIncompHighNu27(real* DD,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *numberOfLBnodes];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *numberOfLBnodes];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *numberOfLBnodes];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *numberOfLBnodes];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *numberOfLBnodes];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *numberOfLBnodes];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *numberOfLBnodes];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *numberOfLBnodes];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *numberOfLBnodes];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *numberOfLBnodes];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *numberOfLBnodes];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *numberOfLBnodes];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *numberOfLBnodes];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *numberOfLBnodes];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *numberOfLBnodes];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *numberOfLBnodes];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *numberOfLBnodes];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *numberOfLBnodes];
-         D.f[DIR_000] = &DD[DIR_000*numberOfLBnodes];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *numberOfLBnodes];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *numberOfLBnodes];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *numberOfLBnodes];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *numberOfLBnodes];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *numberOfLBnodes];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *numberOfLBnodes];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *numberOfLBnodes];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *numberOfLBnodes];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *numberOfLBnodes];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *numberOfLBnodes];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *numberOfLBnodes];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *numberOfLBnodes];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *numberOfLBnodes];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *numberOfLBnodes];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *numberOfLBnodes];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *numberOfLBnodes];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *numberOfLBnodes];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *numberOfLBnodes];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *numberOfLBnodes];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *numberOfLBnodes];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *numberOfLBnodes];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *numberOfLBnodes];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *numberOfLBnodes];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *numberOfLBnodes];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *numberOfLBnodes];
-         D.f[DIR_000] = &DD[DIR_000*numberOfLBnodes];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *numberOfLBnodes];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *numberOfLBnodes];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *numberOfLBnodes];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *numberOfLBnodes];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *numberOfLBnodes];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *numberOfLBnodes];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *numberOfLBnodes];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
@@ -1055,77 +1081,77 @@ __global__ void QDeviceIncompHighNu27(real* DD,
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QDeviceCompHighNu27(
-												 real* DD, 
-												 int* k_Q, 
-												 real* QQ,
-												 unsigned int numberOfBCnodes, 
-												 real om1, 
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int size_Mat, 
-												 bool isEvenTimestep)
+    real* DD, 
+    int* k_Q, 
+    real* QQ,
+    unsigned int numberOfBCnodes, 
+    real om1, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -1146,24 +1172,24 @@ __global__ void QDeviceCompHighNu27(
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -1206,58 +1232,58 @@ __global__ void QDeviceCompHighNu27(
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
             f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_E   = (D.f[DIR_P00   ])[ke   ];
-      f_W   = (D.f[DIR_M00   ])[kw   ];
-      f_N   = (D.f[DIR_0P0   ])[kn   ];
-      f_S   = (D.f[DIR_0M0   ])[ks   ];
-      f_T   = (D.f[DIR_00P   ])[kt   ];
-      f_B   = (D.f[DIR_00M   ])[kb   ];
-      f_NE  = (D.f[DIR_PP0  ])[kne  ];
-      f_SW  = (D.f[DIR_MM0  ])[ksw  ];
-      f_SE  = (D.f[DIR_PM0  ])[kse  ];
-      f_NW  = (D.f[DIR_MP0  ])[knw  ];
-      f_TE  = (D.f[DIR_P0P  ])[kte  ];
-      f_BW  = (D.f[DIR_M0M  ])[kbw  ];
-      f_BE  = (D.f[DIR_P0M  ])[kbe  ];
-      f_TW  = (D.f[DIR_M0P  ])[ktw  ];
-      f_TN  = (D.f[DIR_0PP  ])[ktn  ];
-      f_BS  = (D.f[DIR_0MM  ])[kbs  ];
-      f_BN  = (D.f[DIR_0PM  ])[kbn  ];
-      f_TS  = (D.f[DIR_0MP  ])[kts  ];
-      f_TNE = (D.f[DIR_PPP ])[ktne ];
-      f_TSW = (D.f[DIR_MMP ])[ktsw ];
-      f_TSE = (D.f[DIR_PMP ])[ktse ];
-      f_TNW = (D.f[DIR_MPP ])[ktnw ];
-      f_BNE = (D.f[DIR_PPM ])[kbne ];
-      f_BSW = (D.f[DIR_MMM ])[kbsw ];
-      f_BSE = (D.f[DIR_PMM ])[kbse ];
-      f_BNW = (D.f[DIR_MPM ])[kbnw ];
-      //f_W    = (D.f[DIR_P00   ])[ke   ];
-      //f_E    = (D.f[DIR_M00   ])[kw   ];
-      //f_S    = (D.f[DIR_0P0   ])[kn   ];
-      //f_N    = (D.f[DIR_0M0   ])[ks   ];
-      //f_B    = (D.f[DIR_00P   ])[kt   ];
-      //f_T    = (D.f[DIR_00M   ])[kb   ];
-      //f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      //f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      //f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      //f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      //f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      //f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      //f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      //f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      //f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      //f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      //f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      //f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      //f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      //f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      //f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      //f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      //f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      //f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      //f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      //f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_E   = (D.f[DIR_P00])[ke   ];
+      f_W   = (D.f[DIR_M00])[kw   ];
+      f_N   = (D.f[DIR_0P0])[kn   ];
+      f_S   = (D.f[DIR_0M0])[ks   ];
+      f_T   = (D.f[DIR_00P])[kt   ];
+      f_B   = (D.f[DIR_00M])[kb   ];
+      f_NE  = (D.f[DIR_PP0])[kne  ];
+      f_SW  = (D.f[DIR_MM0])[ksw  ];
+      f_SE  = (D.f[DIR_PM0])[kse  ];
+      f_NW  = (D.f[DIR_MP0])[knw  ];
+      f_TE  = (D.f[DIR_P0P])[kte  ];
+      f_BW  = (D.f[DIR_M0M])[kbw  ];
+      f_BE  = (D.f[DIR_P0M])[kbe  ];
+      f_TW  = (D.f[DIR_M0P])[ktw  ];
+      f_TN  = (D.f[DIR_0PP])[ktn  ];
+      f_BS  = (D.f[DIR_0MM])[kbs  ];
+      f_BN  = (D.f[DIR_0PM])[kbn  ];
+      f_TS  = (D.f[DIR_0MP])[kts  ];
+      f_TNE = (D.f[DIR_PPP])[ktne ];
+      f_TSW = (D.f[DIR_MMP])[ktsw ];
+      f_TSE = (D.f[DIR_PMP])[ktse ];
+      f_TNW = (D.f[DIR_MPP])[ktnw ];
+      f_BNE = (D.f[DIR_PPM])[kbne ];
+      f_BSW = (D.f[DIR_MMM])[kbsw ];
+      f_BSE = (D.f[DIR_PMM])[kbse ];
+      f_BNW = (D.f[DIR_MPM])[kbnw ];
+      //f_W    = (D.f[DIR_P00])[ke   ];
+      //f_E    = (D.f[DIR_M00])[kw   ];
+      //f_S    = (D.f[DIR_0P0])[kn   ];
+      //f_N    = (D.f[DIR_0M0])[ks   ];
+      //f_B    = (D.f[DIR_00P])[kt   ];
+      //f_T    = (D.f[DIR_00M])[kb   ];
+      //f_SW   = (D.f[DIR_PP0])[kne  ];
+      //f_NE   = (D.f[DIR_MM0])[ksw  ];
+      //f_NW   = (D.f[DIR_PM0])[kse  ];
+      //f_SE   = (D.f[DIR_MP0])[knw  ];
+      //f_BW   = (D.f[DIR_P0P])[kte  ];
+      //f_TE   = (D.f[DIR_M0M])[kbw  ];
+      //f_TW   = (D.f[DIR_P0M])[kbe  ];
+      //f_BE   = (D.f[DIR_M0P])[ktw  ];
+      //f_BS   = (D.f[DIR_0PP])[ktn  ];
+      //f_TN   = (D.f[DIR_0MM])[kbs  ];
+      //f_TS   = (D.f[DIR_0PM])[kbn  ];
+      //f_BN   = (D.f[DIR_0MP])[kts  ];
+      //f_BSW  = (D.f[DIR_PPP])[ktne ];
+      //f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      //f_BNW  = (D.f[DIR_PMP])[ktse ];
+      //f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      //f_TSW  = (D.f[DIR_PPM])[kbne ];
+      //f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      //f_TNW  = (D.f[DIR_PMM])[kbse ];
+      //f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -1282,63 +1308,63 @@ __global__ void QDeviceCompHighNu27(
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
@@ -1629,16 +1655,16 @@ __global__ void QDeviceCompHighNu27(
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QDeviceComp27(
-										 real* distributions, 
-										 int* subgridDistanceIndices, 
-										 real* subgridDistances,
-										 unsigned int numberOfBCnodes, 
-										 real omega, 
-										 unsigned int* neighborX,
-										 unsigned int* neighborY,
-										 unsigned int* neighborZ,
-										 unsigned int numberOfLBnodes, 
-										 bool isEvenTimestep)
+    real* distributions, 
+    int* subgridDistanceIndices, 
+    real* subgridDistances,
+    unsigned int numberOfBCnodes, 
+    real omega, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    //////////////////////////////////////////////////////////////////////////
    //! The no-slip boundary condition is executed in the following steps
@@ -1646,16 +1672,9 @@ __global__ void QDeviceComp27(
    ////////////////////////////////////////////////////////////////////////////////
    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
    //!
-   const unsigned  x = threadIdx.x;  // global x-index 
-   const unsigned  y = blockIdx.x;   // global y-index 
-   const unsigned  z = blockIdx.y;   // global z-index 
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
+   const unsigned nodeIndex = getNodeIndex();
 
-   const unsigned k = nx*(ny*z + y) + x;
-
-   if(k < numberOfBCnodes)
+   if(nodeIndex < numberOfBCnodes)
    {
       //////////////////////////////////////////////////////////////////////////
       //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
@@ -1673,7 +1692,7 @@ __global__ void QDeviceComp27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing)
       //!
-      unsigned int indexOfBCnode  = subgridDistanceIndices[k];
+      unsigned int indexOfBCnode  = subgridDistanceIndices[nodeIndex];
       unsigned int kzero= indexOfBCnode;
       unsigned int ke   = indexOfBCnode;
       unsigned int kw   = neighborX[indexOfBCnode];
@@ -1705,32 +1724,32 @@ __global__ void QDeviceComp27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[DIR_P00   ])[ke   ];
-      real f_E    = (dist.f[DIR_M00   ])[kw   ];
-      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
-      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
-      real f_B    = (dist.f[DIR_00P   ])[kt   ];
-      real f_T    = (dist.f[DIR_00M   ])[kb   ];
-      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00])[ke   ];
+      real f_E    = (dist.f[DIR_M00])[kw   ];
+      real f_S    = (dist.f[DIR_0P0])[kn   ];
+      real f_N    = (dist.f[DIR_0M0])[ks   ];
+      real f_B    = (dist.f[DIR_00P])[kt   ];
+      real f_T    = (dist.f[DIR_00M])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Calculate macroscopic quantities
@@ -1761,7 +1780,7 @@ __global__ void QDeviceComp27(
        ////////////////////////////////////////////////////////////////////////////////
       //! - Update distributions with subgrid distance (q) between zero and one
       real feq, q, velocityLB;
-      q = (subgridD.q[DIR_P00])[k];
+      q = (subgridD.q[DIR_P00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one
       {
          velocityLB = vx1;
@@ -1769,7 +1788,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForNoSlipBC(q, f_E, f_W, feq, omega);
       }
 
-      q = (subgridD.q[DIR_M00])[k];
+      q = (subgridD.q[DIR_M00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1;
@@ -1777,7 +1796,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForNoSlipBC(q, f_W, f_E, feq, omega);
       }
 
-      q = (subgridD.q[DIR_0P0])[k];
+      q = (subgridD.q[DIR_0P0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2;
@@ -1785,7 +1804,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForNoSlipBC(q, f_N, f_S, feq, omega);
       }
 
-      q = (subgridD.q[DIR_0M0])[k];
+      q = (subgridD.q[DIR_0M0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2;
@@ -1793,7 +1812,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForNoSlipBC(q, f_S, f_N, feq, omega);
       }
 
-      q = (subgridD.q[DIR_00P])[k];
+      q = (subgridD.q[DIR_00P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx3;
@@ -1801,7 +1820,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForNoSlipBC(q, f_T, f_B, feq, omega);
       }
 
-      q = (subgridD.q[DIR_00M])[k];
+      q = (subgridD.q[DIR_00M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx3;
@@ -1809,7 +1828,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForNoSlipBC(q, f_B, f_T, feq, omega);
       }
 
-      q = (subgridD.q[DIR_PP0])[k];
+      q = (subgridD.q[DIR_PP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2;
@@ -1817,7 +1836,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForNoSlipBC(q, f_NE, f_SW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_MM0])[k];
+      q = (subgridD.q[DIR_MM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2;
@@ -1825,7 +1844,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForNoSlipBC(q, f_SW, f_NE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_PM0])[k];
+      q = (subgridD.q[DIR_PM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2;
@@ -1833,7 +1852,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForNoSlipBC(q, f_SE, f_NW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_MP0])[k];
+      q = (subgridD.q[DIR_MP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2;
@@ -1841,7 +1860,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForNoSlipBC(q, f_NW, f_SE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_P0P])[k];
+      q = (subgridD.q[DIR_P0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx3;
@@ -1849,7 +1868,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForNoSlipBC(q, f_TE, f_BW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_M0M])[k];
+      q = (subgridD.q[DIR_M0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx3;
@@ -1857,7 +1876,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForNoSlipBC(q, f_BW, f_TE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_P0M])[k];
+      q = (subgridD.q[DIR_P0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx3;
@@ -1865,7 +1884,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForNoSlipBC(q, f_BE, f_TW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_M0P])[k];
+      q = (subgridD.q[DIR_M0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx3;
@@ -1873,7 +1892,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForNoSlipBC(q, f_TW, f_BE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_0PP])[k];
+      q = (subgridD.q[DIR_0PP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2 + vx3;
@@ -1881,7 +1900,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForNoSlipBC(q, f_TN, f_BS, feq, omega);
       }
 
-      q = (subgridD.q[DIR_0MM])[k];
+      q = (subgridD.q[DIR_0MM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2 - vx3;
@@ -1889,7 +1908,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_0PP])[ktn] = getInterpolatedDistributionForNoSlipBC(q, f_BS, f_TN, feq, omega);
       }
 
-      q = (subgridD.q[DIR_0PM])[k];
+      q = (subgridD.q[DIR_0PM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2 - vx3;
@@ -1897,7 +1916,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForNoSlipBC(q, f_BN, f_TS, feq, omega);
       }
 
-      q = (subgridD.q[DIR_0MP])[k];
+      q = (subgridD.q[DIR_0MP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2 + vx3;
@@ -1905,7 +1924,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForNoSlipBC(q, f_TS, f_BN, feq, omega);
       }
 
-      q = (subgridD.q[DIR_PPP])[k];
+      q = (subgridD.q[DIR_PPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2 + vx3;
@@ -1913,7 +1932,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForNoSlipBC(q, f_TNE, f_BSW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_MMM])[k];
+      q = (subgridD.q[DIR_MMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2 - vx3;
@@ -1921,7 +1940,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_PPP])[ktne] = getInterpolatedDistributionForNoSlipBC(q, f_BSW, f_TNE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_PPM])[k];
+      q = (subgridD.q[DIR_PPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2 - vx3;
@@ -1929,7 +1948,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForNoSlipBC(q, f_BNE, f_TSW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_MMP])[k];
+      q = (subgridD.q[DIR_MMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2 + vx3;
@@ -1937,7 +1956,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForNoSlipBC(q, f_TSW, f_BNE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_PMP])[k];
+      q = (subgridD.q[DIR_PMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2 + vx3;
@@ -1945,7 +1964,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForNoSlipBC(q, f_TSE, f_BNW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_MPM])[k];
+      q = (subgridD.q[DIR_MPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2 - vx3;
@@ -1953,7 +1972,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForNoSlipBC(q, f_BNW, f_TSE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_PMM])[k];
+      q = (subgridD.q[DIR_PMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2 - vx3;
@@ -1961,7 +1980,7 @@ __global__ void QDeviceComp27(
          (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForNoSlipBC(q, f_BSE, f_TNW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_MPP])[k];
+      q = (subgridD.q[DIR_MPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2 + vx3;
@@ -2011,16 +2030,17 @@ __global__ void QDeviceComp27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QDevice27(real* distributions, 
-                                     int* subgridDistanceIndices, 
-                                     real* subgridDistances,
-                                     unsigned int numberOfBCnodes, 
-                                     real omega, 
-                                     unsigned int* neighborX,
-                                     unsigned int* neighborY,
-                                     unsigned int* neighborZ,
-                                     unsigned int numberOfLBnodes, 
-                                     bool isEvenTimestep)
+__global__ void QDevice27(
+    real* distributions, 
+    int* subgridDistanceIndices, 
+    real* subgridDistances,
+    unsigned int numberOfBCnodes, 
+    real omega, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    //////////////////////////////////////////////////////////////////////////
    //! The no-slip boundary condition is executed in the following steps
@@ -2028,19 +2048,12 @@ __global__ void QDevice27(real* distributions,
    ////////////////////////////////////////////////////////////////////////////////
    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
    //!
-   const unsigned  x = threadIdx.x;  // global x-index 
-   const unsigned  y = blockIdx.x;   // global y-index 
-   const unsigned  z = blockIdx.y;   // global z-index 
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
+   const unsigned nodeIndex = getNodeIndex();
 
    //////////////////////////////////////////////////////////////////////////
    //! - Run for all indices in size of boundary condition (numberOfBCnodes)
    //!
-   if(k < numberOfBCnodes)
+   if(nodeIndex < numberOfBCnodes)
    {
 
       //////////////////////////////////////////////////////////////////////////
@@ -2059,7 +2072,7 @@ __global__ void QDevice27(real* distributions,
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing)
       //!
-      unsigned int indexOfBCnode  = subgridDistanceIndices[k];
+      unsigned int indexOfBCnode  = subgridDistanceIndices[nodeIndex];
       unsigned int kzero= indexOfBCnode;
       unsigned int ke   = indexOfBCnode;
       unsigned int kw   = neighborX[indexOfBCnode];
@@ -2091,32 +2104,32 @@ __global__ void QDevice27(real* distributions,
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[DIR_P00   ])[ke   ];
-      real f_E    = (dist.f[DIR_M00   ])[kw   ];
-      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
-      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
-      real f_B    = (dist.f[DIR_00P   ])[kt   ];
-      real f_T    = (dist.f[DIR_00M   ])[kb   ];
-      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00])[ke   ];
+      real f_E    = (dist.f[DIR_M00])[kw   ];
+      real f_S    = (dist.f[DIR_0P0])[kn   ];
+      real f_N    = (dist.f[DIR_0M0])[ks   ];
+      real f_B    = (dist.f[DIR_00P])[kt   ];
+      real f_T    = (dist.f[DIR_00M])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Calculate macroscopic quantities
@@ -2148,7 +2161,7 @@ __global__ void QDevice27(real* distributions,
       //! - Update distributions with subgrid distance (q) between zero and one
       //!
       real feq, q, velocityLB;
-      q = (subgridD.q[DIR_P00])[k];
+      q = (subgridD.q[DIR_P00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one
       {
          velocityLB = vx1;
@@ -2156,7 +2169,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForNoSlipBC(q, f_E, f_W, feq, omega);
       }
 
-      q = (subgridD.q[DIR_M00])[k];
+      q = (subgridD.q[DIR_M00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1;
@@ -2164,7 +2177,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForNoSlipBC(q, f_W, f_E, feq, omega);
       }
 
-      q = (subgridD.q[DIR_0P0])[k];
+      q = (subgridD.q[DIR_0P0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2;
@@ -2172,7 +2185,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForNoSlipBC(q, f_N, f_S, feq, omega);
       }
 
-      q = (subgridD.q[DIR_0M0])[k];
+      q = (subgridD.q[DIR_0M0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2;
@@ -2180,7 +2193,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForNoSlipBC(q, f_S, f_N, feq, omega);
       }
 
-      q = (subgridD.q[DIR_00P])[k];
+      q = (subgridD.q[DIR_00P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx3;
@@ -2188,7 +2201,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForNoSlipBC(q, f_T, f_B, feq, omega);
       }
 
-      q = (subgridD.q[DIR_00M])[k];
+      q = (subgridD.q[DIR_00M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx3;
@@ -2196,7 +2209,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForNoSlipBC(q, f_B, f_T, feq, omega);
       }
 
-      q = (subgridD.q[DIR_PP0])[k];
+      q = (subgridD.q[DIR_PP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2;
@@ -2204,7 +2217,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForNoSlipBC(q, f_NE, f_SW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_MM0])[k];
+      q = (subgridD.q[DIR_MM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2;
@@ -2212,7 +2225,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForNoSlipBC(q, f_SW, f_NE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_PM0])[k];
+      q = (subgridD.q[DIR_PM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2;
@@ -2220,7 +2233,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForNoSlipBC(q, f_SE, f_NW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_MP0])[k];
+      q = (subgridD.q[DIR_MP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2;
@@ -2228,7 +2241,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForNoSlipBC(q, f_NW, f_SE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_P0P])[k];
+      q = (subgridD.q[DIR_P0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx3;
@@ -2236,7 +2249,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForNoSlipBC(q, f_TE, f_BW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_M0M])[k];
+      q = (subgridD.q[DIR_M0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx3;
@@ -2244,7 +2257,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForNoSlipBC(q, f_BW, f_TE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_P0M])[k];
+      q = (subgridD.q[DIR_P0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx3;
@@ -2252,7 +2265,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForNoSlipBC(q, f_BE, f_TW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_M0P])[k];
+      q = (subgridD.q[DIR_M0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx3;
@@ -2260,7 +2273,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForNoSlipBC(q, f_TW, f_BE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_0PP])[k];
+      q = (subgridD.q[DIR_0PP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2 + vx3;
@@ -2268,7 +2281,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForNoSlipBC(q, f_TN, f_BS, feq, omega);
       }
 
-      q = (subgridD.q[DIR_0MM])[k];
+      q = (subgridD.q[DIR_0MM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2 - vx3;
@@ -2276,7 +2289,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_0PP])[ktn] = getInterpolatedDistributionForNoSlipBC(q, f_BS, f_TN, feq, omega);
       }
 
-      q = (subgridD.q[DIR_0PM])[k];
+      q = (subgridD.q[DIR_0PM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2 - vx3;
@@ -2284,7 +2297,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForNoSlipBC(q, f_BN, f_TS, feq, omega);
       }
 
-      q = (subgridD.q[DIR_0MP])[k];
+      q = (subgridD.q[DIR_0MP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2 + vx3;
@@ -2292,7 +2305,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForNoSlipBC(q, f_TS, f_BN, feq, omega);
       }
 
-      q = (subgridD.q[DIR_PPP])[k];
+      q = (subgridD.q[DIR_PPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2 + vx3;
@@ -2300,7 +2313,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForNoSlipBC(q, f_TNE, f_BSW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_MMM])[k];
+      q = (subgridD.q[DIR_MMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2 - vx3;
@@ -2308,7 +2321,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_PPP])[ktne] = getInterpolatedDistributionForNoSlipBC(q, f_BSW, f_TNE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_PPM])[k];
+      q = (subgridD.q[DIR_PPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2 - vx3;
@@ -2316,7 +2329,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForNoSlipBC(q, f_BNE, f_TSW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_MMP])[k];
+      q = (subgridD.q[DIR_MMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2 + vx3;
@@ -2324,7 +2337,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForNoSlipBC(q, f_TSW, f_BNE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_PMP])[k];
+      q = (subgridD.q[DIR_PMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2 + vx3;
@@ -2332,7 +2345,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForNoSlipBC(q, f_TSE, f_BNW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_MPM])[k];
+      q = (subgridD.q[DIR_MPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2 - vx3;
@@ -2340,7 +2353,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForNoSlipBC(q, f_BNW, f_TSE, feq, omega);
       }
 
-      q = (subgridD.q[DIR_PMM])[k];
+      q = (subgridD.q[DIR_PMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2 - vx3;
@@ -2348,7 +2361,7 @@ __global__ void QDevice27(real* distributions,
          (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForNoSlipBC(q, f_BSE, f_TNW, feq, omega);
       }
 
-      q = (subgridD.q[DIR_MPP])[k];
+      q = (subgridD.q[DIR_MPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2 + vx3;
@@ -2398,15 +2411,16 @@ __global__ void QDevice27(real* distributions,
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void BBDevice27(real* distributions, 
-                                     int* subgridDistanceIndices, 
-                                     real* subgridDistances,
-                                     unsigned int numberOfBCnodes, 
-                                     unsigned int* neighborX,
-                                     unsigned int* neighborY,
-                                     unsigned int* neighborZ,
-                                     unsigned int numberOfLBnodes, 
-                                     bool isEvenTimestep)
+__global__ void BBDevice27(
+    real* distributions, 
+    int* subgridDistanceIndices, 
+    real* subgridDistances,
+    unsigned int numberOfBCnodes, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    //////////////////////////////////////////////////////////////////////////
    //! The no-slip boundary condition is executed in the following steps
@@ -2414,18 +2428,11 @@ __global__ void BBDevice27(real* distributions,
    ////////////////////////////////////////////////////////////////////////////////
    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
    //!
-   const unsigned  x = threadIdx.x;   // global x-index
-   const unsigned  y = blockIdx.x;    // global y-index
-   const unsigned  z = blockIdx.y;    // global z-index
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
+   const unsigned nodeIndex = getNodeIndex();
 
    //////////////////////////////////////////////////////////////////////////
    // run for all indices in size of boundary condition (numberOfBCnodes)
-   if(k < numberOfBCnodes)
+   if(nodeIndex < numberOfBCnodes)
    {
       //////////////////////////////////////////////////////////////////////////
       //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
@@ -2443,7 +2450,7 @@ __global__ void BBDevice27(real* distributions,
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing)
       //!
-      unsigned int indexOfBCnode  = subgridDistanceIndices[k];
+      unsigned int indexOfBCnode  = subgridDistanceIndices[nodeIndex];
       unsigned int ke   = indexOfBCnode;
       unsigned int kw   = neighborX[indexOfBCnode];
       unsigned int kn   = indexOfBCnode;
@@ -2474,32 +2481,32 @@ __global__ void BBDevice27(real* distributions,
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[DIR_P00   ])[ke   ];
-      real f_E    = (dist.f[DIR_M00   ])[kw   ];
-      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
-      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
-      real f_B    = (dist.f[DIR_00P   ])[kt   ];
-      real f_T    = (dist.f[DIR_00M   ])[kb   ];
-      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00])[ke   ];
+      real f_E    = (dist.f[DIR_M00])[kw   ];
+      real f_S    = (dist.f[DIR_0P0])[kn   ];
+      real f_N    = (dist.f[DIR_0M0])[ks   ];
+      real f_B    = (dist.f[DIR_00P])[kt   ];
+      real f_T    = (dist.f[DIR_00M])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - change the pointer to write the results in the correct array
@@ -2509,32 +2516,32 @@ __global__ void BBDevice27(real* distributions,
       ////////////////////////////////////////////////////////////////////////////////
       //! - rewrite distributions if there is a sub-grid distance (q) in same direction
       real q;
-      q = (subgridD.q[DIR_P00  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M00  ])[kw  ]=f_E  ;
-      q = (subgridD.q[DIR_M00  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P00  ])[ke  ]=f_W  ;
-      q = (subgridD.q[DIR_0P0  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0M0  ])[ks  ]=f_N  ;
-      q = (subgridD.q[DIR_0M0  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0P0  ])[kn  ]=f_S  ;
-      q = (subgridD.q[DIR_00P  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_00M  ])[kb  ]=f_T  ;
-      q = (subgridD.q[DIR_00M  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_00P  ])[kt  ]=f_B  ;
-      q = (subgridD.q[DIR_PP0 ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MM0 ])[ksw ]=f_NE ;
-      q = (subgridD.q[DIR_MM0 ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PP0 ])[kne ]=f_SW ;
-      q = (subgridD.q[DIR_PM0 ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MP0 ])[knw ]=f_SE ;
-      q = (subgridD.q[DIR_MP0 ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PM0 ])[kse ]=f_NW ;
-      q = (subgridD.q[DIR_P0P ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M0M ])[kbw ]=f_TE ;
-      q = (subgridD.q[DIR_M0M ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P0P ])[kte ]=f_BW ;
-      q = (subgridD.q[DIR_P0M ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M0P ])[ktw ]=f_BE ;
-      q = (subgridD.q[DIR_M0P ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P0M ])[kbe ]=f_TW ;
-      q = (subgridD.q[DIR_0PP ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0MM ])[kbs ]=f_TN ;
-      q = (subgridD.q[DIR_0MM ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0PP ])[ktn ]=f_BS ;
-      q = (subgridD.q[DIR_0PM ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0MP ])[kts ]=f_BN ;
-      q = (subgridD.q[DIR_0MP ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0PM ])[kbn ]=f_TS ;
-      q = (subgridD.q[DIR_PPP])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MMM])[kbsw]=f_TNE;
-      q = (subgridD.q[DIR_MMM])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PPP])[ktne]=f_BSW;
-      q = (subgridD.q[DIR_PPM])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MMP])[ktsw]=f_BNE;
-      q = (subgridD.q[DIR_MMP])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PPM])[kbne]=f_TSW;
-      q = (subgridD.q[DIR_PMP])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MPM])[kbnw]=f_TSE;
-      q = (subgridD.q[DIR_MPM])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PMP])[ktse]=f_BNW;
-      q = (subgridD.q[DIR_PMM])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MPP])[ktnw]=f_BSE;
-      q = (subgridD.q[DIR_MPP])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PMM])[kbse]=f_TNW;
+      q = (subgridD.q[DIR_P00])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M00])[kw  ]=f_E  ;
+      q = (subgridD.q[DIR_M00])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P00])[ke  ]=f_W  ;
+      q = (subgridD.q[DIR_0P0])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0M0])[ks  ]=f_N  ;
+      q = (subgridD.q[DIR_0M0])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0P0])[kn  ]=f_S  ;
+      q = (subgridD.q[DIR_00P])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_00M])[kb  ]=f_T  ;
+      q = (subgridD.q[DIR_00M])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_00P])[kt  ]=f_B  ;
+      q = (subgridD.q[DIR_PP0])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MM0])[ksw ]=f_NE ;
+      q = (subgridD.q[DIR_MM0])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PP0])[kne ]=f_SW ;
+      q = (subgridD.q[DIR_PM0])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MP0])[knw ]=f_SE ;
+      q = (subgridD.q[DIR_MP0])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PM0])[kse ]=f_NW ;
+      q = (subgridD.q[DIR_P0P])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M0M])[kbw ]=f_TE ;
+      q = (subgridD.q[DIR_M0M])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P0P])[kte ]=f_BW ;
+      q = (subgridD.q[DIR_P0M])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M0P])[ktw ]=f_BE ;
+      q = (subgridD.q[DIR_M0P])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P0M])[kbe ]=f_TW ;
+      q = (subgridD.q[DIR_0PP])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0MM])[kbs ]=f_TN ;
+      q = (subgridD.q[DIR_0MM])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0PP])[ktn ]=f_BS ;
+      q = (subgridD.q[DIR_0PM])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0MP])[kts ]=f_BN ;
+      q = (subgridD.q[DIR_0MP])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0PM])[kbn ]=f_TS ;
+      q = (subgridD.q[DIR_PPP])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MMM])[kbsw]=f_TNE;
+      q = (subgridD.q[DIR_MMM])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PPP])[ktne]=f_BSW;
+      q = (subgridD.q[DIR_PPM])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MMP])[ktsw]=f_BNE;
+      q = (subgridD.q[DIR_MMP])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PPM])[kbne]=f_TSW;
+      q = (subgridD.q[DIR_PMP])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MPM])[kbnw]=f_TSE;
+      q = (subgridD.q[DIR_MPM])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PMP])[ktse]=f_BNW;
+      q = (subgridD.q[DIR_PMM])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MPP])[ktnw]=f_BSE;
+      q = (subgridD.q[DIR_MPP])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PMM])[kbse]=f_TNW;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Particles.cu b/src/gpu/VirtualFluids_GPU/GPU/Particles.cu
index 3a3ab784e6a7901c41d402629172c3c6154ffde9..7a82c694ef55ff5bc6770b9ae333e2de7ed1938c 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Particles.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Particles.cu
@@ -1,9 +1,9 @@
 /* Device code */
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////////
@@ -29,7 +29,7 @@ __global__ void InitParticles( real* coordX,
 										  unsigned int* neighborWSB,
 										  int level,
 									      unsigned int numberOfParticles, 
-										  unsigned int size_Mat)
+										  unsigned long long numberOfLBnodes)
 {
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -72,12 +72,12 @@ __global__ void InitParticles( real* coordX,
 
 		////////////////////////////////////////////////////////////////////////////////
 		//find random node of the fluid domain
-		unsigned int cbID = (unsigned int)(randArray[k]*size_Mat);
-		for(int i = 0; i < size_Mat;i++)
+		unsigned int cbID = (unsigned int)(randArray[k]*numberOfLBnodes);
+		for(int i = 0; i < numberOfLBnodes;i++)
 		{
 			//if (coordX[cbID] < 15 && coordX[cbID] > 5 && coordY[cbID] < 15 && coordY[cbID] > 5 && coordZ[cbID] < 15 && coordZ[cbID] > 5)	break;
 			if (coordX[cbID] < 5 && coordX[cbID] > 2)	break;
-			cbID = (unsigned int)(randArray[k]*(size_Mat - i)); 
+			cbID = (unsigned int)(randArray[k]*(numberOfLBnodes - i)); 
 		}
 	   
 		real coordinateX;
@@ -183,7 +183,7 @@ __global__ void MoveParticles( real* coordX,
 										  unsigned int timestep, 
 										  unsigned int numberOfTimesteps, 
 									      unsigned int numberOfParticles, 
-										  unsigned int size_Mat,
+										  unsigned long long numberOfLBnodes,
 										  bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -248,63 +248,63 @@ __global__ void MoveParticles( real* coordX,
 		{
 		   if (isEvenTimestep==true)
 		   {
-			  feC    = &DD[DIR_P00   *size_Mat];
-			  fwC    = &DD[DIR_M00   *size_Mat];
-			  fnC    = &DD[DIR_0P0   *size_Mat];
-			  fsC    = &DD[DIR_0M0   *size_Mat];
-			  ftC    = &DD[DIR_00P   *size_Mat];
-			  fbC    = &DD[DIR_00M   *size_Mat];
-			  fneC   = &DD[DIR_PP0  *size_Mat];
-			  fswC   = &DD[DIR_MM0  *size_Mat];
-			  fseC   = &DD[DIR_PM0  *size_Mat];
-			  fnwC   = &DD[DIR_MP0  *size_Mat];
-			  fteC   = &DD[DIR_P0P  *size_Mat];
-			  fbwC   = &DD[DIR_M0M  *size_Mat];
-			  fbeC   = &DD[DIR_P0M  *size_Mat];
-			  ftwC   = &DD[DIR_M0P  *size_Mat];
-			  ftnC   = &DD[DIR_0PP  *size_Mat];
-			  fbsC   = &DD[DIR_0MM  *size_Mat];
-			  fbnC   = &DD[DIR_0PM  *size_Mat];
-			  ftsC   = &DD[DIR_0MP  *size_Mat];
-			  fzeroC = &DD[DIR_000*size_Mat];
-			  ftneC  = &DD[DIR_PPP *size_Mat];
-			  ftswC  = &DD[DIR_MMP *size_Mat];
-			  ftseC  = &DD[DIR_PMP *size_Mat];
-			  ftnwC  = &DD[DIR_MPP *size_Mat];
-			  fbneC  = &DD[DIR_PPM *size_Mat];
-			  fbswC  = &DD[DIR_MMM *size_Mat];
-			  fbseC  = &DD[DIR_PMM *size_Mat];
-			  fbnwC  = &DD[DIR_MPM *size_Mat];
+			  feC    = &DD[DIR_P00 * numberOfLBnodes];
+			  fwC    = &DD[DIR_M00 * numberOfLBnodes];
+			  fnC    = &DD[DIR_0P0 * numberOfLBnodes];
+			  fsC    = &DD[DIR_0M0 * numberOfLBnodes];
+			  ftC    = &DD[DIR_00P * numberOfLBnodes];
+			  fbC    = &DD[DIR_00M * numberOfLBnodes];
+			  fneC   = &DD[DIR_PP0 * numberOfLBnodes];
+			  fswC   = &DD[DIR_MM0 * numberOfLBnodes];
+			  fseC   = &DD[DIR_PM0 * numberOfLBnodes];
+			  fnwC   = &DD[DIR_MP0 * numberOfLBnodes];
+			  fteC   = &DD[DIR_P0P * numberOfLBnodes];
+			  fbwC   = &DD[DIR_M0M * numberOfLBnodes];
+			  fbeC   = &DD[DIR_P0M * numberOfLBnodes];
+			  ftwC   = &DD[DIR_M0P * numberOfLBnodes];
+			  ftnC   = &DD[DIR_0PP * numberOfLBnodes];
+			  fbsC   = &DD[DIR_0MM * numberOfLBnodes];
+			  fbnC   = &DD[DIR_0PM * numberOfLBnodes];
+			  ftsC   = &DD[DIR_0MP * numberOfLBnodes];
+			  fzeroC = &DD[DIR_000 * numberOfLBnodes];
+			  ftneC  = &DD[DIR_PPP * numberOfLBnodes];
+			  ftswC  = &DD[DIR_MMP * numberOfLBnodes];
+			  ftseC  = &DD[DIR_PMP * numberOfLBnodes];
+			  ftnwC  = &DD[DIR_MPP * numberOfLBnodes];
+			  fbneC  = &DD[DIR_PPM * numberOfLBnodes];
+			  fbswC  = &DD[DIR_MMM * numberOfLBnodes];
+			  fbseC  = &DD[DIR_PMM * numberOfLBnodes];
+			  fbnwC  = &DD[DIR_MPM * numberOfLBnodes];
 		   } 			 
 		   else			 
 		   {			 
-			  fwC    = &DD[DIR_P00   *size_Mat];
-			  feC    = &DD[DIR_M00   *size_Mat];
-			  fsC    = &DD[DIR_0P0   *size_Mat];
-			  fnC    = &DD[DIR_0M0   *size_Mat];
-			  fbC    = &DD[DIR_00P   *size_Mat];
-			  ftC    = &DD[DIR_00M   *size_Mat];
-			  fswC   = &DD[DIR_PP0  *size_Mat];
-			  fneC   = &DD[DIR_MM0  *size_Mat];
-			  fnwC   = &DD[DIR_PM0  *size_Mat];
-			  fseC   = &DD[DIR_MP0  *size_Mat];
-			  fbwC   = &DD[DIR_P0P  *size_Mat];
-			  fteC   = &DD[DIR_M0M  *size_Mat];
-			  ftwC   = &DD[DIR_P0M  *size_Mat];
-			  fbeC   = &DD[DIR_M0P  *size_Mat];
-			  fbsC   = &DD[DIR_0PP  *size_Mat];
-			  ftnC   = &DD[DIR_0MM  *size_Mat];
-			  ftsC   = &DD[DIR_0PM  *size_Mat];
-			  fbnC   = &DD[DIR_0MP  *size_Mat];
-			  fzeroC = &DD[DIR_000*size_Mat];
-			  fbswC  = &DD[DIR_PPP *size_Mat];
-			  fbneC  = &DD[DIR_MMP *size_Mat];
-			  fbnwC  = &DD[DIR_PMP *size_Mat];
-			  fbseC  = &DD[DIR_MPP *size_Mat];
-			  ftswC  = &DD[DIR_PPM *size_Mat];
-			  ftneC  = &DD[DIR_MMM *size_Mat];
-			  ftnwC  = &DD[DIR_PMM *size_Mat];
-			  ftseC  = &DD[DIR_MPM *size_Mat];
+			  fwC    = &DD[DIR_P00 * numberOfLBnodes];
+			  feC    = &DD[DIR_M00 * numberOfLBnodes];
+			  fsC    = &DD[DIR_0P0 * numberOfLBnodes];
+			  fnC    = &DD[DIR_0M0 * numberOfLBnodes];
+			  fbC    = &DD[DIR_00P * numberOfLBnodes];
+			  ftC    = &DD[DIR_00M * numberOfLBnodes];
+			  fswC   = &DD[DIR_PP0 * numberOfLBnodes];
+			  fneC   = &DD[DIR_MM0 * numberOfLBnodes];
+			  fnwC   = &DD[DIR_PM0 * numberOfLBnodes];
+			  fseC   = &DD[DIR_MP0 * numberOfLBnodes];
+			  fbwC   = &DD[DIR_P0P * numberOfLBnodes];
+			  fteC   = &DD[DIR_M0M * numberOfLBnodes];
+			  ftwC   = &DD[DIR_P0M * numberOfLBnodes];
+			  fbeC   = &DD[DIR_M0P * numberOfLBnodes];
+			  fbsC   = &DD[DIR_0PP * numberOfLBnodes];
+			  ftnC   = &DD[DIR_0MM * numberOfLBnodes];
+			  ftsC   = &DD[DIR_0PM * numberOfLBnodes];
+			  fbnC   = &DD[DIR_0MP * numberOfLBnodes];
+			  fzeroC = &DD[DIR_000 * numberOfLBnodes];
+			  fbswC  = &DD[DIR_PPP * numberOfLBnodes];
+			  fbneC  = &DD[DIR_MMP * numberOfLBnodes];
+			  fbnwC  = &DD[DIR_PMP * numberOfLBnodes];
+			  fbseC  = &DD[DIR_MPP * numberOfLBnodes];
+			  ftswC  = &DD[DIR_PPM * numberOfLBnodes];
+			  ftneC  = &DD[DIR_MMM * numberOfLBnodes];
+			  ftnwC  = &DD[DIR_PMM * numberOfLBnodes];
+			  ftseC  = &DD[DIR_MPM * numberOfLBnodes];
 		   }
 
 			  //////////////////////////////////////////////////////////////////////////
@@ -1055,7 +1055,7 @@ __global__ void MoveParticlesWithoutBCs(   real* coordX,
 													  unsigned int timestep, 
 													  unsigned int numberOfTimesteps, 
 													  unsigned int numberOfParticles, 
-													  unsigned int size_Mat,
+													  unsigned long long numberOfLBnodes,
 													  bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -1114,63 +1114,63 @@ __global__ void MoveParticlesWithoutBCs(   real* coordX,
 		{
 		   if (isEvenTimestep==true)
 		   {
-			  feC    = &DD[DIR_P00   *size_Mat];
-			  fwC    = &DD[DIR_M00   *size_Mat];
-			  fnC    = &DD[DIR_0P0   *size_Mat];
-			  fsC    = &DD[DIR_0M0   *size_Mat];
-			  ftC    = &DD[DIR_00P   *size_Mat];
-			  fbC    = &DD[DIR_00M   *size_Mat];
-			  fneC   = &DD[DIR_PP0  *size_Mat];
-			  fswC   = &DD[DIR_MM0  *size_Mat];
-			  fseC   = &DD[DIR_PM0  *size_Mat];
-			  fnwC   = &DD[DIR_MP0  *size_Mat];
-			  fteC   = &DD[DIR_P0P  *size_Mat];
-			  fbwC   = &DD[DIR_M0M  *size_Mat];
-			  fbeC   = &DD[DIR_P0M  *size_Mat];
-			  ftwC   = &DD[DIR_M0P  *size_Mat];
-			  ftnC   = &DD[DIR_0PP  *size_Mat];
-			  fbsC   = &DD[DIR_0MM  *size_Mat];
-			  fbnC   = &DD[DIR_0PM  *size_Mat];
-			  ftsC   = &DD[DIR_0MP  *size_Mat];
-			  fzeroC = &DD[DIR_000*size_Mat];
-			  ftneC  = &DD[DIR_PPP *size_Mat];
-			  ftswC  = &DD[DIR_MMP *size_Mat];
-			  ftseC  = &DD[DIR_PMP *size_Mat];
-			  ftnwC  = &DD[DIR_MPP *size_Mat];
-			  fbneC  = &DD[DIR_PPM *size_Mat];
-			  fbswC  = &DD[DIR_MMM *size_Mat];
-			  fbseC  = &DD[DIR_PMM *size_Mat];
-			  fbnwC  = &DD[DIR_MPM *size_Mat];
+			  feC    = &DD[DIR_P00 * numberOfLBnodes];
+			  fwC    = &DD[DIR_M00 * numberOfLBnodes];
+			  fnC    = &DD[DIR_0P0 * numberOfLBnodes];
+			  fsC    = &DD[DIR_0M0 * numberOfLBnodes];
+			  ftC    = &DD[DIR_00P * numberOfLBnodes];
+			  fbC    = &DD[DIR_00M * numberOfLBnodes];
+			  fneC   = &DD[DIR_PP0 * numberOfLBnodes];
+			  fswC   = &DD[DIR_MM0 * numberOfLBnodes];
+			  fseC   = &DD[DIR_PM0 * numberOfLBnodes];
+			  fnwC   = &DD[DIR_MP0 * numberOfLBnodes];
+			  fteC   = &DD[DIR_P0P * numberOfLBnodes];
+			  fbwC   = &DD[DIR_M0M * numberOfLBnodes];
+			  fbeC   = &DD[DIR_P0M * numberOfLBnodes];
+			  ftwC   = &DD[DIR_M0P * numberOfLBnodes];
+			  ftnC   = &DD[DIR_0PP * numberOfLBnodes];
+			  fbsC   = &DD[DIR_0MM * numberOfLBnodes];
+			  fbnC   = &DD[DIR_0PM * numberOfLBnodes];
+			  ftsC   = &DD[DIR_0MP * numberOfLBnodes];
+			  fzeroC = &DD[DIR_000 * numberOfLBnodes];
+			  ftneC  = &DD[DIR_PPP * numberOfLBnodes];
+			  ftswC  = &DD[DIR_MMP * numberOfLBnodes];
+			  ftseC  = &DD[DIR_PMP * numberOfLBnodes];
+			  ftnwC  = &DD[DIR_MPP * numberOfLBnodes];
+			  fbneC  = &DD[DIR_PPM * numberOfLBnodes];
+			  fbswC  = &DD[DIR_MMM * numberOfLBnodes];
+			  fbseC  = &DD[DIR_PMM * numberOfLBnodes];
+			  fbnwC  = &DD[DIR_MPM * numberOfLBnodes];
 		   } 			 
 		   else			 
 		   {			 
-			  fwC    = &DD[DIR_P00   *size_Mat];
-			  feC    = &DD[DIR_M00   *size_Mat];
-			  fsC    = &DD[DIR_0P0   *size_Mat];
-			  fnC    = &DD[DIR_0M0   *size_Mat];
-			  fbC    = &DD[DIR_00P   *size_Mat];
-			  ftC    = &DD[DIR_00M   *size_Mat];
-			  fswC   = &DD[DIR_PP0  *size_Mat];
-			  fneC   = &DD[DIR_MM0  *size_Mat];
-			  fnwC   = &DD[DIR_PM0  *size_Mat];
-			  fseC   = &DD[DIR_MP0  *size_Mat];
-			  fbwC   = &DD[DIR_P0P  *size_Mat];
-			  fteC   = &DD[DIR_M0M  *size_Mat];
-			  ftwC   = &DD[DIR_P0M  *size_Mat];
-			  fbeC   = &DD[DIR_M0P  *size_Mat];
-			  fbsC   = &DD[DIR_0PP  *size_Mat];
-			  ftnC   = &DD[DIR_0MM  *size_Mat];
-			  ftsC   = &DD[DIR_0PM  *size_Mat];
-			  fbnC   = &DD[DIR_0MP  *size_Mat];
-			  fzeroC = &DD[DIR_000*size_Mat];
-			  fbswC  = &DD[DIR_PPP *size_Mat];
-			  fbneC  = &DD[DIR_MMP *size_Mat];
-			  fbnwC  = &DD[DIR_PMP *size_Mat];
-			  fbseC  = &DD[DIR_MPP *size_Mat];
-			  ftswC  = &DD[DIR_PPM *size_Mat];
-			  ftneC  = &DD[DIR_MMM *size_Mat];
-			  ftnwC  = &DD[DIR_PMM *size_Mat];
-			  ftseC  = &DD[DIR_MPM *size_Mat];
+			  fwC    = &DD[DIR_P00 * numberOfLBnodes];
+			  feC    = &DD[DIR_M00 * numberOfLBnodes];
+			  fsC    = &DD[DIR_0P0 * numberOfLBnodes];
+			  fnC    = &DD[DIR_0M0 * numberOfLBnodes];
+			  fbC    = &DD[DIR_00P * numberOfLBnodes];
+			  ftC    = &DD[DIR_00M * numberOfLBnodes];
+			  fswC   = &DD[DIR_PP0 * numberOfLBnodes];
+			  fneC   = &DD[DIR_MM0 * numberOfLBnodes];
+			  fnwC   = &DD[DIR_PM0 * numberOfLBnodes];
+			  fseC   = &DD[DIR_MP0 * numberOfLBnodes];
+			  fbwC   = &DD[DIR_P0P * numberOfLBnodes];
+			  fteC   = &DD[DIR_M0M * numberOfLBnodes];
+			  ftwC   = &DD[DIR_P0M * numberOfLBnodes];
+			  fbeC   = &DD[DIR_M0P * numberOfLBnodes];
+			  fbsC   = &DD[DIR_0PP * numberOfLBnodes];
+			  ftnC   = &DD[DIR_0MM * numberOfLBnodes];
+			  ftsC   = &DD[DIR_0PM * numberOfLBnodes];
+			  fbnC   = &DD[DIR_0MP * numberOfLBnodes];
+			  fzeroC = &DD[DIR_000 * numberOfLBnodes];
+			  fbswC  = &DD[DIR_PPP * numberOfLBnodes];
+			  fbneC  = &DD[DIR_MMP * numberOfLBnodes];
+			  fbnwC  = &DD[DIR_PMP * numberOfLBnodes];
+			  fbseC  = &DD[DIR_MPP * numberOfLBnodes];
+			  ftswC  = &DD[DIR_PPM * numberOfLBnodes];
+			  ftneC  = &DD[DIR_MMM * numberOfLBnodes];
+			  ftnwC  = &DD[DIR_PMM * numberOfLBnodes];
+			  ftseC  = &DD[DIR_MPM * numberOfLBnodes];
 		   }
 
 			  //////////////////////////////////////////////////////////////////////////
@@ -1928,7 +1928,7 @@ __global__ void ParticleNoSlipDeviceComp27(real* coordX,
 													  real* NormalX,
 													  real* NormalY,
 													  real* NormalZ,
-													  unsigned int size_Mat, 
+													  unsigned long long numberOfLBnodes, 
 													  bool isEvenTimestep)
 {
 
@@ -1937,63 +1937,63 @@ __global__ void ParticleNoSlipDeviceComp27(real* coordX,
    //Distributions27 D;
    //if (isEvenTimestep==true)
    //{
-   //   D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-   //   D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-   //   D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-   //   D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-   //   D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-   //   D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-   //   D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-   //   D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-   //   D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-   //   D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-   //   D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-   //   D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-   //   D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-   //   D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-   //   D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-   //   D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-   //   D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-   //   D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-   //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   //   D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-   //   D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-   //   D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-   //   D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-   //   D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-   //   D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-   //   D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-   //   D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+   //   D.f[DIR_P00] = &DD[DIR_P00 * size_Mat];
+   //   D.f[DIR_M00] = &DD[DIR_M00 * size_Mat];
+   //   D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat];
+   //   D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat];
+   //   D.f[DIR_00P] = &DD[DIR_00P * size_Mat];
+   //   D.f[DIR_00M] = &DD[DIR_00M * size_Mat];
+   //   D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat];
+   //   D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat];
+   //   D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat];
+   //   D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat];
+   //   D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat];
+   //   D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat];
+   //   D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat];
+   //   D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat];
+   //   D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat];
+   //   D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat];
+   //   D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat];
+   //   D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat];
+   //   D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+   //   D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat];
+   //   D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat];
+   //   D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat];
+   //   D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat];
+   //   D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat];
+   //   D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat];
+   //   D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat];
+   //   D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat];
    //} 
    //else
    //{
-   //   D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-   //   D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-   //   D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-   //   D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-   //   D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-   //   D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-   //   D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-   //   D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-   //   D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-   //   D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-   //   D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-   //   D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-   //   D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-   //   D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-   //   D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-   //   D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-   //   D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-   //   D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-   //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   //   D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-   //   D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-   //   D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-   //   D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-   //   D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-   //   D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-   //   D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-   //   D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+   //   D.f[DIR_M00] = &DD[DIR_P00 * size_Mat];
+   //   D.f[DIR_P00] = &DD[DIR_M00 * size_Mat];
+   //   D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat];
+   //   D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat];
+   //   D.f[DIR_00M] = &DD[DIR_00P * size_Mat];
+   //   D.f[DIR_00P] = &DD[DIR_00M * size_Mat];
+   //   D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat];
+   //   D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat];
+   //   D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat];
+   //   D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat];
+   //   D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat];
+   //   D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat];
+   //   D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat];
+   //   D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat];
+   //   D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat];
+   //   D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat];
+   //   D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat];
+   //   D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat];
+   //   D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+   //   D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat];
+   //   D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat];
+   //   D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat];
+   //   D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat];
+   //   D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat];
+   //   D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat];
+   //   D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat];
+   //   D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat];
    //}
    //////////////////////////////////////////////////////////////////////////////////
    //const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -2015,24 +2015,24 @@ __global__ void ParticleNoSlipDeviceComp27(real* coordX,
    // //         *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
    // //         *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
    // //         *q_dirBSE, *q_dirBNW; 
-   // //   q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-   //    q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-   // //   q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-   //    q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-   // //   q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-   //    q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-   // //   q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-   // //   q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-   // //   q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-   // //   q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-   // //   q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-   // //   q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-   // //   q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-   // //   q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-   // //   q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-   // //   q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-   // //   q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-   // //   q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+   // //   q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+   //    q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+   // //   q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+   //    q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+   // //   q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+   //    q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+   // //   q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+   // //   q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+   // //   q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+   // //   q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+   // //   q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+   // //   q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+   // //   q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+   // //   q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+   // //   q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+   // //   q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+   // //   q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+   // //   q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
    // //   q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
    // //   q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
    // //   q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -2047,24 +2047,24 @@ __global__ void ParticleNoSlipDeviceComp27(real* coordX,
    // //           *nx_dirBE,  *nx_dirTW,  *nx_dirTN,  *nx_dirBS,  *nx_dirBN,  *nx_dirTS,
    // //           *nx_dirTNE, *nx_dirTSW, *nx_dirTSE, *nx_dirTNW, *nx_dirBNE, *nx_dirBSW,
    // //           *nx_dirBSE, *nx_dirBNW; 
-   // //   nx_dirE   = &NormalX[DIR_P00   * numberOfBCnodes];
-   // //   nx_dirW   = &NormalX[DIR_M00   * numberOfBCnodes];
-   // //   nx_dirN   = &NormalX[DIR_0P0   * numberOfBCnodes];
-   // //   nx_dirS   = &NormalX[DIR_0M0   * numberOfBCnodes];
-   // //   nx_dirT   = &NormalX[DIR_00P   * numberOfBCnodes];
-   // //   nx_dirB   = &NormalX[DIR_00M   * numberOfBCnodes];
-   // //   nx_dirNE  = &NormalX[DIR_PP0  * numberOfBCnodes];
-   // //   nx_dirSW  = &NormalX[DIR_MM0  * numberOfBCnodes];
-   // //   nx_dirSE  = &NormalX[DIR_PM0  * numberOfBCnodes];
-   // //   nx_dirNW  = &NormalX[DIR_MP0  * numberOfBCnodes];
-   // //   nx_dirTE  = &NormalX[DIR_P0P  * numberOfBCnodes];
-   // //   nx_dirBW  = &NormalX[DIR_M0M  * numberOfBCnodes];
-   // //   nx_dirBE  = &NormalX[DIR_P0M  * numberOfBCnodes];
-   // //   nx_dirTW  = &NormalX[DIR_M0P  * numberOfBCnodes];
-   // //   nx_dirTN  = &NormalX[DIR_0PP  * numberOfBCnodes];
-   // //   nx_dirBS  = &NormalX[DIR_0MM  * numberOfBCnodes];
-   // //   nx_dirBN  = &NormalX[DIR_0PM  * numberOfBCnodes];
-   // //   nx_dirTS  = &NormalX[DIR_0MP  * numberOfBCnodes];
+   // //   nx_dirE   = &NormalX[DIR_P00 * numberOfBCnodes];
+   // //   nx_dirW   = &NormalX[DIR_M00 * numberOfBCnodes];
+   // //   nx_dirN   = &NormalX[DIR_0P0 * numberOfBCnodes];
+   // //   nx_dirS   = &NormalX[DIR_0M0 * numberOfBCnodes];
+   // //   nx_dirT   = &NormalX[DIR_00P * numberOfBCnodes];
+   // //   nx_dirB   = &NormalX[DIR_00M * numberOfBCnodes];
+   // //   nx_dirNE  = &NormalX[DIR_PP0 * numberOfBCnodes];
+   // //   nx_dirSW  = &NormalX[DIR_MM0 * numberOfBCnodes];
+   // //   nx_dirSE  = &NormalX[DIR_PM0 * numberOfBCnodes];
+   // //   nx_dirNW  = &NormalX[DIR_MP0 * numberOfBCnodes];
+   // //   nx_dirTE  = &NormalX[DIR_P0P * numberOfBCnodes];
+   // //   nx_dirBW  = &NormalX[DIR_M0M * numberOfBCnodes];
+   // //   nx_dirBE  = &NormalX[DIR_P0M * numberOfBCnodes];
+   // //   nx_dirTW  = &NormalX[DIR_M0P * numberOfBCnodes];
+   // //   nx_dirTN  = &NormalX[DIR_0PP * numberOfBCnodes];
+   // //   nx_dirBS  = &NormalX[DIR_0MM * numberOfBCnodes];
+   // //   nx_dirBN  = &NormalX[DIR_0PM * numberOfBCnodes];
+   // //   nx_dirTS  = &NormalX[DIR_0MP * numberOfBCnodes];
    // //   nx_dirTNE = &NormalX[DIR_PPP * numberOfBCnodes];
    // //   nx_dirTSW = &NormalX[DIR_MMP * numberOfBCnodes];
    // //   nx_dirTSE = &NormalX[DIR_PMP * numberOfBCnodes];
@@ -2079,24 +2079,24 @@ __global__ void ParticleNoSlipDeviceComp27(real* coordX,
    // //           *ny_dirBE,  *ny_dirTW,  *ny_dirTN,  *ny_dirBS,  *ny_dirBN,  *ny_dirTS,
    // //           *ny_dirTNE, *ny_dirTSW, *ny_dirTSE, *ny_dirTNW, *ny_dirBNE, *ny_dirBSW,
    // //           *ny_dirBSE, *ny_dirBNW; 
-   // //   ny_dirE   = &NormalY[DIR_P00   * numberOfBCnodes];
-   // //   ny_dirW   = &NormalY[DIR_M00   * numberOfBCnodes];
-   // //   ny_dirN   = &NormalY[DIR_0P0   * numberOfBCnodes];
-   // //   ny_dirS   = &NormalY[DIR_0M0   * numberOfBCnodes];
-   // //   ny_dirT   = &NormalY[DIR_00P   * numberOfBCnodes];
-   // //   ny_dirB   = &NormalY[DIR_00M   * numberOfBCnodes];
-   // //   ny_dirNE  = &NormalY[DIR_PP0  * numberOfBCnodes];
-   // //   ny_dirSW  = &NormalY[DIR_MM0  * numberOfBCnodes];
-   // //   ny_dirSE  = &NormalY[DIR_PM0  * numberOfBCnodes];
-   // //   ny_dirNW  = &NormalY[DIR_MP0  * numberOfBCnodes];
-   // //   ny_dirTE  = &NormalY[DIR_P0P  * numberOfBCnodes];
-   // //   ny_dirBW  = &NormalY[DIR_M0M  * numberOfBCnodes];
-   // //   ny_dirBE  = &NormalY[DIR_P0M  * numberOfBCnodes];
-   // //   ny_dirTW  = &NormalY[DIR_M0P  * numberOfBCnodes];
-   // //   ny_dirTN  = &NormalY[DIR_0PP  * numberOfBCnodes];
-   // //   ny_dirBS  = &NormalY[DIR_0MM  * numberOfBCnodes];
-   // //   ny_dirBN  = &NormalY[DIR_0PM  * numberOfBCnodes];
-   // //   ny_dirTS  = &NormalY[DIR_0MP  * numberOfBCnodes];
+   // //   ny_dirE   = &NormalY[DIR_P00 * numberOfBCnodes];
+   // //   ny_dirW   = &NormalY[DIR_M00 * numberOfBCnodes];
+   // //   ny_dirN   = &NormalY[DIR_0P0 * numberOfBCnodes];
+   // //   ny_dirS   = &NormalY[DIR_0M0 * numberOfBCnodes];
+   // //   ny_dirT   = &NormalY[DIR_00P * numberOfBCnodes];
+   // //   ny_dirB   = &NormalY[DIR_00M * numberOfBCnodes];
+   // //   ny_dirNE  = &NormalY[DIR_PP0 * numberOfBCnodes];
+   // //   ny_dirSW  = &NormalY[DIR_MM0 * numberOfBCnodes];
+   // //   ny_dirSE  = &NormalY[DIR_PM0 * numberOfBCnodes];
+   // //   ny_dirNW  = &NormalY[DIR_MP0 * numberOfBCnodes];
+   // //   ny_dirTE  = &NormalY[DIR_P0P * numberOfBCnodes];
+   // //   ny_dirBW  = &NormalY[DIR_M0M * numberOfBCnodes];
+   // //   ny_dirBE  = &NormalY[DIR_P0M * numberOfBCnodes];
+   // //   ny_dirTW  = &NormalY[DIR_M0P * numberOfBCnodes];
+   // //   ny_dirTN  = &NormalY[DIR_0PP * numberOfBCnodes];
+   // //   ny_dirBS  = &NormalY[DIR_0MM * numberOfBCnodes];
+   // //   ny_dirBN  = &NormalY[DIR_0PM * numberOfBCnodes];
+   // //   ny_dirTS  = &NormalY[DIR_0MP * numberOfBCnodes];
    // //   ny_dirTNE = &NormalY[DIR_PPP * numberOfBCnodes];
    // //   ny_dirTSW = &NormalY[DIR_MMP * numberOfBCnodes];
    // //   ny_dirTSE = &NormalY[DIR_PMP * numberOfBCnodes];
@@ -2111,24 +2111,24 @@ __global__ void ParticleNoSlipDeviceComp27(real* coordX,
    // //           *nz_dirBE,  *nz_dirTW,  *nz_dirTN,  *nz_dirBS,  *nz_dirBN,  *nz_dirTS,
    // //           *nz_dirTNE, *nz_dirTSW, *nz_dirTSE, *nz_dirTNW, *nz_dirBNE, *nz_dirBSW,
    // //           *nz_dirBSE, *nz_dirBNW; 
-   // //   nz_dirE   = &NormalZ[DIR_P00   * numberOfBCnodes];
-   // //   nz_dirW   = &NormalZ[DIR_M00   * numberOfBCnodes];
-   // //   nz_dirN   = &NormalZ[DIR_0P0   * numberOfBCnodes];
-   // //   nz_dirS   = &NormalZ[DIR_0M0   * numberOfBCnodes];
-   // //   nz_dirT   = &NormalZ[DIR_00P   * numberOfBCnodes];
-   // //   nz_dirB   = &NormalZ[DIR_00M   * numberOfBCnodes];
-   // //   nz_dirNE  = &NormalZ[DIR_PP0  * numberOfBCnodes];
-   // //   nz_dirSW  = &NormalZ[DIR_MM0  * numberOfBCnodes];
-   // //   nz_dirSE  = &NormalZ[DIR_PM0  * numberOfBCnodes];
-   // //   nz_dirNW  = &NormalZ[DIR_MP0  * numberOfBCnodes];
-   // //   nz_dirTE  = &NormalZ[DIR_P0P  * numberOfBCnodes];
-   // //   nz_dirBW  = &NormalZ[DIR_M0M  * numberOfBCnodes];
-   // //   nz_dirBE  = &NormalZ[DIR_P0M  * numberOfBCnodes];
-   // //   nz_dirTW  = &NormalZ[DIR_M0P  * numberOfBCnodes];
-   // //   nz_dirTN  = &NormalZ[DIR_0PP  * numberOfBCnodes];
-   // //   nz_dirBS  = &NormalZ[DIR_0MM  * numberOfBCnodes];
-   // //   nz_dirBN  = &NormalZ[DIR_0PM  * numberOfBCnodes];
-   // //   nz_dirTS  = &NormalZ[DIR_0MP  * numberOfBCnodes];
+   // //   nz_dirE   = &NormalZ[DIR_P00 * numberOfBCnodes];
+   // //   nz_dirW   = &NormalZ[DIR_M00 * numberOfBCnodes];
+   // //   nz_dirN   = &NormalZ[DIR_0P0 * numberOfBCnodes];
+   // //   nz_dirS   = &NormalZ[DIR_0M0 * numberOfBCnodes];
+   // //   nz_dirT   = &NormalZ[DIR_00P * numberOfBCnodes];
+   // //   nz_dirB   = &NormalZ[DIR_00M * numberOfBCnodes];
+   // //   nz_dirNE  = &NormalZ[DIR_PP0 * numberOfBCnodes];
+   // //   nz_dirSW  = &NormalZ[DIR_MM0 * numberOfBCnodes];
+   // //   nz_dirSE  = &NormalZ[DIR_PM0 * numberOfBCnodes];
+   // //   nz_dirNW  = &NormalZ[DIR_MP0 * numberOfBCnodes];
+   // //   nz_dirTE  = &NormalZ[DIR_P0P * numberOfBCnodes];
+   // //   nz_dirBW  = &NormalZ[DIR_M0M * numberOfBCnodes];
+   // //   nz_dirBE  = &NormalZ[DIR_P0M * numberOfBCnodes];
+   // //   nz_dirTW  = &NormalZ[DIR_M0P * numberOfBCnodes];
+   // //   nz_dirTN  = &NormalZ[DIR_0PP * numberOfBCnodes];
+   // //   nz_dirBS  = &NormalZ[DIR_0MM * numberOfBCnodes];
+   // //   nz_dirBN  = &NormalZ[DIR_0PM * numberOfBCnodes];
+   // //   nz_dirTS  = &NormalZ[DIR_0MP * numberOfBCnodes];
    // //   nz_dirTNE = &NormalZ[DIR_PPP * numberOfBCnodes];
    // //   nz_dirTSW = &NormalZ[DIR_MMP * numberOfBCnodes];
    // //   nz_dirTSE = &NormalZ[DIR_PMP * numberOfBCnodes];
@@ -2190,32 +2190,32 @@ __global__ void ParticleNoSlipDeviceComp27(real* coordX,
    //   unsigned int ktne = KQK;
    //   unsigned int kbsw = neighborZ[ksw];
    //   ////////////////////////////////////////////////////////////////////////////////
-   //   real f_W    = (D.f[DIR_P00   ])[ke   ];
-   //   real f_E    = (D.f[DIR_M00   ])[kw   ];
-   //   real f_S    = (D.f[DIR_0P0   ])[kn   ];
-   //   real f_N    = (D.f[DIR_0M0   ])[ks   ];
-   //   real f_B    = (D.f[DIR_00P   ])[kt   ];
-   //   real f_T    = (D.f[DIR_00M   ])[kb   ];
-   //   real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-   //   real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-   //   real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-   //   real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-   //   real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-   //   real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-   //   real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-   //   real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-   //   real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-   //   real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-   //   real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-   //   real f_BN   = (D.f[DIR_0MP  ])[kts  ];
-   //   real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-   //   real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-   //   real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-   //   real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-   //   real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-   //   real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-   //   real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-   //   real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+   //   real f_W    = (D.f[DIR_P00])[ke   ];
+   //   real f_E    = (D.f[DIR_M00])[kw   ];
+   //   real f_S    = (D.f[DIR_0P0])[kn   ];
+   //   real f_N    = (D.f[DIR_0M0])[ks   ];
+   //   real f_B    = (D.f[DIR_00P])[kt   ];
+   //   real f_T    = (D.f[DIR_00M])[kb   ];
+   //   real f_SW   = (D.f[DIR_PP0])[kne  ];
+   //   real f_NE   = (D.f[DIR_MM0])[ksw  ];
+   //   real f_NW   = (D.f[DIR_PM0])[kse  ];
+   //   real f_SE   = (D.f[DIR_MP0])[knw  ];
+   //   real f_BW   = (D.f[DIR_P0P])[kte  ];
+   //   real f_TE   = (D.f[DIR_M0M])[kbw  ];
+   //   real f_TW   = (D.f[DIR_P0M])[kbe  ];
+   //   real f_BE   = (D.f[DIR_M0P])[ktw  ];
+   //   real f_BS   = (D.f[DIR_0PP])[ktn  ];
+   //   real f_TN   = (D.f[DIR_0MM])[kbs  ];
+   //   real f_TS   = (D.f[DIR_0PM])[kbn  ];
+   //   real f_BN   = (D.f[DIR_0MP])[kts  ];
+   //   real f_BSW  = (D.f[DIR_PPP])[ktne ];
+   //   real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+   //   real f_BNW  = (D.f[DIR_PMP])[ktse ];
+   //   real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+   //   real f_TSW  = (D.f[DIR_PPM])[kbne ];
+   //   real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+   //   real f_TNW  = (D.f[DIR_PMM])[kbse ];
+   //   real f_TSE  = (D.f[DIR_MPM])[kbnw ];
    //   ////////////////////////////////////////////////////////////////////////////////
    //   // real feq, q;
    //   real vx1, vx2, vx3, drho;
@@ -2241,63 +2241,63 @@ __global__ void ParticleNoSlipDeviceComp27(real* coordX,
    //   //////////////////////////////////////////////////////////////////////////
    //   if (isEvenTimestep==false)
    //   {
-   //      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-   //      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-   //      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-   //      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-   //      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-   //      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-   //      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-   //      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-   //      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-   //      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-   //      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-   //      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-   //      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-   //      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-   //      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-   //      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-   //      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-   //      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-   //      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   //      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-   //      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-   //      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-   //      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-   //      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-   //      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-   //      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-   //      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+   //      D.f[DIR_P00] = &DD[DIR_P00 * size_Mat];
+   //      D.f[DIR_M00] = &DD[DIR_M00 * size_Mat];
+   //      D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat];
+   //      D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat];
+   //      D.f[DIR_00P] = &DD[DIR_00P * size_Mat];
+   //      D.f[DIR_00M] = &DD[DIR_00M * size_Mat];
+   //      D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat];
+   //      D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat];
+   //      D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat];
+   //      D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat];
+   //      D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat];
+   //      D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat];
+   //      D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat];
+   //      D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat];
+   //      D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat];
+   //      D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat];
+   //      D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat];
+   //      D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat];
+   //      D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+   //      D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat];
+   //      D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat];
+   //      D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat];
+   //      D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat];
+   //      D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat];
+   //      D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat];
+   //      D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat];
+   //      D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat];
    //   } 
    //   else
    //   {
-   //      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-   //      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-   //      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-   //      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-   //      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-   //      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-   //      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-   //      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-   //      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-   //      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-   //      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-   //      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-   //      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-   //      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-   //      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-   //      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-   //      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-   //      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-   //      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   //      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-   //      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-   //      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-   //      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-   //      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-   //      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-   //      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-   //      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+   //      D.f[DIR_M00] = &DD[DIR_P00 * size_Mat];
+   //      D.f[DIR_P00] = &DD[DIR_M00 * size_Mat];
+   //      D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat];
+   //      D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat];
+   //      D.f[DIR_00M] = &DD[DIR_00P * size_Mat];
+   //      D.f[DIR_00P] = &DD[DIR_00M * size_Mat];
+   //      D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat];
+   //      D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat];
+   //      D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat];
+   //      D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat];
+   //      D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat];
+   //      D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat];
+   //      D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat];
+   //      D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat];
+   //      D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat];
+   //      D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat];
+   //      D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat];
+   //      D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat];
+   //      D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+   //      D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat];
+   //      D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat];
+   //      D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat];
+   //      D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat];
+   //      D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat];
+   //      D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat];
+   //      D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat];
+   //      D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat];
    //   }
    //}
 }
diff --git a/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu
index b17ffefd13a8a3a6048dde69ffb1db6c5def23e1..64c6b6085c353e16c08f9057f603a7799ce14289 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu
@@ -1,53 +1,89 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file PrecursorBCs27.cu
+//! \ingroup GPU
+//! \author Henry Korb, Henrik Asmuth
+//======================================================================================
 #include "LBM/LB.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 #include <lbm/constants/D3Q27.h>
 #include <lbm/MacroscopicQuantities.h>
 
-#include "VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh"
-#include "VirtualFluids_GPU/GPU/KernelUtilities.h"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
-
-__global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
-                                                int numberOfBCnodes,
-                                                int numberOfPrecursorNodes,
-                                                int sizeQ,
-                                                real omega,
-                                                real* distributions,
-                                                real* subgridDistances,
-                                                uint* neighborX,
-                                                uint* neighborY,
-                                                uint* neighborZ,
-                                                uint* neighbors0PP,
-                                                uint* neighbors0PM,
-                                                uint* neighbors0MP,
-                                                uint* neighbors0MM,
-                                                real* weights0PP,
-                                                real* weights0PM,
-                                                real* weights0MP,
-                                                real* weights0MM,
-                                                real* vLast,
-                                                real* vCurrent,
-                                                real velocityX,
-                                                real velocityY,
-                                                real velocityZ,
-                                                real timeRatio,
-                                                real velocityRatio,
-                                                unsigned long long numberOfLBnodes,
-                                                bool isEvenTimestep)
+using namespace vf::gpu;
+
+__global__ void QPrecursorDeviceCompZeroPress(
+    int* subgridDistanceIndices,
+    int numberOfBCnodes,
+    int numberOfPrecursorNodes,
+    int sizeQ,
+    real omega,
+    real* distributions,
+    real* subgridDistances,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    uint* neighbors0PP,
+    uint* neighbors0PM,
+    uint* neighbors0MP,
+    uint* neighbors0MM,
+    real* weights0PP,
+    real* weights0PM,
+    real* weights0MP,
+    real* weights0MM,
+    real* vLast,
+    real* vCurrent,
+    real velocityX,
+    real velocityY,
+    real velocityZ,
+    real timeRatio,
+    real velocityRatio,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
-    const unsigned k = vf::gpu::getNodeIndex();
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
 
-    if(k>=numberOfBCnodes) return;
+    if(nodeIndex>=numberOfBCnodes) return;
 
     ////////////////////////////////////////////////////////////////////////////////
     // interpolation of velocity
     real vxLastInterpd, vyLastInterpd, vzLastInterpd;
     real vxNextInterpd, vyNextInterpd, vzNextInterpd;
 
-    uint kNeighbor0PP = neighbors0PP[k];
-    real d0PP = weights0PP[k];
+    uint kNeighbor0PP = neighbors0PP[nodeIndex];
+    real d0PP = weights0PP[nodeIndex];
 
     real* vxLast = vLast;
     real* vyLast = &vLast[numberOfPrecursorNodes];
@@ -59,13 +95,13 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
 
     if(d0PP < 1e6)
     {
-        uint kNeighbor0PM = neighbors0PM[k];
-        uint kNeighbor0MP = neighbors0MP[k];
-        uint kNeighbor0MM = neighbors0MM[k];
+        uint kNeighbor0PM = neighbors0PM[nodeIndex];
+        uint kNeighbor0MP = neighbors0MP[nodeIndex];
+        uint kNeighbor0MM = neighbors0MM[nodeIndex];
 
-        real d0PM = weights0PM[k];
-        real d0MP = weights0MP[k];
-        real d0MM = weights0MM[k];
+        real d0PM = weights0PM[nodeIndex];
+        real d0MP = weights0MP[nodeIndex];
+        real d0MM = weights0MM[nodeIndex];
 
         real invWeightSum = 1.f/(d0PP+d0PM+d0MP+d0MM);
 
@@ -95,10 +131,15 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
     // From here on just a copy of QVelDeviceCompZeroPress
     ////////////////////////////////////////////////////////////////////////////////
 
+    //////////////////////////////////////////////////////////////////////////
+    //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep
+    //! is based on the esoteric twist algorithm \ref <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier
+    //! et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
+    //!
     Distributions27 dist;
     getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
 
-    unsigned int KQK  = subgridDistanceIndices[k];
+    unsigned int KQK  = subgridDistanceIndices[nodeIndex];
     unsigned int k000= KQK;
     unsigned int kP00   = KQK;
     unsigned int kM00   = neighborX[KQK];
@@ -187,7 +228,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
     ////////////////////////////////////////////////////////////////////////////////
     //! - Update distributions with subgrid distance (q) between zero and one
     real feq, q, velocityLB, velocityBC;
-    q = (subgridD.q[DIR_P00])[k];
+    q = (subgridD.q[DIR_P00])[nodeIndex];
     if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one
     {
         velocityLB = vx1;
@@ -196,7 +237,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_M00])[kM00] = getInterpolatedDistributionForVeloWithPressureBC(q, f_P00, f_M00, feq, omega, drho, velocityBC, c2o27);
     }
 
-    q = (subgridD.q[DIR_M00])[k];
+    q = (subgridD.q[DIR_M00])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = -vx1;
@@ -205,7 +246,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_P00])[kP00] = getInterpolatedDistributionForVeloWithPressureBC(q, f_M00, f_P00, feq, omega, drho, velocityBC, c2o27);
     }
 
-    q = (subgridD.q[DIR_0P0])[k];
+    q = (subgridD.q[DIR_0P0])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = vx2;
@@ -214,7 +255,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_0M0])[DIR_0M0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_0P0, f_0M0, feq, omega, drho, velocityBC, c2o27);
     }
 
-    q = (subgridD.q[DIR_0M0])[k];
+    q = (subgridD.q[DIR_0M0])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = -vx2;
@@ -223,7 +264,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_0P0])[k0P0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_0M0, f_0P0, feq, omega, drho, velocityBC, c2o27);
     }
 
-    q = (subgridD.q[DIR_00P])[k];
+    q = (subgridD.q[DIR_00P])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = vx3;
@@ -232,7 +273,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_00M])[k00M] = getInterpolatedDistributionForVeloWithPressureBC(q, f_00P, f_00M, feq, omega, drho, velocityBC, c2o27);
     }
 
-    q = (subgridD.q[DIR_00M])[k];
+    q = (subgridD.q[DIR_00M])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = -vx3;
@@ -241,7 +282,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_00P])[k00P] = getInterpolatedDistributionForVeloWithPressureBC(q, f_00M, f_00P, feq, omega, drho, velocityBC, c2o27);
     }
 
-    q = (subgridD.q[DIR_PP0])[k];
+    q = (subgridD.q[DIR_PP0])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = vx1 + vx2;
@@ -250,7 +291,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_MM0])[kMM0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_PP0, f_MM0, feq, omega, drho, velocityBC, c1o54);
     }
 
-    q = (subgridD.q[DIR_MM0])[k];
+    q = (subgridD.q[DIR_MM0])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = -vx1 - vx2;
@@ -259,7 +300,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_PP0])[kPP0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_MM0, f_PP0, feq, omega, drho, velocityBC, c1o54);
     }
 
-    q = (subgridD.q[DIR_PM0])[k];
+    q = (subgridD.q[DIR_PM0])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = vx1 - vx2;
@@ -268,7 +309,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_MP0])[kMP0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_PM0, f_MP0, feq, omega, drho, velocityBC, c1o54);
     }
 
-    q = (subgridD.q[DIR_MP0])[k];
+    q = (subgridD.q[DIR_MP0])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = -vx1 + vx2;
@@ -277,7 +318,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_PM0])[kPM0] = getInterpolatedDistributionForVeloWithPressureBC(q, f_MP0, f_PM0, feq, omega, drho, velocityBC, c1o54);
     }
 
-    q = (subgridD.q[DIR_P0P])[k];
+    q = (subgridD.q[DIR_P0P])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = vx1 + vx3;
@@ -286,7 +327,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_M0M])[kM0M] = getInterpolatedDistributionForVeloWithPressureBC(q, f_P0P, f_M0M, feq, omega, drho, velocityBC, c1o54);
     }
 
-    q = (subgridD.q[DIR_M0M])[k];
+    q = (subgridD.q[DIR_M0M])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = -vx1 - vx3;
@@ -295,7 +336,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_P0P])[kP0P] = getInterpolatedDistributionForVeloWithPressureBC(q, f_M0M, f_P0P, feq, omega, drho, velocityBC, c1o54);
     }
 
-    q = (subgridD.q[DIR_P0M])[k];
+    q = (subgridD.q[DIR_P0M])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = vx1 - vx3;
@@ -304,7 +345,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_M0P])[kM0P] = getInterpolatedDistributionForVeloWithPressureBC(q, f_P0M, f_M0P, feq, omega, drho, velocityBC, c1o54);
     }
 
-    q = (subgridD.q[DIR_M0P])[k];
+    q = (subgridD.q[DIR_M0P])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = -vx1 + vx3;
@@ -313,7 +354,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_P0M])[kP0M] = getInterpolatedDistributionForVeloWithPressureBC(q, f_M0P, f_P0M, feq, omega, drho, velocityBC, c1o54);
     }
 
-    q = (subgridD.q[DIR_0PP])[k];
+    q = (subgridD.q[DIR_0PP])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = vx2 + vx3;
@@ -322,7 +363,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_0MM])[k0MM] = getInterpolatedDistributionForVeloWithPressureBC(q, f_0PP, f_0MM, feq, omega, drho, velocityBC, c1o54);
     }
 
-    q = (subgridD.q[DIR_0MM])[k];
+    q = (subgridD.q[DIR_0MM])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = -vx2 - vx3;
@@ -331,7 +372,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_0PP])[k0PP] = getInterpolatedDistributionForVeloWithPressureBC(q, f_0MM, f_0PP, feq, omega, drho, velocityBC, c1o54);
     }
 
-    q = (subgridD.q[DIR_0PM])[k];
+    q = (subgridD.q[DIR_0PM])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = vx2 - vx3;
@@ -340,7 +381,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_0MP])[k0MP] = getInterpolatedDistributionForVeloWithPressureBC(q, f_0PM, f_0PP, feq, omega, drho, velocityBC, c1o54);
     }
 
-    q = (subgridD.q[DIR_0MP])[k];
+    q = (subgridD.q[DIR_0MP])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = -vx2 + vx3;
@@ -349,7 +390,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_0PM])[k0PM] = getInterpolatedDistributionForVeloWithPressureBC(q, f_0PP, f_0PM, feq, omega, drho, velocityBC, c1o54);
     }
 
-    q = (subgridD.q[DIR_PPP])[k];
+    q = (subgridD.q[DIR_PPP])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = vx1 + vx2 + vx3;
@@ -358,7 +399,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_MMM])[kMMM] = getInterpolatedDistributionForVeloWithPressureBC(q, f_PPP, f_MMM, feq, omega, drho, velocityBC, c1o216);
     }
 
-    q = (subgridD.q[DIR_MMM])[k];
+    q = (subgridD.q[DIR_MMM])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = -vx1 - vx2 - vx3;
@@ -367,7 +408,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_PPP])[kPPP] = getInterpolatedDistributionForVeloWithPressureBC(q, f_MMM, f_PPP, feq, omega, drho, velocityBC, c1o216);
     }
 
-    q = (subgridD.q[DIR_PPM])[k];
+    q = (subgridD.q[DIR_PPM])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = vx1 + vx2 - vx3;
@@ -376,7 +417,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_MMP])[kMMP] = getInterpolatedDistributionForVeloWithPressureBC(q, f_PPM, f_MMP, feq, omega, drho, velocityBC, c1o216);
     }
 
-    q = (subgridD.q[DIR_MMP])[k];
+    q = (subgridD.q[DIR_MMP])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = -vx1 - vx2 + vx3;
@@ -385,7 +426,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_PPM])[kPPM] = getInterpolatedDistributionForVeloWithPressureBC(q, f_MMP, f_PPM, feq, omega, drho, velocityBC, c1o216);
     }
 
-    q = (subgridD.q[DIR_PMP])[k];
+    q = (subgridD.q[DIR_PMP])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = vx1 - vx2 + vx3;
@@ -394,7 +435,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_MPM])[kMPM] = getInterpolatedDistributionForVeloWithPressureBC(q, f_PMP, f_MPM, feq, omega, drho, velocityBC, c1o216);
     }
 
-    q = (subgridD.q[DIR_MPM])[k];
+    q = (subgridD.q[DIR_MPM])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = -vx1 + vx2 - vx3;
@@ -403,7 +444,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_PMP])[kPMP] = getInterpolatedDistributionForVeloWithPressureBC(q, f_MPM, f_PMP, feq, omega, drho, velocityBC, c1o216);
     }
 
-    q = (subgridD.q[DIR_PMM])[k];
+    q = (subgridD.q[DIR_PMM])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = vx1 - vx2 - vx3;
@@ -412,7 +453,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
         (dist.f[DIR_MPP])[kMPP] = getInterpolatedDistributionForVeloWithPressureBC(q, f_PMM, f_MPP, feq, omega, drho, velocityBC, c1o216);
     }
 
-    q = (subgridD.q[DIR_MPP])[k];
+    q = (subgridD.q[DIR_MPP])[nodeIndex];
     if (q>=c0o1 && q<=c1o1)
     {
         velocityLB = -vx1 + vx2 + vx3;
@@ -424,43 +465,89 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 
-__global__ void PrecursorDeviceEQ27( 	int* subgridDistanceIndices,
-                                        int numberOfBCnodes,
-                                        int numberOfPrecursorNodes,
-                                        real omega,
-                                        real* distributions,
-                                        uint* neighborX,
-                                        uint* neighborY,
-                                        uint* neighborZ,
-                                        uint* neighbors0PP,
-                                        uint* neighbors0PM,
-                                        uint* neighbors0MP,
-                                        uint* neighbors0MM,
-                                        real* weights0PP,
-                                        real* weights0PM,
-                                        real* weights0MP,
-                                        real* weights0MM,
-                                        real* vLast,
-                                        real* vCurrent,
-                                        real velocityX,
-                                        real velocityY,
-                                        real velocityZ,
-                                        real timeRatio,
-                                        real velocityRatio,
-                                        unsigned long long numberOfLBnodes,
-                                        bool isEvenTimestep)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+__global__ void PrecursorDeviceEQ27(
+    int *subgridDistanceIndices,
+    int numberOfBCnodes,
+    int numberOfPrecursorNodes,
+    real omega,
+    real* distributions,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    uint* neighbors0PP,
+    uint* neighbors0PM,
+    uint* neighbors0MP,
+    uint* neighbors0MM,
+    real* weights0PP,
+    real* weights0PM,
+    real* weights0MP,
+    real* weights0MM,
+    real* vLast,
+    real* vCurrent,
+    real velocityX,
+    real velocityY,
+    real velocityZ,
+    real timeRatio,
+    real velocityRatio,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
-    const unsigned k = vf::gpu::getNodeIndex();
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
 
-    if(k>=numberOfBCnodes) return;
+    if(nodeIndex>=numberOfBCnodes) return;
 
     ////////////////////////////////////////////////////////////////////////////////
     // interpolation of velocity
     real vxLastInterpd, vyLastInterpd, vzLastInterpd;
     real vxNextInterpd, vyNextInterpd, vzNextInterpd;
 
-    uint kNeighbor0PP = neighbors0PP[k];
-    real d0PP = weights0PP[k];
+    uint kNeighbor0PP = neighbors0PP[nodeIndex];
+    real d0PP = weights0PP[nodeIndex];
 
     real* vxLast = vLast;
     real* vyLast = &vLast[numberOfPrecursorNodes];
@@ -472,13 +559,13 @@ __global__ void PrecursorDeviceEQ27( 	int* subgridDistanceIndices,
 
     if(d0PP < 1e6)
     {
-        uint kNeighbor0PM = neighbors0PM[k];
-        uint kNeighbor0MP = neighbors0MP[k];
-        uint kNeighbor0MM = neighbors0MM[k];
+        uint kNeighbor0PM = neighbors0PM[nodeIndex];
+        uint kNeighbor0MP = neighbors0MP[nodeIndex];
+        uint kNeighbor0MM = neighbors0MM[nodeIndex];
 
-        real d0PM = weights0PM[k];
-        real d0MP = weights0MP[k];
-        real d0MM = weights0MM[k];
+        real d0PM = weights0PM[nodeIndex];
+        real d0MP = weights0MP[nodeIndex];
+        real d0MM = weights0MM[nodeIndex];
 
         real invWeightSum = 1.f/(d0PP+d0PM+d0MP+d0MM);
 
@@ -508,10 +595,15 @@ __global__ void PrecursorDeviceEQ27( 	int* subgridDistanceIndices,
     // From here on just a copy of QVelDeviceCompZeroPress
     ////////////////////////////////////////////////////////////////////////////////
 
+    //////////////////////////////////////////////////////////////////////////
+    //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep
+    //! is based on the esoteric twist algorithm \ref <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier
+    //! et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
+    //!
     Distributions27 dist;
     getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep);
 
-    unsigned int KQK  = subgridDistanceIndices[k]; //QK
+    unsigned int KQK  = subgridDistanceIndices[nodeIndex]; //QK
     unsigned int k000 = KQK; //000
     unsigned int kP00 = KQK; //P00
     unsigned int kM00 = neighborX[KQK]; //M00
@@ -649,33 +741,73 @@ __global__ void PrecursorDeviceEQ27( 	int* subgridDistanceIndices,
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 
-__global__ void PrecursorDeviceDistributions( 	int* subgridDistanceIndices,
-                                                int numberOfBCnodes,
-                                                int numberOfPrecursorNodes,
-                                                real* distributions,
-                                                uint* neighborX,
-                                                uint* neighborY,
-                                                uint* neighborZ,
-                                                uint* neighbors0PP,
-                                                uint* neighbors0PM,
-                                                uint* neighbors0MP,
-                                                uint* neighbors0MM,
-                                                real* weights0PP,
-                                                real* weights0PM,
-                                                real* weights0MP,
-                                                real* weights0MM,
-                                                real* fsLast,
-                                                real* fsNext,
-                                                real timeRatio,
-                                                unsigned long long numberOfLBnodes,
-                                                bool isEvenTimestep)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+__global__ void PrecursorDeviceDistributions(
+    int *subgridDistanceIndices,
+    int numberOfBCnodes,
+    int numberOfPrecursorNodes,
+    real* distributions,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    uint* neighbors0PP,
+    uint* neighbors0PM,
+    uint* neighbors0MP,
+    uint* neighbors0MM,
+    real* weights0PP,
+    real* weights0PM,
+    real* weights0MP,
+    real* weights0MM,
+    real* fsLast,
+    real* fsNext,
+    real timeRatio,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
-    const unsigned k = vf::gpu::getNodeIndex();
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
 
-    if(k>=numberOfBCnodes) return;
+    if(nodeIndex>=numberOfBCnodes) return;
 
-    uint kNeighbor0PP = neighbors0PP[k];
-    real d0PP = weights0PP[k];
+    uint kNeighbor0PP = neighbors0PP[nodeIndex];
+    real d0PP = weights0PP[nodeIndex];
 
     real f0LastInterp, f1LastInterp, f2LastInterp, f3LastInterp, f4LastInterp, f5LastInterp, f6LastInterp, f7LastInterp, f8LastInterp;
     real f0NextInterp, f1NextInterp, f2NextInterp, f3NextInterp, f4NextInterp, f5NextInterp, f6NextInterp, f7NextInterp, f8NextInterp;
@@ -703,13 +835,13 @@ __global__ void PrecursorDeviceDistributions( 	int* subgridDistanceIndices,
 
     if(d0PP<1e6)
     {
-        uint kNeighbor0PM = neighbors0PM[k];
-        uint kNeighbor0MP = neighbors0MP[k];
-        uint kNeighbor0MM = neighbors0MM[k];
+        uint kNeighbor0PM = neighbors0PM[nodeIndex];
+        uint kNeighbor0MP = neighbors0MP[nodeIndex];
+        uint kNeighbor0MM = neighbors0MM[nodeIndex];
 
-        real d0PM = weights0PM[k];
-        real d0MP = weights0MP[k];
-        real d0MM = weights0MM[k];
+        real d0PM = weights0PM[nodeIndex];
+        real d0MP = weights0MP[nodeIndex];
+        real d0MM = weights0MM[nodeIndex];
 
         real invWeightSum = 1.f/(d0PP+d0PM+d0MP+d0MM);
 
@@ -761,10 +893,15 @@ __global__ void PrecursorDeviceDistributions( 	int* subgridDistanceIndices,
         f7NextInterp = f7Next[kNeighbor0PP];
         f8NextInterp = f8Next[kNeighbor0PP];
     }
+    //////////////////////////////////////////////////////////////////////////
+    //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep
+    //! is based on the esoteric twist algorithm \ref <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier
+    //! et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
+    //!
     Distributions27 dist;
     getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep);
 
-    unsigned int KQK  = subgridDistanceIndices[k];
+    unsigned int KQK  = subgridDistanceIndices[nodeIndex];
     // unsigned int k000= KQK;
     unsigned int kP00   = KQK;
     // unsigned int kM00   = neighborX[KQK];
@@ -804,36 +941,84 @@ __global__ void PrecursorDeviceDistributions( 	int* subgridDistanceIndices,
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-//NOTE: Has not been tested after bug fix!
-__global__ void QPrecursorDeviceDistributions( 	int* subgridDistanceIndices,
-                                                real* subgridDistances,
-                                                int sizeQ,
-                                                int numberOfBCnodes,
-                                                int numberOfPrecursorNodes,
-                                                real* distributions,
-                                                uint* neighborX,
-                                                uint* neighborY,
-                                                uint* neighborZ,
-                                                uint* neighbors0PP,
-                                                uint* neighbors0PM,
-                                                uint* neighbors0MP,
-                                                uint* neighbors0MM,
-                                                real* weights0PP,
-                                                real* weights0PM,
-                                                real* weights0MP,
-                                                real* weights0MM,
-                                                real* fsLast,
-                                                real* fsNext,
-                                                real timeRatio,
-                                                unsigned long long numberOfLBnodes,
-                                                bool isEvenTimestep)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// NOTE: Has not been tested after bug fix!
+__global__ void QPrecursorDeviceDistributions(
+    int* subgridDistanceIndices,
+    real* subgridDistances,
+    int sizeQ,
+    int numberOfBCnodes,
+    int numberOfPrecursorNodes,
+    real* distributions,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    uint* neighbors0PP,
+    uint* neighbors0PM,
+    uint* neighbors0MP,
+    uint* neighbors0MM,
+    real* weights0PP,
+    real* weights0PM,
+    real* weights0MP,
+    real* weights0MM,
+    real* fsLast,
+    real* fsNext,
+    real timeRatio,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
-    const unsigned k = vf::gpu::getNodeIndex();
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
 
-    if(k>=numberOfBCnodes) return;
+    if(nodeIndex>=numberOfBCnodes) return;
 
-    uint kNeighbor0PP = neighbors0PP[k];
-    real d0PP = weights0PP[k];
+    uint kNeighbor0PP = neighbors0PP[nodeIndex];
+    real d0PP = weights0PP[nodeIndex];
 
     real f0LastInterp, f1LastInterp, f2LastInterp, f3LastInterp, f4LastInterp, f5LastInterp, f6LastInterp, f7LastInterp, f8LastInterp;
     real f0NextInterp, f1NextInterp, f2NextInterp, f3NextInterp, f4NextInterp, f5NextInterp, f6NextInterp, f7NextInterp, f8NextInterp;
@@ -861,13 +1046,13 @@ __global__ void QPrecursorDeviceDistributions( 	int* subgridDistanceIndices,
 
     if(d0PP<1e6)
     {
-        uint kNeighbor0PM = neighbors0PM[k];
-        uint kNeighbor0MP = neighbors0MP[k];
-        uint kNeighbor0MM = neighbors0MM[k];
+        uint kNeighbor0PM = neighbors0PM[nodeIndex];
+        uint kNeighbor0MP = neighbors0MP[nodeIndex];
+        uint kNeighbor0MM = neighbors0MM[nodeIndex];
 
-        real d0PM = weights0PM[k];
-        real d0MP = weights0MP[k];
-        real d0MM = weights0MM[k];
+        real d0PM = weights0PM[nodeIndex];
+        real d0MP = weights0MP[nodeIndex];
+        real d0MM = weights0MM[nodeIndex];
 
         real invWeightSum = 1.f/(d0PP+d0PM+d0MP+d0MM);
 
@@ -919,10 +1104,15 @@ __global__ void QPrecursorDeviceDistributions( 	int* subgridDistanceIndices,
         f7NextInterp = f7Next[kNeighbor0PP];
         f8NextInterp = f8Next[kNeighbor0PP];
     }
+    //////////////////////////////////////////////////////////////////////////
+    //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep
+    //! is based on the esoteric twist algorithm \ref <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier
+    //! et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
+    //!
     Distributions27 dist;
     getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep);
 
-    unsigned int KQK  = subgridDistanceIndices[k];
+    unsigned int KQK  = subgridDistanceIndices[nodeIndex];
     // unsigned int k000= KQK;
     unsigned int kP00   = KQK;
     // unsigned int kM00   = neighborX[KQK];
@@ -953,15 +1143,15 @@ __global__ void QPrecursorDeviceDistributions( 	int* subgridDistanceIndices,
     getPointersToSubgridDistances(qs, subgridDistances, sizeQ);
 
     real q;
-    q = qs.q[DIR_P00][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P00][kP00] = f0LastInterp*(1.f-timeRatio) + f0NextInterp*timeRatio;
-    q = qs.q[DIR_PP0][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PP0][kPP0] = f1LastInterp*(1.f-timeRatio) + f1NextInterp*timeRatio;
-    q = qs.q[DIR_PM0][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PM0][kPM0] = f2LastInterp*(1.f-timeRatio) + f2NextInterp*timeRatio;
-    q = qs.q[DIR_P0P][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P0P][kP0P] = f3LastInterp*(1.f-timeRatio) + f3NextInterp*timeRatio;
-    q = qs.q[DIR_P0M][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P0M][kP0M] = f4LastInterp*(1.f-timeRatio) + f4NextInterp*timeRatio;
-    q = qs.q[DIR_PPP][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PPP][kPPP] = f5LastInterp*(1.f-timeRatio) + f5NextInterp*timeRatio;
-    q = qs.q[DIR_PMP][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PMP][kPMP] = f6LastInterp*(1.f-timeRatio) + f6NextInterp*timeRatio;
-    q = qs.q[DIR_PPM][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PPM][kPPM] = f7LastInterp*(1.f-timeRatio) + f7NextInterp*timeRatio;
-    q = qs.q[DIR_PMM][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PMM][kPMM] = f8LastInterp*(1.f-timeRatio) + f8NextInterp*timeRatio;
+    q = qs.q[DIR_P00][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P00][kP00] = f0LastInterp*(1.f-timeRatio) + f0NextInterp*timeRatio;
+    q = qs.q[DIR_PP0][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PP0][kPP0] = f1LastInterp*(1.f-timeRatio) + f1NextInterp*timeRatio;
+    q = qs.q[DIR_PM0][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PM0][kPM0] = f2LastInterp*(1.f-timeRatio) + f2NextInterp*timeRatio;
+    q = qs.q[DIR_P0P][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P0P][kP0P] = f3LastInterp*(1.f-timeRatio) + f3NextInterp*timeRatio;
+    q = qs.q[DIR_P0M][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P0M][kP0M] = f4LastInterp*(1.f-timeRatio) + f4NextInterp*timeRatio;
+    q = qs.q[DIR_PPP][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PPP][kPPP] = f5LastInterp*(1.f-timeRatio) + f5NextInterp*timeRatio;
+    q = qs.q[DIR_PMP][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PMP][kPMP] = f6LastInterp*(1.f-timeRatio) + f6NextInterp*timeRatio;
+    q = qs.q[DIR_PPM][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PPM][kPPM] = f7LastInterp*(1.f-timeRatio) + f7NextInterp*timeRatio;
+    q = qs.q[DIR_PMM][nodeIndex]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PMM][kPMM] = f8LastInterp*(1.f-timeRatio) + f8NextInterp*timeRatio;
 
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu
index a72ebb1a385d16850554dad916aab7708235980b..e0ea3c05251e995c55c2b980327059dfa7fd4069 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu
@@ -1,27 +1,58 @@
-/* Device code */
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file PressBCs27.cu
+//! \ingroup GPU
+//! \author Martin Schoenherr, Anna Wellmann
+//======================================================================================
 #include "LBM/LB.h"
 #include "lbm/constants/D3Q27.h"
-#include "lbm/constants/NumericConstants.h"
+#include "basics/constants/NumericConstants.h"
 #include "lbm/MacroscopicQuantities.h"
-#include "Kernel/Utilities/DistributionHelper.cuh"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
-#include "KernelUtilities.h"
-
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
+using namespace vf::gpu;
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void QInflowScaleByPressDevice27(  real* rhoBC,
-                                           real* DD,
-                                           int* k_Q,
-                                           int* k_N,
-                                           int numberOfBCnodes,
-                                           real om1,
-                                           unsigned int* neighborX,
-                                           unsigned int* neighborY,
-                                           unsigned int* neighborZ,
-                                           unsigned int size_Mat,
-                                           bool isEvenTimestep)
+__global__ void QInflowScaleByPressDevice27(
+    real* rhoBC,
+    real* DD,
+    int* k_Q,
+    int* k_N,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index
@@ -100,120 +131,120 @@ __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      real f1_E    = (D.f[DIR_P00   ])[k1e   ];
-      real f1_W    = (D.f[DIR_M00   ])[k1w   ];
-      real f1_N    = (D.f[DIR_0P0   ])[k1n   ];
-      real f1_S    = (D.f[DIR_0M0   ])[k1s   ];
-      real f1_T    = (D.f[DIR_00P   ])[k1t   ];
-      real f1_B    = (D.f[DIR_00M   ])[k1b   ];
-      real f1_NE   = (D.f[DIR_PP0  ])[k1ne  ];
-      real f1_SW   = (D.f[DIR_MM0  ])[k1sw  ];
-      real f1_SE   = (D.f[DIR_PM0  ])[k1se  ];
-      real f1_NW   = (D.f[DIR_MP0  ])[k1nw  ];
-      real f1_TE   = (D.f[DIR_P0P  ])[k1te  ];
-      real f1_BW   = (D.f[DIR_M0M  ])[k1bw  ];
-      real f1_BE   = (D.f[DIR_P0M  ])[k1be  ];
-      real f1_TW   = (D.f[DIR_M0P  ])[k1tw  ];
-      real f1_TN   = (D.f[DIR_0PP  ])[k1tn  ];
-      real f1_BS   = (D.f[DIR_0MM  ])[k1bs  ];
-      real f1_BN   = (D.f[DIR_0PM  ])[k1bn  ];
-      real f1_TS   = (D.f[DIR_0MP  ])[k1ts  ];
+      real f1_E    = (D.f[DIR_P00])[k1e   ];
+      real f1_W    = (D.f[DIR_M00])[k1w   ];
+      real f1_N    = (D.f[DIR_0P0])[k1n   ];
+      real f1_S    = (D.f[DIR_0M0])[k1s   ];
+      real f1_T    = (D.f[DIR_00P])[k1t   ];
+      real f1_B    = (D.f[DIR_00M])[k1b   ];
+      real f1_NE   = (D.f[DIR_PP0])[k1ne  ];
+      real f1_SW   = (D.f[DIR_MM0])[k1sw  ];
+      real f1_SE   = (D.f[DIR_PM0])[k1se  ];
+      real f1_NW   = (D.f[DIR_MP0])[k1nw  ];
+      real f1_TE   = (D.f[DIR_P0P])[k1te  ];
+      real f1_BW   = (D.f[DIR_M0M])[k1bw  ];
+      real f1_BE   = (D.f[DIR_P0M])[k1be  ];
+      real f1_TW   = (D.f[DIR_M0P])[k1tw  ];
+      real f1_TN   = (D.f[DIR_0PP])[k1tn  ];
+      real f1_BS   = (D.f[DIR_0MM])[k1bs  ];
+      real f1_BN   = (D.f[DIR_0PM])[k1bn  ];
+      real f1_TS   = (D.f[DIR_0MP])[k1ts  ];
       //real f1_ZERO = (D.f[DIR_000])[k1zero];
-      real f1_TNE  = (D.f[DIR_PPP ])[k1tne ];
-      real f1_TSW  = (D.f[DIR_MMP ])[k1tsw ];
-      real f1_TSE  = (D.f[DIR_PMP ])[k1tse ];
-      real f1_TNW  = (D.f[DIR_MPP ])[k1tnw ];
-      real f1_BNE  = (D.f[DIR_PPM ])[k1bne ];
-      real f1_BSW  = (D.f[DIR_MMM ])[k1bsw ];
-      real f1_BSE  = (D.f[DIR_PMM ])[k1bse ];
-      real f1_BNW  = (D.f[DIR_MPM ])[k1bnw ];
+      real f1_TNE  = (D.f[DIR_PPP])[k1tne ];
+      real f1_TSW  = (D.f[DIR_MMP])[k1tsw ];
+      real f1_TSE  = (D.f[DIR_PMP])[k1tse ];
+      real f1_TNW  = (D.f[DIR_MPP])[k1tnw ];
+      real f1_BNE  = (D.f[DIR_PPM])[k1bne ];
+      real f1_BSW  = (D.f[DIR_MMM])[k1bsw ];
+      real f1_BSE  = (D.f[DIR_PMM])[k1bse ];
+      real f1_BNW  = (D.f[DIR_MPM])[k1bnw ];
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      real f_E    = (D.f[DIR_P00   ])[ke   ];
-      real f_W    = (D.f[DIR_M00   ])[kw   ];
-      real f_N    = (D.f[DIR_0P0   ])[kn   ];
-      real f_S    = (D.f[DIR_0M0   ])[ks   ];
-      real f_T    = (D.f[DIR_00P   ])[kt   ];
-      real f_B    = (D.f[DIR_00M   ])[kb   ];
-      real f_NE   = (D.f[DIR_PP0  ])[kne  ];
-      real f_SW   = (D.f[DIR_MM0  ])[ksw  ];
-      real f_SE   = (D.f[DIR_PM0  ])[kse  ];
-      real f_NW   = (D.f[DIR_MP0  ])[knw  ];
-      real f_TE   = (D.f[DIR_P0P  ])[kte  ];
-      real f_BW   = (D.f[DIR_M0M  ])[kbw  ];
-      real f_BE   = (D.f[DIR_P0M  ])[kbe  ];
-      real f_TW   = (D.f[DIR_M0P  ])[ktw  ];
-      real f_TN   = (D.f[DIR_0PP  ])[ktn  ];
-      real f_BS   = (D.f[DIR_0MM  ])[kbs  ];
-      real f_BN   = (D.f[DIR_0PM  ])[kbn  ];
-      real f_TS   = (D.f[DIR_0MP  ])[kts  ];
+      real f_E    = (D.f[DIR_P00])[ke   ];
+      real f_W    = (D.f[DIR_M00])[kw   ];
+      real f_N    = (D.f[DIR_0P0])[kn   ];
+      real f_S    = (D.f[DIR_0M0])[ks   ];
+      real f_T    = (D.f[DIR_00P])[kt   ];
+      real f_B    = (D.f[DIR_00M])[kb   ];
+      real f_NE   = (D.f[DIR_PP0])[kne  ];
+      real f_SW   = (D.f[DIR_MM0])[ksw  ];
+      real f_SE   = (D.f[DIR_PM0])[kse  ];
+      real f_NW   = (D.f[DIR_MP0])[knw  ];
+      real f_TE   = (D.f[DIR_P0P])[kte  ];
+      real f_BW   = (D.f[DIR_M0M])[kbw  ];
+      real f_BE   = (D.f[DIR_P0M])[kbe  ];
+      real f_TW   = (D.f[DIR_M0P])[ktw  ];
+      real f_TN   = (D.f[DIR_0PP])[ktn  ];
+      real f_BS   = (D.f[DIR_0MM])[kbs  ];
+      real f_BN   = (D.f[DIR_0PM])[kbn  ];
+      real f_TS   = (D.f[DIR_0MP])[kts  ];
       //real f_ZERO = (D.f[DIR_000])[kzero];
-      real f_TNE  = (D.f[DIR_PPP ])[ktne ];
-      real f_TSW  = (D.f[DIR_MMP ])[ktsw ];
-      real f_TSE  = (D.f[DIR_PMP ])[ktse ];
-      real f_TNW  = (D.f[DIR_MPP ])[ktnw ];
-      real f_BNE  = (D.f[DIR_PPM ])[kbne ];
-      real f_BSW  = (D.f[DIR_MMM ])[kbsw ];
-      real f_BSE  = (D.f[DIR_PMM ])[kbse ];
-      real f_BNW  = (D.f[DIR_MPM ])[kbnw ];
+      real f_TNE  = (D.f[DIR_PPP])[ktne ];
+      real f_TSW  = (D.f[DIR_MMP])[ktsw ];
+      real f_TSE  = (D.f[DIR_PMP])[ktse ];
+      real f_TNW  = (D.f[DIR_MPP])[ktnw ];
+      real f_BNE  = (D.f[DIR_PPM])[kbne ];
+      real f_BSW  = (D.f[DIR_MMM])[kbsw ];
+      real f_BSE  = (D.f[DIR_PMM])[kbse ];
+      real f_BNW  = (D.f[DIR_MPM])[kbnw ];
       //////////////////////////////////////////////////////////////////////////
       // real vx1, vx2, vx3;
       real drho, drho1;
@@ -324,106 +355,106 @@ __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
      //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       //////////////////////////////////////////////////////////////////////////
       //__syncthreads();
      // -X
-     //(D.f[DIR_P00   ])[ke   ] = f_E   ;
-     //(D.f[DIR_PM0  ])[kse  ] = f_SE  ;
-     //(D.f[DIR_PP0  ])[kne  ] = f_NE  ;
-     //(D.f[DIR_P0M  ])[kbe  ] = f_BE  ;
-     //(D.f[DIR_P0P  ])[kte  ] = f_TE  ;
-     //(D.f[DIR_PMP ])[ktse ] = f_TSE ;
-     //(D.f[DIR_PPP ])[ktne ] = f_TNE ;
-     //(D.f[DIR_PMM ])[kbse ] = f_BSE ;
-     //(D.f[DIR_PPM ])[kbne ] = f_BNE ;
+     //(D.f[DIR_P00])[ke   ] = f_E   ;
+     //(D.f[DIR_PM0])[kse  ] = f_SE  ;
+     //(D.f[DIR_PP0])[kne  ] = f_NE  ;
+     //(D.f[DIR_P0M])[kbe  ] = f_BE  ;
+     //(D.f[DIR_P0P])[kte  ] = f_TE  ;
+     //(D.f[DIR_PMP])[ktse ] = f_TSE ;
+     //(D.f[DIR_PPP])[ktne ] = f_TNE ;
+     //(D.f[DIR_PMM])[kbse ] = f_BSE ;
+     //(D.f[DIR_PPM])[kbne ] = f_BNE ;
      // X
-     (D.f[DIR_M00   ])[kw   ] = f_W   ;
-     (D.f[DIR_MM0  ])[ksw  ] = f_SW  ;
-     (D.f[DIR_MP0  ])[knw  ] = f_NW  ;
-     (D.f[DIR_M0M  ])[kbw  ] = f_BW  ;
-     (D.f[DIR_M0P  ])[ktw  ] = f_TW  ;
-     (D.f[DIR_MMP ])[ktsw ] = f_TSW ;
-     (D.f[DIR_MPP ])[ktnw ] = f_TNW ;
-     (D.f[DIR_MMM ])[kbsw ] = f_BSW ;
-     (D.f[DIR_MPM ])[kbnw ] = f_BNW ;
+     (D.f[DIR_M00])[kw   ] = f_W   ;
+     (D.f[DIR_MM0])[ksw  ] = f_SW  ;
+     (D.f[DIR_MP0])[knw  ] = f_NW  ;
+     (D.f[DIR_M0M])[kbw  ] = f_BW  ;
+     (D.f[DIR_M0P])[ktw  ] = f_TW  ;
+     (D.f[DIR_MMP])[ktsw ] = f_TSW ;
+     (D.f[DIR_MPP])[ktnw ] = f_TNW ;
+     (D.f[DIR_MMM])[kbsw ] = f_BSW ;
+     (D.f[DIR_MPM])[kbnw ] = f_BNW ;
      // Y
-     //(D.f[DIR_0M0   ])[ks   ] = f_S   ;
-     //(D.f[DIR_PM0  ])[kse  ] = f_SE  ;
-     //(D.f[DIR_MM0  ])[ksw  ] = f_SW  ;
-     //(D.f[DIR_0MP  ])[kts  ] = f_TS  ;
-     //(D.f[DIR_0MM  ])[kbs  ] = f_BS  ;
-     //(D.f[DIR_PMP ])[ktse ] = f_TSE ;
-     //(D.f[DIR_MMP ])[ktsw ] = f_TSW ;
-     //(D.f[DIR_PMM ])[kbse ] = f_BSE ;
-     //(D.f[DIR_MMM ])[kbsw ] = f_BSW ;
+     //(D.f[DIR_0M0])[ks   ] = f_S   ;
+     //(D.f[DIR_PM0])[kse  ] = f_SE  ;
+     //(D.f[DIR_MM0])[ksw  ] = f_SW  ;
+     //(D.f[DIR_0MP])[kts  ] = f_TS  ;
+     //(D.f[DIR_0MM])[kbs  ] = f_BS  ;
+     //(D.f[DIR_PMP])[ktse ] = f_TSE ;
+     //(D.f[DIR_MMP])[ktsw ] = f_TSW ;
+     //(D.f[DIR_PMM])[kbse ] = f_BSE ;
+     //(D.f[DIR_MMM])[kbsw ] = f_BSW ;
      // Z
-     //(D.f[DIR_00M   ])[kb   ] = f_B   ;
-     //(D.f[DIR_P0M  ])[kbe  ] = f_BE  ;
-     //(D.f[DIR_M0M  ])[kbw  ] = f_BW  ;
-     //(D.f[DIR_0PM  ])[kbn  ] = f_BN  ;
-     //(D.f[DIR_0MM  ])[kbs  ] = f_BS  ;
-     //(D.f[DIR_PPM ])[kbne ] = f_BNE ;
-     //(D.f[DIR_MPM ])[kbnw ] = f_BNW ;
-     //(D.f[DIR_PMM ])[kbse ] = f_BSE ;
-     //(D.f[DIR_MMM ])[kbsw ] = f_BSW ;
+     //(D.f[DIR_00M])[kb   ] = f_B   ;
+     //(D.f[DIR_P0M])[kbe  ] = f_BE  ;
+     //(D.f[DIR_M0M])[kbw  ] = f_BW  ;
+     //(D.f[DIR_0PM])[kbn  ] = f_BN  ;
+     //(D.f[DIR_0MM])[kbs  ] = f_BS  ;
+     //(D.f[DIR_PPM])[kbne ] = f_BNE ;
+     //(D.f[DIR_MPM])[kbnw ] = f_BNW ;
+     //(D.f[DIR_PMM])[kbse ] = f_BSE ;
+     //(D.f[DIR_MMM])[kbsw ] = f_BSW ;
       //////////////////////////////////////////////////////////////////////////
    }
 }
@@ -468,17 +499,18 @@ __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void QPressDeviceIncompNEQ27( real* rhoBC,
-                                       real* DD,
-                                       int* k_Q,
-                                       int* k_N,
-                                       int numberOfBCnodes,
-                                       real om1,
-                                       unsigned int* neighborX,
-                                       unsigned int* neighborY,
-                                       unsigned int* neighborZ,
-                                       unsigned int size_Mat,
-                                       bool isEvenTimestep)
+__global__ void QPressDeviceIncompNEQ27(
+    real* rhoBC,
+    real* DD,
+    int* k_Q,
+    int* k_N,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index
@@ -557,95 +589,95 @@ __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
       Distributions27 D;
       if (isEvenTimestep==true) //// ACHTUNG PREColl !!!!!!!!!!!!!!
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real        f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,
                      f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
 
-      f1_W    = (D.f[DIR_P00   ])[k1e   ];
-      f1_E    = (D.f[DIR_M00   ])[k1w   ];
-      f1_S    = (D.f[DIR_0P0   ])[k1n   ];
-      f1_N    = (D.f[DIR_0M0   ])[k1s   ];
-      f1_B    = (D.f[DIR_00P   ])[k1t   ];
-      f1_T    = (D.f[DIR_00M   ])[k1b   ];
-      f1_SW   = (D.f[DIR_PP0  ])[k1ne  ];
-      f1_NE   = (D.f[DIR_MM0  ])[k1sw  ];
-      f1_NW   = (D.f[DIR_PM0  ])[k1se  ];
-      f1_SE   = (D.f[DIR_MP0  ])[k1nw  ];
-      f1_BW   = (D.f[DIR_P0P  ])[k1te  ];
-      f1_TE   = (D.f[DIR_M0M  ])[k1bw  ];
-      f1_TW   = (D.f[DIR_P0M  ])[k1be  ];
-      f1_BE   = (D.f[DIR_M0P  ])[k1tw  ];
-      f1_BS   = (D.f[DIR_0PP  ])[k1tn  ];
-      f1_TN   = (D.f[DIR_0MM  ])[k1bs  ];
-      f1_TS   = (D.f[DIR_0PM  ])[k1bn  ];
-      f1_BN   = (D.f[DIR_0MP  ])[k1ts  ];
+      f1_W    = (D.f[DIR_P00])[k1e   ];
+      f1_E    = (D.f[DIR_M00])[k1w   ];
+      f1_S    = (D.f[DIR_0P0])[k1n   ];
+      f1_N    = (D.f[DIR_0M0])[k1s   ];
+      f1_B    = (D.f[DIR_00P])[k1t   ];
+      f1_T    = (D.f[DIR_00M])[k1b   ];
+      f1_SW   = (D.f[DIR_PP0])[k1ne  ];
+      f1_NE   = (D.f[DIR_MM0])[k1sw  ];
+      f1_NW   = (D.f[DIR_PM0])[k1se  ];
+      f1_SE   = (D.f[DIR_MP0])[k1nw  ];
+      f1_BW   = (D.f[DIR_P0P])[k1te  ];
+      f1_TE   = (D.f[DIR_M0M])[k1bw  ];
+      f1_TW   = (D.f[DIR_P0M])[k1be  ];
+      f1_BE   = (D.f[DIR_M0P])[k1tw  ];
+      f1_BS   = (D.f[DIR_0PP])[k1tn  ];
+      f1_TN   = (D.f[DIR_0MM])[k1bs  ];
+      f1_TS   = (D.f[DIR_0PM])[k1bn  ];
+      f1_BN   = (D.f[DIR_0MP])[k1ts  ];
       f1_ZERO = (D.f[DIR_000])[k1zero];
-      f1_BSW  = (D.f[DIR_PPP ])[k1tne ];
-      f1_BNE  = (D.f[DIR_MMP ])[k1tsw ];
-      f1_BNW  = (D.f[DIR_PMP ])[k1tse ];
-      f1_BSE  = (D.f[DIR_MPP ])[k1tnw ];
-      f1_TSW  = (D.f[DIR_PPM ])[k1bne ];
-      f1_TNE  = (D.f[DIR_MMM ])[k1bsw ];
-      f1_TNW  = (D.f[DIR_PMM ])[k1bse ];
-      f1_TSE  = (D.f[DIR_MPM ])[k1bnw ];
+      f1_BSW  = (D.f[DIR_PPP])[k1tne ];
+      f1_BNE  = (D.f[DIR_MMP])[k1tsw ];
+      f1_BNW  = (D.f[DIR_PMP])[k1tse ];
+      f1_BSE  = (D.f[DIR_MPP])[k1tnw ];
+      f1_TSW  = (D.f[DIR_PPM])[k1bne ];
+      f1_TNE  = (D.f[DIR_MMM])[k1bsw ];
+      f1_TNW  = (D.f[DIR_PMM])[k1bse ];
+      f1_TSE  = (D.f[DIR_MPM])[k1bnw ];
 
       //////////////////////////////////////////////////////////////////////////
       real drho1    =  f1_ZERO+f1_E+f1_W+f1_N+f1_S+f1_T+f1_B+f1_NE+f1_SW+f1_SE+f1_NW+f1_TE+f1_BW+f1_BE+f1_TW+f1_TN+f1_BS+f1_BN+f1_TS+
@@ -737,33 +769,33 @@ __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
 
       __syncthreads();
 
-      (D.f[DIR_P00   ])[ke   ] = f1_W   ;
-      (D.f[DIR_M00   ])[kw   ] = f1_E   ;
-      (D.f[DIR_0P0   ])[kn   ] = f1_S   ;
-      (D.f[DIR_0M0   ])[ks   ] = f1_N   ;
-      (D.f[DIR_00P   ])[kt   ] = f1_B   ;
-      (D.f[DIR_00M   ])[kb   ] = f1_T   ;
-      (D.f[DIR_PP0  ])[kne  ] = f1_SW  ;
-      (D.f[DIR_MM0  ])[ksw  ] = f1_NE  ;
-      (D.f[DIR_PM0  ])[kse  ] = f1_NW  ;
-      (D.f[DIR_MP0  ])[knw  ] = f1_SE  ;
-      (D.f[DIR_P0P  ])[kte  ] = f1_BW  ;
-      (D.f[DIR_M0M  ])[kbw  ] = f1_TE  ;
-      (D.f[DIR_P0M  ])[kbe  ] = f1_TW  ;
-      (D.f[DIR_M0P  ])[ktw  ] = f1_BE  ;
-      (D.f[DIR_0PP  ])[ktn  ] = f1_BS  ;
-      (D.f[DIR_0MM  ])[kbs  ] = f1_TN  ;
-      (D.f[DIR_0PM  ])[kbn  ] = f1_TS  ;
-      (D.f[DIR_0MP  ])[kts  ] = f1_BN  ;
+      (D.f[DIR_P00])[ke   ] = f1_W   ;
+      (D.f[DIR_M00])[kw   ] = f1_E   ;
+      (D.f[DIR_0P0])[kn   ] = f1_S   ;
+      (D.f[DIR_0M0])[ks   ] = f1_N   ;
+      (D.f[DIR_00P])[kt   ] = f1_B   ;
+      (D.f[DIR_00M])[kb   ] = f1_T   ;
+      (D.f[DIR_PP0])[kne  ] = f1_SW  ;
+      (D.f[DIR_MM0])[ksw  ] = f1_NE  ;
+      (D.f[DIR_PM0])[kse  ] = f1_NW  ;
+      (D.f[DIR_MP0])[knw  ] = f1_SE  ;
+      (D.f[DIR_P0P])[kte  ] = f1_BW  ;
+      (D.f[DIR_M0M])[kbw  ] = f1_TE  ;
+      (D.f[DIR_P0M])[kbe  ] = f1_TW  ;
+      (D.f[DIR_M0P])[ktw  ] = f1_BE  ;
+      (D.f[DIR_0PP])[ktn  ] = f1_BS  ;
+      (D.f[DIR_0MM])[kbs  ] = f1_TN  ;
+      (D.f[DIR_0PM])[kbn  ] = f1_TS  ;
+      (D.f[DIR_0MP])[kts  ] = f1_BN  ;
       (D.f[DIR_000])[kzero] = f1_ZERO;
-      (D.f[DIR_PPP ])[ktne ] = f1_BSW ;
-      (D.f[DIR_MMP ])[ktsw ] = f1_BNE ;
-      (D.f[DIR_PMP ])[ktse ] = f1_BNW ;
-      (D.f[DIR_MPP ])[ktnw ] = f1_BSE ;
-      (D.f[DIR_PPM ])[kbne ] = f1_TSW ;
-      (D.f[DIR_MMM ])[kbsw ] = f1_TNE ;
-      (D.f[DIR_PMM ])[kbse ] = f1_TNW ;
-      (D.f[DIR_MPM ])[kbnw ] = f1_TSE ;
+      (D.f[DIR_PPP])[ktne ] = f1_BSW ;
+      (D.f[DIR_MMP])[ktsw ] = f1_BNE ;
+      (D.f[DIR_PMP])[ktse ] = f1_BNW ;
+      (D.f[DIR_MPP])[ktnw ] = f1_BSE ;
+      (D.f[DIR_PPM])[kbne ] = f1_TSW ;
+      (D.f[DIR_MMM])[kbsw ] = f1_TNE ;
+      (D.f[DIR_PMM])[kbse ] = f1_TNW ;
+      (D.f[DIR_MPM])[kbnw ] = f1_TSE ;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -807,54 +839,49 @@ __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void QPressDeviceNEQ27(real* rhoBC,
-                                             real* distribution,
-                                             int* bcNodeIndices,
-                                             int* bcNeighborIndices,
-                                             int numberOfBCnodes,
-                                             real omega1,
-                                             unsigned int* neighborX,
-                                             unsigned int* neighborY,
-                                             unsigned int* neighborZ,
-                                             unsigned int numberOfLBnodes,
-                                             bool isEvenTimestep)
+__global__ void QPressDeviceNEQ27(
+    real* rhoBC,
+    real* distributions,
+    int* bcNodeIndices,
+    int* bcNeighborIndices,
+    int numberOfBCnodes,
+    real omega1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
-   //////////////////////////////////////////////////////////////////////////
+   ////////////////////////////////////////////////////////////////////////////////
    //! The pressure boundary condition is executed in the following steps
    //!
+
    ////////////////////////////////////////////////////////////////////////////////
    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
    //!
-   const unsigned x = threadIdx.x;    // global x-index
-   const unsigned y = blockIdx.x;     // global y-index
-   const unsigned z = blockIdx.y;     // global z-index
+   const unsigned nodeIndex = getNodeIndex();
 
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
-
-   //////////////////////////////////////////////////////////////////////////
+   ////////////////////////////////////////////////////////////////////////////////
    //! - Run for all indices in size of boundary condition (numberOfBCnodes)
    //!
-   if(k < numberOfBCnodes)
+   if(nodeIndex < numberOfBCnodes)
    {
       //////////////////////////////////////////////////////////////////////////
       //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
       //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
       //!
       Distributions27 dist;
-      getPointersToDistributions(dist, distribution, numberOfLBnodes, isEvenTimestep);
+      getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local pressure
       //!
-      real rhoBClocal = rhoBC[k];
+      real rhoBClocal = rhoBC[nodeIndex];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing)
       //!
-      unsigned int KQK  = bcNodeIndices[k];
+      unsigned int KQK  = bcNodeIndices[nodeIndex];
       unsigned int kzero= KQK;
       unsigned int ke   = KQK;
       unsigned int kw   = neighborX[KQK];
@@ -885,7 +912,7 @@ __global__ void QPressDeviceNEQ27(real* rhoBC,
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing) for neighboring node
       //!
-      unsigned int K1QK  = bcNeighborIndices[k];
+      unsigned int K1QK  = bcNeighborIndices[nodeIndex];
       unsigned int k1zero= K1QK;
       unsigned int k1e   = K1QK;
       unsigned int k1w   = neighborX[K1QK];
@@ -917,33 +944,33 @@ __global__ void QPressDeviceNEQ27(real* rhoBC,
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions for neighboring node
       //!
-      real f1_W    = (dist.f[DIR_P00   ])[k1e   ];
-      real f1_E    = (dist.f[DIR_M00   ])[k1w   ];
-      real f1_S    = (dist.f[DIR_0P0   ])[k1n   ];
-      real f1_N    = (dist.f[DIR_0M0   ])[k1s   ];
-      real f1_B    = (dist.f[DIR_00P   ])[k1t   ];
-      real f1_T    = (dist.f[DIR_00M   ])[k1b   ];
-      real f1_SW   = (dist.f[DIR_PP0  ])[k1ne  ];
-      real f1_NE   = (dist.f[DIR_MM0  ])[k1sw  ];
-      real f1_NW   = (dist.f[DIR_PM0  ])[k1se  ];
-      real f1_SE   = (dist.f[DIR_MP0  ])[k1nw  ];
-      real f1_BW   = (dist.f[DIR_P0P  ])[k1te  ];
-      real f1_TE   = (dist.f[DIR_M0M  ])[k1bw  ];
-      real f1_TW   = (dist.f[DIR_P0M  ])[k1be  ];
-      real f1_BE   = (dist.f[DIR_M0P  ])[k1tw  ];
-      real f1_BS   = (dist.f[DIR_0PP  ])[k1tn  ];
-      real f1_TN   = (dist.f[DIR_0MM  ])[k1bs  ];
-      real f1_TS   = (dist.f[DIR_0PM  ])[k1bn  ];
-      real f1_BN   = (dist.f[DIR_0MP  ])[k1ts  ];
+      real f1_W    = (dist.f[DIR_P00])[k1e   ];
+      real f1_E    = (dist.f[DIR_M00])[k1w   ];
+      real f1_S    = (dist.f[DIR_0P0])[k1n   ];
+      real f1_N    = (dist.f[DIR_0M0])[k1s   ];
+      real f1_B    = (dist.f[DIR_00P])[k1t   ];
+      real f1_T    = (dist.f[DIR_00M])[k1b   ];
+      real f1_SW   = (dist.f[DIR_PP0])[k1ne  ];
+      real f1_NE   = (dist.f[DIR_MM0])[k1sw  ];
+      real f1_NW   = (dist.f[DIR_PM0])[k1se  ];
+      real f1_SE   = (dist.f[DIR_MP0])[k1nw  ];
+      real f1_BW   = (dist.f[DIR_P0P])[k1te  ];
+      real f1_TE   = (dist.f[DIR_M0M])[k1bw  ];
+      real f1_TW   = (dist.f[DIR_P0M])[k1be  ];
+      real f1_BE   = (dist.f[DIR_M0P])[k1tw  ];
+      real f1_BS   = (dist.f[DIR_0PP])[k1tn  ];
+      real f1_TN   = (dist.f[DIR_0MM])[k1bs  ];
+      real f1_TS   = (dist.f[DIR_0PM])[k1bn  ];
+      real f1_BN   = (dist.f[DIR_0MP])[k1ts  ];
       real f1_ZERO = (dist.f[DIR_000])[k1zero];
-      real f1_BSW  = (dist.f[DIR_PPP ])[k1tne ];
-      real f1_BNE  = (dist.f[DIR_MMP ])[k1tsw ];
-      real f1_BNW  = (dist.f[DIR_PMP ])[k1tse ];
-      real f1_BSE  = (dist.f[DIR_MPP ])[k1tnw ];
-      real f1_TSW  = (dist.f[DIR_PPM ])[k1bne ];
-      real f1_TNE  = (dist.f[DIR_MMM ])[k1bsw ];
-      real f1_TNW  = (dist.f[DIR_PMM ])[k1bse ];
-      real f1_TSE  = (dist.f[DIR_MPM ])[k1bnw ];
+      real f1_BSW  = (dist.f[DIR_PPP])[k1tne ];
+      real f1_BNE  = (dist.f[DIR_MMP])[k1tsw ];
+      real f1_BNW  = (dist.f[DIR_PMP])[k1tse ];
+      real f1_BSE  = (dist.f[DIR_MPP])[k1tnw ];
+      real f1_TSW  = (dist.f[DIR_PPM])[k1bne ];
+      real f1_TNE  = (dist.f[DIR_MMM])[k1bsw ];
+      real f1_TNW  = (dist.f[DIR_PMM])[k1bse ];
+      real f1_TSE  = (dist.f[DIR_MPM])[k1bnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Calculate macroscopic quantities (for neighboring node)
@@ -1040,33 +1067,33 @@ __global__ void QPressDeviceNEQ27(real* rhoBC,
       ////////////////////////////////////////////////////////////////////////////////
       //! write the new distributions to the bc nodes
       //!
-      (dist.f[DIR_P00   ])[ke   ] = f1_W   ;
-      (dist.f[DIR_M00   ])[kw   ] = f1_E   ;
-      (dist.f[DIR_0P0   ])[kn   ] = f1_S   ;
-      (dist.f[DIR_0M0   ])[ks   ] = f1_N   ;
-      (dist.f[DIR_00P   ])[kt   ] = f1_B   ;
-      (dist.f[DIR_00M   ])[kb   ] = f1_T   ;
-      (dist.f[DIR_PP0  ])[kne  ] = f1_SW  ;
-      (dist.f[DIR_MM0  ])[ksw  ] = f1_NE  ;
-      (dist.f[DIR_PM0  ])[kse  ] = f1_NW  ;
-      (dist.f[DIR_MP0  ])[knw  ] = f1_SE  ;
-      (dist.f[DIR_P0P  ])[kte  ] = f1_BW  ;
-      (dist.f[DIR_M0M  ])[kbw  ] = f1_TE  ;
-      (dist.f[DIR_P0M  ])[kbe  ] = f1_TW  ;
-      (dist.f[DIR_M0P  ])[ktw  ] = f1_BE  ;
-      (dist.f[DIR_0PP  ])[ktn  ] = f1_BS  ;
-      (dist.f[DIR_0MM  ])[kbs  ] = f1_TN  ;
-      (dist.f[DIR_0PM  ])[kbn  ] = f1_TS  ;
-      (dist.f[DIR_0MP  ])[kts  ] = f1_BN  ;
+      (dist.f[DIR_P00])[ke   ] = f1_W   ;
+      (dist.f[DIR_M00])[kw   ] = f1_E   ;
+      (dist.f[DIR_0P0])[kn   ] = f1_S   ;
+      (dist.f[DIR_0M0])[ks   ] = f1_N   ;
+      (dist.f[DIR_00P])[kt   ] = f1_B   ;
+      (dist.f[DIR_00M])[kb   ] = f1_T   ;
+      (dist.f[DIR_PP0])[kne  ] = f1_SW  ;
+      (dist.f[DIR_MM0])[ksw  ] = f1_NE  ;
+      (dist.f[DIR_PM0])[kse  ] = f1_NW  ;
+      (dist.f[DIR_MP0])[knw  ] = f1_SE  ;
+      (dist.f[DIR_P0P])[kte  ] = f1_BW  ;
+      (dist.f[DIR_M0M])[kbw  ] = f1_TE  ;
+      (dist.f[DIR_P0M])[kbe  ] = f1_TW  ;
+      (dist.f[DIR_M0P])[ktw  ] = f1_BE  ;
+      (dist.f[DIR_0PP])[ktn  ] = f1_BS  ;
+      (dist.f[DIR_0MM])[kbs  ] = f1_TN  ;
+      (dist.f[DIR_0PM])[kbn  ] = f1_TS  ;
+      (dist.f[DIR_0MP])[kts  ] = f1_BN  ;
       (dist.f[DIR_000])[kzero] = f1_ZERO;
-      (dist.f[DIR_PPP ])[ktne ] = f1_BSW ;
-      (dist.f[DIR_MMP ])[ktsw ] = f1_BNE ;
-      (dist.f[DIR_PMP ])[ktse ] = f1_BNW ;
-      (dist.f[DIR_MPP ])[ktnw ] = f1_BSE ;
-      (dist.f[DIR_PPM ])[kbne ] = f1_TSW ;
-      (dist.f[DIR_MMM ])[kbsw ] = f1_TNE ;
-      (dist.f[DIR_PMM ])[kbse ] = f1_TNW ;
-      (dist.f[DIR_MPM ])[kbnw ] = f1_TSE ;
+      (dist.f[DIR_PPP])[ktne ] = f1_BSW ;
+      (dist.f[DIR_MMP])[ktsw ] = f1_BNE ;
+      (dist.f[DIR_PMP])[ktse ] = f1_BNW ;
+      (dist.f[DIR_MPP])[ktnw ] = f1_BSE ;
+      (dist.f[DIR_PPM])[kbne ] = f1_TSW ;
+      (dist.f[DIR_MMM])[kbsw ] = f1_TNE ;
+      (dist.f[DIR_PMM])[kbse ] = f1_TNW ;
+      (dist.f[DIR_MPM])[kbnw ] = f1_TSE ;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -1110,16 +1137,17 @@ __global__ void QPressDeviceNEQ27(real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-__global__ void LB_BC_Press_East27( int nx,
-                                               int ny,
-                                               int tz,
-                                               unsigned int* bcMatD,
-                                               unsigned int* neighborX,
-                                               unsigned int* neighborY,
-                                               unsigned int* neighborZ,
-                                               real* DD,
-                                               unsigned int size_Mat,
-                                               bool isEvenTimestep)
+__global__ void LB_BC_Press_East27(
+    int nx,
+    int ny,
+    int tz,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    //thread-index
    int ty = blockIdx.x;
@@ -1140,63 +1168,63 @@ __global__ void LB_BC_Press_East27( int nx,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////
       ////////////////////////////////////////////////////////////////////////////////
@@ -1315,66 +1343,66 @@ __global__ void LB_BC_Press_East27( int nx,
       real        f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,
                    f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
 
-      f1_W    = (D.f[DIR_P00   ])[k1e   ];
-      f1_E    = (D.f[DIR_M00   ])[k1w   ];
-      f1_S    = (D.f[DIR_0P0   ])[k1n   ];
-      f1_N    = (D.f[DIR_0M0   ])[k1s   ];
-      f1_B    = (D.f[DIR_00P   ])[k1t   ];
-      f1_T    = (D.f[DIR_00M   ])[k1b   ];
-      f1_SW   = (D.f[DIR_PP0  ])[k1ne  ];
-      f1_NE   = (D.f[DIR_MM0  ])[k1sw  ];
-      f1_NW   = (D.f[DIR_PM0  ])[k1se  ];
-      f1_SE   = (D.f[DIR_MP0  ])[k1nw  ];
-      f1_BW   = (D.f[DIR_P0P  ])[k1te  ];
-      f1_TE   = (D.f[DIR_M0M  ])[k1bw  ];
-      f1_TW   = (D.f[DIR_P0M  ])[k1be  ];
-      f1_BE   = (D.f[DIR_M0P  ])[k1tw  ];
-      f1_BS   = (D.f[DIR_0PP  ])[k1tn  ];
-      f1_TN   = (D.f[DIR_0MM  ])[k1bs  ];
-      f1_TS   = (D.f[DIR_0PM  ])[k1bn  ];
-      f1_BN   = (D.f[DIR_0MP  ])[k1ts  ];
+      f1_W    = (D.f[DIR_P00])[k1e   ];
+      f1_E    = (D.f[DIR_M00])[k1w   ];
+      f1_S    = (D.f[DIR_0P0])[k1n   ];
+      f1_N    = (D.f[DIR_0M0])[k1s   ];
+      f1_B    = (D.f[DIR_00P])[k1t   ];
+      f1_T    = (D.f[DIR_00M])[k1b   ];
+      f1_SW   = (D.f[DIR_PP0])[k1ne  ];
+      f1_NE   = (D.f[DIR_MM0])[k1sw  ];
+      f1_NW   = (D.f[DIR_PM0])[k1se  ];
+      f1_SE   = (D.f[DIR_MP0])[k1nw  ];
+      f1_BW   = (D.f[DIR_P0P])[k1te  ];
+      f1_TE   = (D.f[DIR_M0M])[k1bw  ];
+      f1_TW   = (D.f[DIR_P0M])[k1be  ];
+      f1_BE   = (D.f[DIR_M0P])[k1tw  ];
+      f1_BS   = (D.f[DIR_0PP])[k1tn  ];
+      f1_TN   = (D.f[DIR_0MM])[k1bs  ];
+      f1_TS   = (D.f[DIR_0PM])[k1bn  ];
+      f1_BN   = (D.f[DIR_0MP])[k1ts  ];
       f1_ZERO = (D.f[DIR_000])[k1zero];
-      f1_BSW  = (D.f[DIR_PPP ])[k1tne ];
-      f1_BNE  = (D.f[DIR_MMP ])[k1tsw ];
-      f1_BNW  = (D.f[DIR_PMP ])[k1tse ];
-      f1_BSE  = (D.f[DIR_MPP ])[k1tnw ];
-      f1_TSW  = (D.f[DIR_PPM ])[k1bne ];
-      f1_TNE  = (D.f[DIR_MMM ])[k1bsw ];
-      f1_TNW  = (D.f[DIR_PMM ])[k1bse ];
-      f1_TSE  = (D.f[DIR_MPM ])[k1bnw ];
+      f1_BSW  = (D.f[DIR_PPP])[k1tne ];
+      f1_BNE  = (D.f[DIR_MMP])[k1tsw ];
+      f1_BNW  = (D.f[DIR_PMP])[k1tse ];
+      f1_BSE  = (D.f[DIR_MPP])[k1tnw ];
+      f1_TSW  = (D.f[DIR_PPM])[k1bne ];
+      f1_TNE  = (D.f[DIR_MMM])[k1bsw ];
+      f1_TNW  = (D.f[DIR_PMM])[k1bse ];
+      f1_TSE  = (D.f[DIR_MPM])[k1bnw ];
 
       real drho1    =  f1_ZERO+f1_E+f1_W+f1_N+f1_S+f1_T+f1_B+f1_NE+f1_SW+f1_SE+f1_NW+f1_TE+f1_BW+f1_BE+f1_TW+f1_TN+f1_BS+f1_BN+f1_TS+
                         f1_TNE+f1_TSW+f1_TSE+f1_TNW+f1_BNE+f1_BSW+f1_BSE+f1_BNW;
 
       __syncthreads();
 
-      (D.f[DIR_P00   ])[ke   ] = f1_W   -c2o27*drho1;
-      (D.f[DIR_M00   ])[kw   ] = f1_E   -c2o27*drho1;
-      (D.f[DIR_0P0   ])[kn   ] = f1_S   -c2o27*drho1;
-      (D.f[DIR_0M0   ])[ks   ] = f1_N   -c2o27*drho1;
-      (D.f[DIR_00P   ])[kt   ] = f1_B   -c2o27*drho1;
-      (D.f[DIR_00M   ])[kb   ] = f1_T   -c2o27*drho1;
-      (D.f[DIR_PP0  ])[kne  ] = f1_SW  -c1o54*drho1;
-      (D.f[DIR_MM0  ])[ksw  ] = f1_NE  -c1o54*drho1;
-      (D.f[DIR_PM0  ])[kse  ] = f1_NW  -c1o54*drho1;
-      (D.f[DIR_MP0  ])[knw  ] = f1_SE  -c1o54*drho1;
-      (D.f[DIR_P0P  ])[kte  ] = f1_BW  -c1o54*drho1;
-      (D.f[DIR_M0M  ])[kbw  ] = f1_TE  -c1o54*drho1;
-      (D.f[DIR_P0M  ])[kbe  ] = f1_TW  -c1o54*drho1;
-      (D.f[DIR_M0P  ])[ktw  ] = f1_BE  -c1o54*drho1;
-      (D.f[DIR_0PP  ])[ktn  ] = f1_BS  -c1o54*drho1;
-      (D.f[DIR_0MM  ])[kbs  ] = f1_TN  -c1o54*drho1;
-      (D.f[DIR_0PM  ])[kbn  ] = f1_TS  -c1o54*drho1;
-      (D.f[DIR_0MP  ])[kts  ] = f1_BN  -c1o54*drho1;
+      (D.f[DIR_P00])[ke   ] = f1_W   -c2o27*drho1;
+      (D.f[DIR_M00])[kw   ] = f1_E   -c2o27*drho1;
+      (D.f[DIR_0P0])[kn   ] = f1_S   -c2o27*drho1;
+      (D.f[DIR_0M0])[ks   ] = f1_N   -c2o27*drho1;
+      (D.f[DIR_00P])[kt   ] = f1_B   -c2o27*drho1;
+      (D.f[DIR_00M])[kb   ] = f1_T   -c2o27*drho1;
+      (D.f[DIR_PP0])[kne  ] = f1_SW  -c1o54*drho1;
+      (D.f[DIR_MM0])[ksw  ] = f1_NE  -c1o54*drho1;
+      (D.f[DIR_PM0])[kse  ] = f1_NW  -c1o54*drho1;
+      (D.f[DIR_MP0])[knw  ] = f1_SE  -c1o54*drho1;
+      (D.f[DIR_P0P])[kte  ] = f1_BW  -c1o54*drho1;
+      (D.f[DIR_M0M])[kbw  ] = f1_TE  -c1o54*drho1;
+      (D.f[DIR_P0M])[kbe  ] = f1_TW  -c1o54*drho1;
+      (D.f[DIR_M0P])[ktw  ] = f1_BE  -c1o54*drho1;
+      (D.f[DIR_0PP])[ktn  ] = f1_BS  -c1o54*drho1;
+      (D.f[DIR_0MM])[kbs  ] = f1_TN  -c1o54*drho1;
+      (D.f[DIR_0PM])[kbn  ] = f1_TS  -c1o54*drho1;
+      (D.f[DIR_0MP])[kts  ] = f1_BN  -c1o54*drho1;
       (D.f[DIR_000])[kzero] = f1_ZERO-c8o27*drho1;
-      (D.f[DIR_PPP ])[ktne ] = f1_BSW -c1o216*drho1;
-      (D.f[DIR_MMP ])[ktsw ] = f1_BNE -c1o216*drho1;
-      (D.f[DIR_PMP ])[ktse ] = f1_BNW -c1o216*drho1;
-      (D.f[DIR_MPP ])[ktnw ] = f1_BSE -c1o216*drho1;
-      (D.f[DIR_PPM ])[kbne ] = f1_TSW -c1o216*drho1;
-      (D.f[DIR_MMM ])[kbsw ] = f1_TNE -c1o216*drho1;
-      (D.f[DIR_PMM ])[kbse ] = f1_TNW -c1o216*drho1;
-      (D.f[DIR_MPM ])[kbnw ] = f1_TSE -c1o216*drho1;
+      (D.f[DIR_PPP])[ktne ] = f1_BSW -c1o216*drho1;
+      (D.f[DIR_MMP])[ktsw ] = f1_BNE -c1o216*drho1;
+      (D.f[DIR_PMP])[ktse ] = f1_BNW -c1o216*drho1;
+      (D.f[DIR_MPP])[ktnw ] = f1_BSE -c1o216*drho1;
+      (D.f[DIR_PPM])[kbne ] = f1_TSW -c1o216*drho1;
+      (D.f[DIR_MMM])[kbsw ] = f1_TNE -c1o216*drho1;
+      (D.f[DIR_PMM])[kbse ] = f1_TNW -c1o216*drho1;
+      (D.f[DIR_MPM])[kbnw ] = f1_TSE -c1o216*drho1;
    }
    __syncthreads();
 }
@@ -1419,78 +1447,79 @@ __global__ void LB_BC_Press_East27( int nx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QPressDevice27(real* rhoBC,
-                                           real* DD,
-                                           int* k_Q,
-                                           real* QQ,
-                                           unsigned int numberOfBCnodes,
-                                           real om1,
-                                           unsigned int* neighborX,
-                                           unsigned int* neighborY,
-                                           unsigned int* neighborZ,
-                                           unsigned int size_Mat,
-                                           bool isEvenTimestep)
+__global__ void QPressDevice27(
+    real* rhoBC,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    }
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index
@@ -1510,24 +1539,24 @@ __global__ void QPressDevice27(real* rhoBC,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW;
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -1570,32 +1599,32 @@ __global__ void QPressDevice27(real* rhoBC,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real q, vx1, vx2, vx3, drho;
       vx1    =  ((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
@@ -1619,63 +1648,63 @@ __global__ void QPressDevice27(real* rhoBC,
       ////////////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       q = q_dirE[k];
@@ -1902,81 +1931,82 @@ __global__ void QPressDevice27(real* rhoBC,
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QPressDeviceAntiBB27(   real* rhoBC,
-                                       real* vx,
-                                       real* vy,
-                                       real* vz,
-                                       real* DD,
-                                       int* k_Q,
-                                       real* QQ,
-                                       int numberOfBCnodes,
-                                       real om1,
-                                       unsigned int* neighborX,
-                                       unsigned int* neighborY,
-                                       unsigned int* neighborZ,
-                                       unsigned int size_Mat,
-                                       bool isEvenTimestep)
+__global__ void QPressDeviceAntiBB27(
+    real* rhoBC,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    }
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index
@@ -1996,32 +2026,32 @@ __global__ void QPressDeviceAntiBB27(   real* rhoBC,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW;
-      q_dirE   = &QQ[DIR_P00   *numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   *numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   *numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   *numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   *numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   *numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  *numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  *numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  *numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  *numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  *numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  *numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  *numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  *numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  *numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  *numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  *numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  *numberOfBCnodes];
-      q_dirTNE = &QQ[DIR_PPP *numberOfBCnodes];
-      q_dirTSW = &QQ[DIR_MMP *numberOfBCnodes];
-      q_dirTSE = &QQ[DIR_PMP *numberOfBCnodes];
-      q_dirTNW = &QQ[DIR_MPP *numberOfBCnodes];
-      q_dirBNE = &QQ[DIR_PPM *numberOfBCnodes];
-      q_dirBSW = &QQ[DIR_MMM *numberOfBCnodes];
-      q_dirBSE = &QQ[DIR_PMM *numberOfBCnodes];
-      q_dirBNW = &QQ[DIR_MPM *numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -2056,32 +2086,32 @@ __global__ void QPressDeviceAntiBB27(   real* rhoBC,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW, f_ZERO;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
       f_ZERO = (D.f[DIR_000])[kzero];
       ////////////////////////////////////////////////////////////////////////////////
       //real vx1, vx2, vx3, drho;
@@ -2110,63 +2140,63 @@ __global__ void QPressDeviceAntiBB27(   real* rhoBC,
       ////////////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       q = q_dirE[k];
@@ -2367,16 +2397,17 @@ __global__ void QPressDeviceAntiBB27(   real* rhoBC,
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QPressDeviceFixBackflow27( real* rhoBC,
-                                                      real* DD,
-                                                      int* k_Q,
-                                                      int numberOfBCnodes,
-                                                      real om1,
-                                                      unsigned int* neighborX,
-                                                      unsigned int* neighborY,
-                                                      unsigned int* neighborZ,
-                                                      unsigned int size_Mat,
-                                                      bool isEvenTimestep)
+__global__ void QPressDeviceFixBackflow27(
+    real* rhoBC,
+    real* DD,
+    int* k_Q,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index
@@ -2429,63 +2460,63 @@ __global__ void QPressDeviceFixBackflow27( real* rhoBC,
       Distributions27 D;
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
          (D.f[DIR_M00])[kw]       = c2o27  * deltaRho;
@@ -2558,16 +2589,17 @@ __global__ void QPressDeviceFixBackflow27( real* rhoBC,
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QPressDeviceDirDepBot27(  real* rhoBC,
-                                                     real* DD,
-                                                     int* k_Q,
-                                                     int numberOfBCnodes,
-                                                     real om1,
-                                                     unsigned int* neighborX,
-                                                     unsigned int* neighborY,
-                                                     unsigned int* neighborZ,
-                                                     unsigned int size_Mat,
-                                                     bool isEvenTimestep)
+__global__ void QPressDeviceDirDepBot27(
+    real* rhoBC,
+    real* DD,
+    int* k_Q,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index
@@ -2620,86 +2652,86 @@ __global__ void QPressDeviceDirDepBot27(  real* rhoBC,
       Distributions27 D;
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real f_E,f_W,f_N,f_S,f_T,f_NE,f_SW,f_SE,f_NW,f_TE,f_TW,f_TN,f_TS,f_ZERO,f_TNE,f_TSW,f_TSE,f_TNW;//,
             //f_B,f_BW,f_BE,f_BS,f_BN,f_BSW,f_BNE,f_BNW,f_BSE;
 
-      f_E    = (D.f[DIR_P00   ])[ke   ];
-      f_W    = (D.f[DIR_M00   ])[kw   ];
-      f_N    = (D.f[DIR_0P0   ])[kn   ];
-      f_S    = (D.f[DIR_0M0   ])[ks   ];
-      f_T    = (D.f[DIR_00P   ])[kt   ];
-      f_NE   = (D.f[DIR_PP0  ])[kne  ];
-      f_SW   = (D.f[DIR_MM0  ])[ksw  ];
-      f_SE   = (D.f[DIR_PM0  ])[kse  ];
-      f_NW   = (D.f[DIR_MP0  ])[knw  ];
-      f_TE   = (D.f[DIR_P0P  ])[kte  ];
-      f_TW   = (D.f[DIR_M0P  ])[ktw  ];
-      f_TN   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TS   = (D.f[DIR_0MP  ])[kts  ];
+      f_E    = (D.f[DIR_P00])[ke   ];
+      f_W    = (D.f[DIR_M00])[kw   ];
+      f_N    = (D.f[DIR_0P0])[kn   ];
+      f_S    = (D.f[DIR_0M0])[ks   ];
+      f_T    = (D.f[DIR_00P])[kt   ];
+      f_NE   = (D.f[DIR_PP0])[kne  ];
+      f_SW   = (D.f[DIR_MM0])[ksw  ];
+      f_SE   = (D.f[DIR_PM0])[kse  ];
+      f_NW   = (D.f[DIR_MP0])[knw  ];
+      f_TE   = (D.f[DIR_P0P])[kte  ];
+      f_TW   = (D.f[DIR_M0P])[ktw  ];
+      f_TN   = (D.f[DIR_0PP])[ktn  ];
+      f_TS   = (D.f[DIR_0MP])[kts  ];
       f_ZERO = (D.f[DIR_000])[kzero];
-      f_TNE  = (D.f[DIR_PPP ])[ktne ];
-      f_TSW  = (D.f[DIR_MMP ])[ktsw ];
-      f_TSE  = (D.f[DIR_PMP ])[ktse ];
-      f_TNW  = (D.f[DIR_MPP ])[ktnw ];
+      f_TNE  = (D.f[DIR_PPP])[ktne ];
+      f_TSW  = (D.f[DIR_MMP])[ktsw ];
+      f_TSE  = (D.f[DIR_PMP])[ktse ];
+      f_TNW  = (D.f[DIR_MPP])[ktnw ];
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
       //f_B   = (four*rho- four*f_SW-     eight*f_TSW-four*f_W-   eight*f_TW- four*f_NW-     eight*f_TNW-four*f_S-   eight*f_TS-four*f_ZERO+     f_T-four*f_N-   eight*f_TN- four*f_SE-     eight*f_TSE-four*f_E-   eight*f_TE- four*f_NE-     eight*f_TNE)/nine;
@@ -2802,30 +2834,32 @@ __host__ __device__ real computeOutflowDistribution(const real* const &f, const
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void QPressNoRhoDevice27( real* rhoBC,
-                                     real* distributions,
-                                     int* k_Q,
-                                     int* k_N,
-                                     int numberOfBCnodes,
-                                     real om1,
-                                     unsigned int* neighborX,
-                                     unsigned int* neighborY,
-                                     unsigned int* neighborZ,
-                                     unsigned int numberOfLBnodes,
-                                     bool isEvenTimestep,
-                                     int direction)
+__global__ void QPressNoRhoDevice27(
+    real* rhoBC,
+    real* distributions,
+    int* k_Q,
+    int* k_N,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    int direction)
 {
    ////////////////////////////////////////////////////////////////////////////////
+   //! - Get the node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+   //!
+   const unsigned nodeIndex = getNodeIndex();
 
-
-   const unsigned k = vf::gpu::getNodeIndex();
    //////////////////////////////////////////////////////////////////////////
 
-   if(k>=numberOfBCnodes) return;
+   if(nodeIndex >= numberOfBCnodes) return;
 
    ////////////////////////////////////////////////////////////////////////////////
    //index
-   unsigned int KQK  = k_Q[k];
+   unsigned int KQK  = k_Q[nodeIndex];
    // unsigned int kzero= KQK;
    unsigned int ke   = KQK;
    unsigned int kw   = neighborX[KQK];
@@ -2855,7 +2889,7 @@ __global__ void QPressNoRhoDevice27( real* rhoBC,
    unsigned int kbsw = neighborZ[ksw];
    ////////////////////////////////////////////////////////////////////////////////
    //index1
-   unsigned int K1QK  = k_N[k];
+   unsigned int K1QK  = k_N[nodeIndex];
    //unsigned int k1zero= K1QK;
    unsigned int k1e   = K1QK;
    unsigned int k1w   = neighborX[K1QK];
@@ -3027,38 +3061,76 @@ __global__ void QPressNoRhoDevice27( real* rhoBC,
          break;
    }
 }
-
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 __host__ __device__ real computeOutflowDistribution(const real* const &f, const real* const &f1, const int dir, const real rhoCorrection, const real cs, const real weight)
 {
    return f1[dir  ] * cs + (c1o1 - cs) * f[dir  ] - weight *rhoCorrection;
 }
 
-__global__ void QPressZeroRhoOutflowDevice27(  real* rhoBC,
-                                     real* distributions,
-                                     int* k_Q,
-                                     int* k_N,
-                                     int numberOfBCnodes,
-                                     real om1,
-                                     unsigned int* neighborX,
-                                     unsigned int* neighborY,
-                                     unsigned int* neighborZ,
-                                     unsigned int numberOfLBnodes,
-                                     bool isEvenTimestep,
-                                     int direction,
-                                     real densityCorrectionFactor)
+__global__ void QPressZeroRhoOutflowDevice27(
+    real* rhoBC,
+    real* distributions,
+    int* k_Q,
+    int* k_N,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep,
+    int direction,
+    real densityCorrectionFactor)
 {
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned k = vf::gpu::getNodeIndex();
+   //! - Get the node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+   //!
+   const unsigned nodeIndex = getNodeIndex();
 
    //////////////////////////////////////////////////////////////////////////
 
-   if(k>=numberOfBCnodes) return;
+   if( nodeIndex >= numberOfBCnodes ) return;
+
    ////////////////////////////////////////////////////////////////////////////////
    //index
 
-   uint k_000 = k_Q[k];
+   uint k_000 = k_Q[nodeIndex];
    uint k_M00 = neighborX[k_000];
    uint k_0M0 = neighborY[k_000];
    uint k_00M = neighborZ[k_000];
@@ -3069,7 +3141,7 @@ __global__ void QPressZeroRhoOutflowDevice27(  real* rhoBC,
 
    ////////////////////////////////////////////////////////////////////////////////
    //index of neighbor
-   uint kN_000 = k_N[k];
+   uint kN_000 = k_N[nodeIndex];
    uint kN_M00 = neighborX[k_000];
    uint kN_0M0 = neighborY[k_000];
    uint kN_00M = neighborZ[k_000];
@@ -3255,17 +3327,18 @@ __global__ void QPressZeroRhoOutflowDevice27(  real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void QPressDeviceOld27(real* rhoBC,
-                                             real* DD,
-                                             int* k_Q,
-                                             int* k_N,
-                                             int numberOfBCnodes,
-                                             real om1,
-                                             unsigned int* neighborX,
-                                             unsigned int* neighborY,
-                                             unsigned int* neighborZ,
-                                             unsigned int size_Mat,
-                                             bool isEvenTimestep)
+__global__ void QPressDeviceOld27(
+    real* rhoBC,
+    real* DD,
+    int* k_Q,
+    int* k_N,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index
@@ -3344,95 +3417,95 @@ __global__ void QPressDeviceOld27(real* rhoBC,
       Distributions27 D;
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real        f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,
                      f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
 
-      f1_W    = (D.f[DIR_P00   ])[k1e   ];
-      f1_E    = (D.f[DIR_M00   ])[k1w   ];
-      f1_S    = (D.f[DIR_0P0   ])[k1n   ];
-      f1_N    = (D.f[DIR_0M0   ])[k1s   ];
-      f1_B    = (D.f[DIR_00P   ])[k1t   ];
-      f1_T    = (D.f[DIR_00M   ])[k1b   ];
-      f1_SW   = (D.f[DIR_PP0  ])[k1ne  ];
-      f1_NE   = (D.f[DIR_MM0  ])[k1sw  ];
-      f1_NW   = (D.f[DIR_PM0  ])[k1se  ];
-      f1_SE   = (D.f[DIR_MP0  ])[k1nw  ];
-      f1_BW   = (D.f[DIR_P0P  ])[k1te  ];
-      f1_TE   = (D.f[DIR_M0M  ])[k1bw  ];
-      f1_TW   = (D.f[DIR_P0M  ])[k1be  ];
-      f1_BE   = (D.f[DIR_M0P  ])[k1tw  ];
-      f1_BS   = (D.f[DIR_0PP  ])[k1tn  ];
-      f1_TN   = (D.f[DIR_0MM  ])[k1bs  ];
-      f1_TS   = (D.f[DIR_0PM  ])[k1bn  ];
-      f1_BN   = (D.f[DIR_0MP  ])[k1ts  ];
+      f1_W    = (D.f[DIR_P00])[k1e   ];
+      f1_E    = (D.f[DIR_M00])[k1w   ];
+      f1_S    = (D.f[DIR_0P0])[k1n   ];
+      f1_N    = (D.f[DIR_0M0])[k1s   ];
+      f1_B    = (D.f[DIR_00P])[k1t   ];
+      f1_T    = (D.f[DIR_00M])[k1b   ];
+      f1_SW   = (D.f[DIR_PP0])[k1ne  ];
+      f1_NE   = (D.f[DIR_MM0])[k1sw  ];
+      f1_NW   = (D.f[DIR_PM0])[k1se  ];
+      f1_SE   = (D.f[DIR_MP0])[k1nw  ];
+      f1_BW   = (D.f[DIR_P0P])[k1te  ];
+      f1_TE   = (D.f[DIR_M0M])[k1bw  ];
+      f1_TW   = (D.f[DIR_P0M])[k1be  ];
+      f1_BE   = (D.f[DIR_M0P])[k1tw  ];
+      f1_BS   = (D.f[DIR_0PP])[k1tn  ];
+      f1_TN   = (D.f[DIR_0MM])[k1bs  ];
+      f1_TS   = (D.f[DIR_0PM])[k1bn  ];
+      f1_BN   = (D.f[DIR_0MP])[k1ts  ];
       f1_ZERO = (D.f[DIR_000])[k1zero];
-      f1_BSW  = (D.f[DIR_PPP ])[k1tne ];
-      f1_BNE  = (D.f[DIR_MMP ])[k1tsw ];
-      f1_BNW  = (D.f[DIR_PMP ])[k1tse ];
-      f1_BSE  = (D.f[DIR_MPP ])[k1tnw ];
-      f1_TSW  = (D.f[DIR_PPM ])[k1bne ];
-      f1_TNE  = (D.f[DIR_MMM ])[k1bsw ];
-      f1_TNW  = (D.f[DIR_PMM ])[k1bse ];
-      f1_TSE  = (D.f[DIR_MPM ])[k1bnw ];
+      f1_BSW  = (D.f[DIR_PPP])[k1tne ];
+      f1_BNE  = (D.f[DIR_MMP])[k1tsw ];
+      f1_BNW  = (D.f[DIR_PMP])[k1tse ];
+      f1_BSE  = (D.f[DIR_MPP])[k1tnw ];
+      f1_TSW  = (D.f[DIR_PPM])[k1bne ];
+      f1_TNE  = (D.f[DIR_MMM])[k1bsw ];
+      f1_TNW  = (D.f[DIR_PMM])[k1bse ];
+      f1_TSE  = (D.f[DIR_MPM])[k1bnw ];
 
       //////////////////////////////////////////////////////////////////////////
       real drho1    =  f1_ZERO+f1_E+f1_W+f1_N+f1_S+f1_T+f1_B+f1_NE+f1_SW+f1_SE+f1_NW+f1_TE+f1_BW+f1_BE+f1_TW+f1_TN+f1_BS+f1_BN+f1_TS+
@@ -3444,33 +3517,33 @@ __global__ void QPressDeviceOld27(real* rhoBC,
 
       __syncthreads();
 
-      (D.f[DIR_P00   ])[ke   ] = f1_W   -c2o27*drho1;   //  c1o100;  // zero;  //
-      (D.f[DIR_M00   ])[kw   ] = f1_E   -c2o27*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_0P0   ])[kn   ] = f1_S   -c2o27*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_0M0   ])[ks   ] = f1_N   -c2o27*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_00P   ])[kt   ] = f1_B   -c2o27*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_00M   ])[kb   ] = f1_T   -c2o27*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_PP0  ])[kne  ] = f1_SW  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MM0  ])[ksw  ] = f1_NE  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_PM0  ])[kse  ] = f1_NW  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MP0  ])[knw  ] = f1_SE  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_P0P  ])[kte  ] = f1_BW  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_M0M  ])[kbw  ] = f1_TE  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_P0M  ])[kbe  ] = f1_TW  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_M0P  ])[ktw  ] = f1_BE  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_0PP  ])[ktn  ] = f1_BS  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_0MM  ])[kbs  ] = f1_TN  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_0PM  ])[kbn  ] = f1_TS  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_0MP  ])[kts  ] = f1_BN  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_P00])[ke   ] = f1_W   -c2o27*drho1;   //  c1o100;  // zero;  //
+      (D.f[DIR_M00])[kw   ] = f1_E   -c2o27*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0P0])[kn   ] = f1_S   -c2o27*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0M0])[ks   ] = f1_N   -c2o27*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_00P])[kt   ] = f1_B   -c2o27*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_00M])[kb   ] = f1_T   -c2o27*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PP0])[kne  ] = f1_SW  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MM0])[ksw  ] = f1_NE  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PM0])[kse  ] = f1_NW  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MP0])[knw  ] = f1_SE  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_P0P])[kte  ] = f1_BW  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_M0M])[kbw  ] = f1_TE  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_P0M])[kbe  ] = f1_TW  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_M0P])[ktw  ] = f1_BE  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0PP])[ktn  ] = f1_BS  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0MM])[kbs  ] = f1_TN  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0PM])[kbn  ] = f1_TS  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0MP])[kts  ] = f1_BN  -c1o54*drho1;	//  c1o100;  // zero;  //
       (D.f[DIR_000])[kzero] = f1_ZERO-c8o27*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_PPP ])[ktne ] = f1_BSW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MMP ])[ktsw ] = f1_BNE -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_PMP ])[ktse ] = f1_BNW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MPP ])[ktnw ] = f1_BSE -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_PPM ])[kbne ] = f1_TSW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MMM ])[kbsw ] = f1_TNE -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_PMM ])[kbse ] = f1_TNW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MPM ])[kbnw ] = f1_TSE -c1o216*drho1;  //  c1o100;  // zero;  //
+      (D.f[DIR_PPP])[ktne ] = f1_BSW -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MMP])[ktsw ] = f1_BNE -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PMP])[ktse ] = f1_BNW -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MPP])[ktnw ] = f1_BSE -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PPM])[kbne ] = f1_TSW -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MMM])[kbsw ] = f1_TNE -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PMM])[kbse ] = f1_TNW -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MPM])[kbnw ] = f1_TSE -c1o216*drho1;  //  c1o100;  // zero;  //
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -3514,18 +3587,19 @@ __global__ void QPressDeviceOld27(real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void QPressDeviceEQZ27(real* rhoBC,
-                                             real* DD,
-                                             int* k_Q,
-                                             int* k_N,
-                                  real* kTestRE,
-                                             int numberOfBCnodes,
-                                             real om1,
-                                             unsigned int* neighborX,
-                                             unsigned int* neighborY,
-                                             unsigned int* neighborZ,
-                                             unsigned int size_Mat,
-                                             bool isEvenTimestep)
+__global__ void QPressDeviceEQZ27(
+    real* rhoBC,
+    real* DD,
+    int* k_Q,
+    int* k_N,
+    real* kTestRE,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index
@@ -3604,153 +3678,153 @@ __global__ void QPressDeviceEQZ27(real* rhoBC,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////
     //   Distributions27 kDistTest;
-    //      kDistTest.f[DIR_P00   ] = &kTestRE[DIR_P00   *numberOfBCnodes];
-    //      kDistTest.f[DIR_M00   ] = &kTestRE[DIR_M00   *numberOfBCnodes];
-    //      kDistTest.f[DIR_0P0   ] = &kTestRE[DIR_0P0   *numberOfBCnodes];
-    //      kDistTest.f[DIR_0M0   ] = &kTestRE[DIR_0M0   *numberOfBCnodes];
-    //      kDistTest.f[DIR_00P   ] = &kTestRE[DIR_00P   *numberOfBCnodes];
-    //      kDistTest.f[DIR_00M   ] = &kTestRE[DIR_00M   *numberOfBCnodes];
-    //      kDistTest.f[DIR_PP0  ] = &kTestRE[DIR_PP0  *numberOfBCnodes];
-    //      kDistTest.f[DIR_MM0  ] = &kTestRE[DIR_MM0  *numberOfBCnodes];
-    //      kDistTest.f[DIR_PM0  ] = &kTestRE[DIR_PM0  *numberOfBCnodes];
-    //      kDistTest.f[DIR_MP0  ] = &kTestRE[DIR_MP0  *numberOfBCnodes];
-    //      kDistTest.f[DIR_P0P  ] = &kTestRE[DIR_P0P  *numberOfBCnodes];
-    //      kDistTest.f[DIR_M0M  ] = &kTestRE[DIR_M0M  *numberOfBCnodes];
-    //      kDistTest.f[DIR_P0M  ] = &kTestRE[DIR_P0M  *numberOfBCnodes];
-    //      kDistTest.f[DIR_M0P  ] = &kTestRE[DIR_M0P  *numberOfBCnodes];
-    //      kDistTest.f[DIR_0PP  ] = &kTestRE[DIR_0PP  *numberOfBCnodes];
-    //      kDistTest.f[DIR_0MM  ] = &kTestRE[DIR_0MM  *numberOfBCnodes];
-    //      kDistTest.f[DIR_0PM  ] = &kTestRE[DIR_0PM  *numberOfBCnodes];
-    //      kDistTest.f[DIR_0MP  ] = &kTestRE[DIR_0MP  *numberOfBCnodes];
-    //      kDistTest.f[DIR_000] = &kTestRE[DIR_000*numberOfBCnodes];
-    //      kDistTest.f[DIR_PPP ] = &kTestRE[DIR_PPP *numberOfBCnodes];
-    //      kDistTest.f[DIR_MMP ] = &kTestRE[DIR_MMP *numberOfBCnodes];
-    //      kDistTest.f[DIR_PMP ] = &kTestRE[DIR_PMP *numberOfBCnodes];
-    //      kDistTest.f[DIR_MPP ] = &kTestRE[DIR_MPP *numberOfBCnodes];
-    //      kDistTest.f[DIR_PPM ] = &kTestRE[DIR_PPM *numberOfBCnodes];
-    //      kDistTest.f[DIR_MMM ] = &kTestRE[DIR_MMM *numberOfBCnodes];
-    //      kDistTest.f[DIR_PMM ] = &kTestRE[DIR_PMM *numberOfBCnodes];
-    //      kDistTest.f[DIR_MPM ] = &kTestRE[DIR_MPM *numberOfBCnodes];
+    //      kDistTest.f[DIR_P00] = &kTestRE[DIR_P00 * numberOfBCnodes];
+    //      kDistTest.f[DIR_M00] = &kTestRE[DIR_M00 * numberOfBCnodes];
+    //      kDistTest.f[DIR_0P0] = &kTestRE[DIR_0P0 * numberOfBCnodes];
+    //      kDistTest.f[DIR_0M0] = &kTestRE[DIR_0M0 * numberOfBCnodes];
+    //      kDistTest.f[DIR_00P] = &kTestRE[DIR_00P * numberOfBCnodes];
+    //      kDistTest.f[DIR_00M] = &kTestRE[DIR_00M * numberOfBCnodes];
+    //      kDistTest.f[DIR_PP0] = &kTestRE[DIR_PP0 * numberOfBCnodes];
+    //      kDistTest.f[DIR_MM0] = &kTestRE[DIR_MM0 * numberOfBCnodes];
+    //      kDistTest.f[DIR_PM0] = &kTestRE[DIR_PM0 * numberOfBCnodes];
+    //      kDistTest.f[DIR_MP0] = &kTestRE[DIR_MP0 * numberOfBCnodes];
+    //      kDistTest.f[DIR_P0P] = &kTestRE[DIR_P0P * numberOfBCnodes];
+    //      kDistTest.f[DIR_M0M] = &kTestRE[DIR_M0M * numberOfBCnodes];
+    //      kDistTest.f[DIR_P0M] = &kTestRE[DIR_P0M * numberOfBCnodes];
+    //      kDistTest.f[DIR_M0P] = &kTestRE[DIR_M0P * numberOfBCnodes];
+    //      kDistTest.f[DIR_0PP] = &kTestRE[DIR_0PP * numberOfBCnodes];
+    //      kDistTest.f[DIR_0MM] = &kTestRE[DIR_0MM * numberOfBCnodes];
+    //      kDistTest.f[DIR_0PM] = &kTestRE[DIR_0PM * numberOfBCnodes];
+    //      kDistTest.f[DIR_0MP] = &kTestRE[DIR_0MP * numberOfBCnodes];
+    //      kDistTest.f[DIR_000] = &kTestRE[DIR_000 * numberOfBCnodes];
+    //      kDistTest.f[DIR_PPP] = &kTestRE[DIR_PPP * numberOfBCnodes];
+    //      kDistTest.f[DIR_MMP] = &kTestRE[DIR_MMP * numberOfBCnodes];
+    //      kDistTest.f[DIR_PMP] = &kTestRE[DIR_PMP * numberOfBCnodes];
+    //      kDistTest.f[DIR_MPP] = &kTestRE[DIR_MPP * numberOfBCnodes];
+    //      kDistTest.f[DIR_PPM] = &kTestRE[DIR_PPM * numberOfBCnodes];
+    //      kDistTest.f[DIR_MMM] = &kTestRE[DIR_MMM * numberOfBCnodes];
+    //      kDistTest.f[DIR_PMM] = &kTestRE[DIR_PMM * numberOfBCnodes];
+    //      kDistTest.f[DIR_MPM] = &kTestRE[DIR_MPM * numberOfBCnodes];
    //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    //   //real f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
-   //   //f1_W    = (D.f[DIR_P00   ])[k1e   ];
-   //   //f1_E    = (D.f[DIR_M00   ])[k1w   ];
-   //   //f1_S    = (D.f[DIR_0P0   ])[k1n   ];
-   //   //f1_N    = (D.f[DIR_0M0   ])[k1s   ];
-   //   //f1_B    = (D.f[DIR_00P   ])[k1t   ];
-   //   //f1_T    = (D.f[DIR_00M   ])[k1b   ];
-   //   //f1_SW   = (D.f[DIR_PP0  ])[k1ne  ];
-   //   //f1_NE   = (D.f[DIR_MM0  ])[k1sw  ];
-   //   //f1_NW   = (D.f[DIR_PM0  ])[k1se  ];
-   //   //f1_SE   = (D.f[DIR_MP0  ])[k1nw  ];
-   //   //f1_BW   = (D.f[DIR_P0P  ])[k1te  ];
-   //   //f1_TE   = (D.f[DIR_M0M  ])[k1bw  ];
-   //   //f1_TW   = (D.f[DIR_P0M  ])[k1be  ];
-   //   //f1_BE   = (D.f[DIR_M0P  ])[k1tw  ];
-   //   //f1_BS   = (D.f[DIR_0PP  ])[k1tn  ];
-   //   //f1_TN   = (D.f[DIR_0MM  ])[k1bs  ];
-   //   //f1_TS   = (D.f[DIR_0PM  ])[k1bn  ];
-   //   //f1_BN   = (D.f[DIR_0MP  ])[k1ts  ];
+   //   //f1_W    = (D.f[DIR_P00])[k1e   ];
+   //   //f1_E    = (D.f[DIR_M00])[k1w   ];
+   //   //f1_S    = (D.f[DIR_0P0])[k1n   ];
+   //   //f1_N    = (D.f[DIR_0M0])[k1s   ];
+   //   //f1_B    = (D.f[DIR_00P])[k1t   ];
+   //   //f1_T    = (D.f[DIR_00M])[k1b   ];
+   //   //f1_SW   = (D.f[DIR_PP0])[k1ne  ];
+   //   //f1_NE   = (D.f[DIR_MM0])[k1sw  ];
+   //   //f1_NW   = (D.f[DIR_PM0])[k1se  ];
+   //   //f1_SE   = (D.f[DIR_MP0])[k1nw  ];
+   //   //f1_BW   = (D.f[DIR_P0P])[k1te  ];
+   //   //f1_TE   = (D.f[DIR_M0M])[k1bw  ];
+   //   //f1_TW   = (D.f[DIR_P0M])[k1be  ];
+   //   //f1_BE   = (D.f[DIR_M0P])[k1tw  ];
+   //   //f1_BS   = (D.f[DIR_0PP])[k1tn  ];
+   //   //f1_TN   = (D.f[DIR_0MM])[k1bs  ];
+   //   //f1_TS   = (D.f[DIR_0PM])[k1bn  ];
+   //   //f1_BN   = (D.f[DIR_0MP])[k1ts  ];
    //   //f1_ZERO = (D.f[DIR_000])[k1zero];
-   //   //f1_BSW  = (D.f[DIR_PPP ])[k1tne ];
-   //   //f1_BNE  = (D.f[DIR_MMP ])[k1tsw ];
-   //   //f1_BNW  = (D.f[DIR_PMP ])[k1tse ];
-   //   //f1_BSE  = (D.f[DIR_MPP ])[k1tnw ];
-   //   //f1_TSW  = (D.f[DIR_PPM ])[k1bne ];
-   //   //f1_TNE  = (D.f[DIR_MMM ])[k1bsw ];
-   //   //f1_TNW  = (D.f[DIR_PMM ])[k1bse ];
-   //   //f1_TSE  = (D.f[DIR_MPM ])[k1bnw ];
+   //   //f1_BSW  = (D.f[DIR_PPP])[k1tne ];
+   //   //f1_BNE  = (D.f[DIR_MMP])[k1tsw ];
+   //   //f1_BNW  = (D.f[DIR_PMP])[k1tse ];
+   //   //f1_BSE  = (D.f[DIR_MPP])[k1tnw ];
+   //   //f1_TSW  = (D.f[DIR_PPM])[k1bne ];
+   //   //f1_TNE  = (D.f[DIR_MMM])[k1bsw ];
+   //   //f1_TNW  = (D.f[DIR_PMM])[k1bse ];
+   //   //f1_TSE  = (D.f[DIR_MPM])[k1bnw ];
    //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
    //   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    //   real f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
-   //   f1_E    = (D.f[DIR_P00   ])[k1e   ];
-   //   f1_W    = (D.f[DIR_M00   ])[k1w   ];
-   //   f1_N    = (D.f[DIR_0P0   ])[k1n   ];
-   //   f1_S    = (D.f[DIR_0M0   ])[k1s   ];
-   //   f1_T    = (D.f[DIR_00P   ])[k1t   ];
-   //   f1_B    = (D.f[DIR_00M   ])[k1b   ];
-   //   f1_NE   = (D.f[DIR_PP0  ])[k1ne  ];
-   //   f1_SW   = (D.f[DIR_MM0  ])[k1sw  ];
-   //   f1_SE   = (D.f[DIR_PM0  ])[k1se  ];
-   //   f1_NW   = (D.f[DIR_MP0  ])[k1nw  ];
-   //   f1_TE   = (D.f[DIR_P0P  ])[k1te  ];
-   //   f1_BW   = (D.f[DIR_M0M  ])[k1bw  ];
-   //   f1_BE   = (D.f[DIR_P0M  ])[k1be  ];
-   //   f1_TW   = (D.f[DIR_M0P  ])[k1tw  ];
-   //   f1_TN   = (D.f[DIR_0PP  ])[k1tn  ];
-   //   f1_BS   = (D.f[DIR_0MM  ])[k1bs  ];
-   //   f1_BN   = (D.f[DIR_0PM  ])[k1bn  ];
-   //   f1_TS   = (D.f[DIR_0MP  ])[k1ts  ];
+   //   f1_E    = (D.f[DIR_P00])[k1e   ];
+   //   f1_W    = (D.f[DIR_M00])[k1w   ];
+   //   f1_N    = (D.f[DIR_0P0])[k1n   ];
+   //   f1_S    = (D.f[DIR_0M0])[k1s   ];
+   //   f1_T    = (D.f[DIR_00P])[k1t   ];
+   //   f1_B    = (D.f[DIR_00M])[k1b   ];
+   //   f1_NE   = (D.f[DIR_PP0])[k1ne  ];
+   //   f1_SW   = (D.f[DIR_MM0])[k1sw  ];
+   //   f1_SE   = (D.f[DIR_PM0])[k1se  ];
+   //   f1_NW   = (D.f[DIR_MP0])[k1nw  ];
+   //   f1_TE   = (D.f[DIR_P0P])[k1te  ];
+   //   f1_BW   = (D.f[DIR_M0M])[k1bw  ];
+   //   f1_BE   = (D.f[DIR_P0M])[k1be  ];
+   //   f1_TW   = (D.f[DIR_M0P])[k1tw  ];
+   //   f1_TN   = (D.f[DIR_0PP])[k1tn  ];
+   //   f1_BS   = (D.f[DIR_0MM])[k1bs  ];
+   //   f1_BN   = (D.f[DIR_0PM])[k1bn  ];
+   //   f1_TS   = (D.f[DIR_0MP])[k1ts  ];
    //   f1_ZERO = (D.f[DIR_000])[k1zero];
-   //   f1_TNE  = (D.f[DIR_PPP ])[k1tne ];
-   //   f1_TSW  = (D.f[DIR_MMP ])[k1tsw ];
-   //   f1_TSE  = (D.f[DIR_PMP ])[k1tse ];
-   //   f1_TNW  = (D.f[DIR_MPP ])[k1tnw ];
-   //   f1_BNE  = (D.f[DIR_PPM ])[k1bne ];
-   //   f1_BSW  = (D.f[DIR_MMM ])[k1bsw ];
-   //   f1_BSE  = (D.f[DIR_PMM ])[k1bse ];
-   //   f1_BNW  = (D.f[DIR_MPM ])[k1bnw ];
+   //   f1_TNE  = (D.f[DIR_PPP])[k1tne ];
+   //   f1_TSW  = (D.f[DIR_MMP])[k1tsw ];
+   //   f1_TSE  = (D.f[DIR_PMP])[k1tse ];
+   //   f1_TNW  = (D.f[DIR_MPP])[k1tnw ];
+   //   f1_BNE  = (D.f[DIR_PPM])[k1bne ];
+   //   f1_BSW  = (D.f[DIR_MMM])[k1bsw ];
+   //   f1_BSE  = (D.f[DIR_PMM])[k1bse ];
+   //   f1_BNW  = (D.f[DIR_MPM])[k1bnw ];
    //   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
    //   //////////////////////////////////////////////////////////////////////////
@@ -3855,88 +3929,88 @@ __global__ void QPressDeviceEQZ27(real* rhoBC,
          //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             // based on BGK Plus Comp
          //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-         //double mfabb = (D.f[DIR_P00   ])[k1e   ];
-         //double mfcbb = (D.f[DIR_M00   ])[k1w   ];
-         //double mfbab = (D.f[DIR_0P0   ])[k1n   ];
-         //double mfbcb = (D.f[DIR_0M0   ])[k1s   ];
-         //double mfbba = (D.f[DIR_00P   ])[k1t   ];
-         //double mfbbc = (D.f[DIR_00M   ])[k1b   ];
-         //double mfaab = (D.f[DIR_PP0  ])[k1ne  ];
-         //double mfccb = (D.f[DIR_MM0  ])[k1sw  ];
-         //double mfacb = (D.f[DIR_PM0  ])[k1se  ];
-         //double mfcab = (D.f[DIR_MP0  ])[k1nw  ];
-         //double mfaba = (D.f[DIR_P0P  ])[k1te  ];
-         //double mfcbc = (D.f[DIR_M0M  ])[k1bw  ];
-         //double mfabc = (D.f[DIR_P0M  ])[k1be  ];
-         //double mfcba = (D.f[DIR_M0P  ])[k1tw  ];
-         //double mfbaa = (D.f[DIR_0PP  ])[k1tn  ];
-         //double mfbcc = (D.f[DIR_0MM  ])[k1bs  ];
-         //double mfbac = (D.f[DIR_0PM  ])[k1bn  ];
-         //double mfbca = (D.f[DIR_0MP  ])[k1ts  ];
+         //double mfabb = (D.f[DIR_P00])[k1e   ];
+         //double mfcbb = (D.f[DIR_M00])[k1w   ];
+         //double mfbab = (D.f[DIR_0P0])[k1n   ];
+         //double mfbcb = (D.f[DIR_0M0])[k1s   ];
+         //double mfbba = (D.f[DIR_00P])[k1t   ];
+         //double mfbbc = (D.f[DIR_00M])[k1b   ];
+         //double mfaab = (D.f[DIR_PP0])[k1ne  ];
+         //double mfccb = (D.f[DIR_MM0])[k1sw  ];
+         //double mfacb = (D.f[DIR_PM0])[k1se  ];
+         //double mfcab = (D.f[DIR_MP0])[k1nw  ];
+         //double mfaba = (D.f[DIR_P0P])[k1te  ];
+         //double mfcbc = (D.f[DIR_M0M])[k1bw  ];
+         //double mfabc = (D.f[DIR_P0M])[k1be  ];
+         //double mfcba = (D.f[DIR_M0P])[k1tw  ];
+         //double mfbaa = (D.f[DIR_0PP])[k1tn  ];
+         //double mfbcc = (D.f[DIR_0MM])[k1bs  ];
+         //double mfbac = (D.f[DIR_0PM])[k1bn  ];
+         //double mfbca = (D.f[DIR_0MP])[k1ts  ];
          //double mfbbb = (D.f[DIR_000])[k1zero];
-         //double mfaaa = (D.f[DIR_PPP ])[k1tne ];
-         //double mfcca = (D.f[DIR_MMP ])[k1tsw ];
-         //double mfaca = (D.f[DIR_PMP ])[k1tse ];
-         //double mfcaa = (D.f[DIR_MPP ])[k1tnw ];
-         //double mfaac = (D.f[DIR_PPM ])[k1bne ];
-         //double mfccc = (D.f[DIR_MMM ])[k1bsw ];
-         //double mfacc = (D.f[DIR_PMM ])[k1bse ];
-         //double mfcac = (D.f[DIR_MPM ])[k1bnw ];
-         real mfabb = (D.f[DIR_P00   ])[k1e   ];
-         real mfcbb = (D.f[DIR_M00   ])[k1w   ];
-         real mfbab = (D.f[DIR_0P0   ])[k1n   ];
-         real mfbcb = (D.f[DIR_0M0   ])[k1s   ];
-         real mfbba = (D.f[DIR_00P   ])[k1t   ];
-         real mfbbc = (D.f[DIR_00M   ])[k1b   ];
-         real mfaab = (D.f[DIR_PP0  ])[k1ne  ];
-         real mfccb = (D.f[DIR_MM0  ])[k1sw  ];
-         real mfacb = (D.f[DIR_PM0  ])[k1se  ];
-         real mfcab = (D.f[DIR_MP0  ])[k1nw  ];
-         real mfaba = (D.f[DIR_P0P  ])[k1te  ];
-         real mfcbc = (D.f[DIR_M0M  ])[k1bw  ];
-         real mfabc = (D.f[DIR_P0M  ])[k1be  ];
-         real mfcba = (D.f[DIR_M0P  ])[k1tw  ];
-         real mfbaa = (D.f[DIR_0PP  ])[k1tn  ];
-         real mfbcc = (D.f[DIR_0MM  ])[k1bs  ];
-         real mfbac = (D.f[DIR_0PM  ])[k1bn  ];
-         real mfbca = (D.f[DIR_0MP  ])[k1ts  ];
+         //double mfaaa = (D.f[DIR_PPP])[k1tne ];
+         //double mfcca = (D.f[DIR_MMP])[k1tsw ];
+         //double mfaca = (D.f[DIR_PMP])[k1tse ];
+         //double mfcaa = (D.f[DIR_MPP])[k1tnw ];
+         //double mfaac = (D.f[DIR_PPM])[k1bne ];
+         //double mfccc = (D.f[DIR_MMM])[k1bsw ];
+         //double mfacc = (D.f[DIR_PMM])[k1bse ];
+         //double mfcac = (D.f[DIR_MPM])[k1bnw ];
+         real mfabb = (D.f[DIR_P00])[k1e   ];
+         real mfcbb = (D.f[DIR_M00])[k1w   ];
+         real mfbab = (D.f[DIR_0P0])[k1n   ];
+         real mfbcb = (D.f[DIR_0M0])[k1s   ];
+         real mfbba = (D.f[DIR_00P])[k1t   ];
+         real mfbbc = (D.f[DIR_00M])[k1b   ];
+         real mfaab = (D.f[DIR_PP0])[k1ne  ];
+         real mfccb = (D.f[DIR_MM0])[k1sw  ];
+         real mfacb = (D.f[DIR_PM0])[k1se  ];
+         real mfcab = (D.f[DIR_MP0])[k1nw  ];
+         real mfaba = (D.f[DIR_P0P])[k1te  ];
+         real mfcbc = (D.f[DIR_M0M])[k1bw  ];
+         real mfabc = (D.f[DIR_P0M])[k1be  ];
+         real mfcba = (D.f[DIR_M0P])[k1tw  ];
+         real mfbaa = (D.f[DIR_0PP])[k1tn  ];
+         real mfbcc = (D.f[DIR_0MM])[k1bs  ];
+         real mfbac = (D.f[DIR_0PM])[k1bn  ];
+         real mfbca = (D.f[DIR_0MP])[k1ts  ];
          real mfbbb = (D.f[DIR_000])[k1zero];
-         real mfaaa = (D.f[DIR_PPP ])[k1tne ];
-         real mfcca = (D.f[DIR_MMP ])[k1tsw ];
-         real mfaca = (D.f[DIR_PMP ])[k1tse ];
-         real mfcaa = (D.f[DIR_MPP ])[k1tnw ];
-         real mfaac = (D.f[DIR_PPM ])[k1bne ];
-         real mfccc = (D.f[DIR_MMM ])[k1bsw ];
-         real mfacc = (D.f[DIR_PMM ])[k1bse ];
-         real mfcac = (D.f[DIR_MPM ])[k1bnw ];
-
-         //real mfcbb = (D.f[DIR_P00   ])[ke   ];
-         //real mfabb = (D.f[DIR_M00   ])[kw   ];
-         //real mfbcb = (D.f[DIR_0P0   ])[kn   ];
-         //real mfbab = (D.f[DIR_0M0   ])[ks   ];
-         //real mfbbc = (D.f[DIR_00P   ])[kt   ];
-         //real mfbba = (D.f[DIR_00M   ])[kb   ];
-         //real mfccb = (D.f[DIR_PP0  ])[kne  ];
-         //real mfaab = (D.f[DIR_MM0  ])[ksw  ];
-         //real mfcab = (D.f[DIR_PM0  ])[kse  ];
-         //real mfacb = (D.f[DIR_MP0  ])[knw  ];
-         //real mfcbc = (D.f[DIR_P0P  ])[kte  ];
-         //real mfaba = (D.f[DIR_M0M  ])[kbw  ];
-         //real mfcba = (D.f[DIR_P0M  ])[kbe  ];
-         //real mfabc = (D.f[DIR_M0P  ])[ktw  ];
-         //real mfbcc = (D.f[DIR_0PP  ])[ktn  ];
-         //real mfbaa = (D.f[DIR_0MM  ])[kbs  ];
-         //real mfbca = (D.f[DIR_0PM  ])[kbn  ];
-         //real mfbac = (D.f[DIR_0MP  ])[kts  ];
+         real mfaaa = (D.f[DIR_PPP])[k1tne ];
+         real mfcca = (D.f[DIR_MMP])[k1tsw ];
+         real mfaca = (D.f[DIR_PMP])[k1tse ];
+         real mfcaa = (D.f[DIR_MPP])[k1tnw ];
+         real mfaac = (D.f[DIR_PPM])[k1bne ];
+         real mfccc = (D.f[DIR_MMM])[k1bsw ];
+         real mfacc = (D.f[DIR_PMM])[k1bse ];
+         real mfcac = (D.f[DIR_MPM])[k1bnw ];
+
+         //real mfcbb = (D.f[DIR_P00])[ke   ];
+         //real mfabb = (D.f[DIR_M00])[kw   ];
+         //real mfbcb = (D.f[DIR_0P0])[kn   ];
+         //real mfbab = (D.f[DIR_0M0])[ks   ];
+         //real mfbbc = (D.f[DIR_00P])[kt   ];
+         //real mfbba = (D.f[DIR_00M])[kb   ];
+         //real mfccb = (D.f[DIR_PP0])[kne  ];
+         //real mfaab = (D.f[DIR_MM0])[ksw  ];
+         //real mfcab = (D.f[DIR_PM0])[kse  ];
+         //real mfacb = (D.f[DIR_MP0])[knw  ];
+         //real mfcbc = (D.f[DIR_P0P])[kte  ];
+         //real mfaba = (D.f[DIR_M0M])[kbw  ];
+         //real mfcba = (D.f[DIR_P0M])[kbe  ];
+         //real mfabc = (D.f[DIR_M0P])[ktw  ];
+         //real mfbcc = (D.f[DIR_0PP])[ktn  ];
+         //real mfbaa = (D.f[DIR_0MM])[kbs  ];
+         //real mfbca = (D.f[DIR_0PM])[kbn  ];
+         //real mfbac = (D.f[DIR_0MP])[kts  ];
          //real mfbbb = (D.f[DIR_000])[kzero];
-         //real mfccc = (D.f[DIR_PPP ])[ktne ];
-         //real mfaac = (D.f[DIR_MMP ])[ktsw ];
-         //real mfcac = (D.f[DIR_PMP ])[ktse ];
-         //real mfacc = (D.f[DIR_MPP ])[ktnw ];
-         //real mfcca = (D.f[DIR_PPM ])[kbne ];
-         //real mfaaa = (D.f[DIR_MMM ])[kbsw ];
-         //real mfcaa = (D.f[DIR_PMM ])[kbse ];
-         //real mfaca = (D.f[DIR_MPM ])[kbnw ];
+         //real mfccc = (D.f[DIR_PPP])[ktne ];
+         //real mfaac = (D.f[DIR_MMP])[ktsw ];
+         //real mfcac = (D.f[DIR_PMP])[ktse ];
+         //real mfacc = (D.f[DIR_MPP])[ktnw ];
+         //real mfcca = (D.f[DIR_PPM])[kbne ];
+         //real mfaaa = (D.f[DIR_MMM])[kbsw ];
+         //real mfcaa = (D.f[DIR_PMM])[kbse ];
+         //real mfaca = (D.f[DIR_MPM])[kbnw ];
          ////////////////////////////////////////////////////////////////////////////////////
          //real rho   = (((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) +
          //				(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
@@ -3963,61 +4037,61 @@ __global__ void QPressDeviceEQZ27(real* rhoBC,
          ////////////////////////////////////////////////////////////////////////////////////////
          ////round off error test
          //if(vvx!=zero){
-         //	(kDistTest.f[DIR_P00   ])[k] = mfabb;
-         //	(kDistTest.f[DIR_M00   ])[k] = mfcbb;
-         //	(kDistTest.f[DIR_0P0   ])[k] = mfbab;
-         //	(kDistTest.f[DIR_0M0   ])[k] = mfbcb;
-         //	(kDistTest.f[DIR_00P   ])[k] = mfbba;
-         //	(kDistTest.f[DIR_00M   ])[k] = mfbbc;
-         //	(kDistTest.f[DIR_PP0  ])[k] = mfaab;
-         //	(kDistTest.f[DIR_MM0  ])[k] = mfccb;
-         //	(kDistTest.f[DIR_PM0  ])[k] = mfacb;
-         //	(kDistTest.f[DIR_MP0  ])[k] = mfcab;
-         //	(kDistTest.f[DIR_P0P  ])[k] = mfaba;
-         //	(kDistTest.f[DIR_M0M  ])[k] = mfcbc;
-         //	(kDistTest.f[DIR_P0M  ])[k] = mfabc;
-         //	(kDistTest.f[DIR_M0P  ])[k] = mfcba;
-         //	(kDistTest.f[DIR_0PP  ])[k] = mfbaa;
-         //	(kDistTest.f[DIR_0MM  ])[k] = mfbcc;
-         //	(kDistTest.f[DIR_0PM  ])[k] = mfbac;
-         //	(kDistTest.f[DIR_0MP  ])[k] = mfbca;
+         //	(kDistTest.f[DIR_P00])[k] = mfabb;
+         //	(kDistTest.f[DIR_M00])[k] = mfcbb;
+         //	(kDistTest.f[DIR_0P0])[k] = mfbab;
+         //	(kDistTest.f[DIR_0M0])[k] = mfbcb;
+         //	(kDistTest.f[DIR_00P])[k] = mfbba;
+         //	(kDistTest.f[DIR_00M])[k] = mfbbc;
+         //	(kDistTest.f[DIR_PP0])[k] = mfaab;
+         //	(kDistTest.f[DIR_MM0])[k] = mfccb;
+         //	(kDistTest.f[DIR_PM0])[k] = mfacb;
+         //	(kDistTest.f[DIR_MP0])[k] = mfcab;
+         //	(kDistTest.f[DIR_P0P])[k] = mfaba;
+         //	(kDistTest.f[DIR_M0M])[k] = mfcbc;
+         //	(kDistTest.f[DIR_P0M])[k] = mfabc;
+         //	(kDistTest.f[DIR_M0P])[k] = mfcba;
+         //	(kDistTest.f[DIR_0PP])[k] = mfbaa;
+         //	(kDistTest.f[DIR_0MM])[k] = mfbcc;
+         //	(kDistTest.f[DIR_0PM])[k] = mfbac;
+         //	(kDistTest.f[DIR_0MP])[k] = mfbca;
          //	(kDistTest.f[DIR_000])[k] = KQK;
-         //	(kDistTest.f[DIR_PPP ])[k] = mfaaa;
-         //	(kDistTest.f[DIR_MMP ])[k] = mfcca;
-         //	(kDistTest.f[DIR_PMP ])[k] = mfaca;
-         //	(kDistTest.f[DIR_MPP ])[k] = mfcaa;
-         //	(kDistTest.f[DIR_PPM ])[k] = mfaac;
-         //	(kDistTest.f[DIR_MMM ])[k] = mfccc;
-         //	(kDistTest.f[DIR_PMM ])[k] = mfacc;
-         //	(kDistTest.f[DIR_MPM ])[k] = mfcac;
+         //	(kDistTest.f[DIR_PPP])[k] = mfaaa;
+         //	(kDistTest.f[DIR_MMP])[k] = mfcca;
+         //	(kDistTest.f[DIR_PMP])[k] = mfaca;
+         //	(kDistTest.f[DIR_MPP])[k] = mfcaa;
+         //	(kDistTest.f[DIR_PPM])[k] = mfaac;
+         //	(kDistTest.f[DIR_MMM])[k] = mfccc;
+         //	(kDistTest.f[DIR_PMM])[k] = mfacc;
+         //	(kDistTest.f[DIR_MPM])[k] = mfcac;
          //}else{
-         //	(kDistTest.f[DIR_P00   ])[k] = zero;
-         //	(kDistTest.f[DIR_M00   ])[k] = zero;
-         //	(kDistTest.f[DIR_0P0   ])[k] = zero;
-         //	(kDistTest.f[DIR_0M0   ])[k] = zero;
-         //	(kDistTest.f[DIR_00P   ])[k] = zero;
-         //	(kDistTest.f[DIR_00M   ])[k] = zero;
-         //	(kDistTest.f[DIR_PP0  ])[k] = zero;
-         //	(kDistTest.f[DIR_MM0  ])[k] = zero;
-         //	(kDistTest.f[DIR_PM0  ])[k] = zero;
-         //	(kDistTest.f[DIR_MP0  ])[k] = zero;
-         //	(kDistTest.f[DIR_P0P  ])[k] = zero;
-         //	(kDistTest.f[DIR_M0M  ])[k] = zero;
-         //	(kDistTest.f[DIR_P0M  ])[k] = zero;
-         //	(kDistTest.f[DIR_M0P  ])[k] = zero;
-         //	(kDistTest.f[DIR_0PP  ])[k] = zero;
-         //	(kDistTest.f[DIR_0MM  ])[k] = zero;
-         //	(kDistTest.f[DIR_0PM  ])[k] = zero;
-         //	(kDistTest.f[DIR_0MP  ])[k] = zero;
+         //	(kDistTest.f[DIR_P00])[k] = zero;
+         //	(kDistTest.f[DIR_M00])[k] = zero;
+         //	(kDistTest.f[DIR_0P0])[k] = zero;
+         //	(kDistTest.f[DIR_0M0])[k] = zero;
+         //	(kDistTest.f[DIR_00P])[k] = zero;
+         //	(kDistTest.f[DIR_00M])[k] = zero;
+         //	(kDistTest.f[DIR_PP0])[k] = zero;
+         //	(kDistTest.f[DIR_MM0])[k] = zero;
+         //	(kDistTest.f[DIR_PM0])[k] = zero;
+         //	(kDistTest.f[DIR_MP0])[k] = zero;
+         //	(kDistTest.f[DIR_P0P])[k] = zero;
+         //	(kDistTest.f[DIR_M0M])[k] = zero;
+         //	(kDistTest.f[DIR_P0M])[k] = zero;
+         //	(kDistTest.f[DIR_M0P])[k] = zero;
+         //	(kDistTest.f[DIR_0PP])[k] = zero;
+         //	(kDistTest.f[DIR_0MM])[k] = zero;
+         //	(kDistTest.f[DIR_0PM])[k] = zero;
+         //	(kDistTest.f[DIR_0MP])[k] = zero;
          //	(kDistTest.f[DIR_000])[k] = zero;
-         //	(kDistTest.f[DIR_PPP ])[k] = zero;
-         //	(kDistTest.f[DIR_MMP ])[k] = zero;
-         //	(kDistTest.f[DIR_PMP ])[k] = zero;
-         //	(kDistTest.f[DIR_MPP ])[k] = zero;
-         //	(kDistTest.f[DIR_PPM ])[k] = zero;
-         //	(kDistTest.f[DIR_MMM ])[k] = zero;
-         //	(kDistTest.f[DIR_PMM ])[k] = zero;
-         //	(kDistTest.f[DIR_MPM ])[k] = zero;
+         //	(kDistTest.f[DIR_PPP])[k] = zero;
+         //	(kDistTest.f[DIR_MMP])[k] = zero;
+         //	(kDistTest.f[DIR_PMP])[k] = zero;
+         //	(kDistTest.f[DIR_MPP])[k] = zero;
+         //	(kDistTest.f[DIR_PPM])[k] = zero;
+         //	(kDistTest.f[DIR_MMM])[k] = zero;
+         //	(kDistTest.f[DIR_PMM])[k] = zero;
+         //	(kDistTest.f[DIR_MPM])[k] = zero;
          //}
 
          //////////////////////////////////////////////////////////////////////////////////////
@@ -4109,149 +4183,149 @@ __global__ void QPressDeviceEQZ27(real* rhoBC,
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //if (isEvenTimestep==true)
       //{
-      //   D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      //   D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      //   D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      //   D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      //   D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      //   D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      //   D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      //   D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      //   D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      //   D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      //   D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      //   D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      //   D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      //   D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      //   D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      //   D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      //   D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      //   D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      //   D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      //   D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      //   D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      //   D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      //   D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      //   D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      //   D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      //   D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      //   D.f[DIR_P00] = &DD[DIR_P00 * size_Mat];
+      //   D.f[DIR_M00] = &DD[DIR_M00 * size_Mat];
+      //   D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat];
+      //   D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat];
+      //   D.f[DIR_00P] = &DD[DIR_00P * size_Mat];
+      //   D.f[DIR_00M] = &DD[DIR_00M * size_Mat];
+      //   D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat];
+      //   D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat];
+      //   D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat];
+      //   D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat];
+      //   D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat];
+      //   D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat];
+      //   D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat];
+      //   D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat];
+      //   D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat];
+      //   D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat];
+      //   D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat];
+      //   D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat];
+      //   D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+      //   D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat];
+      //   D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat];
+      //   D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat];
+      //   D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat];
+      //   D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat];
+      //   D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat];
+      //   D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat];
+      //   D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat];
       //}
       //else
       //{
-      //   D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      //   D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      //   D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      //   D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      //   D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      //   D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      //   D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      //   D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      //   D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      //   D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      //   D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      //   D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      //   D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      //   D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      //   D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      //   D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      //   D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      //   D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      //   D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      //   D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      //   D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      //   D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      //   D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      //   D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      //   D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      //   D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      //   D.f[DIR_M00] = &DD[DIR_P00 * size_Mat];
+      //   D.f[DIR_P00] = &DD[DIR_M00 * size_Mat];
+      //   D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat];
+      //   D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat];
+      //   D.f[DIR_00M] = &DD[DIR_00P * size_Mat];
+      //   D.f[DIR_00P] = &DD[DIR_00M * size_Mat];
+      //   D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat];
+      //   D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat];
+      //   D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat];
+      //   D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat];
+      //   D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat];
+      //   D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat];
+      //   D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat];
+      //   D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat];
+      //   D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat];
+      //   D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat];
+      //   D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat];
+      //   D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat];
+      //   D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+      //   D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat];
+      //   D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat];
+      //   D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat];
+      //   D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat];
+      //   D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat];
+      //   D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat];
+      //   D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat];
+      //   D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat];
       //}
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //__syncthreads();
 
-         (D.f[DIR_P00   ])[ke   ] = mfabb;//mfcbb;
-         (D.f[DIR_M00   ])[kw   ] = mfcbb;//mfabb;
-         (D.f[DIR_0P0   ])[kn   ] = mfbab;//mfbcb;
-         (D.f[DIR_0M0   ])[ks   ] = mfbcb;//mfbab;
-         (D.f[DIR_00P   ])[kt   ] = mfbba;//mfbbc;
-         (D.f[DIR_00M   ])[kb   ] = mfbbc;//mfbba;
-         (D.f[DIR_PP0  ])[kne  ] = mfaab;//mfccb;
-         (D.f[DIR_MM0  ])[ksw  ] = mfccb;//mfaab;
-         (D.f[DIR_PM0  ])[kse  ] = mfacb;//mfcab;
-         (D.f[DIR_MP0  ])[knw  ] = mfcab;//mfacb;
-         (D.f[DIR_P0P  ])[kte  ] = mfaba;//mfcbc;
-         (D.f[DIR_M0M  ])[kbw  ] = mfcbc;//mfaba;
-         (D.f[DIR_P0M  ])[kbe  ] = mfabc;//mfcba;
-         (D.f[DIR_M0P  ])[ktw  ] = mfcba;//mfabc;
-         (D.f[DIR_0PP  ])[ktn  ] = mfbaa;//mfbcc;
-         (D.f[DIR_0MM  ])[kbs  ] = mfbcc;//mfbaa;
-         (D.f[DIR_0PM  ])[kbn  ] = mfbac;//mfbca;
-         (D.f[DIR_0MP  ])[kts  ] = mfbca;//mfbac;
+         (D.f[DIR_P00])[ke   ] = mfabb;//mfcbb;
+         (D.f[DIR_M00])[kw   ] = mfcbb;//mfabb;
+         (D.f[DIR_0P0])[kn   ] = mfbab;//mfbcb;
+         (D.f[DIR_0M0])[ks   ] = mfbcb;//mfbab;
+         (D.f[DIR_00P])[kt   ] = mfbba;//mfbbc;
+         (D.f[DIR_00M])[kb   ] = mfbbc;//mfbba;
+         (D.f[DIR_PP0])[kne  ] = mfaab;//mfccb;
+         (D.f[DIR_MM0])[ksw  ] = mfccb;//mfaab;
+         (D.f[DIR_PM0])[kse  ] = mfacb;//mfcab;
+         (D.f[DIR_MP0])[knw  ] = mfcab;//mfacb;
+         (D.f[DIR_P0P])[kte  ] = mfaba;//mfcbc;
+         (D.f[DIR_M0M])[kbw  ] = mfcbc;//mfaba;
+         (D.f[DIR_P0M])[kbe  ] = mfabc;//mfcba;
+         (D.f[DIR_M0P])[ktw  ] = mfcba;//mfabc;
+         (D.f[DIR_0PP])[ktn  ] = mfbaa;//mfbcc;
+         (D.f[DIR_0MM])[kbs  ] = mfbcc;//mfbaa;
+         (D.f[DIR_0PM])[kbn  ] = mfbac;//mfbca;
+         (D.f[DIR_0MP])[kts  ] = mfbca;//mfbac;
          (D.f[DIR_000])[kzero] = mfbbb;//mfbbb;
-         (D.f[DIR_PPP ])[ktne ] = mfaaa;//mfccc;
-         (D.f[DIR_MMP ])[ktsw ] = mfcca;//mfaac;
-         (D.f[DIR_PMP ])[ktse ] = mfaca;//mfcac;
-         (D.f[DIR_MPP ])[ktnw ] = mfcaa;//mfacc;
-         (D.f[DIR_PPM ])[kbne ] = mfaac;//mfcca;
-         (D.f[DIR_MMM ])[kbsw ] = mfccc;//mfaaa;
-         (D.f[DIR_PMM ])[kbse ] = mfacc;//mfcaa;
-         (D.f[DIR_MPM ])[kbnw ] = mfcac;//mfaca;
-         //(D.f[DIR_P00   ])[ke   ] = mfcbb;
-         //(D.f[DIR_M00   ])[kw   ] = mfabb;
-         //(D.f[DIR_0P0   ])[kn   ] = mfbcb;
-         //(D.f[DIR_0M0   ])[ks   ] = mfbab;
-         //(D.f[DIR_00P   ])[kt   ] = mfbbc;
-         //(D.f[DIR_00M   ])[kb   ] = mfbba;
-         //(D.f[DIR_PP0  ])[kne  ] = mfccb;
-         //(D.f[DIR_MM0  ])[ksw  ] = mfaab;
-         //(D.f[DIR_PM0  ])[kse  ] = mfcab;
-         //(D.f[DIR_MP0  ])[knw  ] = mfacb;
-         //(D.f[DIR_P0P  ])[kte  ] = mfcbc;
-         //(D.f[DIR_M0M  ])[kbw  ] = mfaba;
-         //(D.f[DIR_P0M  ])[kbe  ] = mfcba;
-         //(D.f[DIR_M0P  ])[ktw  ] = mfabc;
-         //(D.f[DIR_0PP  ])[ktn  ] = mfbcc;
-         //(D.f[DIR_0MM  ])[kbs  ] = mfbaa;
-         //(D.f[DIR_0PM  ])[kbn  ] = mfbca;
-         //(D.f[DIR_0MP  ])[kts  ] = mfbac;
+         (D.f[DIR_PPP])[ktne ] = mfaaa;//mfccc;
+         (D.f[DIR_MMP])[ktsw ] = mfcca;//mfaac;
+         (D.f[DIR_PMP])[ktse ] = mfaca;//mfcac;
+         (D.f[DIR_MPP])[ktnw ] = mfcaa;//mfacc;
+         (D.f[DIR_PPM])[kbne ] = mfaac;//mfcca;
+         (D.f[DIR_MMM])[kbsw ] = mfccc;//mfaaa;
+         (D.f[DIR_PMM])[kbse ] = mfacc;//mfcaa;
+         (D.f[DIR_MPM])[kbnw ] = mfcac;//mfaca;
+         //(D.f[DIR_P00])[ke   ] = mfcbb;
+         //(D.f[DIR_M00])[kw   ] = mfabb;
+         //(D.f[DIR_0P0])[kn   ] = mfbcb;
+         //(D.f[DIR_0M0])[ks   ] = mfbab;
+         //(D.f[DIR_00P])[kt   ] = mfbbc;
+         //(D.f[DIR_00M])[kb   ] = mfbba;
+         //(D.f[DIR_PP0])[kne  ] = mfccb;
+         //(D.f[DIR_MM0])[ksw  ] = mfaab;
+         //(D.f[DIR_PM0])[kse  ] = mfcab;
+         //(D.f[DIR_MP0])[knw  ] = mfacb;
+         //(D.f[DIR_P0P])[kte  ] = mfcbc;
+         //(D.f[DIR_M0M])[kbw  ] = mfaba;
+         //(D.f[DIR_P0M])[kbe  ] = mfcba;
+         //(D.f[DIR_M0P])[ktw  ] = mfabc;
+         //(D.f[DIR_0PP])[ktn  ] = mfbcc;
+         //(D.f[DIR_0MM])[kbs  ] = mfbaa;
+         //(D.f[DIR_0PM])[kbn  ] = mfbca;
+         //(D.f[DIR_0MP])[kts  ] = mfbac;
          //(D.f[DIR_000])[kzero] = mfbbb;
-         //(D.f[DIR_PPP ])[ktne ] = mfccc;
-         //(D.f[DIR_MMP ])[ktsw ] = mfaac;
-         //(D.f[DIR_PMP ])[ktse ] = mfcac;
-         //(D.f[DIR_MPP ])[ktnw ] = mfacc;
-         //(D.f[DIR_PPM ])[kbne ] = mfcca;
-         //(D.f[DIR_MMM ])[kbsw ] = mfaaa;
-         //(D.f[DIR_PMM ])[kbse ] = mfcaa;
-         //(D.f[DIR_MPM ])[kbnw ] = mfaca;
-
-      //(D.f[DIR_P00   ])[ke   ] = fE ;  //f1_E ;   //fW;    //fE ;
-      //(D.f[DIR_M00   ])[kw   ] = fW ;  //f1_W ;   //fE;    //fW ;
-      //(D.f[DIR_0P0   ])[kn   ] = fN ;  //f1_N ;   //fS;    //fN ;
-      //(D.f[DIR_0M0   ])[ks   ] = fS ;  //f1_S ;   //fN;    //fS ;
-      //(D.f[DIR_00P   ])[kt   ] = fT ;  //f1_T ;   //fB;    //fT ;
-      //(D.f[DIR_00M   ])[kb   ] = fB ;  //f1_B ;   //fT;    //fB ;
-      //(D.f[DIR_PP0  ])[kne  ] = fNE;  //f1_NE;   //fSW;   //fNE;
-      //(D.f[DIR_MM0  ])[ksw  ] = fSW;  //f1_SW;   //fNE;   //fSW;
-      //(D.f[DIR_PM0  ])[kse  ] = fSE;  //f1_SE;   //fNW;   //fSE;
-      //(D.f[DIR_MP0  ])[knw  ] = fNW;  //f1_NW;   //fSE;   //fNW;
-      //(D.f[DIR_P0P  ])[kte  ] = fTE;  //f1_TE;   //fBW;   //fTE;
-      //(D.f[DIR_M0M  ])[kbw  ] = fBW;  //f1_BW;   //fTE;   //fBW;
-      //(D.f[DIR_P0M  ])[kbe  ] = fBE;  //f1_BE;   //fTW;   //fBE;
-      //(D.f[DIR_M0P  ])[ktw  ] = fTW;  //f1_TW;   //fBE;   //fTW;
-      //(D.f[DIR_0PP  ])[ktn  ] = fTN;  //f1_TN;   //fBS;   //fTN;
-      //(D.f[DIR_0MM  ])[kbs  ] = fBS;  //f1_BS;   //fTN;   //fBS;
-      //(D.f[DIR_0PM  ])[kbn  ] = fBN;  //f1_BN;   //fTS;   //fBN;
-      //(D.f[DIR_0MP  ])[kts  ] = fTS;  //f1_TS;   //fBN;   //fTS;
+         //(D.f[DIR_PPP])[ktne ] = mfccc;
+         //(D.f[DIR_MMP])[ktsw ] = mfaac;
+         //(D.f[DIR_PMP])[ktse ] = mfcac;
+         //(D.f[DIR_MPP])[ktnw ] = mfacc;
+         //(D.f[DIR_PPM])[kbne ] = mfcca;
+         //(D.f[DIR_MMM])[kbsw ] = mfaaa;
+         //(D.f[DIR_PMM])[kbse ] = mfcaa;
+         //(D.f[DIR_MPM])[kbnw ] = mfaca;
+
+      //(D.f[DIR_P00])[ke   ] = fE ;  //f1_E ;   //fW;    //fE ;
+      //(D.f[DIR_M00])[kw   ] = fW ;  //f1_W ;   //fE;    //fW ;
+      //(D.f[DIR_0P0])[kn   ] = fN ;  //f1_N ;   //fS;    //fN ;
+      //(D.f[DIR_0M0])[ks   ] = fS ;  //f1_S ;   //fN;    //fS ;
+      //(D.f[DIR_00P])[kt   ] = fT ;  //f1_T ;   //fB;    //fT ;
+      //(D.f[DIR_00M])[kb   ] = fB ;  //f1_B ;   //fT;    //fB ;
+      //(D.f[DIR_PP0])[kne  ] = fNE;  //f1_NE;   //fSW;   //fNE;
+      //(D.f[DIR_MM0])[ksw  ] = fSW;  //f1_SW;   //fNE;   //fSW;
+      //(D.f[DIR_PM0])[kse  ] = fSE;  //f1_SE;   //fNW;   //fSE;
+      //(D.f[DIR_MP0])[knw  ] = fNW;  //f1_NW;   //fSE;   //fNW;
+      //(D.f[DIR_P0P])[kte  ] = fTE;  //f1_TE;   //fBW;   //fTE;
+      //(D.f[DIR_M0M])[kbw  ] = fBW;  //f1_BW;   //fTE;   //fBW;
+      //(D.f[DIR_P0M])[kbe  ] = fBE;  //f1_BE;   //fTW;   //fBE;
+      //(D.f[DIR_M0P])[ktw  ] = fTW;  //f1_TW;   //fBE;   //fTW;
+      //(D.f[DIR_0PP])[ktn  ] = fTN;  //f1_TN;   //fBS;   //fTN;
+      //(D.f[DIR_0MM])[kbs  ] = fBS;  //f1_BS;   //fTN;   //fBS;
+      //(D.f[DIR_0PM])[kbn  ] = fBN;  //f1_BN;   //fTS;   //fBN;
+      //(D.f[DIR_0MP])[kts  ] = fTS;  //f1_TS;   //fBN;   //fTS;
       //(D.f[DIR_000])[kzero] = fZERO;//f1_ZERO; //fZERO; //fZERO;
-      //(D.f[DIR_PPP ])[ktne ] = fTNE; //f1_TNE;  //fBSW;  //fTNE;
-      //(D.f[DIR_MMM ])[kbsw ] = fBSW; //f1_BSW;  //fTNE;  //fBSW;
-      //(D.f[DIR_PPM ])[kbne ] = fBNE; //f1_BNE;  //fTSW;  //fBNE;
-      //(D.f[DIR_MMP ])[ktsw ] = fTSW; //f1_TSW;  //fBNE;  //fTSW;
-      //(D.f[DIR_PMP ])[ktse ] = fTSE; //f1_TSE;  //fBNW;  //fTSE;
-      //(D.f[DIR_MPM ])[kbnw ] = fBNW; //f1_BNW;  //fTSE;  //fBNW;
-      //(D.f[DIR_PMM ])[kbse ] = fBSE; //f1_BSE;  //fTNW;  //fBSE;
-      //(D.f[DIR_MPP ])[ktnw ] = fTNW; //f1_TNW;  //fBSE;  //fTNW;
+      //(D.f[DIR_PPP])[ktne ] = fTNE; //f1_TNE;  //fBSW;  //fTNE;
+      //(D.f[DIR_MMM])[kbsw ] = fBSW; //f1_BSW;  //fTNE;  //fBSW;
+      //(D.f[DIR_PPM])[kbne ] = fBNE; //f1_BNE;  //fTSW;  //fBNE;
+      //(D.f[DIR_MMP])[ktsw ] = fTSW; //f1_TSW;  //fBNE;  //fTSW;
+      //(D.f[DIR_PMP])[ktse ] = fTSE; //f1_TSE;  //fBNW;  //fTSE;
+      //(D.f[DIR_MPM])[kbnw ] = fBNW; //f1_BNW;  //fTSE;  //fBNW;
+      //(D.f[DIR_PMM])[kbse ] = fBSE; //f1_BSE;  //fTNW;  //fBSE;
+      //(D.f[DIR_MPP])[ktnw ] = fTNW; //f1_TNW;  //fBSE;  //fTNW;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -4295,14 +4369,15 @@ __global__ void QPressDeviceEQZ27(real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void QPressDeviceZero27(	 real* DD,
-                                     int* k_Q,
-                                     unsigned int numberOfBCnodes,
-                                     unsigned int* neighborX,
-                                     unsigned int* neighborY,
-                                     unsigned int* neighborZ,
-                                     unsigned int size_Mat,
-                                     bool isEvenTimestep)
+__global__ void QPressDeviceZero27(
+    real* DD,
+    int* k_Q,
+    unsigned int numberOfBCnodes,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index
@@ -4351,94 +4426,94 @@ __global__ void QPressDeviceZero27(	 real* DD,
       Distributions27 D;
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //__syncthreads();
      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      (D.f[DIR_P00   ])[ke   ] =c0o1;
-      (D.f[DIR_M00   ])[kw   ] =c0o1;
-      (D.f[DIR_0P0   ])[kn   ] =c0o1;
-      (D.f[DIR_0M0   ])[ks   ] =c0o1;
-      (D.f[DIR_00P   ])[kt   ] =c0o1;
-      (D.f[DIR_00M   ])[kb   ] =c0o1;
-      (D.f[DIR_PP0  ])[kne  ] =c0o1;
-      (D.f[DIR_MM0  ])[ksw  ] =c0o1;
-      (D.f[DIR_PM0  ])[kse  ] =c0o1;
-      (D.f[DIR_MP0  ])[knw  ] =c0o1;
-      (D.f[DIR_P0P  ])[kte  ] =c0o1;
-      (D.f[DIR_M0M  ])[kbw  ] =c0o1;
-      (D.f[DIR_P0M  ])[kbe  ] =c0o1;
-      (D.f[DIR_M0P  ])[ktw  ] =c0o1;
-      (D.f[DIR_0PP  ])[ktn  ] =c0o1;
-      (D.f[DIR_0MM  ])[kbs  ] =c0o1;
-      (D.f[DIR_0PM  ])[kbn  ] =c0o1;
-      (D.f[DIR_0MP  ])[kts  ] =c0o1;
+      (D.f[DIR_P00])[ke   ] =c0o1;
+      (D.f[DIR_M00])[kw   ] =c0o1;
+      (D.f[DIR_0P0])[kn   ] =c0o1;
+      (D.f[DIR_0M0])[ks   ] =c0o1;
+      (D.f[DIR_00P])[kt   ] =c0o1;
+      (D.f[DIR_00M])[kb   ] =c0o1;
+      (D.f[DIR_PP0])[kne  ] =c0o1;
+      (D.f[DIR_MM0])[ksw  ] =c0o1;
+      (D.f[DIR_PM0])[kse  ] =c0o1;
+      (D.f[DIR_MP0])[knw  ] =c0o1;
+      (D.f[DIR_P0P])[kte  ] =c0o1;
+      (D.f[DIR_M0M])[kbw  ] =c0o1;
+      (D.f[DIR_P0M])[kbe  ] =c0o1;
+      (D.f[DIR_M0P])[ktw  ] =c0o1;
+      (D.f[DIR_0PP])[ktn  ] =c0o1;
+      (D.f[DIR_0MM])[kbs  ] =c0o1;
+      (D.f[DIR_0PM])[kbn  ] =c0o1;
+      (D.f[DIR_0MP])[kts  ] =c0o1;
       (D.f[DIR_000])[kzero] =c0o1;
-      (D.f[DIR_PPP ])[ktne ] =c0o1;
-      (D.f[DIR_MMP ])[ktsw ] =c0o1;
-      (D.f[DIR_PMP ])[ktse ] =c0o1;
-      (D.f[DIR_MPP ])[ktnw ] =c0o1;
-      (D.f[DIR_PPM ])[kbne ] =c0o1;
-      (D.f[DIR_MMM ])[kbsw ] =c0o1;
-      (D.f[DIR_PMM ])[kbse ] =c0o1;
-      (D.f[DIR_MPM ])[kbnw ] =c0o1;
+      (D.f[DIR_PPP])[ktne ] =c0o1;
+      (D.f[DIR_MMP])[ktsw ] =c0o1;
+      (D.f[DIR_PMP])[ktse ] =c0o1;
+      (D.f[DIR_MPP])[ktnw ] =c0o1;
+      (D.f[DIR_PPM])[kbne ] =c0o1;
+      (D.f[DIR_MMM])[kbsw ] =c0o1;
+      (D.f[DIR_PMM])[kbse ] =c0o1;
+      (D.f[DIR_MPM])[kbnw ] =c0o1;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -4482,17 +4557,18 @@ __global__ void QPressDeviceZero27(	 real* DD,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void QPressDeviceFake27(	 real* rhoBC,
-                                     real* DD,
-                                     int* k_Q,
-                                     int* k_N,
-                                     int numberOfBCnodes,
-                                     real om1,
-                                     unsigned int* neighborX,
-                                     unsigned int* neighborY,
-                                     unsigned int* neighborZ,
-                                     unsigned int size_Mat,
-                                     bool isEvenTimestep)
+__global__ void QPressDeviceFake27(
+    real* rhoBC,
+    real* DD,
+    int* k_Q,
+    int* k_N,
+    int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index
@@ -4571,95 +4647,95 @@ __global__ void QPressDeviceFake27(	 real* rhoBC,
       Distributions27 D;
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real        f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,
          f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
 
-      f1_W    = (D.f[DIR_P00   ])[k1e   ];
-      f1_E    = (D.f[DIR_M00   ])[k1w   ];
-      f1_S    = (D.f[DIR_0P0   ])[k1n   ];
-      f1_N    = (D.f[DIR_0M0   ])[k1s   ];
-      f1_B    = (D.f[DIR_00P   ])[k1t   ];
-      f1_T    = (D.f[DIR_00M   ])[k1b   ];
-      f1_SW   = (D.f[DIR_PP0  ])[k1ne  ];
-      f1_NE   = (D.f[DIR_MM0  ])[k1sw  ];
-      f1_NW   = (D.f[DIR_PM0  ])[k1se  ];
-      f1_SE   = (D.f[DIR_MP0  ])[k1nw  ];
-      f1_BW   = (D.f[DIR_P0P  ])[k1te  ];
-      f1_TE   = (D.f[DIR_M0M  ])[k1bw  ];
-      f1_TW   = (D.f[DIR_P0M  ])[k1be  ];
-      f1_BE   = (D.f[DIR_M0P  ])[k1tw  ];
-      f1_BS   = (D.f[DIR_0PP  ])[k1tn  ];
-      f1_TN   = (D.f[DIR_0MM  ])[k1bs  ];
-      f1_TS   = (D.f[DIR_0PM  ])[k1bn  ];
-      f1_BN   = (D.f[DIR_0MP  ])[k1ts  ];
+      f1_W    = (D.f[DIR_P00])[k1e   ];
+      f1_E    = (D.f[DIR_M00])[k1w   ];
+      f1_S    = (D.f[DIR_0P0])[k1n   ];
+      f1_N    = (D.f[DIR_0M0])[k1s   ];
+      f1_B    = (D.f[DIR_00P])[k1t   ];
+      f1_T    = (D.f[DIR_00M])[k1b   ];
+      f1_SW   = (D.f[DIR_PP0])[k1ne  ];
+      f1_NE   = (D.f[DIR_MM0])[k1sw  ];
+      f1_NW   = (D.f[DIR_PM0])[k1se  ];
+      f1_SE   = (D.f[DIR_MP0])[k1nw  ];
+      f1_BW   = (D.f[DIR_P0P])[k1te  ];
+      f1_TE   = (D.f[DIR_M0M])[k1bw  ];
+      f1_TW   = (D.f[DIR_P0M])[k1be  ];
+      f1_BE   = (D.f[DIR_M0P])[k1tw  ];
+      f1_BS   = (D.f[DIR_0PP])[k1tn  ];
+      f1_TN   = (D.f[DIR_0MM])[k1bs  ];
+      f1_TS   = (D.f[DIR_0PM])[k1bn  ];
+      f1_BN   = (D.f[DIR_0MP])[k1ts  ];
       f1_ZERO = (D.f[DIR_000])[k1zero];
-      f1_BSW  = (D.f[DIR_PPP ])[k1tne ];
-      f1_BNE  = (D.f[DIR_MMP ])[k1tsw ];
-      f1_BNW  = (D.f[DIR_PMP ])[k1tse ];
-      f1_BSE  = (D.f[DIR_MPP ])[k1tnw ];
-      f1_TSW  = (D.f[DIR_PPM ])[k1bne ];
-      f1_TNE  = (D.f[DIR_MMM ])[k1bsw ];
-      f1_TNW  = (D.f[DIR_PMM ])[k1bse ];
-      f1_TSE  = (D.f[DIR_MPM ])[k1bnw ];
+      f1_BSW  = (D.f[DIR_PPP])[k1tne ];
+      f1_BNE  = (D.f[DIR_MMP])[k1tsw ];
+      f1_BNW  = (D.f[DIR_PMP])[k1tse ];
+      f1_BSE  = (D.f[DIR_MPP])[k1tnw ];
+      f1_TSW  = (D.f[DIR_PPM])[k1bne ];
+      f1_TNE  = (D.f[DIR_MMM])[k1bsw ];
+      f1_TNW  = (D.f[DIR_PMM])[k1bse ];
+      f1_TSE  = (D.f[DIR_MPM])[k1bnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3;
@@ -4686,33 +4762,33 @@ __global__ void QPressDeviceFake27(	 real* rhoBC,
 
       __syncthreads();
 
-      (D.f[DIR_P00   ])[ke   ] = c2o27* (rhoBC[k]+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
-      (D.f[DIR_M00   ])[kw   ] = c2o27* (rhoBC[k]+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
-      (D.f[DIR_0P0   ])[kn   ] = c2o27* (rhoBC[k]+c3o1*(    -vx2    )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
-      (D.f[DIR_0M0   ])[ks   ] = c2o27* (rhoBC[k]+c3o1*(     vx2    )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
-      (D.f[DIR_00P   ])[kt   ] = c2o27* (rhoBC[k]+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
-      (D.f[DIR_00M   ])[kb   ] = c2o27* (rhoBC[k]+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
-      (D.f[DIR_PP0  ])[kne  ] = f1_SW  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MM0  ])[ksw  ] = f1_NE  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_PM0  ])[kse  ] = f1_NW  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MP0  ])[knw  ] = f1_SE  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_P0P  ])[kte  ] = f1_BW  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_M0M  ])[kbw  ] = f1_TE  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_P0M  ])[kbe  ] = f1_TW  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_M0P  ])[ktw  ] = f1_BE  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_0PP  ])[ktn  ] = f1_BS  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_0MM  ])[kbs  ] = f1_TN  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_0PM  ])[kbn  ] = f1_TS  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_0MP  ])[kts  ] = f1_BN  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_P00])[ke   ] = c2o27* (rhoBC[k]+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
+      (D.f[DIR_M00])[kw   ] = c2o27* (rhoBC[k]+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
+      (D.f[DIR_0P0])[kn   ] = c2o27* (rhoBC[k]+c3o1*(    -vx2    )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
+      (D.f[DIR_0M0])[ks   ] = c2o27* (rhoBC[k]+c3o1*(     vx2    )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
+      (D.f[DIR_00P])[kt   ] = c2o27* (rhoBC[k]+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
+      (D.f[DIR_00M])[kb   ] = c2o27* (rhoBC[k]+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
+      (D.f[DIR_PP0])[kne  ] = f1_SW  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MM0])[ksw  ] = f1_NE  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PM0])[kse  ] = f1_NW  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MP0])[knw  ] = f1_SE  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_P0P])[kte  ] = f1_BW  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_M0M])[kbw  ] = f1_TE  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_P0M])[kbe  ] = f1_TW  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_M0P])[ktw  ] = f1_BE  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0PP])[ktn  ] = f1_BS  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0MM])[kbs  ] = f1_TN  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0PM])[kbn  ] = f1_TS  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0MP])[kts  ] = f1_BN  -c1o54*drho1;	//  c1o100;  // zero;  //
       (D.f[DIR_000])[kzero] = f1_ZERO-c8o27*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_PPP ])[ktne ] = f1_BSW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MMP ])[ktsw ] = f1_BNE -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_PMP ])[ktse ] = f1_BNW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MPP ])[ktnw ] = f1_BSE -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_PPM ])[kbne ] = f1_TSW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MMM ])[kbsw ] = f1_TNE -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_PMM ])[kbse ] = f1_TNW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MPM ])[kbnw ] = f1_TSE -c1o216*drho1;  //  c1o100;  // zero;  //
+      (D.f[DIR_PPP])[ktne ] = f1_BSW -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MMP])[ktsw ] = f1_BNE -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PMP])[ktse ] = f1_BNW -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MPP])[ktnw ] = f1_BSE -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PPM])[kbne ] = f1_TSW -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MMM])[kbsw ] = f1_TNE -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PMM])[kbse ] = f1_TNW -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MPM])[kbnw ] = f1_TSE -c1o216*drho1;  //  c1o100;  // zero;  //
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -4756,78 +4832,79 @@ __global__ void QPressDeviceFake27(	 real* rhoBC,
 
 
 //////////////////////////////////////////////////////////////////////////
-__global__ void QPressDevice27_IntBB(real* rho,
-                                    real* DD,
-                                    int* k_Q,
-                                    real* QQ,
-                                    unsigned int numberOfBCnodes,
-                                    real om1,
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    unsigned int size_Mat,
-                                    bool isEvenTimestep)
+__global__ void QPressDevice27_IntBB(
+    real* rho,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    }
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index
@@ -4852,24 +4929,24 @@ __global__ void QPressDevice27_IntBB(real* rho,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW;
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -4912,32 +4989,32 @@ __global__ void QPressDevice27_IntBB(real* rho,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -4962,63 +5039,63 @@ __global__ void QPressDevice27_IntBB(real* rho,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Random.cu b/src/gpu/VirtualFluids_GPU/GPU/Random.cu
index a605fbd42d2977e0f0b6e15aeb50f8c78654f31c..2f9417f2404d773b222f1b79f8456adfaf741018 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Random.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Random.cu
@@ -1,9 +1,9 @@
 /* Device code */
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 //random numbers
diff --git a/src/gpu/VirtualFluids_GPU/GPU/SchlafferBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/SchlafferBCs27.cu
index 8675780d26e63656b04fdfc1f9836b1eba8d1b87..70e938db5df2bae442034ce0303081e8b175e5f6 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/SchlafferBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/SchlafferBCs27.cu
@@ -1,9 +1,9 @@
 /* Device code */
 #include "LBM/LB.h"
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
@@ -21,7 +21,7 @@ __global__ void PressSchlaff27(real* rhoBC,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat,
+                                          unsigned long long numberOfLBnodes,
                                           bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -71,94 +71,94 @@ __global__ void PressSchlaff27(real* rhoBC,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real        f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
 
-      f1_E    = (D.f[DIR_P00   ])[ke   ];
-      f1_W    = (D.f[DIR_M00   ])[kw   ];
-      f1_N    = (D.f[DIR_0P0   ])[kn   ];
-      f1_S    = (D.f[DIR_0M0   ])[ks   ];
-      f1_T    = (D.f[DIR_00P   ])[kt   ];
-      f1_B    = (D.f[DIR_00M   ])[kb   ];
-      f1_NE   = (D.f[DIR_PP0  ])[kne  ];
-      f1_SW   = (D.f[DIR_MM0  ])[ksw  ];
-      f1_SE   = (D.f[DIR_PM0  ])[kse  ];
-      f1_NW   = (D.f[DIR_MP0  ])[knw  ];
-      f1_TE   = (D.f[DIR_P0P  ])[kte  ];
-      f1_BW   = (D.f[DIR_M0M  ])[kbw  ];
-      f1_BE   = (D.f[DIR_P0M  ])[kbe  ];
-      f1_TW   = (D.f[DIR_M0P  ])[ktw  ];
-      f1_TN   = (D.f[DIR_0PP  ])[ktn  ];
-      f1_BS   = (D.f[DIR_0MM  ])[kbs  ];
-      f1_BN   = (D.f[DIR_0PM  ])[kbn  ];
-      f1_TS   = (D.f[DIR_0MP  ])[kts  ];
+      f1_E    = (D.f[DIR_P00])[ke   ];
+      f1_W    = (D.f[DIR_M00])[kw   ];
+      f1_N    = (D.f[DIR_0P0])[kn   ];
+      f1_S    = (D.f[DIR_0M0])[ks   ];
+      f1_T    = (D.f[DIR_00P])[kt   ];
+      f1_B    = (D.f[DIR_00M])[kb   ];
+      f1_NE   = (D.f[DIR_PP0])[kne  ];
+      f1_SW   = (D.f[DIR_MM0])[ksw  ];
+      f1_SE   = (D.f[DIR_PM0])[kse  ];
+      f1_NW   = (D.f[DIR_MP0])[knw  ];
+      f1_TE   = (D.f[DIR_P0P])[kte  ];
+      f1_BW   = (D.f[DIR_M0M])[kbw  ];
+      f1_BE   = (D.f[DIR_P0M])[kbe  ];
+      f1_TW   = (D.f[DIR_M0P])[ktw  ];
+      f1_TN   = (D.f[DIR_0PP])[ktn  ];
+      f1_BS   = (D.f[DIR_0MM])[kbs  ];
+      f1_BN   = (D.f[DIR_0PM])[kbn  ];
+      f1_TS   = (D.f[DIR_0MP])[kts  ];
       f1_ZERO = (D.f[DIR_000])[kzero];
-      f1_TNE  = (D.f[DIR_PPP ])[ktne ];
-      f1_TSW  = (D.f[DIR_MMP ])[ktsw ];
-      f1_TSE  = (D.f[DIR_PMP ])[ktse ];
-      f1_TNW  = (D.f[DIR_MPP ])[ktnw ];
-      f1_BNE  = (D.f[DIR_PPM ])[kbne ];
-      f1_BSW  = (D.f[DIR_MMM ])[kbsw ];
-      f1_BSE  = (D.f[DIR_PMM ])[kbse ];
-      f1_BNW  = (D.f[DIR_MPM ])[kbnw ];
+      f1_TNE  = (D.f[DIR_PPP])[ktne ];
+      f1_TSW  = (D.f[DIR_MMP])[ktsw ];
+      f1_TSE  = (D.f[DIR_PMP])[ktse ];
+      f1_TNW  = (D.f[DIR_MPP])[ktnw ];
+      f1_BNE  = (D.f[DIR_PPM])[kbne ];
+      f1_BSW  = (D.f[DIR_MMM])[kbsw ];
+      f1_BSE  = (D.f[DIR_PMM])[kbse ];
+      f1_BNW  = (D.f[DIR_MPM])[kbnw ];
       //////////////////////////////////////////////////////////////////////////
       real cs       = c1o1/sqrt(c3o1);
       real csp1     = cs + c1o1;
@@ -222,15 +222,15 @@ __global__ void PressSchlaff27(real* rhoBC,
 
       deltaVz0[k] = tempDeltaV;
 
-      (D.f[DIR_00M   ])[kb   ] = f1_B   ;
-      (D.f[DIR_M0M  ])[kbw  ] = f1_BW  ;
-      (D.f[DIR_P0M  ])[kbe  ] = f1_BE  ;
-      (D.f[DIR_0MM  ])[kbs  ] = f1_BS  ;
-      (D.f[DIR_0PM  ])[kbn  ] = f1_BN  ;
-      (D.f[DIR_PPM ])[kbne ] = f1_BNE ;
-      (D.f[DIR_MMM ])[kbsw ] = f1_BSW ;
-      (D.f[DIR_PMM ])[kbse ] = f1_BSE ;
-      (D.f[DIR_MPM ])[kbnw ] = f1_BNW ;
+      (D.f[DIR_00M])[kb   ] = f1_B   ;
+      (D.f[DIR_M0M])[kbw  ] = f1_BW  ;
+      (D.f[DIR_P0M])[kbe  ] = f1_BE  ;
+      (D.f[DIR_0MM])[kbs  ] = f1_BS  ;
+      (D.f[DIR_0PM])[kbn  ] = f1_BN  ;
+      (D.f[DIR_PPM])[kbne ] = f1_BNE ;
+      (D.f[DIR_MMM])[kbsw ] = f1_BSW ;
+      (D.f[DIR_PMM])[kbse ] = f1_BSE ;
+      (D.f[DIR_MPM])[kbnw ] = f1_BNW ;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -285,7 +285,7 @@ __global__ void VelSchlaff27(  int t,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
                                           unsigned int* neighborZ,
-                                          unsigned int size_Mat,
+                                          unsigned long long numberOfLBnodes,
                                           bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -335,122 +335,122 @@ __global__ void VelSchlaff27(  int t,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       }
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real        f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,
                      f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
 
-      f1_E    = (D.f[DIR_P00   ])[ke   ];
-      f1_W    = (D.f[DIR_M00   ])[kw   ];
-      f1_N    = (D.f[DIR_0P0   ])[kn   ];
-      f1_S    = (D.f[DIR_0M0   ])[ks   ];
-      f1_T    = (D.f[DIR_00P   ])[kt   ];
-      f1_B    = (D.f[DIR_00M   ])[kb   ];
-      f1_NE   = (D.f[DIR_PP0  ])[kne  ];
-      f1_SW   = (D.f[DIR_MM0  ])[ksw  ];
-      f1_SE   = (D.f[DIR_PM0  ])[kse  ];
-      f1_NW   = (D.f[DIR_MP0  ])[knw  ];
-      f1_TE   = (D.f[DIR_P0P  ])[kte  ];
-      f1_BW   = (D.f[DIR_M0M  ])[kbw  ];
-      f1_BE   = (D.f[DIR_P0M  ])[kbe  ];
-      f1_TW   = (D.f[DIR_M0P  ])[ktw  ];
-      f1_TN   = (D.f[DIR_0PP  ])[ktn  ];
-      f1_BS   = (D.f[DIR_0MM  ])[kbs  ];
-      f1_BN   = (D.f[DIR_0PM  ])[kbn  ];
-      f1_TS   = (D.f[DIR_0MP  ])[kts  ];
+      f1_E    = (D.f[DIR_P00])[ke   ];
+      f1_W    = (D.f[DIR_M00])[kw   ];
+      f1_N    = (D.f[DIR_0P0])[kn   ];
+      f1_S    = (D.f[DIR_0M0])[ks   ];
+      f1_T    = (D.f[DIR_00P])[kt   ];
+      f1_B    = (D.f[DIR_00M])[kb   ];
+      f1_NE   = (D.f[DIR_PP0])[kne  ];
+      f1_SW   = (D.f[DIR_MM0])[ksw  ];
+      f1_SE   = (D.f[DIR_PM0])[kse  ];
+      f1_NW   = (D.f[DIR_MP0])[knw  ];
+      f1_TE   = (D.f[DIR_P0P])[kte  ];
+      f1_BW   = (D.f[DIR_M0M])[kbw  ];
+      f1_BE   = (D.f[DIR_P0M])[kbe  ];
+      f1_TW   = (D.f[DIR_M0P])[ktw  ];
+      f1_TN   = (D.f[DIR_0PP])[ktn  ];
+      f1_BS   = (D.f[DIR_0MM])[kbs  ];
+      f1_BN   = (D.f[DIR_0PM])[kbn  ];
+      f1_TS   = (D.f[DIR_0MP])[kts  ];
       f1_ZERO = (D.f[DIR_000])[kzero];
-      f1_TNE  = (D.f[DIR_PPP ])[ktne ];
-      f1_TSW  = (D.f[DIR_MMP ])[ktsw ];
-      f1_TSE  = (D.f[DIR_PMP ])[ktse ];
-      f1_TNW  = (D.f[DIR_MPP ])[ktnw ];
-      f1_BNE  = (D.f[DIR_PPM ])[kbne ];
-      f1_BSW  = (D.f[DIR_MMM ])[kbsw ];
-      f1_BSE  = (D.f[DIR_PMM ])[kbse ];
-      f1_BNW  = (D.f[DIR_MPM ])[kbnw ];
-      //f1_W    = (D.f[DIR_P00   ])[ke   ];
-      //f1_E    = (D.f[DIR_M00   ])[kw   ];
-      //f1_S    = (D.f[DIR_0P0   ])[kn   ];
-      //f1_N    = (D.f[DIR_0M0   ])[ks   ];
-      //f1_B    = (D.f[DIR_00P   ])[kt   ];
-      //f1_T    = (D.f[DIR_00M   ])[kb   ];
-      //f1_SW   = (D.f[DIR_PP0  ])[kne  ];
-      //f1_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      //f1_NW   = (D.f[DIR_PM0  ])[kse  ];
-      //f1_SE   = (D.f[DIR_MP0  ])[knw  ];
-      //f1_BW   = (D.f[DIR_P0P  ])[kte  ];
-      //f1_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      //f1_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      //f1_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      //f1_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      //f1_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      //f1_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      //f1_BN   = (D.f[DIR_0MP  ])[kts  ];
+      f1_TNE  = (D.f[DIR_PPP])[ktne ];
+      f1_TSW  = (D.f[DIR_MMP])[ktsw ];
+      f1_TSE  = (D.f[DIR_PMP])[ktse ];
+      f1_TNW  = (D.f[DIR_MPP])[ktnw ];
+      f1_BNE  = (D.f[DIR_PPM])[kbne ];
+      f1_BSW  = (D.f[DIR_MMM])[kbsw ];
+      f1_BSE  = (D.f[DIR_PMM])[kbse ];
+      f1_BNW  = (D.f[DIR_MPM])[kbnw ];
+      //f1_W    = (D.f[DIR_P00])[ke   ];
+      //f1_E    = (D.f[DIR_M00])[kw   ];
+      //f1_S    = (D.f[DIR_0P0])[kn   ];
+      //f1_N    = (D.f[DIR_0M0])[ks   ];
+      //f1_B    = (D.f[DIR_00P])[kt   ];
+      //f1_T    = (D.f[DIR_00M])[kb   ];
+      //f1_SW   = (D.f[DIR_PP0])[kne  ];
+      //f1_NE   = (D.f[DIR_MM0])[ksw  ];
+      //f1_NW   = (D.f[DIR_PM0])[kse  ];
+      //f1_SE   = (D.f[DIR_MP0])[knw  ];
+      //f1_BW   = (D.f[DIR_P0P])[kte  ];
+      //f1_TE   = (D.f[DIR_M0M])[kbw  ];
+      //f1_TW   = (D.f[DIR_P0M])[kbe  ];
+      //f1_BE   = (D.f[DIR_M0P])[ktw  ];
+      //f1_BS   = (D.f[DIR_0PP])[ktn  ];
+      //f1_TN   = (D.f[DIR_0MM])[kbs  ];
+      //f1_TS   = (D.f[DIR_0PM])[kbn  ];
+      //f1_BN   = (D.f[DIR_0MP])[kts  ];
       //f1_ZERO = (D.f[DIR_000])[kzero];
-      //f1_BSW  = (D.f[DIR_PPP ])[ktne ];
-      //f1_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      //f1_BNW  = (D.f[DIR_PMP ])[ktse ];
-      //f1_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      //f1_TSW  = (D.f[DIR_PPM ])[kbne ];
-      //f1_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      //f1_TNW  = (D.f[DIR_PMM ])[kbse ];
-      //f1_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      //f1_BSW  = (D.f[DIR_PPP])[ktne ];
+      //f1_BNE  = (D.f[DIR_MMP])[ktsw ];
+      //f1_BNW  = (D.f[DIR_PMP])[ktse ];
+      //f1_BSE  = (D.f[DIR_MPP])[ktnw ];
+      //f1_TSW  = (D.f[DIR_PPM])[kbne ];
+      //f1_TNE  = (D.f[DIR_MMM])[kbsw ];
+      //f1_TNW  = (D.f[DIR_PMM])[kbse ];
+      //f1_TSE  = (D.f[DIR_MPM])[kbnw ];
       //////////////////////////////////////////////////////////////////////////
       real cs       = c1o1/sqrt(c3o1);
       real csp1     = cs + c1o1;
@@ -522,64 +522,64 @@ __global__ void VelSchlaff27(  int t,
       f1_TNW = f1_BSE - c1o36 * (VX - VY - VZ);
 
       deltaVz0[k] = tempDeltaV;
-      (D.f[DIR_00P   ])[kt   ] = f1_T  ;
-      (D.f[DIR_P0P  ])[kte  ] = f1_TE ;
-      (D.f[DIR_M0P  ])[ktw  ] = f1_TW ;
-      (D.f[DIR_0PP  ])[ktn  ] = f1_TN ;
-      (D.f[DIR_0MP  ])[kts  ] = f1_TS ;
-      (D.f[DIR_PPP ])[ktne ] = f1_TNE;
-      (D.f[DIR_MMP ])[ktsw ] = f1_TSW;
-      (D.f[DIR_PMP ])[ktse ] = f1_TSE;
-      (D.f[DIR_MPP ])[ktnw ] = f1_TNW;
-
-      //(D.f[DIR_00M   ])[kb   ] = f1_B   ;
-      //(D.f[DIR_M0M  ])[kbw  ] = f1_BW  ;
-      //(D.f[DIR_P0M  ])[kbe  ] = f1_BE  ;
-      //(D.f[DIR_0MM  ])[kbs  ] = f1_BS  ;
-      //(D.f[DIR_0PM  ])[kbn  ] = f1_BN  ;
-      //(D.f[DIR_PPM ])[kbne ] = f1_BNE ;
-      //(D.f[DIR_MMM ])[kbsw ] = f1_BSW ;
-      //(D.f[DIR_PMM ])[kbse ] = f1_BSE ;
-      //(D.f[DIR_MPM ])[kbnw ] = f1_BNW ;
-
-
-      //(D.f[DIR_00P   ])[kt   ] = f1_B  ;
-      //(D.f[DIR_P0P  ])[kte  ] = f1_BW ;
-      //(D.f[DIR_M0P  ])[ktw  ] = f1_BE ;
-      //(D.f[DIR_0PP  ])[ktn  ] = f1_BS ;
-      //(D.f[DIR_0MP  ])[kts  ] = f1_BN ;
-      //(D.f[DIR_PPP ])[ktne ] = f1_BSW;
-      //(D.f[DIR_MMP ])[ktsw ] = f1_BNE;
-      //(D.f[DIR_PMP ])[ktse ] = f1_BNW;
-      //(D.f[DIR_MPP ])[ktnw ] = f1_BSE;
-
-      //(D.f[DIR_P00   ])[ke   ] = f1_W   -c2over27*drho1;
-      //(D.f[DIR_M00   ])[kw   ] = f1_E   -c2over27*drho1;
-      //(D.f[DIR_0P0   ])[kn   ] = f1_S   -c2over27*drho1;
-      //(D.f[DIR_0M0   ])[ks   ] = f1_N   -c2over27*drho1;
-      //(D.f[DIR_00P   ])[kt   ] = f1_B   -c2over27*drho1;
-      //(D.f[DIR_00M   ])[kb   ] = f1_T   -c2over27*drho1;
-      //(D.f[DIR_PP0  ])[kne  ] = f1_SW  -c1over54*drho1;
-      //(D.f[DIR_MM0  ])[ksw  ] = f1_NE  -c1over54*drho1;
-      //(D.f[DIR_PM0  ])[kse  ] = f1_NW  -c1over54*drho1;
-      //(D.f[DIR_MP0  ])[knw  ] = f1_SE  -c1over54*drho1;
-      //(D.f[DIR_P0P  ])[kte  ] = f1_BW  -c1over54*drho1;
-      //(D.f[DIR_M0M  ])[kbw  ] = f1_TE  -c1over54*drho1;
-      //(D.f[DIR_P0M  ])[kbe  ] = f1_TW  -c1over54*drho1;
-      //(D.f[DIR_M0P  ])[ktw  ] = f1_BE  -c1over54*drho1;
-      //(D.f[DIR_0PP  ])[ktn  ] = f1_BS  -c1over54*drho1;
-      //(D.f[DIR_0MM  ])[kbs  ] = f1_TN  -c1over54*drho1;
-      //(D.f[DIR_0PM  ])[kbn  ] = f1_TS  -c1over54*drho1;
-      //(D.f[DIR_0MP  ])[kts  ] = f1_BN  -c1over54*drho1;
+      (D.f[DIR_00P])[kt   ] = f1_T  ;
+      (D.f[DIR_P0P])[kte  ] = f1_TE ;
+      (D.f[DIR_M0P])[ktw  ] = f1_TW ;
+      (D.f[DIR_0PP])[ktn  ] = f1_TN ;
+      (D.f[DIR_0MP])[kts  ] = f1_TS ;
+      (D.f[DIR_PPP])[ktne ] = f1_TNE;
+      (D.f[DIR_MMP])[ktsw ] = f1_TSW;
+      (D.f[DIR_PMP])[ktse ] = f1_TSE;
+      (D.f[DIR_MPP])[ktnw ] = f1_TNW;
+
+      //(D.f[DIR_00M])[kb   ] = f1_B   ;
+      //(D.f[DIR_M0M])[kbw  ] = f1_BW  ;
+      //(D.f[DIR_P0M])[kbe  ] = f1_BE  ;
+      //(D.f[DIR_0MM])[kbs  ] = f1_BS  ;
+      //(D.f[DIR_0PM])[kbn  ] = f1_BN  ;
+      //(D.f[DIR_PPM])[kbne ] = f1_BNE ;
+      //(D.f[DIR_MMM])[kbsw ] = f1_BSW ;
+      //(D.f[DIR_PMM])[kbse ] = f1_BSE ;
+      //(D.f[DIR_MPM])[kbnw ] = f1_BNW ;
+
+
+      //(D.f[DIR_00P])[kt   ] = f1_B  ;
+      //(D.f[DIR_P0P])[kte  ] = f1_BW ;
+      //(D.f[DIR_M0P])[ktw  ] = f1_BE ;
+      //(D.f[DIR_0PP])[ktn  ] = f1_BS ;
+      //(D.f[DIR_0MP])[kts  ] = f1_BN ;
+      //(D.f[DIR_PPP])[ktne ] = f1_BSW;
+      //(D.f[DIR_MMP])[ktsw ] = f1_BNE;
+      //(D.f[DIR_PMP])[ktse ] = f1_BNW;
+      //(D.f[DIR_MPP])[ktnw ] = f1_BSE;
+
+      //(D.f[DIR_P00])[ke   ] = f1_W   -c2over27*drho1;
+      //(D.f[DIR_M00])[kw   ] = f1_E   -c2over27*drho1;
+      //(D.f[DIR_0P0])[kn   ] = f1_S   -c2over27*drho1;
+      //(D.f[DIR_0M0])[ks   ] = f1_N   -c2over27*drho1;
+      //(D.f[DIR_00P])[kt   ] = f1_B   -c2over27*drho1;
+      //(D.f[DIR_00M])[kb   ] = f1_T   -c2over27*drho1;
+      //(D.f[DIR_PP0])[kne  ] = f1_SW  -c1over54*drho1;
+      //(D.f[DIR_MM0])[ksw  ] = f1_NE  -c1over54*drho1;
+      //(D.f[DIR_PM0])[kse  ] = f1_NW  -c1over54*drho1;
+      //(D.f[DIR_MP0])[knw  ] = f1_SE  -c1over54*drho1;
+      //(D.f[DIR_P0P])[kte  ] = f1_BW  -c1over54*drho1;
+      //(D.f[DIR_M0M])[kbw  ] = f1_TE  -c1over54*drho1;
+      //(D.f[DIR_P0M])[kbe  ] = f1_TW  -c1over54*drho1;
+      //(D.f[DIR_M0P])[ktw  ] = f1_BE  -c1over54*drho1;
+      //(D.f[DIR_0PP])[ktn  ] = f1_BS  -c1over54*drho1;
+      //(D.f[DIR_0MM])[kbs  ] = f1_TN  -c1over54*drho1;
+      //(D.f[DIR_0PM])[kbn  ] = f1_TS  -c1over54*drho1;
+      //(D.f[DIR_0MP])[kts  ] = f1_BN  -c1over54*drho1;
       //(D.f[DIR_000])[kzero] = f1_ZERO-c8over27*drho1;
-      //(D.f[DIR_PPP ])[ktne ] = f1_BSW -c1over216*drho1;
-      //(D.f[DIR_MMP ])[ktsw ] = f1_BNE -c1over216*drho1;
-      //(D.f[DIR_PMP ])[ktse ] = f1_BNW -c1over216*drho1;
-      //(D.f[DIR_MPP ])[ktnw ] = f1_BSE -c1over216*drho1;
-      //(D.f[DIR_PPM ])[kbne ] = f1_TSW -c1over216*drho1;
-      //(D.f[DIR_MMM ])[kbsw ] = f1_TNE -c1over216*drho1;
-      //(D.f[DIR_PMM ])[kbse ] = f1_TNW -c1over216*drho1;
-      //(D.f[DIR_MPM ])[kbnw ] = f1_TSE -c1over216*drho1;
+      //(D.f[DIR_PPP])[ktne ] = f1_BSW -c1over216*drho1;
+      //(D.f[DIR_MMP])[ktsw ] = f1_BNE -c1over216*drho1;
+      //(D.f[DIR_PMP])[ktse ] = f1_BNW -c1over216*drho1;
+      //(D.f[DIR_MPP])[ktnw ] = f1_BSE -c1over216*drho1;
+      //(D.f[DIR_PPM])[kbne ] = f1_TSW -c1over216*drho1;
+      //(D.f[DIR_MMM])[kbsw ] = f1_TNE -c1over216*drho1;
+      //(D.f[DIR_PMM])[kbse ] = f1_TNW -c1over216*drho1;
+      //(D.f[DIR_MPM])[kbnw ] = f1_TSE -c1over216*drho1;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/SetForcing27.cu b/src/gpu/VirtualFluids_GPU/GPU/SetForcing27.cu
index 8dbf2c670a549f9a6afe581510205c31246b50cb..d847d00193f68127927e2f3fa3fbf1eda7f9a736 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/SetForcing27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/SetForcing27.cu
@@ -1,9 +1,9 @@
 /* Device code */
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -16,69 +16,69 @@ __global__ void GetVeloforForcing27( real* DD,
 												unsigned int* neighborX,
 												unsigned int* neighborY,
 												unsigned int* neighborZ,
-												unsigned int size_Mat, 
+												unsigned long long numberOfLBnodes, 
 												bool isEvenTimestep)
 {
 	Distributions27 D;
 	if (isEvenTimestep==false)
 	{
-		D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+		D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
 	} 
 	else
 	{
-		D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+		D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -123,33 +123,33 @@ __global__ void GetVeloforForcing27( real* DD,
 		unsigned int ktne = KQK;
 		unsigned int kbsw = neighborZ[ksw];
 		////////////////////////////////////////////////////////////////////////////////
-		real mfcbb = (D.f[DIR_P00   ])[ke   ];
-		real mfabb = (D.f[DIR_M00   ])[kw   ];
-		real mfbcb = (D.f[DIR_0P0   ])[kn   ];
-		real mfbab = (D.f[DIR_0M0   ])[ks   ];
-		real mfbbc = (D.f[DIR_00P   ])[kt   ];
-		real mfbba = (D.f[DIR_00M   ])[kb   ];
-		real mfccb = (D.f[DIR_PP0  ])[kne  ];
-		real mfaab = (D.f[DIR_MM0  ])[ksw  ];
-		real mfcab = (D.f[DIR_PM0  ])[kse  ];
-		real mfacb = (D.f[DIR_MP0  ])[knw  ];
-		real mfcbc = (D.f[DIR_P0P  ])[kte  ];
-		real mfaba = (D.f[DIR_M0M  ])[kbw  ];
-		real mfcba = (D.f[DIR_P0M  ])[kbe  ];
-		real mfabc = (D.f[DIR_M0P  ])[ktw  ];
-		real mfbcc = (D.f[DIR_0PP  ])[ktn  ];
-		real mfbaa = (D.f[DIR_0MM  ])[kbs  ];
-		real mfbca = (D.f[DIR_0PM  ])[kbn  ];
-		real mfbac = (D.f[DIR_0MP  ])[kts  ];
+		real mfcbb = (D.f[DIR_P00])[ke   ];
+		real mfabb = (D.f[DIR_M00])[kw   ];
+		real mfbcb = (D.f[DIR_0P0])[kn   ];
+		real mfbab = (D.f[DIR_0M0])[ks   ];
+		real mfbbc = (D.f[DIR_00P])[kt   ];
+		real mfbba = (D.f[DIR_00M])[kb   ];
+		real mfccb = (D.f[DIR_PP0])[kne  ];
+		real mfaab = (D.f[DIR_MM0])[ksw  ];
+		real mfcab = (D.f[DIR_PM0])[kse  ];
+		real mfacb = (D.f[DIR_MP0])[knw  ];
+		real mfcbc = (D.f[DIR_P0P])[kte  ];
+		real mfaba = (D.f[DIR_M0M])[kbw  ];
+		real mfcba = (D.f[DIR_P0M])[kbe  ];
+		real mfabc = (D.f[DIR_M0P])[ktw  ];
+		real mfbcc = (D.f[DIR_0PP])[ktn  ];
+		real mfbaa = (D.f[DIR_0MM])[kbs  ];
+		real mfbca = (D.f[DIR_0PM])[kbn  ];
+		real mfbac = (D.f[DIR_0MP])[kts  ];
 		real mfbbb = (D.f[DIR_000])[kzero];
-		real mfccc = (D.f[DIR_PPP ])[ktne ];
-		real mfaac = (D.f[DIR_MMP ])[ktsw ];
-		real mfcac = (D.f[DIR_PMP ])[ktse ];
-		real mfacc = (D.f[DIR_MPP ])[ktnw ];
-		real mfcca = (D.f[DIR_PPM ])[kbne ];
-		real mfaaa = (D.f[DIR_MMM ])[kbsw ];
-		real mfcaa = (D.f[DIR_PMM ])[kbse ];
-		real mfaca = (D.f[DIR_MPM ])[kbnw ];
+		real mfccc = (D.f[DIR_PPP])[ktne ];
+		real mfaac = (D.f[DIR_MMP])[ktsw ];
+		real mfcac = (D.f[DIR_PMP])[ktse ];
+		real mfacc = (D.f[DIR_MPP])[ktnw ];
+		real mfcca = (D.f[DIR_PPM])[kbne ];
+		real mfaaa = (D.f[DIR_MMM])[kbsw ];
+		real mfcaa = (D.f[DIR_PMM])[kbse ];
+		real mfaca = (D.f[DIR_MPM])[kbnw ];
 		////////////////////////////////////////////////////////////////////////////////////
 		real rho   = (mfccc+mfaaa + mfaca+mfcac + mfacc+mfcaa + mfaac+mfcca + 
 					 	 mfbac+mfbca + mfbaa+mfbcc + mfabc+mfcba + mfaba+mfcbc + mfacb+mfcab + mfaab+mfccb +
diff --git a/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu
index 0079c927373e90c1e408d2c57ace0595bcfdff15..ecd7427665427376aaee290e918fd5c723576f73 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu
@@ -1,85 +1,60 @@
-/* Device code */
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file SlipBCs27.cu
+//! \ingroup GPU
+//! \author Martin Schoenherr, Anna Wellmann
+//======================================================================================
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include "lbm/constants/NumericConstants.h"
-#include "KernelUtilities.h"
+#include "Kernel/Utilities/DistributionHelper.cuh"
+#include "basics/constants/NumericConstants.h"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
+using namespace vf::gpu;
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QSlipDevice27(real* DD, 
-                                         int* k_Q, 
-                                         real* QQ,
-                                         unsigned int numberOfBCnodes,
-                                         real om1, 
-                                         unsigned int* neighborX,
-                                         unsigned int* neighborY,
-                                         unsigned int* neighborZ,
-                                         unsigned int size_Mat, 
-                                         bool isEvenTimestep)
+__global__ void QSlipDevice27(
+    real* DD, 
+    int* k_Q, 
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
-   Distributions27 D;
-   if (isEvenTimestep==true)
-   {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-   } 
-   else
-   {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-   }
+   Distributions27 D = vf::gpu::getDistributionReferences27(DD, numberOfLBnodes, isEvenTimestep);
+
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
    const unsigned  y = blockIdx.x;   // Globaler y-Index 
@@ -99,24 +74,24 @@ __global__ void QSlipDevice27(real* DD,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -156,32 +131,32 @@ __global__ void QSlipDevice27(real* DD,
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[DIR_P00   ])[ke   ];
-      real f_E    = (D.f[DIR_M00   ])[kw   ];
-      real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      real f_B    = (D.f[DIR_00P   ])[kt   ];
-      real f_T    = (D.f[DIR_00M   ])[kb   ];
-      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      real f_W    = (D.f[DIR_P00])[ke   ];
+      real f_E    = (D.f[DIR_M00])[kw   ];
+      real f_S    = (D.f[DIR_0P0])[kn   ];
+      real f_N    = (D.f[DIR_0M0])[ks   ];
+      real f_B    = (D.f[DIR_00P])[kt   ];
+      real f_T    = (D.f[DIR_00M])[kb   ];
+      real f_SW   = (D.f[DIR_PP0])[kne  ];
+      real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0])[kse  ];
+      real f_SE   = (D.f[DIR_MP0])[knw  ];
+      real f_BW   = (D.f[DIR_P0P])[kte  ];
+      real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP])[kts  ];
+      real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -204,66 +179,8 @@ __global__ void QSlipDevice27(real* DD,
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
       //////////////////////////////////////////////////////////////////////////
-      if (isEvenTimestep==false)
-      {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
-      else
-      {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-      }
+
+      D = vf::gpu::getDistributionReferences27(DD, numberOfLBnodes, !isEvenTimestep);
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
       //(D.f[DIR_000])[k]=c1o10;
@@ -659,32 +576,26 @@ __global__ void QSlipDevice27(real* DD,
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QSlipDeviceComp27(
-                                    real* distributions, 
-                                    int* subgridDistanceIndices, 
-                                    real* subgridDistances,
-                                    unsigned int numberOfBCnodes,
-                                    real omega, 
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    unsigned int numberOfLBnodes, 
-                                    bool isEvenTimestep)
+    real* distributions, 
+    int* subgridDistanceIndices, 
+    real* subgridDistances,
+    unsigned int numberOfBCnodes,
+    real omega, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    //! The slip boundary condition is executed in the following steps
    //!
+
    ////////////////////////////////////////////////////////////////////////////////
    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
    //!
-   const unsigned  x = threadIdx.x;  // global x-index 
-   const unsigned  y = blockIdx.x;   // global y-index 
-   const unsigned  z = blockIdx.y;   // global z-index 
+   const unsigned nodeIndex = getNodeIndex();
 
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
-
-   if(k < numberOfBCnodes)
+   if(nodeIndex < numberOfBCnodes)
    {
       //////////////////////////////////////////////////////////////////////////
       //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
@@ -702,7 +613,7 @@ __global__ void QSlipDeviceComp27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing)
       //!
-      unsigned int indexOfBCnode  = subgridDistanceIndices[k];
+      unsigned int indexOfBCnode  = subgridDistanceIndices[nodeIndex];
       unsigned int kzero= indexOfBCnode;
       unsigned int ke   = indexOfBCnode;
       unsigned int kw   = neighborX[indexOfBCnode];
@@ -734,32 +645,32 @@ __global__ void QSlipDeviceComp27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[DIR_P00   ])[ke   ];
-      real f_E    = (dist.f[DIR_M00   ])[kw   ];
-      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
-      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
-      real f_B    = (dist.f[DIR_00P   ])[kt   ];
-      real f_T    = (dist.f[DIR_00M   ])[kb   ];
-      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00])[ke   ];
+      real f_E    = (dist.f[DIR_M00])[kw   ];
+      real f_S    = (dist.f[DIR_0P0])[kn   ];
+      real f_N    = (dist.f[DIR_0M0])[ks   ];
+      real f_B    = (dist.f[DIR_00P])[kt   ];
+      real f_T    = (dist.f[DIR_00M])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Calculate macroscopic quantities
@@ -804,7 +715,7 @@ __global__ void QSlipDeviceComp27(
       bool y = false;
       bool z = false;
 
-      q = (subgridD.q[DIR_P00])[k];
+      q = (subgridD.q[DIR_P00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)  // only update distribution for q between zero and one
       {
          VeloX = c0o1;
@@ -816,7 +727,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloBC(q, f_E, f_W, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_M00])[k];
+      q = (subgridD.q[DIR_M00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = c0o1;
@@ -828,7 +739,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloBC(q, f_W, f_E, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_0P0])[k];
+      q = (subgridD.q[DIR_0P0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = c0o1;
@@ -840,7 +751,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloBC(q, f_N, f_S, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_0M0])[k];
+      q = (subgridD.q[DIR_0M0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = c0o1;
@@ -852,7 +763,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloBC(q, f_S, f_N, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_00P])[k];
+      q = (subgridD.q[DIR_00P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloZ = c0o1;
@@ -864,7 +775,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloBC(q, f_T, f_B, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_00M])[k];
+      q = (subgridD.q[DIR_00M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloZ = c0o1;
@@ -876,7 +787,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloBC(q, f_B, f_T, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_PP0])[k];
+      q = (subgridD.q[DIR_PP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -890,7 +801,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloBC(q, f_NE, f_SW, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_MM0])[k];
+      q = (subgridD.q[DIR_MM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -904,7 +815,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloBC(q, f_SW, f_NE, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_PM0])[k];
+      q = (subgridD.q[DIR_PM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -918,7 +829,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloBC(q, f_SE, f_NW, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_MP0])[k];
+      q = (subgridD.q[DIR_MP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -932,7 +843,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloBC(q, f_NW, f_SE, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_P0P])[k];
+      q = (subgridD.q[DIR_P0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -946,7 +857,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloBC(q, f_TE, f_BW, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_M0M])[k];
+      q = (subgridD.q[DIR_M0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
         VeloX = slipLength*vx1;
@@ -955,12 +866,12 @@ __global__ void QSlipDeviceComp27(
         if (z == true) VeloZ = c0o1;
 
          velocityLB = -vx1 - vx3;
-         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = -VeloX - VeloZ;
          (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloBC(q, f_BW, f_TE, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_P0M])[k];
+      q = (subgridD.q[DIR_P0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -974,7 +885,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloBC(q, f_BE, f_TW, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_M0P])[k];
+      q = (subgridD.q[DIR_M0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -988,7 +899,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloBC(q, f_TW, f_BE, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0PP])[k];
+      q = (subgridD.q[DIR_0PP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -1002,7 +913,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloBC(q, f_TN, f_BS, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0MM])[k];
+      q = (subgridD.q[DIR_0MM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -1017,7 +928,7 @@ __global__ void QSlipDeviceComp27(
       }
 
 
-      q = (subgridD.q[DIR_0PM])[k];
+      q = (subgridD.q[DIR_0PM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -1031,7 +942,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloBC(q, f_BN, f_TS, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0MP])[k];
+      q = (subgridD.q[DIR_0MP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -1045,7 +956,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloBC(q, f_TS, f_BN, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_PPP])[k];
+      q = (subgridD.q[DIR_PPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1060,7 +971,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloBC(q, f_TNE, f_BSW, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MMM])[k];
+      q = (subgridD.q[DIR_MMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1076,7 +987,7 @@ __global__ void QSlipDeviceComp27(
       }
 
 
-      q = (subgridD.q[DIR_PPM])[k];
+      q = (subgridD.q[DIR_PPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1091,7 +1002,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloBC(q, f_BNE, f_TSW, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MMP])[k];
+      q = (subgridD.q[DIR_MMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1106,7 +1017,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloBC(q, f_TSW, f_BNE, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PMP])[k];
+      q = (subgridD.q[DIR_PMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1121,7 +1032,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloBC(q, f_TSE, f_BNW, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MPM])[k];
+      q = (subgridD.q[DIR_MPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1136,7 +1047,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloBC(q, f_BNW, f_TSE, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PMM])[k];
+      q = (subgridD.q[DIR_PMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1151,7 +1062,7 @@ __global__ void QSlipDeviceComp27(
          (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloBC(q, f_BSE, f_TNW, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MPP])[k];
+      q = (subgridD.q[DIR_MPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1169,42 +1080,59 @@ __global__ void QSlipDeviceComp27(
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 //////////////////////////////////////////////////////////////////////////////
 __global__ void BBSlipDeviceComp27(
-                                    real* distributions, 
-                                    int* subgridDistanceIndices, 
-                                    real* subgridDistances,
-                                    unsigned int numberOfBCnodes,
-                                    real omega, 
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    unsigned int numberOfLBnodes, 
-                                    bool isEvenTimestep)
+    real* distributions, 
+    int* subgridDistanceIndices, 
+    real* subgridDistances,
+    unsigned int numberOfBCnodes,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    //! The slip boundary condition is executed in the following steps
    //!
+
    ////////////////////////////////////////////////////////////////////////////////
    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
    //!
-   const unsigned  x = threadIdx.x;  // global x-index 
-   const unsigned  y = blockIdx.x;   // global y-index 
-   const unsigned  z = blockIdx.y;   // global z-index 
+   const unsigned nodeIndex = getNodeIndex();
 
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
-
-   if(k < numberOfBCnodes)
+   if(nodeIndex < numberOfBCnodes)
    {
       //////////////////////////////////////////////////////////////////////////
       //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
       //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
       //!
-      Distributions27 dist;
-      getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
-
+      Distributions27 dist = vf::gpu::getDistributionReferences27(distributions, numberOfLBnodes, isEvenTimestep);
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local subgrid distances (q's)
       //!
@@ -1214,7 +1142,7 @@ __global__ void BBSlipDeviceComp27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing)
       //!
-      unsigned int indexOfBCnode  = subgridDistanceIndices[k];
+      unsigned int indexOfBCnode  = subgridDistanceIndices[nodeIndex];
       unsigned int kzero= indexOfBCnode;
       unsigned int ke   = indexOfBCnode;
       unsigned int kw   = neighborX[indexOfBCnode];
@@ -1246,32 +1174,32 @@ __global__ void BBSlipDeviceComp27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[DIR_P00   ])[ke   ];
-      real f_E    = (dist.f[DIR_M00   ])[kw   ];
-      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
-      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
-      real f_B    = (dist.f[DIR_00P   ])[kt   ];
-      real f_T    = (dist.f[DIR_00M   ])[kb   ];
-      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00])[ke   ];
+      real f_E    = (dist.f[DIR_M00])[kw   ];
+      real f_S    = (dist.f[DIR_0P0])[kn   ];
+      real f_N    = (dist.f[DIR_0M0])[ks   ];
+      real f_B    = (dist.f[DIR_00P])[kt   ];
+      real f_T    = (dist.f[DIR_00M])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Calculate macroscopic quantities
@@ -1292,13 +1220,13 @@ __global__ void BBSlipDeviceComp27(
                    (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
                    (f_T - f_B)) / (c1o1 + drho);
 
-      real cu_sq = c3o2 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3) * (c1o1 + drho);
+      // real cu_sq = c3o2 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3) * (c1o1 + drho);
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - change the pointer to write the results in the correct array
       //!
-      getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep);
 
+      dist = vf::gpu::getDistributionReferences27(distributions, numberOfLBnodes, !isEvenTimestep);
       ////////////////////////////////////////////////////////////////////////////////
       //! - Multiply the local velocities by the slipLength
       //!
@@ -1316,7 +1244,7 @@ __global__ void BBSlipDeviceComp27(
       bool y = false;
       bool z = false;
 
-      q = (subgridD.q[DIR_P00])[k];
+      q = (subgridD.q[DIR_P00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)  // only update distribution for q between zero and one
       {
          VeloX = c0o1;
@@ -1326,7 +1254,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_M00])[kw] = getBounceBackDistributionForVeloBC(f_W, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_M00])[k];
+      q = (subgridD.q[DIR_M00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = c0o1;
@@ -1336,7 +1264,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_P00])[ke] = getBounceBackDistributionForVeloBC(f_E, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_0P0])[k];
+      q = (subgridD.q[DIR_0P0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = c0o1;
@@ -1346,7 +1274,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_0M0])[ks] = getBounceBackDistributionForVeloBC(f_S, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_0M0])[k];
+      q = (subgridD.q[DIR_0M0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = c0o1;
@@ -1356,7 +1284,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_0P0])[kn] = getBounceBackDistributionForVeloBC(f_N, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_00P])[k];
+      q = (subgridD.q[DIR_00P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloZ = c0o1;
@@ -1366,7 +1294,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_00M])[kb] = getBounceBackDistributionForVeloBC(f_B, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_00M])[k];
+      q = (subgridD.q[DIR_00M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloZ = c0o1;
@@ -1376,7 +1304,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_00P])[kt] = getBounceBackDistributionForVeloBC(f_T, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_PP0])[k];
+      q = (subgridD.q[DIR_PP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1388,7 +1316,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_MM0])[ksw] = getBounceBackDistributionForVeloBC(f_SW, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_MM0])[k];
+      q = (subgridD.q[DIR_MM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1400,7 +1328,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_PP0])[kne] = getBounceBackDistributionForVeloBC(f_NE, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_PM0])[k];
+      q = (subgridD.q[DIR_PM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1412,7 +1340,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_MP0])[knw] = getBounceBackDistributionForVeloBC(f_NW, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_MP0])[k];
+      q = (subgridD.q[DIR_MP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1424,7 +1352,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_PM0])[kse] = getBounceBackDistributionForVeloBC(f_SE, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_P0P])[k];
+      q = (subgridD.q[DIR_P0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1436,7 +1364,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_M0M])[kbw] = getBounceBackDistributionForVeloBC(f_BW, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_M0M])[k];
+      q = (subgridD.q[DIR_M0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
         VeloX = slipLength*vx1;
@@ -1444,11 +1372,11 @@ __global__ void BBSlipDeviceComp27(
         if (x == true) VeloX = c0o1;
         if (z == true) VeloZ = c0o1;
 
-         velocityBC = -VeloX - VeloZ;
-         (dist.f[DIR_P0P])[kte] = getBounceBackDistributionForVeloBC(f_TE, velocityBC, c1o54);
+        velocityBC = -VeloX - VeloZ;
+        (dist.f[DIR_P0P])[kte] = getBounceBackDistributionForVeloBC(f_TE, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_P0M])[k];
+      q = (subgridD.q[DIR_P0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1460,7 +1388,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_M0P])[ktw] = getBounceBackDistributionForVeloBC(f_TW, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_M0P])[k];
+      q = (subgridD.q[DIR_M0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1472,7 +1400,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_P0M])[kbe] = getBounceBackDistributionForVeloBC(f_BE, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0PP])[k];
+      q = (subgridD.q[DIR_0PP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -1484,7 +1412,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_0MM])[kbs] = getBounceBackDistributionForVeloBC(f_BS, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0MM])[k];
+      q = (subgridD.q[DIR_0MM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -1497,7 +1425,7 @@ __global__ void BBSlipDeviceComp27(
       }
 
 
-      q = (subgridD.q[DIR_0PM])[k];
+      q = (subgridD.q[DIR_0PM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -1509,7 +1437,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_0MP])[kts] = getBounceBackDistributionForVeloBC(f_TS, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0MP])[k];
+      q = (subgridD.q[DIR_0MP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -1521,7 +1449,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_0PM])[kbn] = getBounceBackDistributionForVeloBC(f_BN, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_PPP])[k];
+      q = (subgridD.q[DIR_PPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1535,7 +1463,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_MMM])[kbsw] = getBounceBackDistributionForVeloBC(f_TNE, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MMM])[k];
+      q = (subgridD.q[DIR_MMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1550,7 +1478,7 @@ __global__ void BBSlipDeviceComp27(
       }
 
 
-      q = (subgridD.q[DIR_PPM])[k];
+      q = (subgridD.q[DIR_PPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1564,7 +1492,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_MMP])[ktsw] = getBounceBackDistributionForVeloBC(f_TSW, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MMP])[k];
+      q = (subgridD.q[DIR_MMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1578,7 +1506,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_PPM])[kbne] = getBounceBackDistributionForVeloBC(f_BNE, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PMP])[k];
+      q = (subgridD.q[DIR_PMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1592,7 +1520,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_MPM])[kbnw] = getBounceBackDistributionForVeloBC(f_BNW, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MPM])[k];
+      q = (subgridD.q[DIR_MPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1606,7 +1534,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_PMP])[ktse] = getBounceBackDistributionForVeloBC(f_TSE, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PMM])[k];
+      q = (subgridD.q[DIR_PMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1620,7 +1548,7 @@ __global__ void BBSlipDeviceComp27(
          (dist.f[DIR_MPP])[ktnw] = getBounceBackDistributionForVeloBC(f_TNW, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MPP])[k];
+      q = (subgridD.q[DIR_MPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1638,35 +1566,55 @@ __global__ void BBSlipDeviceComp27(
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 ////////////////////////////////////////////////////////////////////////////
 __global__ void QSlipDeviceComp27TurbViscosity(
-                                    real* distributions, 
-                                    int* subgridDistanceIndices, 
-                                    real* subgridDistances,
-                                    unsigned int numberOfBCnodes,
-                                    real omega, 
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    real* turbViscosity,
-                                    unsigned int numberOfLBnodes, 
-                                    bool isEvenTimestep)
+    real* distributions, 
+    int* subgridDistanceIndices, 
+    real* subgridDistances,
+    unsigned int numberOfBCnodes,
+    real omega, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* turbViscosity,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    //! The slip boundary condition is executed in the following steps
    //!
+
    ////////////////////////////////////////////////////////////////////////////////
    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
    //!
-   const unsigned  x = threadIdx.x;  // global x-index 
-   const unsigned  y = blockIdx.x;   // global y-index 
-   const unsigned  z = blockIdx.y;   // global z-index 
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
+   const unsigned nodeIndex = getNodeIndex();
 
-   if(k < numberOfBCnodes)
+   if(nodeIndex < numberOfBCnodes)
    {
       //////////////////////////////////////////////////////////////////////////
       //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
@@ -1684,7 +1632,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing)
       //!
-      unsigned int indexOfBCnode  = subgridDistanceIndices[k];
+      unsigned int indexOfBCnode  = subgridDistanceIndices[nodeIndex];
       unsigned int kzero= indexOfBCnode;
       unsigned int ke   = indexOfBCnode;
       unsigned int kw   = neighborX[indexOfBCnode];
@@ -1716,32 +1664,32 @@ __global__ void QSlipDeviceComp27TurbViscosity(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[DIR_P00   ])[ke   ];
-      real f_E    = (dist.f[DIR_M00   ])[kw   ];
-      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
-      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
-      real f_B    = (dist.f[DIR_00P   ])[kt   ];
-      real f_T    = (dist.f[DIR_00M   ])[kb   ];
-      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00])[ke   ];
+      real f_E    = (dist.f[DIR_M00])[kw   ];
+      real f_S    = (dist.f[DIR_0P0])[kn   ];
+      real f_N    = (dist.f[DIR_0M0])[ks   ];
+      real f_B    = (dist.f[DIR_00P])[kt   ];
+      real f_T    = (dist.f[DIR_00M])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Calculate macroscopic quantities
@@ -1791,7 +1739,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
       bool y = false;
       bool z = false;
 
-      q = (subgridD.q[DIR_P00])[k];
+      q = (subgridD.q[DIR_P00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)  // only update distribution for q between zero and one
       {
          VeloX = c0o1;
@@ -1803,7 +1751,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloBC(q, f_E, f_W, feq, om_turb, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_M00])[k];
+      q = (subgridD.q[DIR_M00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = c0o1;
@@ -1815,7 +1763,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloBC(q, f_W, f_E, feq, om_turb, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_0P0])[k];
+      q = (subgridD.q[DIR_0P0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = c0o1;
@@ -1827,7 +1775,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloBC(q, f_N, f_S, feq, om_turb, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_0M0])[k];
+      q = (subgridD.q[DIR_0M0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = c0o1;
@@ -1839,7 +1787,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloBC(q, f_S, f_N, feq, om_turb, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_00P])[k];
+      q = (subgridD.q[DIR_00P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloZ = c0o1;
@@ -1851,7 +1799,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloBC(q, f_T, f_B, feq, om_turb, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_00M])[k];
+      q = (subgridD.q[DIR_00M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloZ = c0o1;
@@ -1863,7 +1811,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloBC(q, f_B, f_T, feq, om_turb, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_PP0])[k];
+      q = (subgridD.q[DIR_PP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1877,7 +1825,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloBC(q, f_NE, f_SW, feq, om_turb, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_MM0])[k];
+      q = (subgridD.q[DIR_MM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1891,7 +1839,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloBC(q, f_SW, f_NE, feq, om_turb, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_PM0])[k];
+      q = (subgridD.q[DIR_PM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1905,7 +1853,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloBC(q, f_SE, f_NW, feq, om_turb, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_MP0])[k];
+      q = (subgridD.q[DIR_MP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1919,7 +1867,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloBC(q, f_NW, f_SE, feq, om_turb, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_P0P])[k];
+      q = (subgridD.q[DIR_P0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1933,7 +1881,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloBC(q, f_TE, f_BW, feq, om_turb, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_M0M])[k];
+      q = (subgridD.q[DIR_M0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
         VeloX = slipLength*vx1;
@@ -1941,13 +1889,13 @@ __global__ void QSlipDeviceComp27TurbViscosity(
         if (x == true) VeloX = c0o1;
         if (z == true) VeloZ = c0o1;
 
-         velocityLB = -vx1 - vx3;
-         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         velocityBC = -VeloX - VeloZ;
-         (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloBC(q, f_BW, f_TE, feq, om_turb, velocityBC, c1o54);
+        velocityLB = -vx1 - vx3;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+        velocityBC = -VeloX - VeloZ;
+        (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloBC(q, f_BW, f_TE, feq, om_turb, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_P0M])[k];
+      q = (subgridD.q[DIR_P0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1961,7 +1909,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloBC(q, f_BE, f_TW, feq, om_turb, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_M0P])[k];
+      q = (subgridD.q[DIR_M0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -1975,7 +1923,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloBC(q, f_TW, f_BE, feq, om_turb, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0PP])[k];
+      q = (subgridD.q[DIR_0PP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -1989,7 +1937,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloBC(q, f_TN, f_BS, feq, om_turb, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0MM])[k];
+      q = (subgridD.q[DIR_0MM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -2004,7 +1952,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
       }
 
 
-      q = (subgridD.q[DIR_0PM])[k];
+      q = (subgridD.q[DIR_0PM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -2018,7 +1966,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloBC(q, f_BN, f_TS, feq, om_turb, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0MP])[k];
+      q = (subgridD.q[DIR_0MP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -2032,7 +1980,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloBC(q, f_TS, f_BN, feq, om_turb, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_PPP])[k];
+      q = (subgridD.q[DIR_PPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2047,7 +1995,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloBC(q, f_TNE, f_BSW, feq, om_turb, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MMM])[k];
+      q = (subgridD.q[DIR_MMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2063,7 +2011,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
       }
 
 
-      q = (subgridD.q[DIR_PPM])[k];
+      q = (subgridD.q[DIR_PPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2078,7 +2026,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloBC(q, f_BNE, f_TSW, feq, om_turb, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MMP])[k];
+      q = (subgridD.q[DIR_MMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2093,7 +2041,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloBC(q, f_TSW, f_BNE, feq, om_turb, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PMP])[k];
+      q = (subgridD.q[DIR_PMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2108,7 +2056,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloBC(q, f_TSE, f_BNW, feq, om_turb, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MPM])[k];
+      q = (subgridD.q[DIR_MPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2123,7 +2071,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloBC(q, f_BNW, f_TSE, feq, om_turb, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PMM])[k];
+      q = (subgridD.q[DIR_PMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2138,7 +2086,7 @@ __global__ void QSlipDeviceComp27TurbViscosity(
          (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloBC(q, f_BSE, f_TNW, feq, om_turb, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MPP])[k];
+      q = (subgridD.q[DIR_MPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2154,37 +2102,59 @@ __global__ void QSlipDeviceComp27TurbViscosity(
       }
    }
 }
+////////////////////////////////////////////////////////////////////////////
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 
 
 ////////////////////////////////////////////////////////////////////////////
 __global__ void QSlipPressureDeviceComp27TurbViscosity(
-                                    real* distributions, 
-                                    int* subgridDistanceIndices, 
-                                    real* subgridDistances,
-                                    unsigned int numberOfBCnodes,
-                                    real omega, 
-                                    unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    real* turbViscosity,
-                                    unsigned int numberOfLBnodes, 
-                                    bool isEvenTimestep)
+    real* distributions, 
+    int* subgridDistanceIndices, 
+    real* subgridDistances,
+    unsigned int numberOfBCnodes,
+    real omega, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* turbViscosity,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    //! The slip boundary condition is executed in the following steps
    //!
    ////////////////////////////////////////////////////////////////////////////////
    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
    //!
-   const unsigned  x = threadIdx.x;  // global x-index 
-   const unsigned  y = blockIdx.x;   // global y-index 
-   const unsigned  z = blockIdx.y;   // global z-index 
+   const unsigned nodeIndex = getNodeIndex();
 
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
-
-   if(k < numberOfBCnodes)
+   if(nodeIndex < numberOfBCnodes)
    {
       //////////////////////////////////////////////////////////////////////////
       //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
@@ -2202,7 +2172,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing)
       //!
-      unsigned int indexOfBCnode  = subgridDistanceIndices[k];
+      unsigned int indexOfBCnode  = subgridDistanceIndices[nodeIndex];
       unsigned int kzero= indexOfBCnode;
       unsigned int ke   = indexOfBCnode;
       unsigned int kw   = neighborX[indexOfBCnode];
@@ -2234,32 +2204,32 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[DIR_P00   ])[ke   ];
-      real f_E    = (dist.f[DIR_M00   ])[kw   ];
-      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
-      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
-      real f_B    = (dist.f[DIR_00P   ])[kt   ];
-      real f_T    = (dist.f[DIR_00M   ])[kb   ];
-      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00])[ke   ];
+      real f_E    = (dist.f[DIR_M00])[kw   ];
+      real f_S    = (dist.f[DIR_0P0])[kn   ];
+      real f_N    = (dist.f[DIR_0M0])[ks   ];
+      real f_B    = (dist.f[DIR_00P])[kt   ];
+      real f_T    = (dist.f[DIR_00M])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Calculate macroscopic quantities
@@ -2309,7 +2279,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
       bool y = false;
       bool z = false;
 
-      q = (subgridD.q[DIR_P00])[k];
+      q = (subgridD.q[DIR_P00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)  // only update distribution for q between zero and one
       {
          VeloX = c0o1;
@@ -2321,7 +2291,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_E, f_W, feq, om_turb, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_M00])[k];
+      q = (subgridD.q[DIR_M00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = c0o1;
@@ -2333,7 +2303,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloWithPressureBC(q, f_W, f_E, feq, om_turb, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_0P0])[k];
+      q = (subgridD.q[DIR_0P0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = c0o1;
@@ -2345,7 +2315,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloWithPressureBC(q, f_N, f_S, feq, om_turb, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_0M0])[k];
+      q = (subgridD.q[DIR_0M0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = c0o1;
@@ -2357,7 +2327,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_S, f_N, feq, om_turb, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_00P])[k];
+      q = (subgridD.q[DIR_00P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloZ = c0o1;
@@ -2369,7 +2339,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloWithPressureBC(q, f_T, f_B, feq, om_turb, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_00M])[k];
+      q = (subgridD.q[DIR_00M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloZ = c0o1;
@@ -2381,7 +2351,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloWithPressureBC(q, f_B, f_T, feq, om_turb, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_PP0])[k];
+      q = (subgridD.q[DIR_PP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2395,7 +2365,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NE, f_SW, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_MM0])[k];
+      q = (subgridD.q[DIR_MM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2409,7 +2379,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SW, f_NE, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_PM0])[k];
+      q = (subgridD.q[DIR_PM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2423,7 +2393,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SE, f_NW, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_MP0])[k];
+      q = (subgridD.q[DIR_MP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2437,7 +2407,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NW, f_SE, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_P0P])[k];
+      q = (subgridD.q[DIR_P0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2451,7 +2421,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TE, f_BW, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_M0M])[k];
+      q = (subgridD.q[DIR_M0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
         VeloX = slipLength*vx1;
@@ -2459,13 +2429,13 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
         if (x == true) VeloX = c0o1;
         if (z == true) VeloZ = c0o1;
 
-         velocityLB = -vx1 - vx3;
-         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         velocityBC = -VeloX - VeloZ;
-         (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BW, f_TE, feq, om_turb, drho, velocityBC, c1o54);
+        velocityLB = -vx1 - vx3;
+        feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+        velocityBC = -VeloX - VeloZ;
+        (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BW, f_TE, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_P0M])[k];
+      q = (subgridD.q[DIR_P0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2479,7 +2449,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BE, f_TW, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_M0P])[k];
+      q = (subgridD.q[DIR_M0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2493,7 +2463,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TW, f_BE, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0PP])[k];
+      q = (subgridD.q[DIR_0PP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -2507,7 +2477,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TN, f_BS, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0MM])[k];
+      q = (subgridD.q[DIR_0MM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -2522,7 +2492,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
       }
 
 
-      q = (subgridD.q[DIR_0PM])[k];
+      q = (subgridD.q[DIR_0PM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -2536,7 +2506,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BN, f_TS, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0MP])[k];
+      q = (subgridD.q[DIR_0MP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloY = slipLength*vx2;
@@ -2550,7 +2520,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TS, f_BN, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_PPP])[k];
+      q = (subgridD.q[DIR_PPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2565,7 +2535,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TNE, f_BSW, feq, om_turb, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MMM])[k];
+      q = (subgridD.q[DIR_MMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2581,7 +2551,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
       }
 
 
-      q = (subgridD.q[DIR_PPM])[k];
+      q = (subgridD.q[DIR_PPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2596,7 +2566,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNE, f_TSW, feq, om_turb, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MMP])[k];
+      q = (subgridD.q[DIR_MMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2611,7 +2581,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSW, f_BNE, feq, om_turb, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PMP])[k];
+      q = (subgridD.q[DIR_PMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2626,7 +2596,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSE, f_BNW, feq, om_turb, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MPM])[k];
+      q = (subgridD.q[DIR_MPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2641,7 +2611,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNW, f_TSE, feq, om_turb, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PMM])[k];
+      q = (subgridD.q[DIR_PMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2656,7 +2626,7 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
          (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BSE, f_TNW, feq, om_turb, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MPP])[k];
+      q = (subgridD.q[DIR_MPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          VeloX = slipLength*vx1;
@@ -2688,63 +2658,63 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
 //    Distributions27 D;
 //    if (isEvenTimestep==true)
 //    {
-//       D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-//       D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-//       D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-//       D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-//       D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-//       D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-//       D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-//       D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-//       D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-//       D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-//       D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-//       D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-//       D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-//       D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-//       D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-//       D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-//       D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-//       D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-//       D.f[DIR_000] = &DD[DIR_000*size_Mat];
-//       D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-//       D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-//       D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-//       D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-//       D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-//       D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-//       D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-//       D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+//       D.f[DIR_P00] = &DD[DIR_P00 * size_Mat];
+//       D.f[DIR_M00] = &DD[DIR_M00 * size_Mat];
+//       D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat];
+//       D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat];
+//       D.f[DIR_00P] = &DD[DIR_00P * size_Mat];
+//       D.f[DIR_00M] = &DD[DIR_00M * size_Mat];
+//       D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat];
+//       D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat];
+//       D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat];
+//       D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat];
+//       D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat];
+//       D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat];
+//       D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat];
+//       D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat];
+//       D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat];
+//       D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat];
+//       D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat];
+//       D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat];
+//       D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+//       D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat];
+//       D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat];
+//       D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat];
+//       D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat];
+//       D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat];
+//       D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat];
+//       D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat];
+//       D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat];
 //    } 
 //    else
 //    {
-//       D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-//       D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-//       D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-//       D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-//       D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-//       D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-//       D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-//       D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-//       D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-//       D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-//       D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-//       D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-//       D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-//       D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-//       D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-//       D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-//       D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-//       D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-//       D.f[DIR_000] = &DD[DIR_000*size_Mat];
-//       D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-//       D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-//       D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-//       D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-//       D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-//       D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-//       D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-//       D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+//       D.f[DIR_M00] = &DD[DIR_P00 * size_Mat];
+//       D.f[DIR_P00] = &DD[DIR_M00 * size_Mat];
+//       D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat];
+//       D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat];
+//       D.f[DIR_00M] = &DD[DIR_00P * size_Mat];
+//       D.f[DIR_00P] = &DD[DIR_00M * size_Mat];
+//       D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat];
+//       D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat];
+//       D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat];
+//       D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat];
+//       D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat];
+//       D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat];
+//       D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat];
+//       D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat];
+//       D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat];
+//       D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat];
+//       D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat];
+//       D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat];
+//       D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+//       D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat];
+//       D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat];
+//       D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat];
+//       D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat];
+//       D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat];
+//       D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat];
+//       D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat];
+//       D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat];
 //    }
 //    ////////////////////////////////////////////////////////////////////////////////
 //    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -2765,24 +2735,24 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
 //             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
 //             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 //             *q_dirBSE, *q_dirBNW; 
-//       q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-//       q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-//       q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-//       q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-//       q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-//       q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-//       q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-//       q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-//       q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-//       q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-//       q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-//       q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-//       q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-//       q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-//       q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-//       q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-//       q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-//       q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+//       q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+//       q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+//       q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+//       q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+//       q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+//       q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+//       q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+//       q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+//       q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+//       q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+//       q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+//       q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+//       q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+//       q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+//       q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+//       q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+//       q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+//       q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
 //       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
 //       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
 //       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -2823,32 +2793,32 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
 //       unsigned int kbsw = neighborZ[ksw];
       
 //       ////////////////////////////////////////////////////////////////////////////////
-//       real f_W    = (D.f[DIR_P00   ])[ke   ];
-//       real f_E    = (D.f[DIR_M00   ])[kw   ];
-//       real f_S    = (D.f[DIR_0P0   ])[kn   ];
-//       real f_N    = (D.f[DIR_0M0   ])[ks   ];
-//       real f_B    = (D.f[DIR_00P   ])[kt   ];
-//       real f_T    = (D.f[DIR_00M   ])[kb   ];
-//       real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-//       real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-//       real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-//       real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-//       real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-//       real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-//       real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-//       real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-//       real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-//       real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-//       real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-//       real f_BN   = (D.f[DIR_0MP  ])[kts  ];
-//       real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-//       real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-//       real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-//       real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-//       real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-//       real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-//       real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-//       real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+//       real f_W    = (D.f[DIR_P00])[ke   ];
+//       real f_E    = (D.f[DIR_M00])[kw   ];
+//       real f_S    = (D.f[DIR_0P0])[kn   ];
+//       real f_N    = (D.f[DIR_0M0])[ks   ];
+//       real f_B    = (D.f[DIR_00P])[kt   ];
+//       real f_T    = (D.f[DIR_00M])[kb   ];
+//       real f_SW   = (D.f[DIR_PP0])[kne  ];
+//       real f_NE   = (D.f[DIR_MM0])[ksw  ];
+//       real f_NW   = (D.f[DIR_PM0])[kse  ];
+//       real f_SE   = (D.f[DIR_MP0])[knw  ];
+//       real f_BW   = (D.f[DIR_P0P])[kte  ];
+//       real f_TE   = (D.f[DIR_M0M])[kbw  ];
+//       real f_TW   = (D.f[DIR_P0M])[kbe  ];
+//       real f_BE   = (D.f[DIR_M0P])[ktw  ];
+//       real f_BS   = (D.f[DIR_0PP])[ktn  ];
+//       real f_TN   = (D.f[DIR_0MM])[kbs  ];
+//       real f_TS   = (D.f[DIR_0PM])[kbn  ];
+//       real f_BN   = (D.f[DIR_0MP])[kts  ];
+//       real f_BSW  = (D.f[DIR_PPP])[ktne ];
+//       real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+//       real f_BNW  = (D.f[DIR_PMP])[ktse ];
+//       real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+//       real f_TSW  = (D.f[DIR_PPM])[kbne ];
+//       real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+//       real f_TNW  = (D.f[DIR_PMM])[kbse ];
+//       real f_TSE  = (D.f[DIR_MPM])[kbnw ];
 //       ////////////////////////////////////////////////////////////////////////////////
 //       real vx1, vx2, vx3, drho, feq, q;
 //       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -2873,63 +2843,63 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
 //       //////////////////////////////////////////////////////////////////////////
 //       if (isEvenTimestep==false)
 //       {
-//          D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-//          D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-//          D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-//          D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-//          D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-//          D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-//          D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-//          D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-//          D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-//          D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-//          D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-//          D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-//          D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-//          D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-//          D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-//          D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-//          D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-//          D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-//          D.f[DIR_000] = &DD[DIR_000*size_Mat];
-//          D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-//          D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-//          D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-//          D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-//          D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-//          D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-//          D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-//          D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+//          D.f[DIR_P00] = &DD[DIR_P00 * size_Mat];
+//          D.f[DIR_M00] = &DD[DIR_M00 * size_Mat];
+//          D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat];
+//          D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat];
+//          D.f[DIR_00P] = &DD[DIR_00P * size_Mat];
+//          D.f[DIR_00M] = &DD[DIR_00M * size_Mat];
+//          D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat];
+//          D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat];
+//          D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat];
+//          D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat];
+//          D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat];
+//          D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat];
+//          D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat];
+//          D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat];
+//          D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat];
+//          D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat];
+//          D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat];
+//          D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat];
+//          D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+//          D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat];
+//          D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat];
+//          D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat];
+//          D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat];
+//          D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat];
+//          D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat];
+//          D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat];
+//          D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat];
 //       } 
 //       else
 //       {
-//          D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-//          D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-//          D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-//          D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-//          D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-//          D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-//          D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-//          D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-//          D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-//          D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-//          D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-//          D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-//          D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-//          D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-//          D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-//          D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-//          D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-//          D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-//          D.f[DIR_000] = &DD[DIR_000*size_Mat];
-//          D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-//          D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-//          D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-//          D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-//          D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-//          D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-//          D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-//          D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+//          D.f[DIR_M00] = &DD[DIR_P00 * size_Mat];
+//          D.f[DIR_P00] = &DD[DIR_M00 * size_Mat];
+//          D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat];
+//          D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat];
+//          D.f[DIR_00M] = &DD[DIR_00P * size_Mat];
+//          D.f[DIR_00P] = &DD[DIR_00M * size_Mat];
+//          D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat];
+//          D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat];
+//          D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat];
+//          D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat];
+//          D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat];
+//          D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat];
+//          D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat];
+//          D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat];
+//          D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat];
+//          D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat];
+//          D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat];
+//          D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat];
+//          D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+//          D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat];
+//          D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat];
+//          D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat];
+//          D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat];
+//          D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat];
+//          D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat];
+//          D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat];
+//          D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat];
 //       }
 //       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 //       //Test
@@ -3378,80 +3348,81 @@ __global__ void QSlipPressureDeviceComp27TurbViscosity(
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QSlipGeomDeviceComp27(real* DD, 
-												 int* k_Q, 
-												 real* QQ,
-												 unsigned int  numberOfBCnodes,
-												 real om1, 
-												 real* NormalX,
-												 real* NormalY,
-												 real* NormalZ,
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int size_Mat, 
-												 bool isEvenTimestep)
+__global__ void QSlipGeomDeviceComp27(
+    real* DD, 
+    int* k_Q, 
+    real* QQ,
+    unsigned int  numberOfBCnodes,
+    real om1, 
+    real* NormalX,
+    real* NormalY,
+    real* NormalZ,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -3472,24 +3443,24 @@ __global__ void QSlipGeomDeviceComp27(real* DD,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -3504,24 +3475,24 @@ __global__ void QSlipGeomDeviceComp27(real* DD,
               *nx_dirBE,  *nx_dirTW,  *nx_dirTN,  *nx_dirBS,  *nx_dirBN,  *nx_dirTS,
               *nx_dirTNE, *nx_dirTSW, *nx_dirTSE, *nx_dirTNW, *nx_dirBNE, *nx_dirBSW,
               *nx_dirBSE, *nx_dirBNW; 
-      nx_dirE   = &NormalX[DIR_P00   * numberOfBCnodes];
-      nx_dirW   = &NormalX[DIR_M00   * numberOfBCnodes];
-      nx_dirN   = &NormalX[DIR_0P0   * numberOfBCnodes];
-      nx_dirS   = &NormalX[DIR_0M0   * numberOfBCnodes];
-      nx_dirT   = &NormalX[DIR_00P   * numberOfBCnodes];
-      nx_dirB   = &NormalX[DIR_00M   * numberOfBCnodes];
-      nx_dirNE  = &NormalX[DIR_PP0  * numberOfBCnodes];
-      nx_dirSW  = &NormalX[DIR_MM0  * numberOfBCnodes];
-      nx_dirSE  = &NormalX[DIR_PM0  * numberOfBCnodes];
-      nx_dirNW  = &NormalX[DIR_MP0  * numberOfBCnodes];
-      nx_dirTE  = &NormalX[DIR_P0P  * numberOfBCnodes];
-      nx_dirBW  = &NormalX[DIR_M0M  * numberOfBCnodes];
-      nx_dirBE  = &NormalX[DIR_P0M  * numberOfBCnodes];
-      nx_dirTW  = &NormalX[DIR_M0P  * numberOfBCnodes];
-      nx_dirTN  = &NormalX[DIR_0PP  * numberOfBCnodes];
-      nx_dirBS  = &NormalX[DIR_0MM  * numberOfBCnodes];
-      nx_dirBN  = &NormalX[DIR_0PM  * numberOfBCnodes];
-      nx_dirTS  = &NormalX[DIR_0MP  * numberOfBCnodes];
+      nx_dirE   = &NormalX[DIR_P00 * numberOfBCnodes];
+      nx_dirW   = &NormalX[DIR_M00 * numberOfBCnodes];
+      nx_dirN   = &NormalX[DIR_0P0 * numberOfBCnodes];
+      nx_dirS   = &NormalX[DIR_0M0 * numberOfBCnodes];
+      nx_dirT   = &NormalX[DIR_00P * numberOfBCnodes];
+      nx_dirB   = &NormalX[DIR_00M * numberOfBCnodes];
+      nx_dirNE  = &NormalX[DIR_PP0 * numberOfBCnodes];
+      nx_dirSW  = &NormalX[DIR_MM0 * numberOfBCnodes];
+      nx_dirSE  = &NormalX[DIR_PM0 * numberOfBCnodes];
+      nx_dirNW  = &NormalX[DIR_MP0 * numberOfBCnodes];
+      nx_dirTE  = &NormalX[DIR_P0P * numberOfBCnodes];
+      nx_dirBW  = &NormalX[DIR_M0M * numberOfBCnodes];
+      nx_dirBE  = &NormalX[DIR_P0M * numberOfBCnodes];
+      nx_dirTW  = &NormalX[DIR_M0P * numberOfBCnodes];
+      nx_dirTN  = &NormalX[DIR_0PP * numberOfBCnodes];
+      nx_dirBS  = &NormalX[DIR_0MM * numberOfBCnodes];
+      nx_dirBN  = &NormalX[DIR_0PM * numberOfBCnodes];
+      nx_dirTS  = &NormalX[DIR_0MP * numberOfBCnodes];
       nx_dirTNE = &NormalX[DIR_PPP * numberOfBCnodes];
       nx_dirTSW = &NormalX[DIR_MMP * numberOfBCnodes];
       nx_dirTSE = &NormalX[DIR_PMP * numberOfBCnodes];
@@ -3536,24 +3507,24 @@ __global__ void QSlipGeomDeviceComp27(real* DD,
               *ny_dirBE,  *ny_dirTW,  *ny_dirTN,  *ny_dirBS,  *ny_dirBN,  *ny_dirTS,
               *ny_dirTNE, *ny_dirTSW, *ny_dirTSE, *ny_dirTNW, *ny_dirBNE, *ny_dirBSW,
               *ny_dirBSE, *ny_dirBNW; 
-      ny_dirE   = &NormalY[DIR_P00   * numberOfBCnodes];
-      ny_dirW   = &NormalY[DIR_M00   * numberOfBCnodes];
-      ny_dirN   = &NormalY[DIR_0P0   * numberOfBCnodes];
-      ny_dirS   = &NormalY[DIR_0M0   * numberOfBCnodes];
-      ny_dirT   = &NormalY[DIR_00P   * numberOfBCnodes];
-      ny_dirB   = &NormalY[DIR_00M   * numberOfBCnodes];
-      ny_dirNE  = &NormalY[DIR_PP0  * numberOfBCnodes];
-      ny_dirSW  = &NormalY[DIR_MM0  * numberOfBCnodes];
-      ny_dirSE  = &NormalY[DIR_PM0  * numberOfBCnodes];
-      ny_dirNW  = &NormalY[DIR_MP0  * numberOfBCnodes];
-      ny_dirTE  = &NormalY[DIR_P0P  * numberOfBCnodes];
-      ny_dirBW  = &NormalY[DIR_M0M  * numberOfBCnodes];
-      ny_dirBE  = &NormalY[DIR_P0M  * numberOfBCnodes];
-      ny_dirTW  = &NormalY[DIR_M0P  * numberOfBCnodes];
-      ny_dirTN  = &NormalY[DIR_0PP  * numberOfBCnodes];
-      ny_dirBS  = &NormalY[DIR_0MM  * numberOfBCnodes];
-      ny_dirBN  = &NormalY[DIR_0PM  * numberOfBCnodes];
-      ny_dirTS  = &NormalY[DIR_0MP  * numberOfBCnodes];
+      ny_dirE   = &NormalY[DIR_P00 * numberOfBCnodes];
+      ny_dirW   = &NormalY[DIR_M00 * numberOfBCnodes];
+      ny_dirN   = &NormalY[DIR_0P0 * numberOfBCnodes];
+      ny_dirS   = &NormalY[DIR_0M0 * numberOfBCnodes];
+      ny_dirT   = &NormalY[DIR_00P * numberOfBCnodes];
+      ny_dirB   = &NormalY[DIR_00M * numberOfBCnodes];
+      ny_dirNE  = &NormalY[DIR_PP0 * numberOfBCnodes];
+      ny_dirSW  = &NormalY[DIR_MM0 * numberOfBCnodes];
+      ny_dirSE  = &NormalY[DIR_PM0 * numberOfBCnodes];
+      ny_dirNW  = &NormalY[DIR_MP0 * numberOfBCnodes];
+      ny_dirTE  = &NormalY[DIR_P0P * numberOfBCnodes];
+      ny_dirBW  = &NormalY[DIR_M0M * numberOfBCnodes];
+      ny_dirBE  = &NormalY[DIR_P0M * numberOfBCnodes];
+      ny_dirTW  = &NormalY[DIR_M0P * numberOfBCnodes];
+      ny_dirTN  = &NormalY[DIR_0PP * numberOfBCnodes];
+      ny_dirBS  = &NormalY[DIR_0MM * numberOfBCnodes];
+      ny_dirBN  = &NormalY[DIR_0PM * numberOfBCnodes];
+      ny_dirTS  = &NormalY[DIR_0MP * numberOfBCnodes];
       ny_dirTNE = &NormalY[DIR_PPP * numberOfBCnodes];
       ny_dirTSW = &NormalY[DIR_MMP * numberOfBCnodes];
       ny_dirTSE = &NormalY[DIR_PMP * numberOfBCnodes];
@@ -3568,24 +3539,24 @@ __global__ void QSlipGeomDeviceComp27(real* DD,
               *nz_dirBE,  *nz_dirTW,  *nz_dirTN,  *nz_dirBS,  *nz_dirBN,  *nz_dirTS,
               *nz_dirTNE, *nz_dirTSW, *nz_dirTSE, *nz_dirTNW, *nz_dirBNE, *nz_dirBSW,
               *nz_dirBSE, *nz_dirBNW; 
-      nz_dirE   = &NormalZ[DIR_P00   * numberOfBCnodes];
-      nz_dirW   = &NormalZ[DIR_M00   * numberOfBCnodes];
-      nz_dirN   = &NormalZ[DIR_0P0   * numberOfBCnodes];
-      nz_dirS   = &NormalZ[DIR_0M0   * numberOfBCnodes];
-      nz_dirT   = &NormalZ[DIR_00P   * numberOfBCnodes];
-      nz_dirB   = &NormalZ[DIR_00M   * numberOfBCnodes];
-      nz_dirNE  = &NormalZ[DIR_PP0  * numberOfBCnodes];
-      nz_dirSW  = &NormalZ[DIR_MM0  * numberOfBCnodes];
-      nz_dirSE  = &NormalZ[DIR_PM0  * numberOfBCnodes];
-      nz_dirNW  = &NormalZ[DIR_MP0  * numberOfBCnodes];
-      nz_dirTE  = &NormalZ[DIR_P0P  * numberOfBCnodes];
-      nz_dirBW  = &NormalZ[DIR_M0M  * numberOfBCnodes];
-      nz_dirBE  = &NormalZ[DIR_P0M  * numberOfBCnodes];
-      nz_dirTW  = &NormalZ[DIR_M0P  * numberOfBCnodes];
-      nz_dirTN  = &NormalZ[DIR_0PP  * numberOfBCnodes];
-      nz_dirBS  = &NormalZ[DIR_0MM  * numberOfBCnodes];
-      nz_dirBN  = &NormalZ[DIR_0PM  * numberOfBCnodes];
-      nz_dirTS  = &NormalZ[DIR_0MP  * numberOfBCnodes];
+      nz_dirE   = &NormalZ[DIR_P00 * numberOfBCnodes];
+      nz_dirW   = &NormalZ[DIR_M00 * numberOfBCnodes];
+      nz_dirN   = &NormalZ[DIR_0P0 * numberOfBCnodes];
+      nz_dirS   = &NormalZ[DIR_0M0 * numberOfBCnodes];
+      nz_dirT   = &NormalZ[DIR_00P * numberOfBCnodes];
+      nz_dirB   = &NormalZ[DIR_00M * numberOfBCnodes];
+      nz_dirNE  = &NormalZ[DIR_PP0 * numberOfBCnodes];
+      nz_dirSW  = &NormalZ[DIR_MM0 * numberOfBCnodes];
+      nz_dirSE  = &NormalZ[DIR_PM0 * numberOfBCnodes];
+      nz_dirNW  = &NormalZ[DIR_MP0 * numberOfBCnodes];
+      nz_dirTE  = &NormalZ[DIR_P0P * numberOfBCnodes];
+      nz_dirBW  = &NormalZ[DIR_M0M * numberOfBCnodes];
+      nz_dirBE  = &NormalZ[DIR_P0M * numberOfBCnodes];
+      nz_dirTW  = &NormalZ[DIR_M0P * numberOfBCnodes];
+      nz_dirTN  = &NormalZ[DIR_0PP * numberOfBCnodes];
+      nz_dirBS  = &NormalZ[DIR_0MM * numberOfBCnodes];
+      nz_dirBN  = &NormalZ[DIR_0PM * numberOfBCnodes];
+      nz_dirTS  = &NormalZ[DIR_0MP * numberOfBCnodes];
       nz_dirTNE = &NormalZ[DIR_PPP * numberOfBCnodes];
       nz_dirTSW = &NormalZ[DIR_MMP * numberOfBCnodes];
       nz_dirTSE = &NormalZ[DIR_PMP * numberOfBCnodes];
@@ -3625,32 +3596,32 @@ __global__ void QSlipGeomDeviceComp27(real* DD,
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[DIR_P00   ])[ke   ];
-      real f_E    = (D.f[DIR_M00   ])[kw   ];
-      real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      real f_B    = (D.f[DIR_00P   ])[kt   ];
-      real f_T    = (D.f[DIR_00M   ])[kb   ];
-      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      real f_W    = (D.f[DIR_P00])[ke   ];
+      real f_E    = (D.f[DIR_M00])[kw   ];
+      real f_S    = (D.f[DIR_0P0])[kn   ];
+      real f_N    = (D.f[DIR_0M0])[ks   ];
+      real f_B    = (D.f[DIR_00P])[kt   ];
+      real f_T    = (D.f[DIR_00M])[kb   ];
+      real f_SW   = (D.f[DIR_PP0])[kne  ];
+      real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0])[kse  ];
+      real f_SE   = (D.f[DIR_MP0])[knw  ];
+      real f_BW   = (D.f[DIR_P0P])[kte  ];
+      real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP])[kts  ];
+      real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -3675,63 +3646,63 @@ __global__ void QSlipGeomDeviceComp27(real* DD,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  real VeloX = vx1;
@@ -4264,80 +4235,81 @@ __global__ void QSlipGeomDeviceComp27(real* DD,
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QSlipNormDeviceComp27(real* DD, 
-												 int* k_Q, 
-												 real* QQ,
-												 unsigned int  numberOfBCnodes,
-												 real om1, 
-												 real* NormalX,
-												 real* NormalY,
-												 real* NormalZ,
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int size_Mat, 
-												 bool isEvenTimestep)
+__global__ void QSlipNormDeviceComp27(
+    real* DD, 
+    int* k_Q, 
+    real* QQ,
+    unsigned int  numberOfBCnodes,
+    real om1, 
+    real* NormalX,
+    real* NormalY,
+    real* NormalZ,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -4358,24 +4330,24 @@ __global__ void QSlipNormDeviceComp27(real* DD,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -4390,24 +4362,24 @@ __global__ void QSlipNormDeviceComp27(real* DD,
               *nx_dirBE,  *nx_dirTW,  *nx_dirTN,  *nx_dirBS,  *nx_dirBN,  *nx_dirTS,
               *nx_dirTNE, *nx_dirTSW, *nx_dirTSE, *nx_dirTNW, *nx_dirBNE, *nx_dirBSW,
               *nx_dirBSE, *nx_dirBNW; 
-      nx_dirE   = &NormalX[DIR_P00   * numberOfBCnodes];
-      nx_dirW   = &NormalX[DIR_M00   * numberOfBCnodes];
-      nx_dirN   = &NormalX[DIR_0P0   * numberOfBCnodes];
-      nx_dirS   = &NormalX[DIR_0M0   * numberOfBCnodes];
-      nx_dirT   = &NormalX[DIR_00P   * numberOfBCnodes];
-      nx_dirB   = &NormalX[DIR_00M   * numberOfBCnodes];
-      nx_dirNE  = &NormalX[DIR_PP0  * numberOfBCnodes];
-      nx_dirSW  = &NormalX[DIR_MM0  * numberOfBCnodes];
-      nx_dirSE  = &NormalX[DIR_PM0  * numberOfBCnodes];
-      nx_dirNW  = &NormalX[DIR_MP0  * numberOfBCnodes];
-      nx_dirTE  = &NormalX[DIR_P0P  * numberOfBCnodes];
-      nx_dirBW  = &NormalX[DIR_M0M  * numberOfBCnodes];
-      nx_dirBE  = &NormalX[DIR_P0M  * numberOfBCnodes];
-      nx_dirTW  = &NormalX[DIR_M0P  * numberOfBCnodes];
-      nx_dirTN  = &NormalX[DIR_0PP  * numberOfBCnodes];
-      nx_dirBS  = &NormalX[DIR_0MM  * numberOfBCnodes];
-      nx_dirBN  = &NormalX[DIR_0PM  * numberOfBCnodes];
-      nx_dirTS  = &NormalX[DIR_0MP  * numberOfBCnodes];
+      nx_dirE   = &NormalX[DIR_P00 * numberOfBCnodes];
+      nx_dirW   = &NormalX[DIR_M00 * numberOfBCnodes];
+      nx_dirN   = &NormalX[DIR_0P0 * numberOfBCnodes];
+      nx_dirS   = &NormalX[DIR_0M0 * numberOfBCnodes];
+      nx_dirT   = &NormalX[DIR_00P * numberOfBCnodes];
+      nx_dirB   = &NormalX[DIR_00M * numberOfBCnodes];
+      nx_dirNE  = &NormalX[DIR_PP0 * numberOfBCnodes];
+      nx_dirSW  = &NormalX[DIR_MM0 * numberOfBCnodes];
+      nx_dirSE  = &NormalX[DIR_PM0 * numberOfBCnodes];
+      nx_dirNW  = &NormalX[DIR_MP0 * numberOfBCnodes];
+      nx_dirTE  = &NormalX[DIR_P0P * numberOfBCnodes];
+      nx_dirBW  = &NormalX[DIR_M0M * numberOfBCnodes];
+      nx_dirBE  = &NormalX[DIR_P0M * numberOfBCnodes];
+      nx_dirTW  = &NormalX[DIR_M0P * numberOfBCnodes];
+      nx_dirTN  = &NormalX[DIR_0PP * numberOfBCnodes];
+      nx_dirBS  = &NormalX[DIR_0MM * numberOfBCnodes];
+      nx_dirBN  = &NormalX[DIR_0PM * numberOfBCnodes];
+      nx_dirTS  = &NormalX[DIR_0MP * numberOfBCnodes];
       nx_dirTNE = &NormalX[DIR_PPP * numberOfBCnodes];
       nx_dirTSW = &NormalX[DIR_MMP * numberOfBCnodes];
       nx_dirTSE = &NormalX[DIR_PMP * numberOfBCnodes];
@@ -4422,24 +4394,24 @@ __global__ void QSlipNormDeviceComp27(real* DD,
               *ny_dirBE,  *ny_dirTW,  *ny_dirTN,  *ny_dirBS,  *ny_dirBN,  *ny_dirTS,
               *ny_dirTNE, *ny_dirTSW, *ny_dirTSE, *ny_dirTNW, *ny_dirBNE, *ny_dirBSW,
               *ny_dirBSE, *ny_dirBNW; 
-      ny_dirE   = &NormalY[DIR_P00   * numberOfBCnodes];
-      ny_dirW   = &NormalY[DIR_M00   * numberOfBCnodes];
-      ny_dirN   = &NormalY[DIR_0P0   * numberOfBCnodes];
-      ny_dirS   = &NormalY[DIR_0M0   * numberOfBCnodes];
-      ny_dirT   = &NormalY[DIR_00P   * numberOfBCnodes];
-      ny_dirB   = &NormalY[DIR_00M   * numberOfBCnodes];
-      ny_dirNE  = &NormalY[DIR_PP0  * numberOfBCnodes];
-      ny_dirSW  = &NormalY[DIR_MM0  * numberOfBCnodes];
-      ny_dirSE  = &NormalY[DIR_PM0  * numberOfBCnodes];
-      ny_dirNW  = &NormalY[DIR_MP0  * numberOfBCnodes];
-      ny_dirTE  = &NormalY[DIR_P0P  * numberOfBCnodes];
-      ny_dirBW  = &NormalY[DIR_M0M  * numberOfBCnodes];
-      ny_dirBE  = &NormalY[DIR_P0M  * numberOfBCnodes];
-      ny_dirTW  = &NormalY[DIR_M0P  * numberOfBCnodes];
-      ny_dirTN  = &NormalY[DIR_0PP  * numberOfBCnodes];
-      ny_dirBS  = &NormalY[DIR_0MM  * numberOfBCnodes];
-      ny_dirBN  = &NormalY[DIR_0PM  * numberOfBCnodes];
-      ny_dirTS  = &NormalY[DIR_0MP  * numberOfBCnodes];
+      ny_dirE   = &NormalY[DIR_P00 * numberOfBCnodes];
+      ny_dirW   = &NormalY[DIR_M00 * numberOfBCnodes];
+      ny_dirN   = &NormalY[DIR_0P0 * numberOfBCnodes];
+      ny_dirS   = &NormalY[DIR_0M0 * numberOfBCnodes];
+      ny_dirT   = &NormalY[DIR_00P * numberOfBCnodes];
+      ny_dirB   = &NormalY[DIR_00M * numberOfBCnodes];
+      ny_dirNE  = &NormalY[DIR_PP0 * numberOfBCnodes];
+      ny_dirSW  = &NormalY[DIR_MM0 * numberOfBCnodes];
+      ny_dirSE  = &NormalY[DIR_PM0 * numberOfBCnodes];
+      ny_dirNW  = &NormalY[DIR_MP0 * numberOfBCnodes];
+      ny_dirTE  = &NormalY[DIR_P0P * numberOfBCnodes];
+      ny_dirBW  = &NormalY[DIR_M0M * numberOfBCnodes];
+      ny_dirBE  = &NormalY[DIR_P0M * numberOfBCnodes];
+      ny_dirTW  = &NormalY[DIR_M0P * numberOfBCnodes];
+      ny_dirTN  = &NormalY[DIR_0PP * numberOfBCnodes];
+      ny_dirBS  = &NormalY[DIR_0MM * numberOfBCnodes];
+      ny_dirBN  = &NormalY[DIR_0PM * numberOfBCnodes];
+      ny_dirTS  = &NormalY[DIR_0MP * numberOfBCnodes];
       ny_dirTNE = &NormalY[DIR_PPP * numberOfBCnodes];
       ny_dirTSW = &NormalY[DIR_MMP * numberOfBCnodes];
       ny_dirTSE = &NormalY[DIR_PMP * numberOfBCnodes];
@@ -4454,24 +4426,24 @@ __global__ void QSlipNormDeviceComp27(real* DD,
               *nz_dirBE,  *nz_dirTW,  *nz_dirTN,  *nz_dirBS,  *nz_dirBN,  *nz_dirTS,
               *nz_dirTNE, *nz_dirTSW, *nz_dirTSE, *nz_dirTNW, *nz_dirBNE, *nz_dirBSW,
               *nz_dirBSE, *nz_dirBNW; 
-      nz_dirE   = &NormalZ[DIR_P00   * numberOfBCnodes];
-      nz_dirW   = &NormalZ[DIR_M00   * numberOfBCnodes];
-      nz_dirN   = &NormalZ[DIR_0P0   * numberOfBCnodes];
-      nz_dirS   = &NormalZ[DIR_0M0   * numberOfBCnodes];
-      nz_dirT   = &NormalZ[DIR_00P   * numberOfBCnodes];
-      nz_dirB   = &NormalZ[DIR_00M   * numberOfBCnodes];
-      nz_dirNE  = &NormalZ[DIR_PP0  * numberOfBCnodes];
-      nz_dirSW  = &NormalZ[DIR_MM0  * numberOfBCnodes];
-      nz_dirSE  = &NormalZ[DIR_PM0  * numberOfBCnodes];
-      nz_dirNW  = &NormalZ[DIR_MP0  * numberOfBCnodes];
-      nz_dirTE  = &NormalZ[DIR_P0P  * numberOfBCnodes];
-      nz_dirBW  = &NormalZ[DIR_M0M  * numberOfBCnodes];
-      nz_dirBE  = &NormalZ[DIR_P0M  * numberOfBCnodes];
-      nz_dirTW  = &NormalZ[DIR_M0P  * numberOfBCnodes];
-      nz_dirTN  = &NormalZ[DIR_0PP  * numberOfBCnodes];
-      nz_dirBS  = &NormalZ[DIR_0MM  * numberOfBCnodes];
-      nz_dirBN  = &NormalZ[DIR_0PM  * numberOfBCnodes];
-      nz_dirTS  = &NormalZ[DIR_0MP  * numberOfBCnodes];
+      nz_dirE   = &NormalZ[DIR_P00 * numberOfBCnodes];
+      nz_dirW   = &NormalZ[DIR_M00 * numberOfBCnodes];
+      nz_dirN   = &NormalZ[DIR_0P0 * numberOfBCnodes];
+      nz_dirS   = &NormalZ[DIR_0M0 * numberOfBCnodes];
+      nz_dirT   = &NormalZ[DIR_00P * numberOfBCnodes];
+      nz_dirB   = &NormalZ[DIR_00M * numberOfBCnodes];
+      nz_dirNE  = &NormalZ[DIR_PP0 * numberOfBCnodes];
+      nz_dirSW  = &NormalZ[DIR_MM0 * numberOfBCnodes];
+      nz_dirSE  = &NormalZ[DIR_PM0 * numberOfBCnodes];
+      nz_dirNW  = &NormalZ[DIR_MP0 * numberOfBCnodes];
+      nz_dirTE  = &NormalZ[DIR_P0P * numberOfBCnodes];
+      nz_dirBW  = &NormalZ[DIR_M0M * numberOfBCnodes];
+      nz_dirBE  = &NormalZ[DIR_P0M * numberOfBCnodes];
+      nz_dirTW  = &NormalZ[DIR_M0P * numberOfBCnodes];
+      nz_dirTN  = &NormalZ[DIR_0PP * numberOfBCnodes];
+      nz_dirBS  = &NormalZ[DIR_0MM * numberOfBCnodes];
+      nz_dirBN  = &NormalZ[DIR_0PM * numberOfBCnodes];
+      nz_dirTS  = &NormalZ[DIR_0MP * numberOfBCnodes];
       nz_dirTNE = &NormalZ[DIR_PPP * numberOfBCnodes];
       nz_dirTSW = &NormalZ[DIR_MMP * numberOfBCnodes];
       nz_dirTSE = &NormalZ[DIR_PMP * numberOfBCnodes];
@@ -4511,32 +4483,32 @@ __global__ void QSlipNormDeviceComp27(real* DD,
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[DIR_P00   ])[ke   ];
-      real f_E    = (D.f[DIR_M00   ])[kw   ];
-      real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      real f_B    = (D.f[DIR_00P   ])[kt   ];
-      real f_T    = (D.f[DIR_00M   ])[kb   ];
-      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      real f_W    = (D.f[DIR_P00])[ke   ];
+      real f_E    = (D.f[DIR_M00])[kw   ];
+      real f_S    = (D.f[DIR_0P0])[kn   ];
+      real f_N    = (D.f[DIR_0M0])[ks   ];
+      real f_B    = (D.f[DIR_00P])[kt   ];
+      real f_T    = (D.f[DIR_00M])[kb   ];
+      real f_SW   = (D.f[DIR_PP0])[kne  ];
+      real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0])[kse  ];
+      real f_SE   = (D.f[DIR_MP0])[knw  ];
+      real f_BW   = (D.f[DIR_P0P])[kte  ];
+      real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP])[kts  ];
+      real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -4561,63 +4533,63 @@ __global__ void QSlipNormDeviceComp27(real* DD,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  real VeloX = vx1;
diff --git a/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu
index 74e2faa38638228aa5d499aa74226405ab109f7d..1cc5017816aed29d52e74823a8c910bfed35ad42 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu
@@ -42,29 +42,32 @@
 
 #include "LBM/LB.h"
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
-#include "KernelUtilities.h"
+#include "Kernel/Utilities/DistributionHelper.cuh"
+#include <basics/constants/NumericConstants.h>
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
+using namespace vf::gpu;
 
 //////////////////////////////////////////////////////////////////////////////
-__host__ __device__ __forceinline__ void iMEM(uint k, uint kN,
-                                                         real* _wallNormalX, real* _wallNormalY, real* _wallNormalZ,
-                                                         real* vx, real* vy, real* vz,
-                                                         real* vx_el,      real* vy_el,      real* vz_el,      //!>mean (temporally filtered) velocities at exchange location
-                                                         real* vx_w_mean,  real* vy_w_mean,  real* vz_w_mean,  //!>mean (temporally filtered) velocities at wall-adjactent node
-                                                         real  vx_w_inst,  real  vy_w_inst,  real  vz_w_inst,  //!>instantaneous velocities at wall-adjactent node
-                                                         real  rho,
-                                                         int* samplingOffset,
-                                                         real q,
-                                                         real forceFactor,                                     //!>e.g., 1.0 for simple-bounce back, or (1+q) for interpolated single-node bounce-back as in Geier et al (2015)
-                                                         real eps,                                             //!>filter constant in temporal averaging
-                                                         real* z0,                                             //!>aerodynamic roughness length
-                                                         bool  hasWallModelMonitor,
-                                                         real* u_star_monitor,
-                                                         real wallMomentumX, real wallMomentumY, real wallMomentumZ,
-                                                         real& wallVelocityX, real& wallVelocityY, real&wallVelocityZ)
+__host__ __device__ __forceinline__ void iMEM(
+    uint k, uint kN,
+    real* _wallNormalX, real* _wallNormalY, real* _wallNormalZ,
+    real* vx, real* vy, real* vz,
+    real* vx_el,      real* vy_el,      real* vz_el,      //!>mean (temporally filtered) velocities at exchange location
+    real* vx_w_mean,  real* vy_w_mean,  real* vz_w_mean,  //!>mean (temporally filtered) velocities at wall-adjactent node
+    real  vx_w_inst,  real  vy_w_inst,  real  vz_w_inst,  //!>instantaneous velocities at wall-adjactent node
+    real  rho,
+    int* samplingOffset,
+    real q,
+    real forceFactor,                                     //!>e.g., 1.0 for simple-bounce back, or (1+q) for interpolated single-node bounce-back as in Geier et al (2015)
+    real eps,                                             //!>filter constant in temporal averaging
+    real* z0,                                             //!>aerodynamic roughness length
+    bool  hasWallModelMonitor,
+    real* u_star_monitor,
+    real wallMomentumX, real wallMomentumY, real wallMomentumZ,
+    real& wallVelocityX, real& wallVelocityY, real&wallVelocityZ)
 {
       real wallNormalX = _wallNormalX[k];
       real wallNormalY = _wallNormalY[k];
@@ -105,7 +108,7 @@ __host__ __device__ __forceinline__ void iMEM(uint k, uint kN,
       real _vz_w = vz_w_inst-vDotN_w*wallNormalZ;
 
       //Compute wall shear stress tau_w via MOST
-      real z = (real)samplingOffset[k] + 0.5; //assuming q=0.5, could be replaced by wall distance via wall normal
+      real z = (real)samplingOffset[k] + q; //assuming q=0.5, could be replaced by wall distance via wall normal
       real kappa = 0.4;
       real u_star = vMag_el*kappa/(log(z/z0[k]));
       if(hasWallModelMonitor) u_star_monitor[k] = u_star;
@@ -135,101 +138,44 @@ __host__ __device__ __forceinline__ void iMEM(uint k, uint kN,
       wallVelocityZ = clipVz > -clipVz? min(clipVz, max(-clipVz, -3.0*F_z*forceFactor)): max(clipVz, min(-clipVz, -3.0*F_z*forceFactor));
 }
 
+
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QStressDeviceComp27(real* DD,
-											   int* k_Q,
-                                    int* k_N,
-											   real* QQ,
-                                    unsigned int numberOfBCnodes,
-                                    real om1,
-                                    real* turbViscosity,
-                                    real* vx,
-                                    real* vy,
-                                    real* vz,
-                                    real* normalX,
-                                    real* normalY,
-                                    real* normalZ,
-                                    real* vx_el,
-                                    real* vy_el,
-                                    real* vz_el,
-                                    real* vx_w_mean,
-                                    real* vy_w_mean,
-                                    real* vz_w_mean,
-                                    int* samplingOffset,
-                                    real* z0,
-                                    bool  hasWallModelMonitor,
-                                    real* u_star_monitor,
-                                    real* Fx_monitor,
-                                    real* Fy_monitor,
-                                    real* Fz_monitor,
-											   unsigned int* neighborX,
-                                    unsigned int* neighborY,
-                                    unsigned int* neighborZ,
-                                    unsigned int size_Mat,
-                                    bool isEvenTimestep)
+__global__ void QStressDeviceComp27(
+    real* DD,
+    int* k_Q,
+    int* k_N,
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1,
+    real* turbViscosity,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* normalX,
+    real* normalY,
+    real* normalZ,
+    real* vx_el,
+    real* vy_el,
+    real* vz_el,
+    real* vx_w_mean,
+    real* vy_w_mean,
+    real* vz_w_mean,
+    int* samplingOffset,
+    real* z0,
+    bool  hasWallModelMonitor,
+    real* u_star_monitor,
+    real* Fx_monitor,
+    real* Fy_monitor,
+    real* Fz_monitor,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
 
-   Distributions27 D;
-   if (isEvenTimestep==true)//get right array of post coll f's
-   {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-   }
-   else
-   {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-   }
+   Distributions27 D = vf::gpu::getDistributionReferences27(DD, numberOfLBnodes, isEvenTimestep);
+
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index
    const unsigned  y = blockIdx.x;   // Globaler y-Index
@@ -249,24 +195,24 @@ __global__ void QStressDeviceComp27(real* DD,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW;
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -309,32 +255,32 @@ __global__ void QStressDeviceComp27(real* DD,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];     //post-coll f's
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];     //post-coll f's
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
@@ -359,66 +305,8 @@ __global__ void QStressDeviceComp27(real* DD,
 
       real om_turb = om1 / (c1o1 + c3o1*om1*max(c0o1, turbViscosity[k_Q[k]]));
       //////////////////////////////////////////////////////////////////////////
-      if (isEvenTimestep==false)      //get adress where incoming f's should be written to
-      {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      }
-      else
-      {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-      }
+
+      D = vf::gpu::getDistributionReferences27(DD, numberOfLBnodes, !isEvenTimestep);
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Compute incoming f's with zero wall velocity
       ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -434,7 +322,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = vx1;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
-         f_W_in = getInterpolatedDistributionForNoSlipBC(q, f_E, f_W, feq, om_turb);
+         // f_W_in = getInterpolatedDistributionForNoSlipBC(q, f_E, f_W, feq, om_turb);
+         f_W_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_E, f_W, feq, om_turb, drho, c2o27);
          wallMomentumX += f_E+f_W_in;
       }
 
@@ -443,7 +332,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = -vx1;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
-         f_E_in = getInterpolatedDistributionForNoSlipBC(q, f_W, f_E, feq, om_turb);
+         // f_E_in = getInterpolatedDistributionForNoSlipBC(q, f_W, f_E, feq, om_turb);
+         f_E_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_W, f_E, feq, om_turb, drho, c2o27);
          wallMomentumX -= f_W+f_E_in;
       }
 
@@ -452,7 +342,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
-         f_S_in = getInterpolatedDistributionForNoSlipBC(q, f_N, f_S, feq, om_turb);
+         // f_S_in = getInterpolatedDistributionForNoSlipBC(q, f_N, f_S, feq, om_turb);
+         f_S_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_N, f_S, feq, om_turb, drho, c2o27);
          wallMomentumY += f_N+f_S_in;
       }
 
@@ -461,7 +352,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = -vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
-         f_N_in = getInterpolatedDistributionForNoSlipBC(q, f_S, f_N, feq, om_turb);
+         // f_N_in = getInterpolatedDistributionForNoSlipBC(q, f_S, f_N, feq, om_turb);
+         f_N_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_S, f_N, feq, om_turb, drho, c2o27);
          wallMomentumY -= f_S+f_N_in;
       }
 
@@ -470,7 +362,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
-         f_B_in = getInterpolatedDistributionForNoSlipBC(q, f_T, f_B, feq, om_turb);
+         // f_B_in = getInterpolatedDistributionForNoSlipBC(q, f_T, f_B, feq, om_turb);
+         f_B_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_T, f_B, feq, om_turb, drho, c2o27);
          wallMomentumZ += f_T+f_B_in;
       }
 
@@ -479,7 +372,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = -vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
-         f_T_in = getInterpolatedDistributionForNoSlipBC(q, f_B, f_T, feq, om_turb);
+         // f_T_in = getInterpolatedDistributionForNoSlipBC(q, f_B, f_T, feq, om_turb);
+         f_T_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_B, f_T, feq, om_turb, drho, c2o27);
          wallMomentumZ -= f_B+f_T_in;
       }
 
@@ -488,7 +382,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = vx1 + vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         f_SW_in = getInterpolatedDistributionForNoSlipBC(q, f_NE, f_SW, feq, om_turb);
+         // f_SW_in = getInterpolatedDistributionForNoSlipBC(q, f_NE, f_SW, feq, om_turb);
+         f_SW_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_NE, f_SW, feq, om_turb, drho, c2o27);
          wallMomentumX += f_NE+f_SW_in;
          wallMomentumY += f_NE+f_SW_in;
       }
@@ -498,7 +393,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = -vx1 - vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         f_NE_in = getInterpolatedDistributionForNoSlipBC(q, f_SW, f_NE, feq, om_turb);
+         // f_NE_in = getInterpolatedDistributionForNoSlipBC(q, f_SW, f_NE, feq, om_turb);
+         f_NE_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_SW, f_NE, feq, om_turb, drho, c1o54);
          wallMomentumX -= f_SW+f_NE_in;
          wallMomentumY -= f_SW+f_NE_in;
       }
@@ -508,7 +404,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = vx1 - vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         f_NW_in = getInterpolatedDistributionForNoSlipBC(q, f_SE, f_NW, feq, om_turb);
+         // f_NW_in = getInterpolatedDistributionForNoSlipBC(q, f_SE, f_NW, feq, om_turb);
+         f_NW_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_SE, f_NW, feq, om_turb, drho, c1o54);
          wallMomentumX += f_SE+f_NW_in;
          wallMomentumY -= f_SE+f_NW_in;
       }
@@ -518,7 +415,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = -vx1 + vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         f_SE_in = getInterpolatedDistributionForNoSlipBC(q, f_NW, f_SE, feq, om_turb);
+         // f_SE_in = getInterpolatedDistributionForNoSlipBC(q, f_NW, f_SE, feq, om_turb);
+         f_SE_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_NW, f_SE, feq, om_turb, drho, c1o54);
          wallMomentumX -= f_NW+f_SE_in;
          wallMomentumY += f_NW+f_SE_in;
       }
@@ -528,7 +426,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = vx1 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         f_BW_in = getInterpolatedDistributionForNoSlipBC(q, f_TE, f_BW, feq, om_turb);
+         // f_BW_in = getInterpolatedDistributionForNoSlipBC(q, f_TE, f_BW, feq, om_turb);
+         f_BW_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_TE, f_BW, feq, om_turb, drho, c1o54);
          wallMomentumX += f_TE+f_BW_in;
          wallMomentumZ += f_TE+f_BW_in;
       }
@@ -538,7 +437,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = -vx1 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         f_TE_in = getInterpolatedDistributionForNoSlipBC(q, f_BW, f_TE, feq, om_turb);
+         // f_TE_in = getInterpolatedDistributionForNoSlipBC(q, f_BW, f_TE, feq, om_turb);
+         f_TE_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_BW, f_TE, feq, om_turb, drho, c1o54);
          wallMomentumX -= f_BW+f_TE_in;
          wallMomentumZ -= f_BW+f_TE_in;
       }
@@ -548,7 +448,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = vx1 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         f_TW_in = getInterpolatedDistributionForNoSlipBC(q, f_BE, f_TW, feq, om_turb);
+         // f_TW_in = getInterpolatedDistributionForNoSlipBC(q, f_BE, f_TW, feq, om_turb);
+         f_TW_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_BE, f_TW, feq, om_turb, drho, c1o54);
          wallMomentumX += f_BE+f_TW_in;
          wallMomentumZ -= f_BE+f_TW_in;
       }
@@ -558,7 +459,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = -vx1 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         f_BE_in = getInterpolatedDistributionForNoSlipBC(q, f_TW, f_BE, feq, om_turb);
+         // f_BE_in = getInterpolatedDistributionForNoSlipBC(q, f_TW, f_BE, feq, om_turb);
+         f_BE_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_TW, f_BE, feq, om_turb, drho, c1o54);
          wallMomentumX -= f_TW+f_BE_in;
          wallMomentumZ += f_TW+f_BE_in;
       }
@@ -568,7 +470,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         f_BS_in = getInterpolatedDistributionForNoSlipBC(q, f_TN, f_BS, feq, om_turb);
+         // f_BS_in = getInterpolatedDistributionForNoSlipBC(q, f_TN, f_BS, feq, om_turb);
+         f_BS_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_TN, f_BS, feq, om_turb, drho, c1o54);
          wallMomentumY += f_TN+f_BS_in;
          wallMomentumZ += f_TN+f_BS_in;
       }
@@ -578,7 +481,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = -vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         f_TN_in = getInterpolatedDistributionForNoSlipBC(q, f_BS, f_TN, feq, om_turb);
+         // f_TN_in = getInterpolatedDistributionForNoSlipBC(q, f_BS, f_TN, feq, om_turb);
+         f_TN_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_BS, f_TN, feq, om_turb, drho, c1o54);
          wallMomentumY -= f_BS+f_TN_in;
          wallMomentumZ -= f_BS+f_TN_in;
       }
@@ -588,7 +492,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         f_TS_in = getInterpolatedDistributionForNoSlipBC(q, f_BN, f_TS, feq, om_turb);
+         // f_TS_in = getInterpolatedDistributionForNoSlipBC(q, f_BN, f_TS, feq, om_turb);
+         f_TS_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_BN, f_TS, feq, om_turb, drho, c1o54);
          wallMomentumY += f_BN+f_TS_in;
          wallMomentumZ -= f_BN+f_TS_in;
       }
@@ -598,7 +503,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = -vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         f_BN_in = getInterpolatedDistributionForNoSlipBC(q, f_TS, f_BN, feq, om_turb);
+         // f_BN_in = getInterpolatedDistributionForNoSlipBC(q, f_TS, f_BN, feq, om_turb);
+         f_BN_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_TS, f_BN, feq, om_turb, drho, c1o54);
          wallMomentumY -= f_TS+f_BN_in;
          wallMomentumZ += f_TS+f_BN_in;
       }
@@ -608,7 +514,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = vx1 + vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         f_BSW_in = getInterpolatedDistributionForNoSlipBC(q, f_TNE, f_BSW, feq, om_turb);
+         // f_BSW_in = getInterpolatedDistributionForNoSlipBC(q, f_TNE, f_BSW, feq, om_turb);
+         f_BSW_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_TNE, f_BSW, feq, om_turb, drho, c1o216);
          wallMomentumX += f_TNE+f_BSW_in;
          wallMomentumY += f_TNE+f_BSW_in;
          wallMomentumZ += f_TNE+f_BSW_in;
@@ -619,7 +526,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = -vx1 - vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         f_TNE_in = getInterpolatedDistributionForNoSlipBC(q, f_BSW, f_TNE, feq, om_turb);
+         // f_TNE_in = getInterpolatedDistributionForNoSlipBC(q, f_BSW, f_TNE, feq, om_turb);
+         f_TNE_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_BSW, f_TNE, feq, om_turb, drho, c1o216);
          wallMomentumX -= f_BSW+f_TNE_in;
          wallMomentumY -= f_BSW+f_TNE_in;
          wallMomentumZ -= f_BSW+f_TNE_in;
@@ -630,7 +538,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = vx1 + vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         f_TSW_in = getInterpolatedDistributionForNoSlipBC(q, f_BNE, f_TSW, feq, om_turb);
+         // f_TSW_in = getInterpolatedDistributionForNoSlipBC(q, f_BNE, f_TSW, feq, om_turb);
+         f_TSW_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_BNE, f_TSW, feq, om_turb, drho, c1o216);
          wallMomentumX += f_BNE+f_TSW_in;
          wallMomentumY += f_BNE+f_TSW_in;
          wallMomentumZ -= f_BNE+f_TSW_in;
@@ -641,7 +550,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = -vx1 - vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         f_BNE_in = getInterpolatedDistributionForNoSlipBC(q, f_TSW, f_BNE, feq, om_turb);
+         // f_BNE_in = getInterpolatedDistributionForNoSlipBC(q, f_TSW, f_BNE, feq, om_turb);
+         f_BNE_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_TSW, f_BNE, feq, om_turb, drho, c1o216);
          wallMomentumX -= f_TSW+f_BNE_in;
          wallMomentumY -= f_TSW+f_BNE_in;
          wallMomentumZ += f_TSW+f_BNE_in;
@@ -652,7 +562,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = vx1 - vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         f_BNW_in = getInterpolatedDistributionForNoSlipBC(q, f_TSE, f_BNW, feq, om_turb);
+         // f_BNW_in = getInterpolatedDistributionForNoSlipBC(q, f_TSE, f_BNW, feq, om_turb);
+         f_BNW_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_TSE, f_BNW, feq, om_turb, drho, c1o216);
          wallMomentumX += f_TSE+f_BNW_in;
          wallMomentumY -= f_TSE+f_BNW_in;
          wallMomentumZ += f_TSE+f_BNW_in;
@@ -663,7 +574,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = -vx1 + vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         f_TSE_in = getInterpolatedDistributionForNoSlipBC(q, f_BNW, f_TSE, feq, om_turb);
+         // f_TSE_in = getInterpolatedDistributionForNoSlipBC(q, f_BNW, f_TSE, feq, om_turb);
+         f_TSE_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_BNW, f_TSE, feq, om_turb, drho, c1o216);
          wallMomentumX -= f_BNW+f_TSE_in;
          wallMomentumY += f_BNW+f_TSE_in;
          wallMomentumZ -= f_BNW+f_TSE_in;
@@ -674,7 +586,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = vx1 - vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         f_TNW_in = getInterpolatedDistributionForNoSlipBC(q, f_BSE, f_TNW, feq, om_turb);
+         // f_TNW_in = getInterpolatedDistributionForNoSlipBC(q, f_BSE, f_TNW, feq, om_turb);
+         f_TNW_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_BSE, f_TNW, feq, om_turb, drho, c1o216);
          wallMomentumX += f_BSE+f_TNW_in;
          wallMomentumY -= f_BSE+f_TNW_in;
          wallMomentumZ -= f_BSE+f_TNW_in;
@@ -685,7 +598,8 @@ __global__ void QStressDeviceComp27(real* DD,
       {
          velocityLB = -vx1 + vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         f_BSE_in = getInterpolatedDistributionForNoSlipBC(q, f_TNW, f_BSE, feq, om_turb);
+         // f_BSE_in = getInterpolatedDistributionForNoSlipBC(q, f_TNW, f_BSE, feq, om_turb);
+         f_BSE_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_TNW, f_BSE, feq, om_turb, drho, c1o216);
          wallMomentumX -= f_TNW+f_BSE_in;
          wallMomentumY += f_TNW+f_BSE_in;
          wallMomentumZ += f_TNW+f_BSE_in;
@@ -696,7 +610,7 @@ __global__ void QStressDeviceComp27(real* DD,
       // ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real VeloX=0.0, VeloY=0.0, VeloZ=0.0;
 
-      q = 0.5f;
+      q = q_dirB[k];
       real eps = 0.001f;
 
       iMEM( k, k_N[k],
@@ -968,70 +882,12 @@ __global__ void BBStressDevice27( real* DD,
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
-                                             unsigned int size_Mat,
+                                             unsigned long long numberOfLBnodes,
                                              bool isEvenTimestep)
 {
-   Distributions27 D;
-   if (isEvenTimestep==true)
-   {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-   }
-   else
-   {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-   }
+
+   Distributions27 D = vf::gpu::getDistributionReferences27(DD, numberOfLBnodes, isEvenTimestep);
+
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index
    const unsigned  y = blockIdx.x;   // Globaler y-Index
@@ -1051,24 +907,24 @@ __global__ void BBStressDevice27( real* DD,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW;
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -1112,32 +968,32 @@ __global__ void BBStressDevice27( real* DD,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho;
@@ -1159,66 +1015,8 @@ __global__ void BBStressDevice27( real* DD,
                  (f_T - f_B)) / (c1o1 + drho);
 
       //////////////////////////////////////////////////////////////////////////
-      if (isEvenTimestep==false)
-      {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      }
-      else
-      {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-      }
+
+      D = vf::gpu::getDistributionReferences27(DD, numberOfLBnodes, !isEvenTimestep);
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real f_E_in,  f_W_in,  f_N_in,  f_S_in,  f_T_in,  f_B_in,   f_NE_in,  f_SW_in,  f_SE_in,  f_NW_in,  f_TE_in,  f_BW_in,  f_BE_in,
          f_TW_in, f_TN_in, f_BS_in, f_BN_in, f_TS_in, f_TNE_in, f_TSW_in, f_TSE_in, f_TNW_in, f_BNE_in, f_BSW_in, f_BSE_in, f_BNW_in;
@@ -1442,7 +1240,7 @@ __global__ void BBStressDevice27( real* DD,
       // ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real VeloX=0.0, VeloY=0.0, VeloZ=0.0;
 
-      q = 0.5f;
+      q = q_dirB[k];
       real eps = 0.001f;
 
       iMEM( k, k_N[k],
@@ -1715,70 +1513,11 @@ __global__ void BBStressPressureDevice27( real* DD,
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
-                                             unsigned int size_Mat,
+                                             unsigned long long numberOfLBnodes,
                                              bool isEvenTimestep)
 {
-   Distributions27 D;
-   if (isEvenTimestep==true)
-   {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-   }
-   else
-   {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-   }
+   Distributions27 D = vf::gpu::getDistributionReferences27(DD, numberOfLBnodes, isEvenTimestep);
+
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index
    const unsigned  y = blockIdx.x;   // Globaler y-Index
@@ -1798,24 +1537,24 @@ __global__ void BBStressPressureDevice27( real* DD,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW;
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -1859,32 +1598,32 @@ __global__ void BBStressPressureDevice27( real* DD,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho;
@@ -1906,66 +1645,8 @@ __global__ void BBStressPressureDevice27( real* DD,
                  (f_T - f_B)) / (c1o1 + drho);
 
       //////////////////////////////////////////////////////////////////////////
-      if (isEvenTimestep==false)
-      {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      }
-      else
-      {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-      }
+      D = vf::gpu::getDistributionReferences27(DD, numberOfLBnodes, !isEvenTimestep);
+
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real f_E_in,  f_W_in,  f_N_in,  f_S_in,  f_T_in,  f_B_in,   f_NE_in,  f_SW_in,  f_SE_in,  f_NW_in,  f_TE_in,  f_BW_in,  f_BE_in,
          f_TW_in, f_TN_in, f_BS_in, f_BN_in, f_TS_in, f_TNE_in, f_TSW_in, f_TSE_in, f_TNW_in, f_BNE_in, f_BSW_in, f_BSE_in, f_BNW_in;
@@ -2189,7 +1870,7 @@ __global__ void BBStressPressureDevice27( real* DD,
       // ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real VeloX=0.0, VeloY=0.0, VeloZ=0.0;
 
-      q = 0.5f;
+      q = q_dirB[k];
       real eps = 0.001f;
 
       iMEM( k, k_N[k],
diff --git a/src/gpu/VirtualFluids_GPU/GPU/ThinWallBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/ThinWallBCs27.cu
index 55f810628f370976289d1492e9916d5d3fa0dbb8..f76b31c768553aed46fd640d9cdace8ba753b6b8 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/ThinWallBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/ThinWallBCs27.cu
@@ -9,9 +9,9 @@
 /* Device code */
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 /////////////////////////////////////////////////////////////////////////
@@ -27,69 +27,69 @@ __global__ void QVelDeviceCompThinWallsPartOne27(
 	uint* neighborX,
 	uint* neighborY,
 	uint* neighborZ,
-	uint size_Mat, 
+	unsigned long long numberOfLBnodes, 
 	bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -114,24 +114,24 @@ __global__ void QVelDeviceCompThinWallsPartOne27(
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -174,32 +174,32 @@ __global__ void QVelDeviceCompThinWallsPartOne27(
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -456,69 +456,69 @@ __global__ void QDeviceCompThinWallsPartOne27(
 	unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
-	unsigned int size_Mat,
+	unsigned long long numberOfLBnodes,
 	bool isEvenTimestep)
 {
 	Distributions27 D;
 	if (isEvenTimestep == true)
 	{
-		D.f[DIR_P00] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_M00] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0P0] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0M0] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00P] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00M] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_PP0] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_MM0] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_PM0] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_MP0] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_P0P] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_M0M] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_P0M] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_M0P] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0PP] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0MM] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0PM] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0MP] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_MMP] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_PMP] = &DD[DIR_PMP *size_Mat];
-		D.f[DIR_MPP] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_PPM] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_MMM] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_PMM] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_MPM] = &DD[DIR_MPM *size_Mat];
+		D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
 	}
 	else
 	{
-		D.f[DIR_M00] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_P00] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0M0] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0P0] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00M] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00P] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_MM0] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_PP0] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_MP0] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_PM0] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_M0M] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_P0P] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_M0P] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_P0M] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0MM] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0PP] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0MP] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0PM] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_MMP] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_PMP] = &DD[DIR_MPM *size_Mat];
-		D.f[DIR_MPP] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_PPM] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_MMM] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_PMM] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_MPM] = &DD[DIR_PMP *size_Mat];
+		D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -539,24 +539,24 @@ __global__ void QDeviceCompThinWallsPartOne27(
 			*q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS,
 			*q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 			*q_dirBSE, *q_dirBNW;
-		q_dirE = &QQ[DIR_P00   * numberOfBCnodes];
-		q_dirW = &QQ[DIR_M00   * numberOfBCnodes];
-		q_dirN = &QQ[DIR_0P0   * numberOfBCnodes];
-		q_dirS = &QQ[DIR_0M0   * numberOfBCnodes];
-		q_dirT = &QQ[DIR_00P   * numberOfBCnodes];
-		q_dirB = &QQ[DIR_00M   * numberOfBCnodes];
-		q_dirNE = &QQ[DIR_PP0  * numberOfBCnodes];
-		q_dirSW = &QQ[DIR_MM0  * numberOfBCnodes];
-		q_dirSE = &QQ[DIR_PM0  * numberOfBCnodes];
-		q_dirNW = &QQ[DIR_MP0  * numberOfBCnodes];
-		q_dirTE = &QQ[DIR_P0P  * numberOfBCnodes];
-		q_dirBW = &QQ[DIR_M0M  * numberOfBCnodes];
-		q_dirBE = &QQ[DIR_P0M  * numberOfBCnodes];
-		q_dirTW = &QQ[DIR_M0P  * numberOfBCnodes];
-		q_dirTN = &QQ[DIR_0PP  * numberOfBCnodes];
-		q_dirBS = &QQ[DIR_0MM  * numberOfBCnodes];
-		q_dirBN = &QQ[DIR_0PM  * numberOfBCnodes];
-		q_dirTS = &QQ[DIR_0MP  * numberOfBCnodes];
+		q_dirE = &QQ[DIR_P00 * numberOfBCnodes];
+		q_dirW = &QQ[DIR_M00 * numberOfBCnodes];
+		q_dirN = &QQ[DIR_0P0 * numberOfBCnodes];
+		q_dirS = &QQ[DIR_0M0 * numberOfBCnodes];
+		q_dirT = &QQ[DIR_00P * numberOfBCnodes];
+		q_dirB = &QQ[DIR_00M * numberOfBCnodes];
+		q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes];
+		q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes];
+		q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes];
+		q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes];
+		q_dirTE = &QQ[DIR_P0P * numberOfBCnodes];
+		q_dirBW = &QQ[DIR_M0M * numberOfBCnodes];
+		q_dirBE = &QQ[DIR_P0M * numberOfBCnodes];
+		q_dirTW = &QQ[DIR_M0P * numberOfBCnodes];
+		q_dirTN = &QQ[DIR_0PP * numberOfBCnodes];
+		q_dirBS = &QQ[DIR_0MM * numberOfBCnodes];
+		q_dirBN = &QQ[DIR_0PM * numberOfBCnodes];
+		q_dirTS = &QQ[DIR_0MP * numberOfBCnodes];
 		q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
 		q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
 		q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -882,7 +882,7 @@ __global__ void QThinWallsPartTwo27(
 	uint* neighborY,
 	uint* neighborZ,
 	uint* neighborWSB,
-	uint size_Mat, 
+	unsigned long long numberOfLBnodes, 
 	bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -904,24 +904,24 @@ __global__ void QThinWallsPartTwo27(
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -980,123 +980,123 @@ __global__ void QThinWallsPartTwo27(
 	  Distributions27 D, DN;
 	  if (isEvenTimestep == true)
 	  {
-		  D.f[DIR_P00] = &DD[DIR_P00   *size_Mat];
-		  D.f[DIR_M00] = &DD[DIR_M00   *size_Mat];
-		  D.f[DIR_0P0] = &DD[DIR_0P0   *size_Mat];
-		  D.f[DIR_0M0] = &DD[DIR_0M0   *size_Mat];
-		  D.f[DIR_00P] = &DD[DIR_00P   *size_Mat];
-		  D.f[DIR_00M] = &DD[DIR_00M   *size_Mat];
-		  D.f[DIR_PP0] = &DD[DIR_PP0  *size_Mat];
-		  D.f[DIR_MM0] = &DD[DIR_MM0  *size_Mat];
-		  D.f[DIR_PM0] = &DD[DIR_PM0  *size_Mat];
-		  D.f[DIR_MP0] = &DD[DIR_MP0  *size_Mat];
-		  D.f[DIR_P0P] = &DD[DIR_P0P  *size_Mat];
-		  D.f[DIR_M0M] = &DD[DIR_M0M  *size_Mat];
-		  D.f[DIR_P0M] = &DD[DIR_P0M  *size_Mat];
-		  D.f[DIR_M0P] = &DD[DIR_M0P  *size_Mat];
-		  D.f[DIR_0PP] = &DD[DIR_0PP  *size_Mat];
-		  D.f[DIR_0MM] = &DD[DIR_0MM  *size_Mat];
-		  D.f[DIR_0PM] = &DD[DIR_0PM  *size_Mat];
-		  D.f[DIR_0MP] = &DD[DIR_0MP  *size_Mat];
-		  D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		  D.f[DIR_PPP] = &DD[DIR_PPP *size_Mat];
-		  D.f[DIR_MMP] = &DD[DIR_MMP *size_Mat];
-		  D.f[DIR_PMP] = &DD[DIR_PMP *size_Mat];
-		  D.f[DIR_MPP] = &DD[DIR_MPP *size_Mat];
-		  D.f[DIR_PPM] = &DD[DIR_PPM *size_Mat];
-		  D.f[DIR_MMM] = &DD[DIR_MMM *size_Mat];
-		  D.f[DIR_PMM] = &DD[DIR_PMM *size_Mat];
-		  D.f[DIR_MPM] = &DD[DIR_MPM *size_Mat];
+		  D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+		  D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+		  D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+		  D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+		  D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+		  D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+		  D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+		  D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+		  D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+		  D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+		  D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+		  D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+		  D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+		  D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+		  D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+		  D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+		  D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+		  D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+		  D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		  D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+		  D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+		  D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+		  D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+		  D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+		  D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+		  D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+		  D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
 	  }
 	  else
 	  {
-		  D.f[DIR_M00] = &DD[DIR_P00   *size_Mat];
-		  D.f[DIR_P00] = &DD[DIR_M00   *size_Mat];
-		  D.f[DIR_0M0] = &DD[DIR_0P0   *size_Mat];
-		  D.f[DIR_0P0] = &DD[DIR_0M0   *size_Mat];
-		  D.f[DIR_00M] = &DD[DIR_00P   *size_Mat];
-		  D.f[DIR_00P] = &DD[DIR_00M   *size_Mat];
-		  D.f[DIR_MM0] = &DD[DIR_PP0  *size_Mat];
-		  D.f[DIR_PP0] = &DD[DIR_MM0  *size_Mat];
-		  D.f[DIR_MP0] = &DD[DIR_PM0  *size_Mat];
-		  D.f[DIR_PM0] = &DD[DIR_MP0  *size_Mat];
-		  D.f[DIR_M0M] = &DD[DIR_P0P  *size_Mat];
-		  D.f[DIR_P0P] = &DD[DIR_M0M  *size_Mat];
-		  D.f[DIR_M0P] = &DD[DIR_P0M  *size_Mat];
-		  D.f[DIR_P0M] = &DD[DIR_M0P  *size_Mat];
-		  D.f[DIR_0MM] = &DD[DIR_0PP  *size_Mat];
-		  D.f[DIR_0PP] = &DD[DIR_0MM  *size_Mat];
-		  D.f[DIR_0MP] = &DD[DIR_0PM  *size_Mat];
-		  D.f[DIR_0PM] = &DD[DIR_0MP  *size_Mat];
-		  D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		  D.f[DIR_PPP] = &DD[DIR_MMM *size_Mat];
-		  D.f[DIR_MMP] = &DD[DIR_PPM *size_Mat];
-		  D.f[DIR_PMP] = &DD[DIR_MPM *size_Mat];
-		  D.f[DIR_MPP] = &DD[DIR_PMM *size_Mat];
-		  D.f[DIR_PPM] = &DD[DIR_MMP *size_Mat];
-		  D.f[DIR_MMM] = &DD[DIR_PPP *size_Mat];
-		  D.f[DIR_PMM] = &DD[DIR_MPP *size_Mat];
-		  D.f[DIR_MPM] = &DD[DIR_PMP *size_Mat];
+		  D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+		  D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+		  D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+		  D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+		  D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+		  D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+		  D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+		  D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+		  D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+		  D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+		  D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+		  D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+		  D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+		  D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+		  D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+		  D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+		  D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+		  D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+		  D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		  D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+		  D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+		  D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+		  D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+		  D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+		  D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+		  D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+		  D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
 	  }
 	  if (isEvenTimestep==false)
       {
-         DN.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         DN.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         DN.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         DN.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         DN.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         DN.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         DN.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         DN.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         DN.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         DN.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         DN.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         DN.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         DN.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         DN.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         DN.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         DN.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         DN.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         DN.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         DN.f[DIR_000] = &DD[DIR_000*size_Mat];
-         DN.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         DN.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         DN.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         DN.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         DN.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         DN.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         DN.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         DN.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         DN.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         DN.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         DN.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         DN.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         DN.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         DN.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         DN.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         DN.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         DN.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         DN.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         DN.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         DN.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         DN.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         DN.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         DN.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         DN.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         DN.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         DN.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         DN.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         DN.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         DN.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         DN.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         DN.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         DN.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         DN.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         DN.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         DN.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         DN.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         DN.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         DN.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         DN.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         DN.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         DN.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         DN.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         DN.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         DN.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         DN.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         DN.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         DN.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         DN.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         DN.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         DN.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         DN.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         DN.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         DN.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         DN.f[DIR_000] = &DD[DIR_000*size_Mat];
-         DN.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         DN.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         DN.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         DN.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         DN.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         DN.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         DN.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         DN.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         DN.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         DN.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         DN.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         DN.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         DN.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         DN.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         DN.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         DN.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         DN.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         DN.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         DN.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         DN.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         DN.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         DN.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         DN.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         DN.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         DN.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         DN.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         DN.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         DN.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         DN.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         DN.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         DN.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         DN.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         DN.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         DN.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         DN.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //directions allways exchange
@@ -1106,24 +1106,24 @@ __global__ void QThinWallsPartTwo27(
 	  //( 1  1  1) ( 1  0  0) ( 0  1  0) ( 0  0  1) ( 1  1  0) ( 1  0  1) ( 0  1  1) (-1 -1  1) (-1  1 -1) ( 1 -1 -1) (-1  1  0) (-1  0  1) ( 0 -1  1)
 	  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  real q, tmp;
-      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1){ if (geom[kw  ] < GEO_FLUID){tmp = (DN.f[DIR_M00  ])[kw  ]; (DN.f[DIR_M00  ])[kw  ]=(D.f[DIR_M00  ])[kw  ]; (D.f[DIR_M00  ])[kw  ]=tmp;}}
-	  q = q_dirW[k];   if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_P00  ])[ke  ]; (DN.f[DIR_P00  ])[ke  ]=(D.f[DIR_P00  ])[ke  ]; (D.f[DIR_P00  ])[ke  ]=tmp;}}
-      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1){ if (geom[ks  ] < GEO_FLUID){tmp = (DN.f[DIR_0M0  ])[ks  ]; (DN.f[DIR_0M0  ])[ks  ]=(D.f[DIR_0M0  ])[ks  ]; (D.f[DIR_0M0  ])[ks  ]=tmp;}}
-      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_0P0  ])[kn  ]; (DN.f[DIR_0P0  ])[kn  ]=(D.f[DIR_0P0  ])[kn  ]; (D.f[DIR_0P0  ])[kn  ]=tmp;}}
-      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1){ if (geom[kb  ] < GEO_FLUID){tmp = (DN.f[DIR_00M  ])[kb  ]; (DN.f[DIR_00M  ])[kb  ]=(D.f[DIR_00M  ])[kb  ]; (D.f[DIR_00M  ])[kb  ]=tmp;}}
-      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_00P  ])[kt  ]; (DN.f[DIR_00P  ])[kt  ]=(D.f[DIR_00P  ])[kt  ]; (D.f[DIR_00P  ])[kt  ]=tmp;}}
-      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1){ if (geom[ksw ] < GEO_FLUID){tmp = (DN.f[DIR_MM0 ])[ksw ]; (DN.f[DIR_MM0 ])[ksw ]=(D.f[DIR_MM0 ])[ksw ]; (D.f[DIR_MM0 ])[ksw ]=tmp;}}
-      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_PP0 ])[kne ]; (DN.f[DIR_PP0 ])[kne ]=(D.f[DIR_PP0 ])[kne ]; (D.f[DIR_PP0 ])[kne ]=tmp;}}
-      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_MP0 ])[knw ]; (DN.f[DIR_MP0 ])[knw ]=(D.f[DIR_MP0 ])[knw ]; (D.f[DIR_MP0 ])[knw ]=tmp;}}
-      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1){ if (geom[kmp0] < GEO_FLUID){tmp = (DN.f[DIR_PM0 ])[kse ]; (DN.f[DIR_PM0 ])[kse ]=(D.f[DIR_PM0 ])[kse ]; (D.f[DIR_PM0 ])[kse ]=tmp;}}
-      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1){ if (geom[kbw ] < GEO_FLUID){tmp = (DN.f[DIR_M0M ])[kbw ]; (DN.f[DIR_M0M ])[kbw ]=(D.f[DIR_M0M ])[kbw ]; (D.f[DIR_M0M ])[kbw ]=tmp;}}
-      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_P0P ])[kte ]; (DN.f[DIR_P0P ])[kte ]=(D.f[DIR_P0P ])[kte ]; (D.f[DIR_P0P ])[kte ]=tmp;}}
-      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_M0P ])[ktw ]; (DN.f[DIR_M0P ])[ktw ]=(D.f[DIR_M0P ])[ktw ]; (D.f[DIR_M0P ])[ktw ]=tmp;}}
-      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1){ if (geom[km0p] < GEO_FLUID){tmp = (DN.f[DIR_P0M ])[kbe ]; (DN.f[DIR_P0M ])[kbe ]=(D.f[DIR_P0M ])[kbe ]; (D.f[DIR_P0M ])[kbe ]=tmp;}}
-      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1){ if (geom[kbs ] < GEO_FLUID){tmp = (DN.f[DIR_0MM ])[kbs ]; (DN.f[DIR_0MM ])[kbs ]=(D.f[DIR_0MM ])[kbs ]; (D.f[DIR_0MM ])[kbs ]=tmp;}}
-      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_0PP ])[ktn ]; (DN.f[DIR_0PP ])[ktn ]=(D.f[DIR_0PP ])[ktn ]; (D.f[DIR_0PP ])[ktn ]=tmp;}}
-      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_0MP ])[kts ]; (DN.f[DIR_0MP ])[kts ]=(D.f[DIR_0MP ])[kts ]; (D.f[DIR_0MP ])[kts ]=tmp;}}
-      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1){ if (geom[k0mp] < GEO_FLUID){tmp = (DN.f[DIR_0PM ])[kbn ]; (DN.f[DIR_0PM ])[kbn ]=(D.f[DIR_0PM ])[kbn ]; (D.f[DIR_0PM ])[kbn ]=tmp;}}
+      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1){ if (geom[kw  ] < GEO_FLUID){tmp = (DN.f[DIR_M00])[kw  ]; (DN.f[DIR_M00])[kw  ]=(D.f[DIR_M00])[kw  ]; (D.f[DIR_M00])[kw  ]=tmp;}}
+	  q = q_dirW[k];   if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_P00])[ke  ]; (DN.f[DIR_P00])[ke  ]=(D.f[DIR_P00])[ke  ]; (D.f[DIR_P00])[ke  ]=tmp;}}
+      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1){ if (geom[ks  ] < GEO_FLUID){tmp = (DN.f[DIR_0M0])[ks  ]; (DN.f[DIR_0M0])[ks  ]=(D.f[DIR_0M0])[ks  ]; (D.f[DIR_0M0])[ks  ]=tmp;}}
+      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_0P0])[kn  ]; (DN.f[DIR_0P0])[kn  ]=(D.f[DIR_0P0])[kn  ]; (D.f[DIR_0P0])[kn  ]=tmp;}}
+      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1){ if (geom[kb  ] < GEO_FLUID){tmp = (DN.f[DIR_00M])[kb  ]; (DN.f[DIR_00M])[kb  ]=(D.f[DIR_00M])[kb  ]; (D.f[DIR_00M])[kb  ]=tmp;}}
+      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_00P])[kt  ]; (DN.f[DIR_00P])[kt  ]=(D.f[DIR_00P])[kt  ]; (D.f[DIR_00P])[kt  ]=tmp;}}
+      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1){ if (geom[ksw ] < GEO_FLUID){tmp = (DN.f[DIR_MM0])[ksw ]; (DN.f[DIR_MM0])[ksw ]=(D.f[DIR_MM0])[ksw ]; (D.f[DIR_MM0])[ksw ]=tmp;}}
+      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_PP0])[kne ]; (DN.f[DIR_PP0])[kne ]=(D.f[DIR_PP0])[kne ]; (D.f[DIR_PP0])[kne ]=tmp;}}
+      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_MP0])[knw ]; (DN.f[DIR_MP0])[knw ]=(D.f[DIR_MP0])[knw ]; (D.f[DIR_MP0])[knw ]=tmp;}}
+      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1){ if (geom[kmp0] < GEO_FLUID){tmp = (DN.f[DIR_PM0])[kse ]; (DN.f[DIR_PM0])[kse ]=(D.f[DIR_PM0])[kse ]; (D.f[DIR_PM0])[kse ]=tmp;}}
+      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1){ if (geom[kbw ] < GEO_FLUID){tmp = (DN.f[DIR_M0M])[kbw ]; (DN.f[DIR_M0M])[kbw ]=(D.f[DIR_M0M])[kbw ]; (D.f[DIR_M0M])[kbw ]=tmp;}}
+      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_P0P])[kte ]; (DN.f[DIR_P0P])[kte ]=(D.f[DIR_P0P])[kte ]; (D.f[DIR_P0P])[kte ]=tmp;}}
+      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_M0P])[ktw ]; (DN.f[DIR_M0P])[ktw ]=(D.f[DIR_M0P])[ktw ]; (D.f[DIR_M0P])[ktw ]=tmp;}}
+      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1){ if (geom[km0p] < GEO_FLUID){tmp = (DN.f[DIR_P0M])[kbe ]; (DN.f[DIR_P0M])[kbe ]=(D.f[DIR_P0M])[kbe ]; (D.f[DIR_P0M])[kbe ]=tmp;}}
+      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1){ if (geom[kbs ] < GEO_FLUID){tmp = (DN.f[DIR_0MM])[kbs ]; (DN.f[DIR_0MM])[kbs ]=(D.f[DIR_0MM])[kbs ]; (D.f[DIR_0MM])[kbs ]=tmp;}}
+      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_0PP])[ktn ]; (DN.f[DIR_0PP])[ktn ]=(D.f[DIR_0PP])[ktn ]; (D.f[DIR_0PP])[ktn ]=tmp;}}
+      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_0MP])[kts ]; (DN.f[DIR_0MP])[kts ]=(D.f[DIR_0MP])[kts ]; (D.f[DIR_0MP])[kts ]=tmp;}}
+      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1){ if (geom[k0mp] < GEO_FLUID){tmp = (DN.f[DIR_0PM])[kbn ]; (DN.f[DIR_0PM])[kbn ]=(D.f[DIR_0PM])[kbn ]; (D.f[DIR_0PM])[kbn ]=tmp;}}
       q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1){ if (geom[kbsw] < GEO_FLUID){tmp = (DN.f[DIR_MMM])[kbsw]; (DN.f[DIR_MMM])[kbsw]=(D.f[DIR_MMM])[kbsw]; (D.f[DIR_MMM])[kbsw]=tmp;}}
       q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_PPP])[ktne]; (DN.f[DIR_PPP])[ktne]=(D.f[DIR_PPP])[ktne]; (D.f[DIR_PPP])[ktne]=tmp;}}
       q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_MMP])[ktsw]; (DN.f[DIR_MMP])[ktsw]=(D.f[DIR_MMP])[ktsw]; (D.f[DIR_MMP])[ktsw]=tmp;}}
diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu b/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu
index f8cf8ab13c39d55477bf006cd27f7943dcb5b53a..82e5f98fda0086458f2bc937dbe33e7b66feb2c5 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu
@@ -9,14 +9,16 @@
 /* Device code */
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include "basics/constants/NumericConstants.h"
 
 #include "lbm/MacroscopicQuantities.h"
 #include "../Kernel/Utilities/DistributionHelper.cuh"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
+using namespace vf::gpu;
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void CalcTurbulenceIntensity(
@@ -34,19 +36,21 @@ __global__ void CalcTurbulenceIntensity(
    unsigned int* neighborX,
    unsigned int* neighborY,
    unsigned int* neighborZ,
-   unsigned int size_Mat, 
+   unsigned long long numberOfLBnodes, 
    bool isEvenTimestep)
 {
-   const unsigned k = vf::gpu::getNodeIndex();
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
 
-   if (k >= size_Mat)
+   if (nodeIndex >= numberOfLBnodes)
        return;
 
-   if (!vf::gpu::isValidFluidNode(typeOfGridNode[k]))
+   if (!isValidFluidNode(typeOfGridNode[nodeIndex]))
        return;
 
-   vf::gpu::DistributionWrapper distr_wrapper(distributions, size_Mat, isEvenTimestep, k, neighborX, neighborY,
-                                              neighborZ);
+   DistributionWrapper distr_wrapper(distributions, numberOfLBnodes, isEvenTimestep, nodeIndex, neighborX, neighborY, neighborZ);
    const auto &distribution = distr_wrapper.distribution;
 
    // analogue to LBCalcMacCompSP27
@@ -58,16 +62,16 @@ __global__ void CalcTurbulenceIntensity(
 
    // compute subtotals:
    // fluctuations
-   vxx[k] = vxx[k] + vx * vx;
-   vyy[k] = vyy[k] + vy * vy;
-   vzz[k] = vzz[k] + vz * vz;
-   vxy[k] = vxy[k] + vx * vy;
-   vxz[k] = vxz[k] + vx * vz;
-   vyz[k] = vyz[k] + vy * vz;
+   vxx[nodeIndex] = vxx[nodeIndex] + vx * vx;
+   vyy[nodeIndex] = vyy[nodeIndex] + vy * vy;
+   vzz[nodeIndex] = vzz[nodeIndex] + vz * vz;
+   vxy[nodeIndex] = vxy[nodeIndex] + vx * vy;
+   vxz[nodeIndex] = vxz[nodeIndex] + vx * vz;
+   vyz[nodeIndex] = vyz[nodeIndex] + vy * vz;
 
    // velocity (for mean velocity)
-   vx_mean[k] = vx_mean[k] + vx;
-   vy_mean[k] = vy_mean[k] + vy;
-   vz_mean[k] = vz_mean[k] + vz; 
+   vx_mean[nodeIndex] = vx_mean[nodeIndex] + vx;
+   vy_mean[nodeIndex] = vy_mean[nodeIndex] + vy;
+   vz_mean[nodeIndex] = vz_mean[nodeIndex] + vz; 
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityInlines.cuh b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityInlines.cuh
index 58856f624fa1dfd2488c3061721e9dac53a67d07..ebf67339b65782a5c10c1b756c3fe5e06c3977d1 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityInlines.cuh
+++ b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityInlines.cuh
@@ -38,9 +38,9 @@
 #include <cuda_runtime.h>
 
 #include "LBM/LB.h" 
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 
 __inline__ __device__ real calcTurbulentViscositySmagorinsky(real Cs, real dxux, real dyuy, real dzuz, real Dxy, real Dxz , real Dyz)
 {
diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu
index a8f02fee717caf7f67624243b873fe993b5c7927..d00941aba35d2885e893eea1ffe23c89002046c4 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu
@@ -32,15 +32,15 @@
 //======================================================================================
 
 #include "TurbulentViscosityKernels.h"
-#include "lbm/constants/NumericConstants.h"
+#include "basics/constants/NumericConstants.h"
 #include "Parameter/Parameter.h"
 #include "cuda/CudaGrid.h"
 #include <cuda_runtime.h>
 #include <helper_cuda.h>
 #include "LBM/LB.h"
-#include "Kernel/Utilities/DistributionHelper.cuh"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 
 __host__ __device__ __forceinline__ void calcDerivatives(const uint& k, uint& kM, uint& kP, uint* typeOfGridNode, real* vx, real* vy, real* vz, real& dvx, real& dvy, real& dvz)
 {
@@ -53,26 +53,31 @@ __host__ __device__ __forceinline__ void calcDerivatives(const uint& k, uint& kM
     dvz = ((fluidP ? vz[kP] : vz[k])-(fluidM ? vz[kM] : vz[k]))*div;
 }
 
-__global__ void calcAMD(real* vx,
-                        real* vy,
-                        real* vz,
-                        real* turbulentViscosity,
-                        uint* neighborX,
-                        uint* neighborY,
-                        uint* neighborZ,
-                        uint* neighborWSB,
-                        uint* typeOfGridNode,
-                        uint size_Mat,
-                        real SGSConstant)
+__global__ void calcAMD(
+    real* vx,
+    real* vy,
+    real* vz,
+    real* turbulentViscosity,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    uint* neighborWSB,
+    uint* typeOfGridNode,
+    unsigned long long numberOfLBnodes,
+    real SGSConstant)
 {
-    const uint k = vf::gpu::getNodeIndex();
-    if(k >= size_Mat) return;
-    if(typeOfGridNode[k] != GEO_FLUID) return;
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = vf::gpu::getNodeIndex();
 
-    uint kPx = neighborX[k];
-    uint kPy = neighborY[k];
-    uint kPz = neighborZ[k];
-    uint kMxyz = neighborWSB[k];
+    if(nodeIndex >= numberOfLBnodes) return;
+    if(typeOfGridNode[nodeIndex] != GEO_FLUID) return;
+
+    uint kPx = neighborX[nodeIndex];
+    uint kPy = neighborY[nodeIndex];
+    uint kPz = neighborZ[nodeIndex];
+    uint kMxyz = neighborWSB[nodeIndex];
     uint kMx = neighborZ[neighborY[kMxyz]];
     uint kMy = neighborZ[neighborX[kMxyz]];
     uint kMz = neighborY[neighborX[kMxyz]];
@@ -81,9 +86,9 @@ __global__ void calcAMD(real* vx,
          dvydx, dvydy, dvydz,
          dvzdx, dvzdy, dvzdz;
 
-    calcDerivatives(k, kMx, kPx, typeOfGridNode, vx, vy, vz, dvxdx, dvydx, dvzdx);
-    calcDerivatives(k, kMy, kPy, typeOfGridNode, vx, vy, vz, dvxdy, dvydy, dvzdy);
-    calcDerivatives(k, kMz, kPz, typeOfGridNode, vx, vy, vz, dvxdz, dvydz, dvzdz);
+    calcDerivatives(nodeIndex, kMx, kPx, typeOfGridNode, vx, vy, vz, dvxdx, dvydx, dvzdx);
+    calcDerivatives(nodeIndex, kMy, kPy, typeOfGridNode, vx, vy, vz, dvxdy, dvydy, dvzdy);
+    calcDerivatives(nodeIndex, kMz, kPz, typeOfGridNode, vx, vy, vz, dvxdz, dvydz, dvzdz);
 
     real denominator =  dvxdx*dvxdx + dvydx*dvydx + dvzdx*dvzdx + 
                         dvxdy*dvxdy + dvydy*dvydy + dvzdy*dvzdy +
@@ -95,7 +100,7 @@ __global__ void calcAMD(real* vx,
                         (dvxdx*dvzdx + dvxdy*dvzdy + dvxdz*dvzdz) * (dvxdz+dvzdx) + 
                         (dvydx*dvzdx + dvydy*dvzdy + dvydz*dvzdz) * (dvydz+dvzdy);
 
-    turbulentViscosity[k] = denominator != c0o1 ? max(c0o1,-SGSConstant*enumerator)/denominator : c0o1;
+    turbulentViscosity[nodeIndex] = denominator != c0o1 ? max(c0o1,-SGSConstant*enumerator)/denominator : c0o1;
 }
 
 void calcTurbulentViscosityAMD(Parameter* para, int level)
diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.h b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.h
index b227e680301cd4639d48a5cf3ce74f08eb7e1b9f..74d0e69a181e94c6d34e9207f203fb852d9d2fd1 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.h
@@ -38,10 +38,10 @@
 #include <cuda_runtime.h>
 
 #include "LBM/LB.h" 
-#include "Core/DataTypes.h"
-#include <lbm/constants/NumericConstants.h>
+#include "DataTypes.h"
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 
 class Parameter;
 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu
index 05c85e8b546aeaa964b1dbb61cbf01dd9b82ca1a..6207c98f9290520199e3cd9c31294ef5520b1798 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu
@@ -1,96 +1,120 @@
-//  _    ___      __              __________      _     __        ______________   __
-// | |  / (_)____/ /___  ______ _/ / ____/ /_  __(_)___/ /____   /  ___/ __  / /  / /
-// | | / / / ___/ __/ / / / __ `/ / /_  / / / / / / __  / ___/  / /___/ /_/ / /  / /
-// | |/ / / /  / /_/ /_/ / /_/ / / __/ / / /_/ / / /_/ (__  )  / /_) / ____/ /__/ / 
-// |___/_/_/   \__/\__,_/\__,_/_/_/   /_/\__,_/_/\__,_/____/   \____/_/    \_____/
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
 //
-//////////////////////////////////////////////////////////////////////////
-
-/* Device code */
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file VelocityBCs27.cu
+//! \ingroup GPU
+//! \author Martin Schoenherr, Anna Wellmann
+//======================================================================================
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include "lbm/constants/NumericConstants.h"
-#include "KernelUtilities.h"
+#include "basics/constants/NumericConstants.h"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
+using namespace vf::gpu;
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QVelDeviceCompPlusSlip27(
-													real* vx,
-													real* vy,
-													real* vz,
-													real* DD, 
-													int* k_Q, 
-													real* QQ,
-													unsigned int numberOfBCnodes, 
-													real om1, 
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													unsigned int size_Mat, 
-													bool isEvenTimestep)
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD, 
+    int* k_Q, 
+    real* QQ,
+    unsigned int numberOfBCnodes, 
+    real om1, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -115,24 +139,24 @@ __global__ void QVelDeviceCompPlusSlip27(
 		   *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS,
 		   *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 		   *q_dirBSE, *q_dirBNW;
-	   q_dirE = &QQ[DIR_P00   * numberOfBCnodes];
-	   q_dirW = &QQ[DIR_M00   * numberOfBCnodes];
-	   q_dirN = &QQ[DIR_0P0   * numberOfBCnodes];
-	   q_dirS = &QQ[DIR_0M0   * numberOfBCnodes];
-	   q_dirT = &QQ[DIR_00P   * numberOfBCnodes];
-	   q_dirB = &QQ[DIR_00M   * numberOfBCnodes];
-	   q_dirNE = &QQ[DIR_PP0  * numberOfBCnodes];
-	   q_dirSW = &QQ[DIR_MM0  * numberOfBCnodes];
-	   q_dirSE = &QQ[DIR_PM0  * numberOfBCnodes];
-	   q_dirNW = &QQ[DIR_MP0  * numberOfBCnodes];
-	   q_dirTE = &QQ[DIR_P0P  * numberOfBCnodes];
-	   q_dirBW = &QQ[DIR_M0M  * numberOfBCnodes];
-	   q_dirBE = &QQ[DIR_P0M  * numberOfBCnodes];
-	   q_dirTW = &QQ[DIR_M0P  * numberOfBCnodes];
-	   q_dirTN = &QQ[DIR_0PP  * numberOfBCnodes];
-	   q_dirBS = &QQ[DIR_0MM  * numberOfBCnodes];
-	   q_dirBN = &QQ[DIR_0PM  * numberOfBCnodes];
-	   q_dirTS = &QQ[DIR_0MP  * numberOfBCnodes];
+	   q_dirE = &QQ[DIR_P00 * numberOfBCnodes];
+	   q_dirW = &QQ[DIR_M00 * numberOfBCnodes];
+	   q_dirN = &QQ[DIR_0P0 * numberOfBCnodes];
+	   q_dirS = &QQ[DIR_0M0 * numberOfBCnodes];
+	   q_dirT = &QQ[DIR_00P * numberOfBCnodes];
+	   q_dirB = &QQ[DIR_00M * numberOfBCnodes];
+	   q_dirNE = &QQ[DIR_PP0 * numberOfBCnodes];
+	   q_dirSW = &QQ[DIR_MM0 * numberOfBCnodes];
+	   q_dirSE = &QQ[DIR_PM0 * numberOfBCnodes];
+	   q_dirNW = &QQ[DIR_MP0 * numberOfBCnodes];
+	   q_dirTE = &QQ[DIR_P0P * numberOfBCnodes];
+	   q_dirBW = &QQ[DIR_M0M * numberOfBCnodes];
+	   q_dirBE = &QQ[DIR_P0M * numberOfBCnodes];
+	   q_dirTW = &QQ[DIR_M0P * numberOfBCnodes];
+	   q_dirTN = &QQ[DIR_0PP * numberOfBCnodes];
+	   q_dirBS = &QQ[DIR_0MM * numberOfBCnodes];
+	   q_dirBN = &QQ[DIR_0PM * numberOfBCnodes];
+	   q_dirTS = &QQ[DIR_0MP * numberOfBCnodes];
 	   q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
 	   q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
 	   q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -225,63 +249,63 @@ __global__ void QVelDeviceCompPlusSlip27(
 	   //////////////////////////////////////////////////////////////////////////
 	   if (isEvenTimestep == false)
 	   {
-		   D.f[DIR_P00] = &DD[DIR_P00   *size_Mat];
-		   D.f[DIR_M00] = &DD[DIR_M00   *size_Mat];
-		   D.f[DIR_0P0] = &DD[DIR_0P0   *size_Mat];
-		   D.f[DIR_0M0] = &DD[DIR_0M0   *size_Mat];
-		   D.f[DIR_00P] = &DD[DIR_00P   *size_Mat];
-		   D.f[DIR_00M] = &DD[DIR_00M   *size_Mat];
-		   D.f[DIR_PP0] = &DD[DIR_PP0  *size_Mat];
-		   D.f[DIR_MM0] = &DD[DIR_MM0  *size_Mat];
-		   D.f[DIR_PM0] = &DD[DIR_PM0  *size_Mat];
-		   D.f[DIR_MP0] = &DD[DIR_MP0  *size_Mat];
-		   D.f[DIR_P0P] = &DD[DIR_P0P  *size_Mat];
-		   D.f[DIR_M0M] = &DD[DIR_M0M  *size_Mat];
-		   D.f[DIR_P0M] = &DD[DIR_P0M  *size_Mat];
-		   D.f[DIR_M0P] = &DD[DIR_M0P  *size_Mat];
-		   D.f[DIR_0PP] = &DD[DIR_0PP  *size_Mat];
-		   D.f[DIR_0MM] = &DD[DIR_0MM  *size_Mat];
-		   D.f[DIR_0PM] = &DD[DIR_0PM  *size_Mat];
-		   D.f[DIR_0MP] = &DD[DIR_0MP  *size_Mat];
-		   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		   D.f[DIR_PPP] = &DD[DIR_PPP *size_Mat];
-		   D.f[DIR_MMP] = &DD[DIR_MMP *size_Mat];
-		   D.f[DIR_PMP] = &DD[DIR_PMP *size_Mat];
-		   D.f[DIR_MPP] = &DD[DIR_MPP *size_Mat];
-		   D.f[DIR_PPM] = &DD[DIR_PPM *size_Mat];
-		   D.f[DIR_MMM] = &DD[DIR_MMM *size_Mat];
-		   D.f[DIR_PMM] = &DD[DIR_PMM *size_Mat];
-		   D.f[DIR_MPM] = &DD[DIR_MPM *size_Mat];
+		   D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+		   D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+		   D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+		   D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+		   D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+		   D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+		   D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+		   D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+		   D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+		   D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+		   D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+		   D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+		   D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+		   D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+		   D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+		   D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+		   D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+		   D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+		   D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		   D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+		   D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+		   D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+		   D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+		   D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+		   D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+		   D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+		   D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
 	   }
 	   else
 	   {
-		   D.f[DIR_M00] = &DD[DIR_P00   *size_Mat];
-		   D.f[DIR_P00] = &DD[DIR_M00   *size_Mat];
-		   D.f[DIR_0M0] = &DD[DIR_0P0   *size_Mat];
-		   D.f[DIR_0P0] = &DD[DIR_0M0   *size_Mat];
-		   D.f[DIR_00M] = &DD[DIR_00P   *size_Mat];
-		   D.f[DIR_00P] = &DD[DIR_00M   *size_Mat];
-		   D.f[DIR_MM0] = &DD[DIR_PP0  *size_Mat];
-		   D.f[DIR_PP0] = &DD[DIR_MM0  *size_Mat];
-		   D.f[DIR_MP0] = &DD[DIR_PM0  *size_Mat];
-		   D.f[DIR_PM0] = &DD[DIR_MP0  *size_Mat];
-		   D.f[DIR_M0M] = &DD[DIR_P0P  *size_Mat];
-		   D.f[DIR_P0P] = &DD[DIR_M0M  *size_Mat];
-		   D.f[DIR_M0P] = &DD[DIR_P0M  *size_Mat];
-		   D.f[DIR_P0M] = &DD[DIR_M0P  *size_Mat];
-		   D.f[DIR_0MM] = &DD[DIR_0PP  *size_Mat];
-		   D.f[DIR_0PP] = &DD[DIR_0MM  *size_Mat];
-		   D.f[DIR_0MP] = &DD[DIR_0PM  *size_Mat];
-		   D.f[DIR_0PM] = &DD[DIR_0MP  *size_Mat];
-		   D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		   D.f[DIR_PPP] = &DD[DIR_MMM *size_Mat];
-		   D.f[DIR_MMP] = &DD[DIR_PPM *size_Mat];
-		   D.f[DIR_PMP] = &DD[DIR_MPM *size_Mat];
-		   D.f[DIR_MPP] = &DD[DIR_PMM *size_Mat];
-		   D.f[DIR_PPM] = &DD[DIR_MMP *size_Mat];
-		   D.f[DIR_MMM] = &DD[DIR_PPP *size_Mat];
-		   D.f[DIR_PMM] = &DD[DIR_MPP *size_Mat];
-		   D.f[DIR_MPM] = &DD[DIR_PMP *size_Mat];
+		   D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+		   D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+		   D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+		   D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+		   D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+		   D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+		   D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+		   D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+		   D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+		   D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+		   D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+		   D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+		   D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+		   D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+		   D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+		   D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+		   D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+		   D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+		   D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		   D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+		   D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+		   D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+		   D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+		   D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+		   D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+		   D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+		   D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
 	   }
 	   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	   //Test
@@ -553,18 +577,19 @@ __global__ void QVelDeviceCompPlusSlip27(
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void QVeloDeviceEQ27(real* VeloX,
-										   real* VeloY,
-										   real* VeloZ,
-                                           real* DD, 
-                                           int* k_Q, 
-                                           int numberOfBCnodes, 
-                                           real om1, 
-                                           unsigned int* neighborX,
-                                           unsigned int* neighborY,
-                                           unsigned int* neighborZ,
-                                           unsigned int size_Mat, 
-                                           bool isEvenTimestep)
+__global__ void QVeloDeviceEQ27(
+    real* VeloX,
+    real* VeloY,
+    real* VeloZ,
+    real* DD, 
+    int* k_Q, 
+    int numberOfBCnodes, 
+    real om1, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -613,95 +638,95 @@ __global__ void QVeloDeviceEQ27(real* VeloX,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
 
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             // based on BGK Plus Comp
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[ke   ];
-			real mfabb = (D.f[DIR_M00   ])[kw   ];
-			real mfbcb = (D.f[DIR_0P0   ])[kn   ];
-			real mfbab = (D.f[DIR_0M0   ])[ks   ];
-			real mfbbc = (D.f[DIR_00P   ])[kt   ];
-			real mfbba = (D.f[DIR_00M   ])[kb   ];
-			real mfccb = (D.f[DIR_PP0  ])[kne  ];
-			real mfaab = (D.f[DIR_MM0  ])[ksw  ];
-			real mfcab = (D.f[DIR_PM0  ])[kse  ];
-			real mfacb = (D.f[DIR_MP0  ])[knw  ];
-			real mfcbc = (D.f[DIR_P0P  ])[kte  ];
-			real mfaba = (D.f[DIR_M0M  ])[kbw  ];
-			real mfcba = (D.f[DIR_P0M  ])[kbe  ];
-			real mfabc = (D.f[DIR_M0P  ])[ktw  ];
-			real mfbcc = (D.f[DIR_0PP  ])[ktn  ];
-			real mfbaa = (D.f[DIR_0MM  ])[kbs  ];
-			real mfbca = (D.f[DIR_0PM  ])[kbn  ];
-			real mfbac = (D.f[DIR_0MP  ])[kts  ];
+			real mfcbb = (D.f[DIR_P00])[ke   ];
+			real mfabb = (D.f[DIR_M00])[kw   ];
+			real mfbcb = (D.f[DIR_0P0])[kn   ];
+			real mfbab = (D.f[DIR_0M0])[ks   ];
+			real mfbbc = (D.f[DIR_00P])[kt   ];
+			real mfbba = (D.f[DIR_00M])[kb   ];
+			real mfccb = (D.f[DIR_PP0])[kne  ];
+			real mfaab = (D.f[DIR_MM0])[ksw  ];
+			real mfcab = (D.f[DIR_PM0])[kse  ];
+			real mfacb = (D.f[DIR_MP0])[knw  ];
+			real mfcbc = (D.f[DIR_P0P])[kte  ];
+			real mfaba = (D.f[DIR_M0M])[kbw  ];
+			real mfcba = (D.f[DIR_P0M])[kbe  ];
+			real mfabc = (D.f[DIR_M0P])[ktw  ];
+			real mfbcc = (D.f[DIR_0PP])[ktn  ];
+			real mfbaa = (D.f[DIR_0MM])[kbs  ];
+			real mfbca = (D.f[DIR_0PM])[kbn  ];
+			real mfbac = (D.f[DIR_0MP])[kts  ];
 			real mfbbb = (D.f[DIR_000])[kzero];
-			real mfccc = (D.f[DIR_PPP ])[ktne ];
-			real mfaac = (D.f[DIR_MMP ])[ktsw ];
-			real mfcac = (D.f[DIR_PMP ])[ktse ];
-			real mfacc = (D.f[DIR_MPP ])[ktnw ];
-			real mfcca = (D.f[DIR_PPM ])[kbne ];
-			real mfaaa = (D.f[DIR_MMM ])[kbsw ];
-			real mfcaa = (D.f[DIR_PMM ])[kbse ];
-			real mfaca = (D.f[DIR_MPM ])[kbnw ];
+			real mfccc = (D.f[DIR_PPP])[ktne ];
+			real mfaac = (D.f[DIR_MMP])[ktsw ];
+			real mfcac = (D.f[DIR_PMP])[ktse ];
+			real mfacc = (D.f[DIR_MPP])[ktnw ];
+			real mfcca = (D.f[DIR_PPM])[kbne ];
+			real mfaaa = (D.f[DIR_MMM])[kbsw ];
+			real mfcaa = (D.f[DIR_PMM])[kbse ];
+			real mfaca = (D.f[DIR_MPM])[kbnw ];
 			////////////////////////////////////////////////////////////////////////////////////
 			real rho   = (mfccc+mfaaa + mfaca+mfcac + mfacc+mfcaa + mfaac+mfcca + 
 							 mfbac+mfbca + mfbaa+mfbcc + mfabc+mfcba + mfaba+mfcbc + mfacb+mfcab + mfaab+mfccb +
@@ -763,33 +788,33 @@ __global__ void QVeloDeviceEQ27(real* VeloX,
 			mfcaa = -rho * XXc * YYa * ZZa - c1o216;
 			mfaca = -rho * XXa * YYc * ZZa - c1o216;
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			(D.f[DIR_P00   ])[ke   ] = mfabb;//mfcbb;
-			(D.f[DIR_M00   ])[kw   ] = mfcbb;//mfabb;
-			(D.f[DIR_0P0   ])[kn   ] = mfbab;//mfbcb;
-			(D.f[DIR_0M0   ])[ks   ] = mfbcb;//mfbab;
-			(D.f[DIR_00P   ])[kt   ] = mfbba;//mfbbc;
-			(D.f[DIR_00M   ])[kb   ] = mfbbc;//mfbba;
-			(D.f[DIR_PP0  ])[kne  ] = mfaab;//mfccb;
-			(D.f[DIR_MM0  ])[ksw  ] = mfccb;//mfaab;
-			(D.f[DIR_PM0  ])[kse  ] = mfacb;//mfcab;
-			(D.f[DIR_MP0  ])[knw  ] = mfcab;//mfacb;
-			(D.f[DIR_P0P  ])[kte  ] = mfaba;//mfcbc;
-			(D.f[DIR_M0M  ])[kbw  ] = mfcbc;//mfaba;
-			(D.f[DIR_P0M  ])[kbe  ] = mfabc;//mfcba;
-			(D.f[DIR_M0P  ])[ktw  ] = mfcba;//mfabc;
-			(D.f[DIR_0PP  ])[ktn  ] = mfbaa;//mfbcc;
-			(D.f[DIR_0MM  ])[kbs  ] = mfbcc;//mfbaa;
-			(D.f[DIR_0PM  ])[kbn  ] = mfbac;//mfbca;
-			(D.f[DIR_0MP  ])[kts  ] = mfbca;//mfbac;
+			(D.f[DIR_P00])[ke   ] = mfabb;//mfcbb;
+			(D.f[DIR_M00])[kw   ] = mfcbb;//mfabb;
+			(D.f[DIR_0P0])[kn   ] = mfbab;//mfbcb;
+			(D.f[DIR_0M0])[ks   ] = mfbcb;//mfbab;
+			(D.f[DIR_00P])[kt   ] = mfbba;//mfbbc;
+			(D.f[DIR_00M])[kb   ] = mfbbc;//mfbba;
+			(D.f[DIR_PP0])[kne  ] = mfaab;//mfccb;
+			(D.f[DIR_MM0])[ksw  ] = mfccb;//mfaab;
+			(D.f[DIR_PM0])[kse  ] = mfacb;//mfcab;
+			(D.f[DIR_MP0])[knw  ] = mfcab;//mfacb;
+			(D.f[DIR_P0P])[kte  ] = mfaba;//mfcbc;
+			(D.f[DIR_M0M])[kbw  ] = mfcbc;//mfaba;
+			(D.f[DIR_P0M])[kbe  ] = mfabc;//mfcba;
+			(D.f[DIR_M0P])[ktw  ] = mfcba;//mfabc;
+			(D.f[DIR_0PP])[ktn  ] = mfbaa;//mfbcc;
+			(D.f[DIR_0MM])[kbs  ] = mfbcc;//mfbaa;
+			(D.f[DIR_0PM])[kbn  ] = mfbac;//mfbca;
+			(D.f[DIR_0MP])[kts  ] = mfbca;//mfbac;
 			(D.f[DIR_000])[kzero] = mfbbb;//mfbbb;
-			(D.f[DIR_PPP ])[ktne ] = mfaaa;//mfccc;
-			(D.f[DIR_MMP ])[ktsw ] = mfcca;//mfaac;
-			(D.f[DIR_PMP ])[ktse ] = mfaca;//mfcac;
-			(D.f[DIR_MPP ])[ktnw ] = mfcaa;//mfacc;
-			(D.f[DIR_PPM ])[kbne ] = mfaac;//mfcca;
-			(D.f[DIR_MMM ])[kbsw ] = mfccc;//mfaaa;
-			(D.f[DIR_PMM ])[kbse ] = mfacc;//mfcaa;
-			(D.f[DIR_MPM ])[kbnw ] = mfcac;//mfaca;
+			(D.f[DIR_PPP])[ktne ] = mfaaa;//mfccc;
+			(D.f[DIR_MMP])[ktsw ] = mfcca;//mfaac;
+			(D.f[DIR_PMP])[ktse ] = mfaca;//mfcac;
+			(D.f[DIR_MPP])[ktnw ] = mfcaa;//mfacc;
+			(D.f[DIR_PPM])[kbne ] = mfaac;//mfcca;
+			(D.f[DIR_MMM])[kbsw ] = mfccc;//mfaaa;
+			(D.f[DIR_PMM])[kbse ] = mfacc;//mfcaa;
+			(D.f[DIR_MPM])[kbnw ] = mfcac;//mfaca;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -832,254 +857,6 @@ __global__ void QVeloDeviceEQ27(real* VeloX,
 
 
 
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void QVeloStreetDeviceEQ27(
-	real* veloXfraction,
-	real* veloYfraction,
-	int*  naschVelo,
-	real* DD,
-	int*  naschIndex,
-	int   numberOfStreetNodes,
-	real  velocityRatio,
-	uint* neighborX,
-	uint* neighborY,
-	uint* neighborZ,
-	uint  size_Mat,
-	bool  isEvenTimestep)
-{
-	////////////////////////////////////////////////////////////////////////////////
-	const unsigned  x = threadIdx.x;  // Globaler x-Index 
-	const unsigned  y = blockIdx.x;   // Globaler y-Index 
-	const unsigned  z = blockIdx.y;   // Globaler z-Index 
-
-	const unsigned nx = blockDim.x;
-	const unsigned ny = gridDim.x;
-
-	const unsigned k = nx*(ny*z + y) + x;
-	//////////////////////////////////////////////////////////////////////////
-
-	if (k < numberOfStreetNodes)
-	{
-		////////////////////////////////////////////////////////////////////////////////
-		//index
-		unsigned int KQK   = naschIndex[k];
-		unsigned int kzero = KQK;
-		unsigned int ke    = KQK;
-		unsigned int kw    = neighborX[KQK];
-		unsigned int kn    = KQK;
-		unsigned int ks    = neighborY[KQK];
-		unsigned int kt    = KQK;
-		unsigned int kb    = neighborZ[KQK];
-		unsigned int ksw   = neighborY[kw];
-		unsigned int kne   = KQK;
-		unsigned int kse   = ks;
-		unsigned int knw   = kw;
-		unsigned int kbw   = neighborZ[kw];
-		unsigned int kte   = KQK;
-		unsigned int kbe   = kb;
-		unsigned int ktw   = kw;
-		unsigned int kbs   = neighborZ[ks];
-		unsigned int ktn   = KQK;
-		unsigned int kbn   = kb;
-		unsigned int kts   = ks;
-		unsigned int ktse  = ks;
-		unsigned int kbnw  = kbw;
-		unsigned int ktnw  = kw;
-		unsigned int kbse  = kbs;
-		unsigned int ktsw  = ksw;
-		unsigned int kbne  = kb;
-		unsigned int ktne  = KQK;
-		unsigned int kbsw  = neighborZ[ksw];
-		////////////////////////////////////////////////////////////////////////////////
-		Distributions27 D;
-		if (isEvenTimestep == true)
-		{
-			D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-			D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-			D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-			D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-			D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-			D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-			D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-			D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-			D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-			D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-			D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-			D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-			D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-			D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-			D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-			D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-			D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-			D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-			D.f[DIR_000] = &DD[DIR_000*size_Mat];
-			D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-			D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-			D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-			D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-			D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-			D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-			D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-			D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-		}
-		else
-		{
-			D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-			D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-			D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-			D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-			D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-			D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-			D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-			D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-			D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-			D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-			D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-			D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-			D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-			D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-			D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-			D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-			D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-			D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-			D.f[DIR_000] = &DD[DIR_000*size_Mat];
-			D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-			D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-			D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-			D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-			D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-			D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-			D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-			D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-		}
-
-		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-		// based on BGK Plus Comp
-		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-		real mfcbb = (D.f[DIR_P00   ])[ke   ];
-		real mfabb = (D.f[DIR_M00   ])[kw   ];
-		real mfbcb = (D.f[DIR_0P0   ])[kn   ];
-		real mfbab = (D.f[DIR_0M0   ])[ks   ];
-		real mfbbc = (D.f[DIR_00P   ])[kt   ];
-		real mfbba = (D.f[DIR_00M   ])[kb   ];
-		real mfccb = (D.f[DIR_PP0  ])[kne  ];
-		real mfaab = (D.f[DIR_MM0  ])[ksw  ];
-		real mfcab = (D.f[DIR_PM0  ])[kse  ];
-		real mfacb = (D.f[DIR_MP0  ])[knw  ];
-		real mfcbc = (D.f[DIR_P0P  ])[kte  ];
-		real mfaba = (D.f[DIR_M0M  ])[kbw  ];
-		real mfcba = (D.f[DIR_P0M  ])[kbe  ];
-		real mfabc = (D.f[DIR_M0P  ])[ktw  ];
-		real mfbcc = (D.f[DIR_0PP  ])[ktn  ];
-		real mfbaa = (D.f[DIR_0MM  ])[kbs  ];
-		real mfbca = (D.f[DIR_0PM  ])[kbn  ];
-		real mfbac = (D.f[DIR_0MP  ])[kts  ];
-		real mfbbb = (D.f[DIR_000])[kzero];
-		real mfccc = (D.f[DIR_PPP ])[ktne ];
-		real mfaac = (D.f[DIR_MMP ])[ktsw ];
-		real mfcac = (D.f[DIR_PMP ])[ktse ];
-		real mfacc = (D.f[DIR_MPP ])[ktnw ];
-		real mfcca = (D.f[DIR_PPM ])[kbne ];
-		real mfaaa = (D.f[DIR_MMM ])[kbsw ];
-		real mfcaa = (D.f[DIR_PMM ])[kbse ];
-		real mfaca = (D.f[DIR_MPM ])[kbnw ];
-		////////////////////////////////////////////////////////////////////////////////////
-		real rho = (mfccc + mfaaa + mfaca + mfcac + mfacc + mfcaa + mfaac + mfcca +
-			        mfbac + mfbca + mfbaa + mfbcc + mfabc + mfcba + mfaba + mfcbc + mfacb + mfcab + mfaab + mfccb +
-			        mfabb + mfcbb + mfbab + mfbcb + mfbba + mfbbc + mfbbb + c1o1);
-		//!!!!Achtung + one
-		////////////////////////////////////////////////////////////////////////////////////
-		real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
-			        (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
-			          (mfcbb - mfabb)) / rho;
-		real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
-			        (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
-			          (mfbcb - mfbab)) / rho;
-		real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
-			        (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
-			          (mfbbc - mfbba)) / rho;
-		////////////////////////////////////////////////////////////////////////////////////
-		if (naschVelo[k] >= 0)
-		{
-			real VeloX = naschVelo[k] * veloXfraction[k] / velocityRatio;
-			real VeloY = naschVelo[k] * veloYfraction[k] / velocityRatio;
-			vvx = VeloX;
-			vvy = VeloY;
-		}
-		////////////////////////////////////////////////////////////////////////////////////
-		real vx2 = vvx * vvx;
-		real vy2 = vvy * vvy;
-		real vz2 = vvz * vvz;
-		////////////////////////////////////////////////////////////////////////////////////
-		real XXb = -c2o3 + vx2;
-		real XXc = -c1o2 * (XXb + c1o1 + vvx);
-		real XXa = XXc + vvx;
-		real YYb = -c2o3 + vy2;
-		real YYc = -c1o2 * (YYb + c1o1 + vvy);
-		real YYa = YYc + vvy;
-		real ZZb = -c2o3 + vz2;
-		real ZZc = -c1o2 * (ZZb + c1o1 + vvz);
-		real ZZa = ZZc + vvz;
-		////////////////////////////////////////////////////////////////////////////////////
-		mfcbb = -rho * XXc * YYb * ZZb - c2o27;
-		mfabb = -rho * XXa * YYb * ZZb - c2o27;
-		mfbcb = -rho * XXb * YYc * ZZb - c2o27;
-		mfbab = -rho * XXb * YYa * ZZb - c2o27;
-		mfbbc = -rho * XXb * YYb * ZZc - c2o27;
-		mfbba = -rho * XXb * YYb * ZZa - c2o27;
-		mfccb = -rho * XXc * YYc * ZZb - c1o54;
-		mfaab = -rho * XXa * YYa * ZZb - c1o54;
-		mfcab = -rho * XXc * YYa * ZZb - c1o54;
-		mfacb = -rho * XXa * YYc * ZZb - c1o54;
-		mfcbc = -rho * XXc * YYb * ZZc - c1o54;
-		mfaba = -rho * XXa * YYb * ZZa - c1o54;
-		mfcba = -rho * XXc * YYb * ZZa - c1o54;
-		mfabc = -rho * XXa * YYb * ZZc - c1o54;
-		mfbcc = -rho * XXb * YYc * ZZc - c1o54;
-		mfbaa = -rho * XXb * YYa * ZZa - c1o54;
-		mfbca = -rho * XXb * YYc * ZZa - c1o54;
-		mfbac = -rho * XXb * YYa * ZZc - c1o54;
-		mfbbb = -rho * XXb * YYb * ZZb - c8o27;
-		mfccc = -rho * XXc * YYc * ZZc - c1o216;
-		mfaac = -rho * XXa * YYa * ZZc - c1o216;
-		mfcac = -rho * XXc * YYa * ZZc - c1o216;
-		mfacc = -rho * XXa * YYc * ZZc - c1o216;
-		mfcca = -rho * XXc * YYc * ZZa - c1o216;
-		mfaaa = -rho * XXa * YYa * ZZa - c1o216;
-		mfcaa = -rho * XXc * YYa * ZZa - c1o216;
-		mfaca = -rho * XXa * YYc * ZZa - c1o216;
-		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-		(D.f[DIR_P00   ])[ke   ] = mfabb;//mfcbb;
-		(D.f[DIR_M00   ])[kw   ] = mfcbb;//mfabb;
-		(D.f[DIR_0P0   ])[kn   ] = mfbab;//mfbcb;
-		(D.f[DIR_0M0   ])[ks   ] = mfbcb;//mfbab;
-		(D.f[DIR_00P   ])[kt   ] = mfbba;//mfbbc;
-		(D.f[DIR_00M   ])[kb   ] = mfbbc;//mfbba;
-		(D.f[DIR_PP0  ])[kne  ] = mfaab;//mfccb;
-		(D.f[DIR_MM0  ])[ksw  ] = mfccb;//mfaab;
-		(D.f[DIR_PM0  ])[kse  ] = mfacb;//mfcab;
-		(D.f[DIR_MP0  ])[knw  ] = mfcab;//mfacb;
-		(D.f[DIR_P0P  ])[kte  ] = mfaba;//mfcbc;
-		(D.f[DIR_M0M  ])[kbw  ] = mfcbc;//mfaba;
-		(D.f[DIR_P0M  ])[kbe  ] = mfabc;//mfcba;
-		(D.f[DIR_M0P  ])[ktw  ] = mfcba;//mfabc;
-		(D.f[DIR_0PP  ])[ktn  ] = mfbaa;//mfbcc;
-		(D.f[DIR_0MM  ])[kbs  ] = mfbcc;//mfbaa;
-		(D.f[DIR_0PM  ])[kbn  ] = mfbac;//mfbca;
-		(D.f[DIR_0MP  ])[kts  ] = mfbca;//mfbac;
-		(D.f[DIR_000])[kzero] = mfbbb;//mfbbb;
-		(D.f[DIR_PPP ])[ktne ] = mfaaa;//mfccc;
-		(D.f[DIR_MMP ])[ktsw ] = mfcca;//mfaac;
-		(D.f[DIR_PMP ])[ktse ] = mfaca;//mfcac;
-		(D.f[DIR_MPP ])[ktnw ] = mfcaa;//mfacc;
-		(D.f[DIR_PPM ])[kbne ] = mfaac;//mfcca;
-		(D.f[DIR_MMM ])[kbsw ] = mfccc;//mfaaa;
-		(D.f[DIR_PMM ])[kbse ] = mfacc;//mfcaa;
-		(D.f[DIR_MPM ])[kbnw ] = mfcac;//mfaca;
-	}
-}
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
 
 
 
@@ -1120,80 +897,80 @@ __global__ void QVeloStreetDeviceEQ27(
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QVelDeviceIncompHighNu27(
-													real* vx,
-													real* vy,
-													real* vz,
-													real* DD, 
-													int* k_Q, 
-													real* QQ,
-													unsigned int numberOfBCnodes, 
-													real om1, 
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													unsigned int size_Mat, 
-													bool isEvenTimestep)
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD, 
+    int* k_Q, 
+    real* QQ,
+    unsigned int numberOfBCnodes, 
+    real om1, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -1218,24 +995,24 @@ __global__ void QVelDeviceIncompHighNu27(
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -1278,32 +1055,32 @@ __global__ void QVelDeviceIncompHighNu27(
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_E   = (D.f[DIR_P00   ])[ke   ];
-      f_W   = (D.f[DIR_M00   ])[kw   ];
-      f_N   = (D.f[DIR_0P0   ])[kn   ];
-      f_S   = (D.f[DIR_0M0   ])[ks   ];
-      f_T   = (D.f[DIR_00P   ])[kt   ];
-      f_B   = (D.f[DIR_00M   ])[kb   ];
-      f_NE  = (D.f[DIR_PP0  ])[kne  ];
-      f_SW  = (D.f[DIR_MM0  ])[ksw  ];
-      f_SE  = (D.f[DIR_PM0  ])[kse  ];
-      f_NW  = (D.f[DIR_MP0  ])[knw  ];
-      f_TE  = (D.f[DIR_P0P  ])[kte  ];
-      f_BW  = (D.f[DIR_M0M  ])[kbw  ];
-      f_BE  = (D.f[DIR_P0M  ])[kbe  ];
-      f_TW  = (D.f[DIR_M0P  ])[ktw  ];
-      f_TN  = (D.f[DIR_0PP  ])[ktn  ];
-      f_BS  = (D.f[DIR_0MM  ])[kbs  ];
-      f_BN  = (D.f[DIR_0PM  ])[kbn  ];
-      f_TS  = (D.f[DIR_0MP  ])[kts  ];
-      f_TNE = (D.f[DIR_PPP ])[ktne ];
-      f_TSW = (D.f[DIR_MMP ])[ktsw ];
-      f_TSE = (D.f[DIR_PMP ])[ktse ];
-      f_TNW = (D.f[DIR_MPP ])[ktnw ];
-      f_BNE = (D.f[DIR_PPM ])[kbne ];
-      f_BSW = (D.f[DIR_MMM ])[kbsw ];
-      f_BSE = (D.f[DIR_PMM ])[kbse ];
-      f_BNW = (D.f[DIR_MPM ])[kbnw ];
+      f_E   = (D.f[DIR_P00])[ke   ];
+      f_W   = (D.f[DIR_M00])[kw   ];
+      f_N   = (D.f[DIR_0P0])[kn   ];
+      f_S   = (D.f[DIR_0M0])[ks   ];
+      f_T   = (D.f[DIR_00P])[kt   ];
+      f_B   = (D.f[DIR_00M])[kb   ];
+      f_NE  = (D.f[DIR_PP0])[kne  ];
+      f_SW  = (D.f[DIR_MM0])[ksw  ];
+      f_SE  = (D.f[DIR_PM0])[kse  ];
+      f_NW  = (D.f[DIR_MP0])[knw  ];
+      f_TE  = (D.f[DIR_P0P])[kte  ];
+      f_BW  = (D.f[DIR_M0M])[kbw  ];
+      f_BE  = (D.f[DIR_P0M])[kbe  ];
+      f_TW  = (D.f[DIR_M0P])[ktw  ];
+      f_TN  = (D.f[DIR_0PP])[ktn  ];
+      f_BS  = (D.f[DIR_0MM])[kbs  ];
+      f_BN  = (D.f[DIR_0PM])[kbn  ];
+      f_TS  = (D.f[DIR_0MP])[kts  ];
+      f_TNE = (D.f[DIR_PPP])[ktne ];
+      f_TSW = (D.f[DIR_MMP])[ktsw ];
+      f_TSE = (D.f[DIR_PMP])[ktse ];
+      f_TNW = (D.f[DIR_MPP])[ktnw ];
+      f_BNE = (D.f[DIR_PPM])[kbne ];
+      f_BSW = (D.f[DIR_MMM])[kbsw ];
+      f_BSE = (D.f[DIR_PMM])[kbse ];
+      f_BNW = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -1328,63 +1105,63 @@ __global__ void QVelDeviceIncompHighNu27(
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
@@ -1618,80 +1395,80 @@ __global__ void QVelDeviceIncompHighNu27(
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QVelDeviceCompHighNu27(
-													real* vx,
-													real* vy,
-													real* vz,
-													real* DD,
-													int* k_Q,
-													real* QQ,
-													unsigned int numberOfBCnodes, 
-													real om1,
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													unsigned int size_Mat, 
-													bool isEvenTimestep)
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    unsigned int numberOfBCnodes, 
+    real om1,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -1716,24 +1493,24 @@ __global__ void QVelDeviceCompHighNu27(
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -1776,58 +1553,58 @@ __global__ void QVelDeviceCompHighNu27(
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_E   = (D.f[DIR_P00   ])[ke   ];
-      f_W   = (D.f[DIR_M00   ])[kw   ];
-      f_N   = (D.f[DIR_0P0   ])[kn   ];
-      f_S   = (D.f[DIR_0M0   ])[ks   ];
-      f_T   = (D.f[DIR_00P   ])[kt   ];
-      f_B   = (D.f[DIR_00M   ])[kb   ];
-      f_NE  = (D.f[DIR_PP0  ])[kne  ];
-      f_SW  = (D.f[DIR_MM0  ])[ksw  ];
-      f_SE  = (D.f[DIR_PM0  ])[kse  ];
-      f_NW  = (D.f[DIR_MP0  ])[knw  ];
-      f_TE  = (D.f[DIR_P0P  ])[kte  ];
-      f_BW  = (D.f[DIR_M0M  ])[kbw  ];
-      f_BE  = (D.f[DIR_P0M  ])[kbe  ];
-      f_TW  = (D.f[DIR_M0P  ])[ktw  ];
-      f_TN  = (D.f[DIR_0PP  ])[ktn  ];
-      f_BS  = (D.f[DIR_0MM  ])[kbs  ];
-      f_BN  = (D.f[DIR_0PM  ])[kbn  ];
-      f_TS  = (D.f[DIR_0MP  ])[kts  ];
-      f_TNE = (D.f[DIR_PPP ])[ktne ];
-      f_TSW = (D.f[DIR_MMP ])[ktsw ];
-      f_TSE = (D.f[DIR_PMP ])[ktse ];
-      f_TNW = (D.f[DIR_MPP ])[ktnw ];
-      f_BNE = (D.f[DIR_PPM ])[kbne ];
-      f_BSW = (D.f[DIR_MMM ])[kbsw ];
-      f_BSE = (D.f[DIR_PMM ])[kbse ];
-      f_BNW = (D.f[DIR_MPM ])[kbnw ];
-      //f_W    = (D.f[DIR_P00   ])[ke   ];
-      //f_E    = (D.f[DIR_M00   ])[kw   ];
-      //f_S    = (D.f[DIR_0P0   ])[kn   ];
-      //f_N    = (D.f[DIR_0M0   ])[ks   ];
-      //f_B    = (D.f[DIR_00P   ])[kt   ];
-      //f_T    = (D.f[DIR_00M   ])[kb   ];
-      //f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      //f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      //f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      //f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      //f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      //f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      //f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      //f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      //f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      //f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      //f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      //f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      //f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      //f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      //f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      //f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      //f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      //f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      //f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      //f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_E   = (D.f[DIR_P00])[ke   ];
+      f_W   = (D.f[DIR_M00])[kw   ];
+      f_N   = (D.f[DIR_0P0])[kn   ];
+      f_S   = (D.f[DIR_0M0])[ks   ];
+      f_T   = (D.f[DIR_00P])[kt   ];
+      f_B   = (D.f[DIR_00M])[kb   ];
+      f_NE  = (D.f[DIR_PP0])[kne  ];
+      f_SW  = (D.f[DIR_MM0])[ksw  ];
+      f_SE  = (D.f[DIR_PM0])[kse  ];
+      f_NW  = (D.f[DIR_MP0])[knw  ];
+      f_TE  = (D.f[DIR_P0P])[kte  ];
+      f_BW  = (D.f[DIR_M0M])[kbw  ];
+      f_BE  = (D.f[DIR_P0M])[kbe  ];
+      f_TW  = (D.f[DIR_M0P])[ktw  ];
+      f_TN  = (D.f[DIR_0PP])[ktn  ];
+      f_BS  = (D.f[DIR_0MM])[kbs  ];
+      f_BN  = (D.f[DIR_0PM])[kbn  ];
+      f_TS  = (D.f[DIR_0MP])[kts  ];
+      f_TNE = (D.f[DIR_PPP])[ktne ];
+      f_TSW = (D.f[DIR_MMP])[ktsw ];
+      f_TSE = (D.f[DIR_PMP])[ktse ];
+      f_TNW = (D.f[DIR_MPP])[ktnw ];
+      f_BNE = (D.f[DIR_PPM])[kbne ];
+      f_BSW = (D.f[DIR_MMM])[kbsw ];
+      f_BSE = (D.f[DIR_PMM])[kbse ];
+      f_BNW = (D.f[DIR_MPM])[kbnw ];
+      //f_W    = (D.f[DIR_P00])[ke   ];
+      //f_E    = (D.f[DIR_M00])[kw   ];
+      //f_S    = (D.f[DIR_0P0])[kn   ];
+      //f_N    = (D.f[DIR_0M0])[ks   ];
+      //f_B    = (D.f[DIR_00P])[kt   ];
+      //f_T    = (D.f[DIR_00M])[kb   ];
+      //f_SW   = (D.f[DIR_PP0])[kne  ];
+      //f_NE   = (D.f[DIR_MM0])[ksw  ];
+      //f_NW   = (D.f[DIR_PM0])[kse  ];
+      //f_SE   = (D.f[DIR_MP0])[knw  ];
+      //f_BW   = (D.f[DIR_P0P])[kte  ];
+      //f_TE   = (D.f[DIR_M0M])[kbw  ];
+      //f_TW   = (D.f[DIR_P0M])[kbe  ];
+      //f_BE   = (D.f[DIR_M0P])[ktw  ];
+      //f_BS   = (D.f[DIR_0PP])[ktn  ];
+      //f_TN   = (D.f[DIR_0MM])[kbs  ];
+      //f_TS   = (D.f[DIR_0PM])[kbn  ];
+      //f_BN   = (D.f[DIR_0MP])[kts  ];
+      //f_BSW  = (D.f[DIR_PPP])[ktne ];
+      //f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      //f_BNW  = (D.f[DIR_PMP])[ktse ];
+      //f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      //f_TSW  = (D.f[DIR_PPM])[kbne ];
+      //f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      //f_TNW  = (D.f[DIR_PMM])[kbse ];
+      //f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -1852,63 +1629,63 @@ __global__ void QVelDeviceCompHighNu27(
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
@@ -2194,39 +1971,32 @@ __global__ void QVelDeviceCompHighNu27(
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QVelDeviceCompZeroPress27(
-														real* velocityX,
-														real* velocityY,
-														real* velocityZ,
-														real* distribution, 
-														int* subgridDistanceIndices, 
-														real* subgridDistances,
-														unsigned int numberOfBCnodes, 
-														real omega, 
-														unsigned int* neighborX,
-														unsigned int* neighborY,
-														unsigned int* neighborZ,
-														unsigned int numberOfLBnodes, 
-														bool isEvenTimestep)
+    real* velocityX,
+    real* velocityY,
+    real* velocityZ,
+    real* distribution, 
+    int* subgridDistanceIndices, 
+    real* subgridDistances,
+    unsigned int numberOfBCnodes, 
+    real omega, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    //////////////////////////////////////////////////////////////////////////
-	//! The velocity boundary condition is executed in the following steps
-	//!
-	////////////////////////////////////////////////////////////////////////////////
-	//! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
-	//!
-   const unsigned  x = threadIdx.x;  // global x-index 
-   const unsigned  y = blockIdx.x;   // global y-index 
-   const unsigned  z = blockIdx.y;   // global z-index 
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
+   //! The velocity boundary condition is executed in the following steps
+   //!
+   ////////////////////////////////////////////////////////////////////////////////
+   //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+   //!
+   const unsigned nodeIndex = getNodeIndex();
 
    //////////////////////////////////////////////////////////////////////////
    //! - Run for all indices in size of boundary condition (numberOfBCnodes)
    //!
-   if(k < numberOfBCnodes)
+   if(nodeIndex < numberOfBCnodes)
    {
 
       //////////////////////////////////////////////////////////////////////////
@@ -2239,9 +2009,9 @@ __global__ void QVelDeviceCompZeroPress27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local velocities
       //!
-      real VeloX = velocityX[k];
-      real VeloY = velocityY[k];
-      real VeloZ = velocityZ[k];
+      real VeloX = velocityX[nodeIndex];
+      real VeloY = velocityY[nodeIndex];
+      real VeloZ = velocityZ[nodeIndex];
 
 
       ////////////////////////////////////////////////////////////////////////////////
@@ -2253,7 +2023,7 @@ __global__ void QVelDeviceCompZeroPress27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing)
       //!
-      unsigned int KQK  = subgridDistanceIndices[k];
+      unsigned int KQK  = subgridDistanceIndices[nodeIndex];
       unsigned int kzero= KQK;
       unsigned int ke   = KQK;
       unsigned int kw   = neighborX[KQK];
@@ -2285,32 +2055,32 @@ __global__ void QVelDeviceCompZeroPress27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[DIR_P00   ])[ke   ];
-      real f_E    = (dist.f[DIR_M00   ])[kw   ];
-      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
-      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
-      real f_B    = (dist.f[DIR_00P   ])[kt   ];
-      real f_T    = (dist.f[DIR_00M   ])[kb   ];
-      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00])[ke   ];
+      real f_E    = (dist.f[DIR_M00])[kw   ];
+      real f_S    = (dist.f[DIR_0P0])[kn   ];
+      real f_N    = (dist.f[DIR_0M0])[ks   ];
+      real f_B    = (dist.f[DIR_00P])[kt   ];
+      real f_T    = (dist.f[DIR_00M])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Calculate macroscopic quantities
@@ -2342,7 +2112,7 @@ __global__ void QVelDeviceCompZeroPress27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Update distributions with subgrid distance (q) between zero and one
       real feq, q, velocityLB, velocityBC;
-      q = (subgridD.q[DIR_P00])[k];
+      q = (subgridD.q[DIR_P00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one
       {
          velocityLB = vx1;
@@ -2351,7 +2121,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_E, f_W, feq, omega, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_M00])[k];
+      q = (subgridD.q[DIR_M00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1;
@@ -2360,7 +2130,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloWithPressureBC(q, f_W, f_E, feq, omega, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_0P0])[k];
+      q = (subgridD.q[DIR_0P0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2;
@@ -2369,7 +2139,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloWithPressureBC(q, f_N, f_S, feq, omega, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_0M0])[k];
+      q = (subgridD.q[DIR_0M0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2;
@@ -2378,7 +2148,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_S, f_N, feq, omega, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_00P])[k];
+      q = (subgridD.q[DIR_00P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx3;
@@ -2387,7 +2157,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloWithPressureBC(q, f_T, f_B, feq, omega, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_00M])[k];
+      q = (subgridD.q[DIR_00M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx3;
@@ -2396,7 +2166,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloWithPressureBC(q, f_B, f_T, feq, omega, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_PP0])[k];
+      q = (subgridD.q[DIR_PP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2;
@@ -2405,7 +2175,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NE, f_SW, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_MM0])[k];
+      q = (subgridD.q[DIR_MM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2;
@@ -2414,7 +2184,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SW, f_NE, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_PM0])[k];
+      q = (subgridD.q[DIR_PM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2;
@@ -2423,7 +2193,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SE, f_NW, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_MP0])[k];
+      q = (subgridD.q[DIR_MP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2;
@@ -2432,7 +2202,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NW, f_SE, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_P0P])[k];
+      q = (subgridD.q[DIR_P0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx3;
@@ -2441,7 +2211,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TE, f_BW, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_M0M])[k];
+      q = (subgridD.q[DIR_M0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx3;
@@ -2450,7 +2220,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BW, f_TE, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_P0M])[k];
+      q = (subgridD.q[DIR_P0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx3;
@@ -2459,7 +2229,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BE, f_TW, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_M0P])[k];
+      q = (subgridD.q[DIR_M0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx3;
@@ -2468,7 +2238,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TW, f_BE, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0PP])[k];
+      q = (subgridD.q[DIR_0PP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2 + vx3;
@@ -2477,7 +2247,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TN, f_BS, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0MM])[k];
+      q = (subgridD.q[DIR_0MM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2 - vx3;
@@ -2486,7 +2256,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_0PP])[ktn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BS, f_TN, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0PM])[k];
+      q = (subgridD.q[DIR_0PM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2 - vx3;
@@ -2495,7 +2265,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BN, f_TS, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0MP])[k];
+      q = (subgridD.q[DIR_0MP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2 + vx3;
@@ -2504,7 +2274,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TS, f_BN, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_PPP])[k];
+      q = (subgridD.q[DIR_PPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2 + vx3;
@@ -2513,7 +2283,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TNE, f_BSW, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MMM])[k];
+      q = (subgridD.q[DIR_MMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2 - vx3;
@@ -2522,7 +2292,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_PPP])[ktne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BSW, f_TNE, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PPM])[k];
+      q = (subgridD.q[DIR_PPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2 - vx3;
@@ -2531,7 +2301,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNE, f_TSW, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MMP])[k];
+      q = (subgridD.q[DIR_MMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2 + vx3;
@@ -2540,7 +2310,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSW, f_BNE, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PMP])[k];
+      q = (subgridD.q[DIR_PMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2 + vx3;
@@ -2549,7 +2319,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSE, f_BNW, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MPM])[k];
+      q = (subgridD.q[DIR_MPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2 - vx3;
@@ -2558,7 +2328,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNW, f_TSE, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PMM])[k];
+      q = (subgridD.q[DIR_PMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2 - vx3;
@@ -2567,7 +2337,7 @@ __global__ void QVelDeviceCompZeroPress27(
          (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BSE, f_TNW, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MPP])[k];
+      q = (subgridD.q[DIR_MPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2 + vx3;
@@ -2619,87 +2389,88 @@ __global__ void QVelDeviceCompZeroPress27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QVelDeviceCompZeroPress1h27( int inx,
-														int iny,
-														real* vx,
-														real* vy,
-														real* vz,
-														real* DD, 
-														int* k_Q, 
-														real* QQ,
-														unsigned int numberOfBCnodes,
-														real om1, 
-														real Phi,
-														real angularVelocity,
-														unsigned int* neighborX,
-														unsigned int* neighborY,
-														unsigned int* neighborZ,
-														real* coordX,
-														real* coordY,
-														real* coordZ,
-														unsigned int size_Mat, 
-														bool isEvenTimestep)
+__global__ void QVelDeviceCompZeroPress1h27(
+    int inx,
+    int iny,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD, 
+    int* k_Q, 
+    real* QQ,
+    unsigned int numberOfBCnodes,
+    real om1, 
+    real Phi,
+    real angularVelocity,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* coordX,
+    real* coordY,
+    real* coordZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -2738,24 +2509,24 @@ __global__ void QVelDeviceCompZeroPress1h27( int inx,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -2797,63 +2568,63 @@ __global__ void QVelDeviceCompZeroPress1h27( int inx,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  real vx1, vx2, vx3, drho, feq, q, cu_sq;
@@ -3090,21 +2861,22 @@ __global__ void QVelDeviceCompZeroPress1h27( int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void LB_BC_Vel_West_27( int nx, 
-                                              int ny, 
-                                              int nz, 
-                                              int itz, 
-                                              unsigned int* bcMatD, 
-                                              unsigned int* neighborX,
-                                              unsigned int* neighborY,
-                                              unsigned int* neighborZ,
-                                              real* DD, 
-                                              unsigned int size_Mat, 
-                                              bool isEvenTimestep, 
-                                              real u0x, 
-                                              unsigned int grid_nx, 
-                                              unsigned int grid_ny, 
-                                              real om) 
+__global__ void LB_BC_Vel_West_27(
+    int nx, 
+    int ny, 
+    int nz, 
+    int itz, 
+    unsigned int* bcMatD, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DD, 
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep, 
+    real u0x, 
+    unsigned int grid_nx, 
+    unsigned int grid_ny, 
+    real om) 
 {
    //thread-index
    unsigned int ity = blockIdx.x;
@@ -3125,63 +2897,63 @@ __global__ void LB_BC_Vel_West_27( int nx,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
 
       ////////////////////////////////////////////////////////////////////////////////
@@ -3300,33 +3072,33 @@ __global__ void LB_BC_Vel_West_27( int nx,
       real        f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,
          f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
 
-      f1_W    = (D.f[DIR_P00   ])[k1e   ];
-      f1_E    = (D.f[DIR_M00   ])[k1w   ];
-      f1_S    = (D.f[DIR_0P0   ])[k1n   ];
-      f1_N    = (D.f[DIR_0M0   ])[k1s   ];
-      f1_B    = (D.f[DIR_00P   ])[k1t   ];
-      f1_T    = (D.f[DIR_00M   ])[k1b   ];
-      f1_SW   = (D.f[DIR_PP0  ])[k1ne  ];
-      f1_NE   = (D.f[DIR_MM0  ])[k1sw  ];
-      f1_NW   = (D.f[DIR_PM0  ])[k1se  ];
-      f1_SE   = (D.f[DIR_MP0  ])[k1nw  ];
-      f1_BW   = (D.f[DIR_P0P  ])[k1te  ];
-      f1_TE   = (D.f[DIR_M0M  ])[k1bw  ];
-      f1_TW   = (D.f[DIR_P0M  ])[k1be  ];
-      f1_BE   = (D.f[DIR_M0P  ])[k1tw  ];
-      f1_BS   = (D.f[DIR_0PP  ])[k1tn  ];
-      f1_TN   = (D.f[DIR_0MM  ])[k1bs  ];
-      f1_TS   = (D.f[DIR_0PM  ])[k1bn  ];
-      f1_BN   = (D.f[DIR_0MP  ])[k1ts  ];
+      f1_W    = (D.f[DIR_P00])[k1e   ];
+      f1_E    = (D.f[DIR_M00])[k1w   ];
+      f1_S    = (D.f[DIR_0P0])[k1n   ];
+      f1_N    = (D.f[DIR_0M0])[k1s   ];
+      f1_B    = (D.f[DIR_00P])[k1t   ];
+      f1_T    = (D.f[DIR_00M])[k1b   ];
+      f1_SW   = (D.f[DIR_PP0])[k1ne  ];
+      f1_NE   = (D.f[DIR_MM0])[k1sw  ];
+      f1_NW   = (D.f[DIR_PM0])[k1se  ];
+      f1_SE   = (D.f[DIR_MP0])[k1nw  ];
+      f1_BW   = (D.f[DIR_P0P])[k1te  ];
+      f1_TE   = (D.f[DIR_M0M])[k1bw  ];
+      f1_TW   = (D.f[DIR_P0M])[k1be  ];
+      f1_BE   = (D.f[DIR_M0P])[k1tw  ];
+      f1_BS   = (D.f[DIR_0PP])[k1tn  ];
+      f1_TN   = (D.f[DIR_0MM])[k1bs  ];
+      f1_TS   = (D.f[DIR_0PM])[k1bn  ];
+      f1_BN   = (D.f[DIR_0MP])[k1ts  ];
       f1_ZERO = (D.f[DIR_000])[k1zero];
-      f1_BSW  = (D.f[DIR_PPP ])[k1tne ];
-      f1_BNE  = (D.f[DIR_MMP ])[k1tsw ];
-      f1_BNW  = (D.f[DIR_PMP ])[k1tse ];
-      f1_BSE  = (D.f[DIR_MPP ])[k1tnw ];
-      f1_TSW  = (D.f[DIR_PPM ])[k1bne ];
-      f1_TNE  = (D.f[DIR_MMM ])[k1bsw ];
-      f1_TNW  = (D.f[DIR_PMM ])[k1bse ];
-      f1_TSE  = (D.f[DIR_MPM ])[k1bnw ];
+      f1_BSW  = (D.f[DIR_PPP])[k1tne ];
+      f1_BNE  = (D.f[DIR_MMP])[k1tsw ];
+      f1_BNW  = (D.f[DIR_PMP])[k1tse ];
+      f1_BSE  = (D.f[DIR_MPP])[k1tnw ];
+      f1_TSW  = (D.f[DIR_PPM])[k1bne ];
+      f1_TNE  = (D.f[DIR_MMM])[k1bsw ];
+      f1_TNW  = (D.f[DIR_PMM])[k1bse ];
+      f1_TSE  = (D.f[DIR_MPM])[k1bnw ];
 
       real drho1    =  f1_ZERO+f1_E+f1_W+f1_N+f1_S+f1_T+f1_B+f1_NE+f1_SW+f1_SE+f1_NW+f1_TE+f1_BW+f1_BE+f1_TW+f1_TN+f1_BS+f1_BN+f1_TS+
          f1_TNE+f1_TSW+f1_TSE+f1_TNW+f1_BNE+f1_BSW+f1_BSE+f1_BNW;
@@ -3343,32 +3115,32 @@ __global__ void LB_BC_Vel_West_27( int nx,
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
       (D.f[DIR_000])[kzero] =   c8o27* (drho-cu_sq);
-      (D.f[DIR_P00   ])[ke   ] =   c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
-      (D.f[DIR_M00   ])[kw   ] =   c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
-      (D.f[DIR_0P0   ])[kn   ] =   c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
-      (D.f[DIR_0M0   ])[ks   ] =   c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
-      (D.f[DIR_00P   ])[kt   ] =   c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
-      (D.f[DIR_00M   ])[kb   ] =   c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
-      (D.f[DIR_PP0  ])[kne  ] =   c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
-      (D.f[DIR_MM0  ])[ksw  ] =   c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
-      (D.f[DIR_PM0  ])[kse  ] =   c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
-      (D.f[DIR_MP0  ])[knw  ] =   c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
-      (D.f[DIR_P0P  ])[kte  ] =   c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
-      (D.f[DIR_M0M  ])[kbw  ] =   c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
-      (D.f[DIR_P0M  ])[kbe  ] =   c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
-      (D.f[DIR_M0P  ])[ktw  ] =   c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
-      (D.f[DIR_0PP  ])[ktn  ] =   c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
-      (D.f[DIR_0MM  ])[kbs  ] =   c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
-      (D.f[DIR_0PM  ])[kbn  ] =   c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
-      (D.f[DIR_0MP  ])[kts  ] =   c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
-      (D.f[DIR_PPP ])[ktne ] =   c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
-      (D.f[DIR_MMM ])[kbsw ] =   c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
-      (D.f[DIR_PPM ])[kbne ] =   c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
-      (D.f[DIR_MMP ])[ktsw ] =   c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
-      (D.f[DIR_PMP ])[ktse ] =   c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
-      (D.f[DIR_MPM ])[kbnw ] =   c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
-      (D.f[DIR_PMM ])[kbse ] =   c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
-      (D.f[DIR_MPP ])[ktnw ] =   c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
+      (D.f[DIR_P00])[ke   ] =   c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
+      (D.f[DIR_M00])[kw   ] =   c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
+      (D.f[DIR_0P0])[kn   ] =   c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
+      (D.f[DIR_0M0])[ks   ] =   c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
+      (D.f[DIR_00P])[kt   ] =   c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
+      (D.f[DIR_00M])[kb   ] =   c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
+      (D.f[DIR_PP0])[kne  ] =   c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
+      (D.f[DIR_MM0])[ksw  ] =   c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
+      (D.f[DIR_PM0])[kse  ] =   c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
+      (D.f[DIR_MP0])[knw  ] =   c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
+      (D.f[DIR_P0P])[kte  ] =   c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
+      (D.f[DIR_M0M])[kbw  ] =   c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
+      (D.f[DIR_P0M])[kbe  ] =   c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
+      (D.f[DIR_M0P])[ktw  ] =   c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
+      (D.f[DIR_0PP])[ktn  ] =   c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
+      (D.f[DIR_0MM])[kbs  ] =   c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
+      (D.f[DIR_0PM])[kbn  ] =   c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
+      (D.f[DIR_0MP])[kts  ] =   c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
+      (D.f[DIR_PPP])[ktne ] =   c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
+      (D.f[DIR_MMM])[kbsw ] =   c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
+      (D.f[DIR_PPM])[kbne ] =   c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
+      (D.f[DIR_MMP])[ktsw ] =   c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
+      (D.f[DIR_PMP])[ktse ] =   c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
+      (D.f[DIR_MPM])[kbnw ] =   c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
+      (D.f[DIR_PMM])[kbse ] =   c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
+      (D.f[DIR_MPP])[ktnw ] =   c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
    }
    __syncthreads();
 }          
@@ -3414,18 +3186,18 @@ __global__ void LB_BC_Vel_West_27( int nx,
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QVelDevPlainBB27(
-   real* velocityX,
-   real* velocityY,
-   real* velocityZ,
-   real* distributions,
-   int* subgridDistanceIndices,
-   real* subgridDistances,
-   uint numberOfBCnodes,
-   uint* neighborX,
-   uint* neighborY,
-   uint* neighborZ,
-   uint numberOfLBnodes,
-   bool isEvenTimestep)
+    real* velocityX,
+    real* velocityY,
+    real* velocityZ,
+    real* distributions,
+    int* subgridDistanceIndices,
+    real* subgridDistances,
+    uint numberOfBCnodes,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    //////////////////////////////////////////////////////////////////////////
    //! The velocity boundary condition is executed in the following steps
@@ -3433,18 +3205,11 @@ __global__ void QVelDevPlainBB27(
    ////////////////////////////////////////////////////////////////////////////////
    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
    //!
-   const unsigned  x = threadIdx.x;   // global x-index
-   const unsigned  y = blockIdx.x;    // global y-index
-   const unsigned  z = blockIdx.y;    // global z-index
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
+   const unsigned nodeIndex = getNodeIndex();
 
    //////////////////////////////////////////////////////////////////////////
    // run for all indices in size of boundary condition (numberOfBCnodes)
-   if(k < numberOfBCnodes)
+   if(nodeIndex < numberOfBCnodes)
    {
        //////////////////////////////////////////////////////////////////////////
        //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
@@ -3456,9 +3221,9 @@ __global__ void QVelDevPlainBB27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local velocities
       //!
-      real VeloX = velocityX[k];
-      real VeloY = velocityY[k];
-      real VeloZ = velocityZ[k];
+      real VeloX = velocityX[nodeIndex];
+      real VeloY = velocityY[nodeIndex];
+      real VeloZ = velocityZ[nodeIndex];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local subgrid distances (q's)
@@ -3469,7 +3234,7 @@ __global__ void QVelDevPlainBB27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing)
       //!
-      uint indexOfBCnode = subgridDistanceIndices[k];
+      uint indexOfBCnode = subgridDistanceIndices[nodeIndex];
       uint ke   = indexOfBCnode;
       uint kw   = neighborX[indexOfBCnode];
       uint kn   = indexOfBCnode;
@@ -3500,32 +3265,32 @@ __global__ void QVelDevPlainBB27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[DIR_P00   ])[ke   ];
-      real f_E    = (dist.f[DIR_M00   ])[kw   ];
-      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
-      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
-      real f_B    = (dist.f[DIR_00P   ])[kt   ];
-      real f_T    = (dist.f[DIR_00M   ])[kb   ];
-      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00])[ke   ];
+      real f_E    = (dist.f[DIR_M00])[kw   ];
+      real f_S    = (dist.f[DIR_0P0])[kn   ];
+      real f_N    = (dist.f[DIR_0M0])[ks   ];
+      real f_B    = (dist.f[DIR_00P])[kt   ];
+      real f_T    = (dist.f[DIR_00M])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - change the pointer to write the results in the correct array
@@ -3535,32 +3300,32 @@ __global__ void QVelDevPlainBB27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - rewrite distributions if there is a sub-grid distance (q) in same direction
       real q;
-      q = (subgridD.q[DIR_P00  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M00  ])[kw  ]=f_E   + c4o9  * (-VeloX);
-      q = (subgridD.q[DIR_M00  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P00  ])[ke  ]=f_W   + c4o9  * ( VeloX);
-      q = (subgridD.q[DIR_0P0  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0M0  ])[ks  ]=f_N   + c4o9  * (-VeloY);
-      q = (subgridD.q[DIR_0M0  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0P0  ])[kn  ]=f_S   + c4o9  * ( VeloY);
-      q = (subgridD.q[DIR_00P  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_00M  ])[kb  ]=f_T   + c4o9  * (-VeloZ);
-      q = (subgridD.q[DIR_00M  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_00P  ])[kt  ]=f_B   + c4o9  * ( VeloZ);
-      q = (subgridD.q[DIR_PP0 ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MM0 ])[ksw ]=f_NE  + c1o9  * (-VeloX - VeloY);
-      q = (subgridD.q[DIR_MM0 ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PP0 ])[kne ]=f_SW  + c1o9  * ( VeloX + VeloY);
-      q = (subgridD.q[DIR_PM0 ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MP0 ])[knw ]=f_SE  + c1o9  * (-VeloX + VeloY);
-      q = (subgridD.q[DIR_MP0 ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PM0 ])[kse ]=f_NW  + c1o9  * ( VeloX - VeloY);
-      q = (subgridD.q[DIR_P0P ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M0M ])[kbw ]=f_TE  + c1o9  * (-VeloX - VeloZ);
-      q = (subgridD.q[DIR_M0M ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P0P ])[kte ]=f_BW  + c1o9  * ( VeloX + VeloZ);
-      q = (subgridD.q[DIR_P0M ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M0P ])[ktw ]=f_BE  + c1o9  * (-VeloX + VeloZ);
-      q = (subgridD.q[DIR_M0P ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P0M ])[kbe ]=f_TW  + c1o9  * ( VeloX - VeloZ);
-      q = (subgridD.q[DIR_0PP ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0MM ])[kbs ]=f_TN  + c1o9  * (-VeloY - VeloZ);
-      q = (subgridD.q[DIR_0MM ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0PP ])[ktn ]=f_BS  + c1o9  * ( VeloY + VeloZ);
-      q = (subgridD.q[DIR_0PM ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0MP ])[kts ]=f_BN  + c1o9  * (-VeloY + VeloZ);
-      q = (subgridD.q[DIR_0MP ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0PM ])[kbn ]=f_TS  + c1o9  * ( VeloY - VeloZ);
-      q = (subgridD.q[DIR_PPP])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MMM])[kbsw]=f_TNE + c1o36 * (-VeloX - VeloY - VeloZ);
-      q = (subgridD.q[DIR_MMM])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PPP])[ktne]=f_BSW + c1o36 * ( VeloX + VeloY + VeloZ);
-      q = (subgridD.q[DIR_PPM])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MMP])[ktsw]=f_BNE + c1o36 * (-VeloX - VeloY + VeloZ);
-      q = (subgridD.q[DIR_MMP])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PPM])[kbne]=f_TSW + c1o36 * ( VeloX + VeloY - VeloZ);
-      q = (subgridD.q[DIR_PMP])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MPM])[kbnw]=f_TSE + c1o36 * (-VeloX + VeloY - VeloZ);
-      q = (subgridD.q[DIR_MPM])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PMP])[ktse]=f_BNW + c1o36 * ( VeloX - VeloY + VeloZ);
-      q = (subgridD.q[DIR_PMM])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MPP])[ktnw]=f_BSE + c1o36 * (-VeloX + VeloY + VeloZ);
-      q = (subgridD.q[DIR_MPP])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PMM])[kbse]=f_TNW + c1o36 * ( VeloX - VeloY - VeloZ);
+      q = (subgridD.q[DIR_P00])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M00])[kw  ]=f_E   + c4o9  * (-VeloX);
+      q = (subgridD.q[DIR_M00])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P00])[ke  ]=f_W   + c4o9  * ( VeloX);
+      q = (subgridD.q[DIR_0P0])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0M0])[ks  ]=f_N   + c4o9  * (-VeloY);
+      q = (subgridD.q[DIR_0M0])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0P0])[kn  ]=f_S   + c4o9  * ( VeloY);
+      q = (subgridD.q[DIR_00P])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_00M])[kb  ]=f_T   + c4o9  * (-VeloZ);
+      q = (subgridD.q[DIR_00M])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_00P])[kt  ]=f_B   + c4o9  * ( VeloZ);
+      q = (subgridD.q[DIR_PP0])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MM0])[ksw ]=f_NE  + c1o9  * (-VeloX - VeloY);
+      q = (subgridD.q[DIR_MM0])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PP0])[kne ]=f_SW  + c1o9  * ( VeloX + VeloY);
+      q = (subgridD.q[DIR_PM0])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MP0])[knw ]=f_SE  + c1o9  * (-VeloX + VeloY);
+      q = (subgridD.q[DIR_MP0])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PM0])[kse ]=f_NW  + c1o9  * ( VeloX - VeloY);
+      q = (subgridD.q[DIR_P0P])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M0M])[kbw ]=f_TE  + c1o9  * (-VeloX - VeloZ);
+      q = (subgridD.q[DIR_M0M])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P0P])[kte ]=f_BW  + c1o9  * ( VeloX + VeloZ);
+      q = (subgridD.q[DIR_P0M])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M0P])[ktw ]=f_BE  + c1o9  * (-VeloX + VeloZ);
+      q = (subgridD.q[DIR_M0P])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P0M])[kbe ]=f_TW  + c1o9  * ( VeloX - VeloZ);
+      q = (subgridD.q[DIR_0PP])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0MM])[kbs ]=f_TN  + c1o9  * (-VeloY - VeloZ);
+      q = (subgridD.q[DIR_0MM])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0PP])[ktn ]=f_BS  + c1o9  * ( VeloY + VeloZ);
+      q = (subgridD.q[DIR_0PM])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0MP])[kts ]=f_BN  + c1o9  * (-VeloY + VeloZ);
+      q = (subgridD.q[DIR_0MP])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0PM])[kbn ]=f_TS  + c1o9  * ( VeloY - VeloZ);
+      q = (subgridD.q[DIR_PPP])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MMM])[kbsw]=f_TNE + c1o36 * (-VeloX - VeloY - VeloZ);
+      q = (subgridD.q[DIR_MMM])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PPP])[ktne]=f_BSW + c1o36 * ( VeloX + VeloY + VeloZ);
+      q = (subgridD.q[DIR_PPM])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MMP])[ktsw]=f_BNE + c1o36 * (-VeloX - VeloY + VeloZ);
+      q = (subgridD.q[DIR_MMP])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PPM])[kbne]=f_TSW + c1o36 * ( VeloX + VeloY - VeloZ);
+      q = (subgridD.q[DIR_PMP])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MPM])[kbnw]=f_TSE + c1o36 * (-VeloX + VeloY - VeloZ);
+      q = (subgridD.q[DIR_MPM])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PMP])[ktse]=f_BNW + c1o36 * ( VeloX - VeloY + VeloZ);
+      q = (subgridD.q[DIR_PMM])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MPP])[ktnw]=f_BSE + c1o36 * (-VeloX + VeloY + VeloZ);
+      q = (subgridD.q[DIR_MPP])[nodeIndex];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PMM])[kbse]=f_TNW + c1o36 * ( VeloX - VeloY - VeloZ);
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -3604,80 +3369,81 @@ __global__ void QVelDevPlainBB27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QVelDevCouette27(real* vx,
-											real* vy,
-	 										real* vz,
-											real* DD,
-											int* k_Q, 
-											real* QQ,
-											unsigned int numberOfBCnodes, 
-											real om1, 
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int size_Mat, 
-											bool isEvenTimestep)
+__global__ void QVelDevCouette27(
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD,
+    int* k_Q, 
+    real* QQ,
+    unsigned int numberOfBCnodes, 
+    real om1, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -3702,24 +3468,24 @@ __global__ void QVelDevCouette27(real* vx,
 			 *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
 			 *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 			 *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -3761,94 +3527,94 @@ __global__ void QVelDevCouette27(real* vx,
       ////////////////////////////////////////////////////////////////////////////////
      
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[DIR_P00   ])[ke   ];
-      real f_E    = (D.f[DIR_M00   ])[kw   ];
-      real f_S    = (D.f[DIR_0P0   ])[kn   ];
-      real f_N    = (D.f[DIR_0M0   ])[ks   ];
-      real f_B    = (D.f[DIR_00P   ])[kt   ];
-      real f_T    = (D.f[DIR_00M   ])[kb   ];
-      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      real f_W    = (D.f[DIR_P00])[ke   ];
+      real f_E    = (D.f[DIR_M00])[kw   ];
+      real f_S    = (D.f[DIR_0P0])[kn   ];
+      real f_N    = (D.f[DIR_0M0])[ks   ];
+      real f_B    = (D.f[DIR_00P])[kt   ];
+      real f_T    = (D.f[DIR_00M])[kb   ];
+      real f_SW   = (D.f[DIR_PP0])[kne  ];
+      real f_NE   = (D.f[DIR_MM0])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0])[kse  ];
+      real f_SE   = (D.f[DIR_MP0])[knw  ];
+      real f_BW   = (D.f[DIR_P0P])[kte  ];
+      real f_TE   = (D.f[DIR_M0M])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP])[kts  ];
+      real f_BSW  = (D.f[DIR_PPP])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM])[kbnw ];
 	  ////////////////////////////////////////////////////////////////////////////////
 
 	  ////////////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
 	  //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  ///////               FlowDirection Y !!!!!!!!!!                                                           ///////////////////////////////////
@@ -3868,24 +3634,24 @@ __global__ void QVelDevCouette27(real* vx,
 	  //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //set distributions
       real q;
-      q = q_dirE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_M00  ])[kw  ]=f_E   + ms*c2o27  * VeloX;	
-      q = q_dirW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_P00  ])[ke  ]=f_W   - ms*c2o27  * VeloX;	
-      q = q_dirN[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0M0  ])[ks  ]=f_N   + ms*c2o27  * VeloY;	
-      q = q_dirS[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0P0  ])[kn  ]=f_S   - ms*c2o27  * VeloY;	
-	  q = q_dirT[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_00M  ])[kb  ]=f_T   + ms*c2o27  * VeloZ - c3o2*c2o27*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on;
-      q = q_dirB[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_00P  ])[kt  ]=f_B   - ms*c2o27  * VeloZ;
-      q = q_dirNE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_MM0 ])[ksw ]=f_NE  + ms*c1o54  * VeloX + ms*c1o54  * VeloY;
-	  q = q_dirSW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_PP0 ])[kne ]=f_SW  - ms*c1o54  * VeloX - ms*c1o54  * VeloY;
-	  q = q_dirSE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_MP0 ])[knw ]=f_SE  + ms*c1o54  * VeloX - ms*c1o54  * VeloY;
-	  q = q_dirNW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_PM0 ])[kse ]=f_NW  - ms*c1o54  * VeloX + ms*c1o54  * VeloY;
-	  q = q_dirTE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_M0M ])[kbw ]=f_TE  + ms*c1o54  * VeloX + ms*c1o54  * VeloZ - c3o2*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on-c1o12*kxxMyyFromfcNEQ;
-	  q = q_dirBW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_P0P ])[kte ]=f_BW  - ms*c1o54  * VeloX - ms*c1o54  * VeloZ;
-	  q = q_dirBE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_M0P ])[ktw ]=f_BE  + ms*c1o54  * VeloX - ms*c1o54  * VeloZ;
-	  q = q_dirTW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_P0M ])[kbe ]=f_TW  - ms*c1o54  * VeloX + ms*c1o54  * VeloZ - c3o2*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on-c1o12*kxxMyyFromfcNEQ;
-	  q = q_dirTN[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0MM ])[kbs ]=f_TN  + ms*c1o54  * VeloY + ms*c1o54  * VeloZ + c3o1*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on+c1o12*kxxMyyFromfcNEQ;
-	  q = q_dirBS[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0PP ])[ktn ]=f_BS  - ms*c1o54  * VeloY - ms*c1o54  * VeloZ;
-	  q = q_dirBN[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0MP ])[kts ]=f_BN  + ms*c1o54  * VeloY - ms*c1o54  * VeloZ;
-	  q = q_dirTS[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0PM ])[kbn ]=f_TS  - ms*c1o54  * VeloY + ms*c1o54  * VeloZ + c3o1*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on+c1o12*kxxMyyFromfcNEQ;
+      q = q_dirE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_M00])[kw  ]=f_E   + ms*c2o27  * VeloX;	
+      q = q_dirW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_P00])[ke  ]=f_W   - ms*c2o27  * VeloX;	
+      q = q_dirN[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0M0])[ks  ]=f_N   + ms*c2o27  * VeloY;	
+      q = q_dirS[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0P0])[kn  ]=f_S   - ms*c2o27  * VeloY;	
+	  q = q_dirT[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_00M])[kb  ]=f_T   + ms*c2o27  * VeloZ - c3o2*c2o27*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on;
+      q = q_dirB[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_00P])[kt  ]=f_B   - ms*c2o27  * VeloZ;
+      q = q_dirNE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_MM0])[ksw ]=f_NE  + ms*c1o54  * VeloX + ms*c1o54  * VeloY;
+	  q = q_dirSW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_PP0])[kne ]=f_SW  - ms*c1o54  * VeloX - ms*c1o54  * VeloY;
+	  q = q_dirSE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_MP0])[knw ]=f_SE  + ms*c1o54  * VeloX - ms*c1o54  * VeloY;
+	  q = q_dirNW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_PM0])[kse ]=f_NW  - ms*c1o54  * VeloX + ms*c1o54  * VeloY;
+	  q = q_dirTE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_M0M])[kbw ]=f_TE  + ms*c1o54  * VeloX + ms*c1o54  * VeloZ - c3o2*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on-c1o12*kxxMyyFromfcNEQ;
+	  q = q_dirBW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_P0P])[kte ]=f_BW  - ms*c1o54  * VeloX - ms*c1o54  * VeloZ;
+	  q = q_dirBE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_M0P])[ktw ]=f_BE  + ms*c1o54  * VeloX - ms*c1o54  * VeloZ;
+	  q = q_dirTW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_P0M])[kbe ]=f_TW  - ms*c1o54  * VeloX + ms*c1o54  * VeloZ - c3o2*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on-c1o12*kxxMyyFromfcNEQ;
+	  q = q_dirTN[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0MM])[kbs ]=f_TN  + ms*c1o54  * VeloY + ms*c1o54  * VeloZ + c3o1*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on+c1o12*kxxMyyFromfcNEQ;
+	  q = q_dirBS[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0PP])[ktn ]=f_BS  - ms*c1o54  * VeloY - ms*c1o54  * VeloZ;
+	  q = q_dirBN[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0MP])[kts ]=f_BN  + ms*c1o54  * VeloY - ms*c1o54  * VeloZ;
+	  q = q_dirTS[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0PM])[kbn ]=f_TS  - ms*c1o54  * VeloY + ms*c1o54  * VeloZ + c3o1*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on+c1o12*kxxMyyFromfcNEQ;
       q = q_dirTNE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_MMM])[kbsw]=f_TNE + ms*c1o216 * VeloX + ms*c1o216 * VeloY + ms*c1o216 * VeloZ + c3o1*c1o216*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on;
       q = q_dirBSW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_PPP])[ktne]=f_BSW - ms*c1o216 * VeloX - ms*c1o216 * VeloY - ms*c1o216 * VeloZ;
       q = q_dirBNE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_MMP])[ktsw]=f_BNE + ms*c1o216 * VeloX + ms*c1o216 * VeloY - ms*c1o216 * VeloZ;
@@ -3894,24 +3660,24 @@ __global__ void QVelDevCouette27(real* vx,
       q = q_dirBNW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_PMP])[ktse]=f_BNW - ms*c1o216 * VeloX + ms*c1o216 * VeloY - ms*c1o216 * VeloZ;
       q = q_dirBSE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_MPP])[ktnw]=f_BSE + ms*c1o216 * VeloX - ms*c1o216 * VeloY - ms*c1o216 * VeloZ;
       q = q_dirTNW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_PMM])[kbse]=f_TNW - ms*c1o216 * VeloX + ms*c1o216 * VeloY + ms*c1o216 * VeloZ + c3o1*c1o216*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on;
-      //q = q_dirE[k];	if (q>=zero && q<=one)	(D.f[DIR_M00  ])[kw  ]=f_E   + ms*c2over27  * VeloX;	
-   //   q = q_dirW[k];	if (q>=zero && q<=one)	(D.f[DIR_P00  ])[ke  ]=f_W   - ms*c2over27  * VeloX;	
-   //   q = q_dirN[k];	if (q>=zero && q<=one)	(D.f[DIR_0M0  ])[ks  ]=f_N   + ms*c2over27  * VeloY;	
-   //   q = q_dirS[k];	if (q>=zero && q<=one)	(D.f[DIR_0P0  ])[kn  ]=f_S   - ms*c2over27  * VeloY;	
-	  //q = q_dirT[k];	if (q>=zero && q<=one)	(D.f[DIR_00M  ])[kb  ]=f_T   + ms*c2over27  * VeloZ - c1o9*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
-   //   q = q_dirB[k];	if (q>=zero && q<=one)	(D.f[DIR_00P  ])[kt  ]=f_B   - ms*c2over27  * VeloZ;
-   //   q = q_dirNE[k];	if (q>=zero && q<=one)	(D.f[DIR_MM0 ])[ksw ]=f_NE  + ms*c1over54  * VeloX + ms*c1over54  * VeloY;
-	  //q = q_dirSW[k];	if (q>=zero && q<=one)	(D.f[DIR_PP0 ])[kne ]=f_SW  - ms*c1over54  * VeloX - ms*c1over54  * VeloY;
-	  //q = q_dirSE[k];	if (q>=zero && q<=one)	(D.f[DIR_MP0 ])[knw ]=f_SE  + ms*c1over54  * VeloX - ms*c1over54  * VeloY;
-	  //q = q_dirNW[k];	if (q>=zero && q<=one)	(D.f[DIR_PM0 ])[kse ]=f_NW  - ms*c1over54  * VeloX + ms*c1over54  * VeloY;
-	  //q = q_dirTE[k];	if (q>=zero && q<=one)	(D.f[DIR_M0M ])[kbw ]=f_TE  + ms*c1over54  * VeloX + ms*c1over54  * VeloZ - c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
-	  //q = q_dirBW[k];	if (q>=zero && q<=one)	(D.f[DIR_P0P ])[kte ]=f_BW  - ms*c1over54  * VeloX - ms*c1over54  * VeloZ;
-	  //q = q_dirBE[k];	if (q>=zero && q<=one)	(D.f[DIR_M0P ])[ktw ]=f_BE  + ms*c1over54  * VeloX - ms*c1over54  * VeloZ;
-	  //q = q_dirTW[k];	if (q>=zero && q<=one)	(D.f[DIR_P0M ])[kbe ]=f_TW  - ms*c1over54  * VeloX + ms*c1over54  * VeloZ - c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
-	  //q = q_dirTN[k];	if (q>=zero && q<=one)	(D.f[DIR_0MM ])[kbs ]=f_TN  + ms*c1over54  * VeloY + ms*c1over54  * VeloZ + c1o2*c1o9*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
-	  //q = q_dirBS[k];	if (q>=zero && q<=one)	(D.f[DIR_0PP ])[ktn ]=f_BS  - ms*c1over54  * VeloY - ms*c1over54  * VeloZ;
-	  //q = q_dirBN[k];	if (q>=zero && q<=one)	(D.f[DIR_0MP ])[kts ]=f_BN  + ms*c1over54  * VeloY - ms*c1over54  * VeloZ;
-	  //q = q_dirTS[k];	if (q>=zero && q<=one)	(D.f[DIR_0PM ])[kbn ]=f_TS  - ms*c1over54  * VeloY + ms*c1over54  * VeloZ + c1o2*c1o9*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
+      //q = q_dirE[k];	if (q>=zero && q<=one)	(D.f[DIR_M00])[kw  ]=f_E   + ms*c2over27  * VeloX;	
+   //   q = q_dirW[k];	if (q>=zero && q<=one)	(D.f[DIR_P00])[ke  ]=f_W   - ms*c2over27  * VeloX;	
+   //   q = q_dirN[k];	if (q>=zero && q<=one)	(D.f[DIR_0M0])[ks  ]=f_N   + ms*c2over27  * VeloY;	
+   //   q = q_dirS[k];	if (q>=zero && q<=one)	(D.f[DIR_0P0])[kn  ]=f_S   - ms*c2over27  * VeloY;	
+	  //q = q_dirT[k];	if (q>=zero && q<=one)	(D.f[DIR_00M])[kb  ]=f_T   + ms*c2over27  * VeloZ - c1o9*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
+   //   q = q_dirB[k];	if (q>=zero && q<=one)	(D.f[DIR_00P])[kt  ]=f_B   - ms*c2over27  * VeloZ;
+   //   q = q_dirNE[k];	if (q>=zero && q<=one)	(D.f[DIR_MM0])[ksw ]=f_NE  + ms*c1over54  * VeloX + ms*c1over54  * VeloY;
+	  //q = q_dirSW[k];	if (q>=zero && q<=one)	(D.f[DIR_PP0])[kne ]=f_SW  - ms*c1over54  * VeloX - ms*c1over54  * VeloY;
+	  //q = q_dirSE[k];	if (q>=zero && q<=one)	(D.f[DIR_MP0])[knw ]=f_SE  + ms*c1over54  * VeloX - ms*c1over54  * VeloY;
+	  //q = q_dirNW[k];	if (q>=zero && q<=one)	(D.f[DIR_PM0])[kse ]=f_NW  - ms*c1over54  * VeloX + ms*c1over54  * VeloY;
+	  //q = q_dirTE[k];	if (q>=zero && q<=one)	(D.f[DIR_M0M])[kbw ]=f_TE  + ms*c1over54  * VeloX + ms*c1over54  * VeloZ - c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
+	  //q = q_dirBW[k];	if (q>=zero && q<=one)	(D.f[DIR_P0P])[kte ]=f_BW  - ms*c1over54  * VeloX - ms*c1over54  * VeloZ;
+	  //q = q_dirBE[k];	if (q>=zero && q<=one)	(D.f[DIR_M0P])[ktw ]=f_BE  + ms*c1over54  * VeloX - ms*c1over54  * VeloZ;
+	  //q = q_dirTW[k];	if (q>=zero && q<=one)	(D.f[DIR_P0M])[kbe ]=f_TW  - ms*c1over54  * VeloX + ms*c1over54  * VeloZ - c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
+	  //q = q_dirTN[k];	if (q>=zero && q<=one)	(D.f[DIR_0MM])[kbs ]=f_TN  + ms*c1over54  * VeloY + ms*c1over54  * VeloZ + c1o2*c1o9*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
+	  //q = q_dirBS[k];	if (q>=zero && q<=one)	(D.f[DIR_0PP])[ktn ]=f_BS  - ms*c1over54  * VeloY - ms*c1over54  * VeloZ;
+	  //q = q_dirBN[k];	if (q>=zero && q<=one)	(D.f[DIR_0MP])[kts ]=f_BN  + ms*c1over54  * VeloY - ms*c1over54  * VeloZ;
+	  //q = q_dirTS[k];	if (q>=zero && q<=one)	(D.f[DIR_0PM])[kbn ]=f_TS  - ms*c1over54  * VeloY + ms*c1over54  * VeloZ + c1o2*c1o9*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
    //   q = q_dirTNE[k];	if (q>=zero && q<=one)	(D.f[DIR_MMM])[kbsw]=f_TNE + ms*c1over216 * VeloX + ms*c1over216 * VeloY + ms*c1over216 * VeloZ + c1o2*c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
    //   q = q_dirBSW[k];	if (q>=zero && q<=one)	(D.f[DIR_PPP])[ktne]=f_BSW - ms*c1over216 * VeloX - ms*c1over216 * VeloY - ms*c1over216 * VeloZ;
    //   q = q_dirBNE[k];	if (q>=zero && q<=one)	(D.f[DIR_MMP])[ktsw]=f_BNE + ms*c1over216 * VeloX + ms*c1over216 * VeloY - ms*c1over216 * VeloZ;
@@ -3964,87 +3730,88 @@ __global__ void QVelDevCouette27(real* vx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QVelDev1h27( int inx,
-										int iny,
-										real* vx,
-										real* vy,
-										real* vz,
-										real* DD, 
-										int* k_Q, 
-										real* QQ,
-										unsigned int numberOfBCnodes, 
-										real om1,
-										real Phi,
-										real angularVelocity,
-										unsigned int* neighborX,
-										unsigned int* neighborY,
-										unsigned int* neighborZ,
-										real* coordX,
-										real* coordY,
-										real* coordZ,
-										unsigned int size_Mat, 
-										bool isEvenTimestep)
+__global__ void QVelDev1h27(
+    int inx,
+    int iny,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD, 
+    int* k_Q, 
+    real* QQ,
+    unsigned int numberOfBCnodes, 
+    real om1,
+    real Phi,
+    real angularVelocity,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* coordX,
+    real* coordY,
+    real* coordZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
 	Distributions27 D;
 	if (isEvenTimestep==true)
 	{
-		D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+		D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
 	} 
 	else
 	{
-		D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+		D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+		D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+		D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -4079,24 +3846,24 @@ __global__ void QVelDev1h27( int inx,
 			*q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
 			*q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 			*q_dirBSE, *q_dirBNW; 
-		q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-		q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-		q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-		q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-		q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-		q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-		q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-		q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-		q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-		q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-		q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-		q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-		q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-		q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-		q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-		q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-		q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-		q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+		q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+		q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+		q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+		q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+		q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+		q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+		q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+		q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+		q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+		q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+		q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+		q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+		q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+		q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+		q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+		q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+		q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+		q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
 		q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
 		q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
 		q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -4167,32 +3934,32 @@ __global__ void QVelDev1h27( int inx,
 		//real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
 		//	f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-		//f_W    = (D.f[DIR_P00   ])[ke   ];
-		//f_E    = (D.f[DIR_M00   ])[kw   ];
-		//f_S    = (D.f[DIR_0P0   ])[kn   ];
-		//f_N    = (D.f[DIR_0M0   ])[ks   ];
-		//f_B    = (D.f[DIR_00P   ])[kt   ];
-		//f_T    = (D.f[DIR_00M   ])[kb   ];
-		//f_SW   = (D.f[DIR_PP0  ])[kne  ];
-		//f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-		//f_NW   = (D.f[DIR_PM0  ])[kse  ];
-		//f_SE   = (D.f[DIR_MP0  ])[knw  ];
-		//f_BW   = (D.f[DIR_P0P  ])[kte  ];
-		//f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-		//f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-		//f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-		//f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-		//f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-		//f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-		//f_BN   = (D.f[DIR_0MP  ])[kts  ];
-		//f_BSW  = (D.f[DIR_PPP ])[ktne ];
-		//f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-		//f_BNW  = (D.f[DIR_PMP ])[ktse ];
-		//f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-		//f_TSW  = (D.f[DIR_PPM ])[kbne ];
-		//f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-		//f_TNW  = (D.f[DIR_PMM ])[kbse ];
-		//f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+		//f_W    = (D.f[DIR_P00])[ke   ];
+		//f_E    = (D.f[DIR_M00])[kw   ];
+		//f_S    = (D.f[DIR_0P0])[kn   ];
+		//f_N    = (D.f[DIR_0M0])[ks   ];
+		//f_B    = (D.f[DIR_00P])[kt   ];
+		//f_T    = (D.f[DIR_00M])[kb   ];
+		//f_SW   = (D.f[DIR_PP0])[kne  ];
+		//f_NE   = (D.f[DIR_MM0])[ksw  ];
+		//f_NW   = (D.f[DIR_PM0])[kse  ];
+		//f_SE   = (D.f[DIR_MP0])[knw  ];
+		//f_BW   = (D.f[DIR_P0P])[kte  ];
+		//f_TE   = (D.f[DIR_M0M])[kbw  ];
+		//f_TW   = (D.f[DIR_P0M])[kbe  ];
+		//f_BE   = (D.f[DIR_M0P])[ktw  ];
+		//f_BS   = (D.f[DIR_0PP])[ktn  ];
+		//f_TN   = (D.f[DIR_0MM])[kbs  ];
+		//f_TS   = (D.f[DIR_0PM])[kbn  ];
+		//f_BN   = (D.f[DIR_0MP])[kts  ];
+		//f_BSW  = (D.f[DIR_PPP])[ktne ];
+		//f_BNE  = (D.f[DIR_MMP])[ktsw ];
+		//f_BNW  = (D.f[DIR_PMP])[ktse ];
+		//f_BSE  = (D.f[DIR_MPP])[ktnw ];
+		//f_TSW  = (D.f[DIR_PPM])[kbne ];
+		//f_TNE  = (D.f[DIR_MMM])[kbsw ];
+		//f_TNW  = (D.f[DIR_PMM])[kbse ];
+		//f_TSE  = (D.f[DIR_MPM])[kbnw ];
 		////////////////////////////////////////////////////////////////////////////////
 		real /*vx1, vx2,*/ vx3, drho, feq, q, cu_sq;
 		//drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -4217,63 +3984,63 @@ __global__ void QVelDev1h27( int inx,
 		//////////////////////////////////////////////////////////////////////////
 		if (isEvenTimestep==false)
 		{
-			D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-			D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-			D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-			D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-			D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-			D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-			D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-			D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-			D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-			D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-			D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-			D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-			D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-			D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-			D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-			D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-			D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-			D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-			D.f[DIR_000] = &DD[DIR_000*size_Mat];
-			D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-			D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-			D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-			D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-			D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-			D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-			D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-			D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+			D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+			D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+			D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+			D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+			D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+			D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+			D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+			D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+			D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+			D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+			D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+			D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+			D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+			D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+			D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+			D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+			D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+			D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+			D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+			D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+			D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+			D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+			D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+			D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+			D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+			D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+			D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
 		} 
 		else
 		{
-			D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-			D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-			D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-			D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-			D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-			D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-			D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-			D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-			D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-			D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-			D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-			D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-			D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-			D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-			D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-			D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-			D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-			D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-			D.f[DIR_000] = &DD[DIR_000*size_Mat];
-			D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-			D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-			D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-			D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-			D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-			D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-			D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-			D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+			D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+			D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+			D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+			D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+			D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+			D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+			D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+			D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+			D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+			D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+			D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+			D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+			D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+			D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+			D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+			D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+			D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+			D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+			D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+			D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+			D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+			D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+			D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+			D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+			D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+			D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+			D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
 		}
 		////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		//Test
@@ -4748,39 +4515,32 @@ __global__ void QVelDev1h27( int inx,
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QVelDeviceComp27(
-											real* velocityX,
-											real* velocityY,
-											real* velocityZ,
-											real* distributions,
-											int* subgridDistanceIndices,
-											real* subgridDistances,
-											unsigned int numberOfBCnodes,
-											real omega,
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int numberOfLBnodes,
-											bool isEvenTimestep)
+    real* velocityX,
+    real* velocityY,
+    real* velocityZ,
+    real* distributions,
+    int* subgridDistanceIndices,
+    real* subgridDistances,
+    unsigned int numberOfBCnodes,
+    real omega,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes,
+    bool isEvenTimestep)
 {
    //////////////////////////////////////////////////////////////////////////
    //! The velocity boundary condition is executed in the following steps
    //!
-   ////////////////////////////////////////////////////////////////////////////////
-   //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
-   //!
-   const unsigned  x = threadIdx.x;  // global x-index 
-   const unsigned  y = blockIdx.x;   // global y-index 
-   const unsigned  z = blockIdx.y;   // global z-index 
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
 
    //////////////////////////////////////////////////////////////////////////
    //! - Run for all indices in size of boundary condition (numberOfBCnodes)
    //!
-   if(k < numberOfBCnodes)
+   if(nodeIndex < numberOfBCnodes)
    {
       //////////////////////////////////////////////////////////////////////////
       //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
@@ -4792,9 +4552,9 @@ __global__ void QVelDeviceComp27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local velocities
       //!
-      real VeloX = velocityX[k];
-      real VeloY = velocityY[k];
-      real VeloZ = velocityZ[k];
+      real VeloX = velocityX[nodeIndex];
+      real VeloY = velocityY[nodeIndex];
+      real VeloZ = velocityZ[nodeIndex];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local subgrid distances (q's)
@@ -4805,7 +4565,7 @@ __global__ void QVelDeviceComp27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing)
       //!
-      unsigned int indexOfBCnode  = subgridDistanceIndices[k];
+      unsigned int indexOfBCnode  = subgridDistanceIndices[nodeIndex];
       unsigned int kzero= indexOfBCnode;
       unsigned int ke   = indexOfBCnode;
       unsigned int kw   = neighborX[indexOfBCnode];
@@ -4837,32 +4597,32 @@ __global__ void QVelDeviceComp27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[DIR_P00   ])[ke   ];
-      real f_E    = (dist.f[DIR_M00   ])[kw   ];
-      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
-      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
-      real f_B    = (dist.f[DIR_00P   ])[kt   ];
-      real f_T    = (dist.f[DIR_00M   ])[kb   ];
-      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
-      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
-      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
-      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
-      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
-      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
-      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
-      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
-      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
-      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
-      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
-      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
-      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
-      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
-      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
-      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
-      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
-      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
-      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
-      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00])[ke   ];
+      real f_E    = (dist.f[DIR_M00])[kw   ];
+      real f_S    = (dist.f[DIR_0P0])[kn   ];
+      real f_N    = (dist.f[DIR_0M0])[ks   ];
+      real f_B    = (dist.f[DIR_00P])[kt   ];
+      real f_T    = (dist.f[DIR_00M])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Calculate macroscopic quantities
@@ -4894,7 +4654,7 @@ __global__ void QVelDeviceComp27(
       //! - Update distributions with subgrid distance (q) between zero and one
       //!
       real feq, q, velocityLB, velocityBC;
-      q = (subgridD.q[DIR_P00])[k];
+      q = (subgridD.q[DIR_P00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one
       {
          velocityLB = vx1;
@@ -4903,7 +4663,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloBC(q, f_E, f_W, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_M00])[k];
+      q = (subgridD.q[DIR_M00])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1;
@@ -4912,7 +4672,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloBC(q, f_W, f_E, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_0P0])[k];
+      q = (subgridD.q[DIR_0P0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2;
@@ -4921,7 +4681,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloBC(q, f_N, f_S, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_0M0])[k];
+      q = (subgridD.q[DIR_0M0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2;
@@ -4930,7 +4690,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloBC(q, f_S, f_N, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_00P])[k];
+      q = (subgridD.q[DIR_00P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx3;
@@ -4939,7 +4699,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloBC(q, f_T, f_B, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_00M])[k];
+      q = (subgridD.q[DIR_00M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx3;
@@ -4948,7 +4708,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloBC(q, f_B, f_T, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[DIR_PP0])[k];
+      q = (subgridD.q[DIR_PP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2;
@@ -4957,7 +4717,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloBC(q, f_NE, f_SW, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_MM0])[k];
+      q = (subgridD.q[DIR_MM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2;
@@ -4966,7 +4726,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloBC(q, f_SW, f_NE, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_PM0])[k];
+      q = (subgridD.q[DIR_PM0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2;
@@ -4975,7 +4735,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloBC(q, f_SE, f_NW, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_MP0])[k];
+      q = (subgridD.q[DIR_MP0])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2;
@@ -4984,7 +4744,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloBC(q, f_NW, f_SE, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_P0P])[k];
+      q = (subgridD.q[DIR_P0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx3;
@@ -4993,7 +4753,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloBC(q, f_TE, f_BW, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_M0M])[k];
+      q = (subgridD.q[DIR_M0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx3;
@@ -5002,7 +4762,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloBC(q, f_BW, f_TE, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_P0M])[k];
+      q = (subgridD.q[DIR_P0M])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx3;
@@ -5011,7 +4771,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloBC(q, f_BE, f_TW, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_M0P])[k];
+      q = (subgridD.q[DIR_M0P])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx3;
@@ -5020,7 +4780,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloBC(q, f_TW, f_BE, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0PP])[k];
+      q = (subgridD.q[DIR_0PP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2 + vx3;
@@ -5029,7 +4789,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloBC(q, f_TN, f_BS, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0MM])[k];
+      q = (subgridD.q[DIR_0MM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2 - vx3;
@@ -5038,7 +4798,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_0PP])[ktn] = getInterpolatedDistributionForVeloBC(q, f_BS, f_TN, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0PM])[k];
+      q = (subgridD.q[DIR_0PM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2 - vx3;
@@ -5047,7 +4807,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloBC(q, f_BN, f_TS, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_0MP])[k];
+      q = (subgridD.q[DIR_0MP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2 + vx3;
@@ -5056,7 +4816,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloBC(q, f_TS, f_BN, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[DIR_PPP])[k];
+      q = (subgridD.q[DIR_PPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2 + vx3;
@@ -5065,7 +4825,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloBC(q, f_TNE, f_BSW, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MMM])[k];
+      q = (subgridD.q[DIR_MMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2 - vx3;
@@ -5074,7 +4834,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_PPP])[ktne] = getInterpolatedDistributionForVeloBC(q, f_BSW, f_TNE, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PPM])[k];
+      q = (subgridD.q[DIR_PPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2 - vx3;
@@ -5083,7 +4843,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloBC(q, f_BNE, f_TSW, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MMP])[k];
+      q = (subgridD.q[DIR_MMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2 + vx3;
@@ -5092,7 +4852,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloBC(q, f_TSW, f_BNE, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PMP])[k];
+      q = (subgridD.q[DIR_PMP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2 + vx3;
@@ -5101,7 +4861,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloBC(q, f_TSE, f_BNW, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MPM])[k];
+      q = (subgridD.q[DIR_MPM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2 - vx3;
@@ -5110,7 +4870,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloBC(q, f_BNW, f_TSE, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_PMM])[k];
+      q = (subgridD.q[DIR_PMM])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2 - vx3;
@@ -5119,7 +4879,7 @@ __global__ void QVelDeviceComp27(
          (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloBC(q, f_BSE, f_TNW, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[DIR_MPP])[k];
+      q = (subgridD.q[DIR_MPP])[nodeIndex];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2 + vx3;
@@ -5170,82 +4930,83 @@ __global__ void QVelDeviceComp27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-__global__ void QVelDevice27(int inx,
-                                        int iny,
-                                        real* vx,
-                                        real* vy,
-                                        real* vz,
-                                        real* DD, 
-                                        int* k_Q, 
-                                        real* QQ,
-                                        unsigned int numberOfBCnodes, 
-                                        real om1, 
-                                        unsigned int* neighborX,
-                                        unsigned int* neighborY,
-                                        unsigned int* neighborZ,
-                                        unsigned int size_Mat, 
-                                        bool isEvenTimestep)
+__global__ void QVelDevice27(
+    int inx,
+    int iny,
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD, 
+    int* k_Q, 
+    real* QQ,
+    unsigned int numberOfBCnodes, 
+    real om1, 
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned long long numberOfLBnodes, 
+    bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -5270,24 +5031,24 @@ __global__ void QVelDevice27(int inx,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -5358,32 +5119,32 @@ __global__ void QVelDevice27(int inx,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -5408,63 +5169,63 @@ __global__ void QVelDevice27(int inx,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
       } 
       else
       {
-         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-         D.f[DIR_000] = &DD[DIR_000*size_Mat];
-         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
@@ -5723,19 +5484,20 @@ __global__ void QVelDevice27(int inx,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-__global__ void PropellerBC(unsigned int* neighborX,
-                                       unsigned int* neighborY,
-                                       unsigned int* neighborZ,
-                                       real* rho,
-                                       real* ux,
-                                       real* uy,
-                                       real* uz,
-                                       int* k_Q, 
-									   unsigned int size_Prop,
-                                       unsigned int size_Mat,
-                                       unsigned int* bcMatD,
-                                       real* DD,
-                                       bool EvenOrOdd)
+__global__ void PropellerBC(
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* rho,
+    real* ux,
+    real* uy,
+    real* uz,
+    int* k_Q, 
+    unsigned int size_Prop,
+    unsigned long long numberOfLBnodes,
+    unsigned int* bcMatD,
+    real* DD,
+    bool EvenOrOdd)
 {
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -5754,63 +5516,63 @@ __global__ void PropellerBC(unsigned int* neighborX,
         Distributions27 D;
         if (EvenOrOdd==true)
         {
-			D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-			D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-			D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-			D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-			D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-			D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-			D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-			D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-			D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-			D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-			D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-			D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-			D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-			D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-			D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-			D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-			D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-			D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-			D.f[DIR_000] = &DD[DIR_000*size_Mat];
-			D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-			D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-			D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-			D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-			D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-			D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-			D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-			D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+			D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+			D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+			D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+			D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+			D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+			D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+			D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+			D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+			D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+			D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+			D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+			D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+			D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+			D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+			D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+			D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+			D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+			D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+			D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+			D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+			D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+			D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+			D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+			D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+			D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+			D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+			D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
         }
         else
         {
-			D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-			D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-			D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-			D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-			D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-			D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-			D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-			D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-			D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-			D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-			D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-			D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-			D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-			D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-			D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-			D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-			D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-			D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-			D.f[DIR_000] = &DD[DIR_000*size_Mat];
-			D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-			D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-			D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-			D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-			D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-			D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-			D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-			D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+			D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+			D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+			D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+			D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+			D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+			D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+			D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+			D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+			D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+			D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+			D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+			D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+			D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+			D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+			D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+			D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+			D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+			D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+			D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+			D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+			D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+			D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
+			D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+			D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+			D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+			D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+			D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
         }
         //////////////////////////////////////////////////////////////////////////
 		unsigned int KQK = k_Q[k];
@@ -5859,58 +5621,58 @@ __global__ void PropellerBC(unsigned int* neighborX,
 		f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW, f_ZERO;
 
 		f_ZERO= (D.f[DIR_000])[kzero];
-		f_E   = (D.f[DIR_P00   ])[ke   ];
-		f_W   = (D.f[DIR_M00   ])[kw   ];
-		f_N   = (D.f[DIR_0P0   ])[kn   ];
-		f_S   = (D.f[DIR_0M0   ])[ks   ];
-		f_T   = (D.f[DIR_00P   ])[kt   ];
-		f_B   = (D.f[DIR_00M   ])[kb   ];
-		f_NE  = (D.f[DIR_PP0  ])[kne  ];
-		f_SW  = (D.f[DIR_MM0  ])[ksw  ];
-		f_SE  = (D.f[DIR_PM0  ])[kse  ];
-		f_NW  = (D.f[DIR_MP0  ])[knw  ];
-		f_TE  = (D.f[DIR_P0P  ])[kte  ];
-		f_BW  = (D.f[DIR_M0M  ])[kbw  ];
-		f_BE  = (D.f[DIR_P0M  ])[kbe  ];
-		f_TW  = (D.f[DIR_M0P  ])[ktw  ];
-		f_TN  = (D.f[DIR_0PP  ])[ktn  ];
-		f_BS  = (D.f[DIR_0MM  ])[kbs  ];
-		f_BN  = (D.f[DIR_0PM  ])[kbn  ];
-		f_TS  = (D.f[DIR_0MP  ])[kts  ];
-		f_TNE = (D.f[DIR_PPP ])[ktne ];
-		f_BSW = (D.f[DIR_MMM ])[kbsw ];
-		f_BNE = (D.f[DIR_PPM ])[kbne ];
-		f_TSW = (D.f[DIR_MMP ])[ktsw ];
-		f_TSE = (D.f[DIR_PMP ])[ktse ];
-		f_BNW = (D.f[DIR_MPM ])[kbnw ];
-		f_BSE = (D.f[DIR_PMM ])[kbse ];
-		f_TNW = (D.f[DIR_MPP ])[ktnw ];
-		//f_W    = (D.f[DIR_P00   ])[ke   ];
-		//f_E    = (D.f[DIR_M00   ])[kw   ];
-		//f_S    = (D.f[DIR_0P0   ])[kn   ];
-		//f_N    = (D.f[DIR_0M0   ])[ks   ];
-		//f_B    = (D.f[DIR_00P   ])[kt   ];
-		//f_T    = (D.f[DIR_00M   ])[kb   ];
-		//f_SW   = (D.f[DIR_PP0  ])[kne  ];
-		//f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-		//f_NW   = (D.f[DIR_PM0  ])[kse  ];
-		//f_SE   = (D.f[DIR_MP0  ])[knw  ];
-		//f_BW   = (D.f[DIR_P0P  ])[kte  ];
-		//f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-		//f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-		//f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-		//f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-		//f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-		//f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-		//f_BN   = (D.f[DIR_0MP  ])[kts  ];
-		//f_BSW  = (D.f[DIR_PPP ])[ktne ];
-		//f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-		//f_TSW  = (D.f[DIR_PPM ])[kbne ];
-		//f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-		//f_BNW  = (D.f[DIR_PMP ])[ktse ];
-		//f_TSE  = (D.f[DIR_MPM ])[kbnw ];
-		//f_TNW  = (D.f[DIR_PMM ])[kbse ];
-		//f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+		f_E   = (D.f[DIR_P00])[ke   ];
+		f_W   = (D.f[DIR_M00])[kw   ];
+		f_N   = (D.f[DIR_0P0])[kn   ];
+		f_S   = (D.f[DIR_0M0])[ks   ];
+		f_T   = (D.f[DIR_00P])[kt   ];
+		f_B   = (D.f[DIR_00M])[kb   ];
+		f_NE  = (D.f[DIR_PP0])[kne  ];
+		f_SW  = (D.f[DIR_MM0])[ksw  ];
+		f_SE  = (D.f[DIR_PM0])[kse  ];
+		f_NW  = (D.f[DIR_MP0])[knw  ];
+		f_TE  = (D.f[DIR_P0P])[kte  ];
+		f_BW  = (D.f[DIR_M0M])[kbw  ];
+		f_BE  = (D.f[DIR_P0M])[kbe  ];
+		f_TW  = (D.f[DIR_M0P])[ktw  ];
+		f_TN  = (D.f[DIR_0PP])[ktn  ];
+		f_BS  = (D.f[DIR_0MM])[kbs  ];
+		f_BN  = (D.f[DIR_0PM])[kbn  ];
+		f_TS  = (D.f[DIR_0MP])[kts  ];
+		f_TNE = (D.f[DIR_PPP])[ktne ];
+		f_BSW = (D.f[DIR_MMM])[kbsw ];
+		f_BNE = (D.f[DIR_PPM])[kbne ];
+		f_TSW = (D.f[DIR_MMP])[ktsw ];
+		f_TSE = (D.f[DIR_PMP])[ktse ];
+		f_BNW = (D.f[DIR_MPM])[kbnw ];
+		f_BSE = (D.f[DIR_PMM])[kbse ];
+		f_TNW = (D.f[DIR_MPP])[ktnw ];
+		//f_W    = (D.f[DIR_P00])[ke   ];
+		//f_E    = (D.f[DIR_M00])[kw   ];
+		//f_S    = (D.f[DIR_0P0])[kn   ];
+		//f_N    = (D.f[DIR_0M0])[ks   ];
+		//f_B    = (D.f[DIR_00P])[kt   ];
+		//f_T    = (D.f[DIR_00M])[kb   ];
+		//f_SW   = (D.f[DIR_PP0])[kne  ];
+		//f_NE   = (D.f[DIR_MM0])[ksw  ];
+		//f_NW   = (D.f[DIR_PM0])[kse  ];
+		//f_SE   = (D.f[DIR_MP0])[knw  ];
+		//f_BW   = (D.f[DIR_P0P])[kte  ];
+		//f_TE   = (D.f[DIR_M0M])[kbw  ];
+		//f_TW   = (D.f[DIR_P0M])[kbe  ];
+		//f_BE   = (D.f[DIR_M0P])[ktw  ];
+		//f_BS   = (D.f[DIR_0PP])[ktn  ];
+		//f_TN   = (D.f[DIR_0MM])[kbs  ];
+		//f_TS   = (D.f[DIR_0PM])[kbn  ];
+		//f_BN   = (D.f[DIR_0MP])[kts  ];
+		//f_BSW  = (D.f[DIR_PPP])[ktne ];
+		//f_TNE  = (D.f[DIR_MMM])[kbsw ];
+		//f_TSW  = (D.f[DIR_PPM])[kbne ];
+		//f_BNE  = (D.f[DIR_MMP])[ktsw ];
+		//f_BNW  = (D.f[DIR_PMP])[ktse ];
+		//f_TSE  = (D.f[DIR_MPM])[kbnw ];
+		//f_TNW  = (D.f[DIR_PMM])[kbse ];
+		//f_BSE  = (D.f[DIR_MPP])[ktnw ];
 		//////////////////////////////////////////////////////////////////////////////////
 		real vxo1, vxo2, vxo3, drho;
 		drho   =  /*zero;*/f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -5992,88 +5754,88 @@ __global__ void PropellerBC(unsigned int* neighborX,
          f_TNW  = f_TNW  + ((c1o1+drho) * (-  c1o216*(c3o1*(-vxo1+vxo2+vxo3)+c9o2*(-vxo1+vxo2+vxo3)*(-vxo1+vxo2+vxo3)-cusq) +   c1o216*(c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq2)));
 
 		(D.f[DIR_000])[kzero] =  f_ZERO;
-        (D.f[DIR_P00   ])[ke   ] =  f_E   ;	// f_W   ;//    	
-        (D.f[DIR_M00   ])[kw   ] =  f_W   ;	// f_E   ;//    	
-        (D.f[DIR_0P0   ])[kn   ] =  f_N   ;	// f_S   ;//    	
-        (D.f[DIR_0M0   ])[ks   ] =  f_S   ;	// f_N   ;//    	
-        (D.f[DIR_00P   ])[kt   ] =  f_T   ;	// f_B   ;//    	
-        (D.f[DIR_00M   ])[kb   ] =  f_B   ;	// f_T   ;//    	
-        (D.f[DIR_PP0  ])[kne  ] =  f_NE  ;	// f_SW  ;//    	
-        (D.f[DIR_MM0  ])[ksw  ] =  f_SW  ;	// f_NE  ;//    	
-        (D.f[DIR_PM0  ])[kse  ] =  f_SE  ;	// f_NW  ;//    	
-        (D.f[DIR_MP0  ])[knw  ] =  f_NW  ;	// f_SE  ;//    	
-        (D.f[DIR_P0P  ])[kte  ] =  f_TE  ;	// f_BW  ;//    	
-        (D.f[DIR_M0M  ])[kbw  ] =  f_BW  ;	// f_TE  ;//    	
-        (D.f[DIR_P0M  ])[kbe  ] =  f_BE  ;	// f_TW  ;//    	
-        (D.f[DIR_M0P  ])[ktw  ] =  f_TW  ;	// f_BE  ;//    	
-        (D.f[DIR_0PP  ])[ktn  ] =  f_TN  ;	// f_BS  ;//    	
-        (D.f[DIR_0MM  ])[kbs  ] =  f_BS  ;	// f_TN  ;//    	
-        (D.f[DIR_0PM  ])[kbn  ] =  f_BN  ;	// f_TS  ;//    	
-        (D.f[DIR_0MP  ])[kts  ] =  f_TS  ;	// f_BN  ;//    	
-        (D.f[DIR_PPP ])[ktne ] =  f_TNE ;	// f_BSW ;//    	
-        (D.f[DIR_MMM ])[kbsw ] =  f_BSW ;	// f_BNE ;//    	
-        (D.f[DIR_PPM ])[kbne ] =  f_BNE ;	// f_BNW ;//    	
-        (D.f[DIR_MMP ])[ktsw ] =  f_TSW ;	// f_BSE ;//    	
-        (D.f[DIR_PMP ])[ktse ] =  f_TSE ;	// f_TSW ;//    	
-        (D.f[DIR_MPM ])[kbnw ] =  f_BNW ;	// f_TNE ;//    	
-        (D.f[DIR_PMM ])[kbse ] =  f_BSE ;	// f_TNW ;//    	
-        (D.f[DIR_MPP ])[ktnw ] =  f_TNW ;	// f_TSE ;//    	
+        (D.f[DIR_P00])[ke   ] =  f_E   ;	// f_W   ;//    	
+        (D.f[DIR_M00])[kw   ] =  f_W   ;	// f_E   ;//    	
+        (D.f[DIR_0P0])[kn   ] =  f_N   ;	// f_S   ;//    	
+        (D.f[DIR_0M0])[ks   ] =  f_S   ;	// f_N   ;//    	
+        (D.f[DIR_00P])[kt   ] =  f_T   ;	// f_B   ;//    	
+        (D.f[DIR_00M])[kb   ] =  f_B   ;	// f_T   ;//    	
+        (D.f[DIR_PP0])[kne  ] =  f_NE  ;	// f_SW  ;//    	
+        (D.f[DIR_MM0])[ksw  ] =  f_SW  ;	// f_NE  ;//    	
+        (D.f[DIR_PM0])[kse  ] =  f_SE  ;	// f_NW  ;//    	
+        (D.f[DIR_MP0])[knw  ] =  f_NW  ;	// f_SE  ;//    	
+        (D.f[DIR_P0P])[kte  ] =  f_TE  ;	// f_BW  ;//    	
+        (D.f[DIR_M0M])[kbw  ] =  f_BW  ;	// f_TE  ;//    	
+        (D.f[DIR_P0M])[kbe  ] =  f_BE  ;	// f_TW  ;//    	
+        (D.f[DIR_M0P])[ktw  ] =  f_TW  ;	// f_BE  ;//    	
+        (D.f[DIR_0PP])[ktn  ] =  f_TN  ;	// f_BS  ;//    	
+        (D.f[DIR_0MM])[kbs  ] =  f_BS  ;	// f_TN  ;//    	
+        (D.f[DIR_0PM])[kbn  ] =  f_BN  ;	// f_TS  ;//    	
+        (D.f[DIR_0MP])[kts  ] =  f_TS  ;	// f_BN  ;//    	
+        (D.f[DIR_PPP])[ktne ] =  f_TNE ;	// f_BSW ;//    	
+        (D.f[DIR_MMM])[kbsw ] =  f_BSW ;	// f_BNE ;//    	
+        (D.f[DIR_PPM])[kbne ] =  f_BNE ;	// f_BNW ;//    	
+        (D.f[DIR_MMP])[ktsw ] =  f_TSW ;	// f_BSE ;//    	
+        (D.f[DIR_PMP])[ktse ] =  f_TSE ;	// f_TSW ;//    	
+        (D.f[DIR_MPM])[kbnw ] =  f_BNW ;	// f_TNE ;//    	
+        (D.f[DIR_PMM])[kbse ] =  f_BSE ;	// f_TNW ;//    	
+        (D.f[DIR_MPP])[ktnw ] =  f_TNW ;	// f_TSE ;//    	
 
 		//////////////////////////////////////////////////////////////////////////
         ////(D.f[DIR_000])[kzero] =   c8over27* (drho-cu_sq);
-        //(D.f[DIR_P00   ])[ke   ] =   three*c2over27* ( vx1        );		//six
-        //(D.f[DIR_M00   ])[kw   ] =   three*c2over27* (-vx1        );		//six
-        //(D.f[DIR_0P0   ])[kn   ] =   three*c2over27* (     vx2    );		//six
-        //(D.f[DIR_0M0   ])[ks   ] =   three*c2over27* (    -vx2    );		//six
-        //(D.f[DIR_00P   ])[kt   ] =   three*c2over27* (         vx3);		//six
-        //(D.f[DIR_00M   ])[kb   ] =   three*c2over27* (        -vx3);		//six
-        //(D.f[DIR_PP0  ])[kne  ] =   three*c1over54* ( vx1+vx2    );		//six
-        //(D.f[DIR_MM0  ])[ksw  ] =   three*c1over54* (-vx1-vx2    );		//six
-        //(D.f[DIR_PM0  ])[kse  ] =   three*c1over54* ( vx1-vx2    );		//six
-        //(D.f[DIR_MP0  ])[knw  ] =   three*c1over54* (-vx1+vx2    );		//six
-        //(D.f[DIR_P0P  ])[kte  ] =   three*c1over54* ( vx1    +vx3);		//six
-        //(D.f[DIR_M0M  ])[kbw  ] =   three*c1over54* (-vx1    -vx3);		//six
-        //(D.f[DIR_P0M  ])[kbe  ] =   three*c1over54* ( vx1    -vx3);		//six
-        //(D.f[DIR_M0P  ])[ktw  ] =   three*c1over54* (-vx1    +vx3);		//six
-        //(D.f[DIR_0PP  ])[ktn  ] =   three*c1over54* (     vx2+vx3);		//six
-        //(D.f[DIR_0MM  ])[kbs  ] =   three*c1over54* (    -vx2-vx3);		//six
-        //(D.f[DIR_0PM  ])[kbn  ] =   three*c1over54* (     vx2-vx3);		//six
-        //(D.f[DIR_0MP  ])[kts  ] =   three*c1over54* (    -vx2+vx3);		//six
-        //(D.f[DIR_PPP ])[ktne ] =   three*c1over216*( vx1+vx2+vx3);		//six
-        //(D.f[DIR_MMM ])[kbsw ] =   three*c1over216*(-vx1-vx2-vx3);		//six
-        //(D.f[DIR_PPM ])[kbne ] =   three*c1over216*( vx1+vx2-vx3);		//six
-        //(D.f[DIR_MMP ])[ktsw ] =   three*c1over216*(-vx1-vx2+vx3);		//six
-        //(D.f[DIR_PMP ])[ktse ] =   three*c1over216*( vx1-vx2+vx3);		//six
-        //(D.f[DIR_MPM ])[kbnw ] =   three*c1over216*(-vx1+vx2-vx3);		//six
-        //(D.f[DIR_PMM ])[kbse ] =   three*c1over216*( vx1-vx2-vx3);		//six
-        //(D.f[DIR_MPP ])[ktnw ] =   three*c1over216*(-vx1+vx2+vx3);		//six
+        //(D.f[DIR_P00])[ke   ] =   three*c2over27* ( vx1        );		//six
+        //(D.f[DIR_M00])[kw   ] =   three*c2over27* (-vx1        );		//six
+        //(D.f[DIR_0P0])[kn   ] =   three*c2over27* (     vx2    );		//six
+        //(D.f[DIR_0M0])[ks   ] =   three*c2over27* (    -vx2    );		//six
+        //(D.f[DIR_00P])[kt   ] =   three*c2over27* (         vx3);		//six
+        //(D.f[DIR_00M])[kb   ] =   three*c2over27* (        -vx3);		//six
+        //(D.f[DIR_PP0])[kne  ] =   three*c1over54* ( vx1+vx2    );		//six
+        //(D.f[DIR_MM0])[ksw  ] =   three*c1over54* (-vx1-vx2    );		//six
+        //(D.f[DIR_PM0])[kse  ] =   three*c1over54* ( vx1-vx2    );		//six
+        //(D.f[DIR_MP0])[knw  ] =   three*c1over54* (-vx1+vx2    );		//six
+        //(D.f[DIR_P0P])[kte  ] =   three*c1over54* ( vx1    +vx3);		//six
+        //(D.f[DIR_M0M])[kbw  ] =   three*c1over54* (-vx1    -vx3);		//six
+        //(D.f[DIR_P0M])[kbe  ] =   three*c1over54* ( vx1    -vx3);		//six
+        //(D.f[DIR_M0P])[ktw  ] =   three*c1over54* (-vx1    +vx3);		//six
+        //(D.f[DIR_0PP])[ktn  ] =   three*c1over54* (     vx2+vx3);		//six
+        //(D.f[DIR_0MM])[kbs  ] =   three*c1over54* (    -vx2-vx3);		//six
+        //(D.f[DIR_0PM])[kbn  ] =   three*c1over54* (     vx2-vx3);		//six
+        //(D.f[DIR_0MP])[kts  ] =   three*c1over54* (    -vx2+vx3);		//six
+        //(D.f[DIR_PPP])[ktne ] =   three*c1over216*( vx1+vx2+vx3);		//six
+        //(D.f[DIR_MMM])[kbsw ] =   three*c1over216*(-vx1-vx2-vx3);		//six
+        //(D.f[DIR_PPM])[kbne ] =   three*c1over216*( vx1+vx2-vx3);		//six
+        //(D.f[DIR_MMP])[ktsw ] =   three*c1over216*(-vx1-vx2+vx3);		//six
+        //(D.f[DIR_PMP])[ktse ] =   three*c1over216*( vx1-vx2+vx3);		//six
+        //(D.f[DIR_MPM])[kbnw ] =   three*c1over216*(-vx1+vx2-vx3);		//six
+        //(D.f[DIR_PMM])[kbse ] =   three*c1over216*( vx1-vx2-vx3);		//six
+        //(D.f[DIR_MPP])[ktnw ] =   three*c1over216*(-vx1+vx2+vx3);		//six
         //(D.f[DIR_000])[kzero] =   c8over27* (drho-cu_sq);
-        //(D.f[DIR_P00   ])[ke   ] =   c2over27* (drho+three*( vx1        )+c9over2*( vx1        )*( vx1        )-cu_sq);
-        //(D.f[DIR_M00   ])[kw   ] =   c2over27* (drho+three*(-vx1        )+c9over2*(-vx1        )*(-vx1        )-cu_sq);
-        //(D.f[DIR_0P0   ])[kn   ] =   c2over27* (drho+three*(    vx2     )+c9over2*(     vx2    )*(     vx2    )-cu_sq);
-        //(D.f[DIR_0M0   ])[ks   ] =   c2over27* (drho+three*(   -vx2     )+c9over2*(    -vx2    )*(    -vx2    )-cu_sq);
-        //(D.f[DIR_00P   ])[kt   ] =   c2over27* (drho+three*(         vx3)+c9over2*(         vx3)*(         vx3)-cu_sq);
-        //(D.f[DIR_00M   ])[kb   ] =   c2over27* (drho+three*(        -vx3)+c9over2*(        -vx3)*(        -vx3)-cu_sq);
-        //(D.f[DIR_PP0  ])[kne  ] =   c1over54* (drho+three*( vx1+vx2    )+c9over2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
-        //(D.f[DIR_MM0  ])[ksw  ] =   c1over54* (drho+three*(-vx1-vx2    )+c9over2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
-        //(D.f[DIR_PM0  ])[kse  ] =   c1over54* (drho+three*( vx1-vx2    )+c9over2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
-        //(D.f[DIR_MP0  ])[knw  ] =   c1over54* (drho+three*(-vx1+vx2    )+c9over2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
-        //(D.f[DIR_P0P  ])[kte  ] =   c1over54* (drho+three*( vx1    +vx3)+c9over2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
-        //(D.f[DIR_M0M  ])[kbw  ] =   c1over54* (drho+three*(-vx1    -vx3)+c9over2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
-        //(D.f[DIR_P0M  ])[kbe  ] =   c1over54* (drho+three*( vx1    -vx3)+c9over2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
-        //(D.f[DIR_M0P  ])[ktw  ] =   c1over54* (drho+three*(-vx1    +vx3)+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
-        //(D.f[DIR_0PP  ])[ktn  ] =   c1over54* (drho+three*(     vx2+vx3)+c9over2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
-        //(D.f[DIR_0MM  ])[kbs  ] =   c1over54* (drho+three*(    -vx2-vx3)+c9over2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
-        //(D.f[DIR_0PM  ])[kbn  ] =   c1over54* (drho+three*(     vx2-vx3)+c9over2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
-        //(D.f[DIR_0MP  ])[kts  ] =   c1over54* (drho+three*(    -vx2+vx3)+c9over2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
-        //(D.f[DIR_PPP ])[ktne ] =   c1over216*(drho+three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
-        //(D.f[DIR_MMM ])[kbsw ] =   c1over216*(drho+three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
-        //(D.f[DIR_PPM ])[kbne ] =   c1over216*(drho+three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
-        //(D.f[DIR_MMP ])[ktsw ] =   c1over216*(drho+three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
-        //(D.f[DIR_PMP ])[ktse ] =   c1over216*(drho+three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
-        //(D.f[DIR_MPM ])[kbnw ] =   c1over216*(drho+three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
-        //(D.f[DIR_PMM ])[kbse ] =   c1over216*(drho+three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
-        //(D.f[DIR_MPP ])[ktnw ] =   c1over216*(drho+three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
+        //(D.f[DIR_P00])[ke   ] =   c2over27* (drho+three*( vx1        )+c9over2*( vx1        )*( vx1        )-cu_sq);
+        //(D.f[DIR_M00])[kw   ] =   c2over27* (drho+three*(-vx1        )+c9over2*(-vx1        )*(-vx1        )-cu_sq);
+        //(D.f[DIR_0P0])[kn   ] =   c2over27* (drho+three*(    vx2     )+c9over2*(     vx2    )*(     vx2    )-cu_sq);
+        //(D.f[DIR_0M0])[ks   ] =   c2over27* (drho+three*(   -vx2     )+c9over2*(    -vx2    )*(    -vx2    )-cu_sq);
+        //(D.f[DIR_00P])[kt   ] =   c2over27* (drho+three*(         vx3)+c9over2*(         vx3)*(         vx3)-cu_sq);
+        //(D.f[DIR_00M])[kb   ] =   c2over27* (drho+three*(        -vx3)+c9over2*(        -vx3)*(        -vx3)-cu_sq);
+        //(D.f[DIR_PP0])[kne  ] =   c1over54* (drho+three*( vx1+vx2    )+c9over2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
+        //(D.f[DIR_MM0])[ksw  ] =   c1over54* (drho+three*(-vx1-vx2    )+c9over2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
+        //(D.f[DIR_PM0])[kse  ] =   c1over54* (drho+three*( vx1-vx2    )+c9over2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
+        //(D.f[DIR_MP0])[knw  ] =   c1over54* (drho+three*(-vx1+vx2    )+c9over2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
+        //(D.f[DIR_P0P])[kte  ] =   c1over54* (drho+three*( vx1    +vx3)+c9over2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
+        //(D.f[DIR_M0M])[kbw  ] =   c1over54* (drho+three*(-vx1    -vx3)+c9over2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
+        //(D.f[DIR_P0M])[kbe  ] =   c1over54* (drho+three*( vx1    -vx3)+c9over2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
+        //(D.f[DIR_M0P])[ktw  ] =   c1over54* (drho+three*(-vx1    +vx3)+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
+        //(D.f[DIR_0PP])[ktn  ] =   c1over54* (drho+three*(     vx2+vx3)+c9over2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
+        //(D.f[DIR_0MM])[kbs  ] =   c1over54* (drho+three*(    -vx2-vx3)+c9over2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
+        //(D.f[DIR_0PM])[kbn  ] =   c1over54* (drho+three*(     vx2-vx3)+c9over2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
+        //(D.f[DIR_0MP])[kts  ] =   c1over54* (drho+three*(    -vx2+vx3)+c9over2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
+        //(D.f[DIR_PPP])[ktne ] =   c1over216*(drho+three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
+        //(D.f[DIR_MMM])[kbsw ] =   c1over216*(drho+three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
+        //(D.f[DIR_PPM])[kbne ] =   c1over216*(drho+three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
+        //(D.f[DIR_MMP])[ktsw ] =   c1over216*(drho+three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
+        //(D.f[DIR_PMP])[ktse ] =   c1over216*(drho+three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
+        //(D.f[DIR_MPM])[kbnw ] =   c1over216*(drho+three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
+        //(D.f[DIR_PMM])[kbse ] =   c1over216*(drho+three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
+        //(D.f[DIR_MPP])[ktnw ] =   c1over216*(drho+three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
 		}
     }
 }
diff --git a/src/gpu/VirtualFluids_GPU/GPU/WaleCumulant27.cu b/src/gpu/VirtualFluids_GPU/GPU/WaleCumulant27.cu
index 16028e2f9f87716f43ed60f82ed513289e381b7c..6bac690a881494520a33e901c35c3f51e4d2bfc7 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/WaleCumulant27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/WaleCumulant27.cu
@@ -1,9 +1,9 @@
 /* Device code */
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 #include "math.h"
@@ -23,7 +23,7 @@ __global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27(
 	real* veloZ,
 	real* DDStart,
 	real* turbulentViscosity,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	int level,
 	real* forces,
 	bool EvenOrOdd)
@@ -39,7 +39,7 @@ __global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27(
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if (k<size_Mat)
+	if (k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -50,63 +50,63 @@ __global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27(
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/WallFunction.cu b/src/gpu/VirtualFluids_GPU/GPU/WallFunction.cu
index d48fa80fd14ce15f4a380ed46403654b43c805e8..6de196e1788494b20c24a73cb9ec02a360f868ea 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/WallFunction.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/WallFunction.cu
@@ -1,9 +1,9 @@
 /* Device code */
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 
@@ -20,69 +20,69 @@ __global__ void WallFunction27(
 										  unsigned int* neighborX,
 										  unsigned int* neighborY,
 										  unsigned int* neighborZ,
-										  unsigned int size_Mat, 
+										  unsigned long long numberOfLBnodes, 
 										  bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
    } 
    else
    {
-      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
+      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
+      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
+      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
+      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
+      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
+      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
+      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
+      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
+      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
+      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
+      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
+      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
+      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
+      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
+      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
+      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
+      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
+      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
+      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
+      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
+      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
+      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
+      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
+      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
+      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -107,24 +107,24 @@ __global__ void WallFunction27(
       //      *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
       //      *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
       //      *q_dirBSE, *q_dirBNW; 
-      //q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-      //q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-      //q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-      //q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-      //q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-      //q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-      //q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-      //q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-      //q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-      //q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-      //q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-      //q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-      //q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-      //q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-      //q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-      //q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-      //q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-      //q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      //q_dirE   = &QQ[DIR_P00 * numberOfBCnodes];
+      //q_dirW   = &QQ[DIR_M00 * numberOfBCnodes];
+      //q_dirN   = &QQ[DIR_0P0 * numberOfBCnodes];
+      //q_dirS   = &QQ[DIR_0M0 * numberOfBCnodes];
+      //q_dirT   = &QQ[DIR_00P * numberOfBCnodes];
+      //q_dirB   = &QQ[DIR_00M * numberOfBCnodes];
+      //q_dirNE  = &QQ[DIR_PP0 * numberOfBCnodes];
+      //q_dirSW  = &QQ[DIR_MM0 * numberOfBCnodes];
+      //q_dirSE  = &QQ[DIR_PM0 * numberOfBCnodes];
+      //q_dirNW  = &QQ[DIR_MP0 * numberOfBCnodes];
+      //q_dirTE  = &QQ[DIR_P0P * numberOfBCnodes];
+      //q_dirBW  = &QQ[DIR_M0M * numberOfBCnodes];
+      //q_dirBE  = &QQ[DIR_P0M * numberOfBCnodes];
+      //q_dirTW  = &QQ[DIR_M0P * numberOfBCnodes];
+      //q_dirTN  = &QQ[DIR_0PP * numberOfBCnodes];
+      //q_dirBS  = &QQ[DIR_0MM * numberOfBCnodes];
+      //q_dirBN  = &QQ[DIR_0PM * numberOfBCnodes];
+      //q_dirTS  = &QQ[DIR_0MP * numberOfBCnodes];
       //q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
       //q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
       //q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
@@ -167,32 +167,32 @@ __global__ void WallFunction27(
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[DIR_P00   ])[ke   ];
-      f_E    = (D.f[DIR_M00   ])[kw   ];
-      f_S    = (D.f[DIR_0P0   ])[kn   ];
-      f_N    = (D.f[DIR_0M0   ])[ks   ];
-      f_B    = (D.f[DIR_00P   ])[kt   ];
-      f_T    = (D.f[DIR_00M   ])[kb   ];
-      f_SW   = (D.f[DIR_PP0  ])[kne  ];
-      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-      f_NW   = (D.f[DIR_PM0  ])[kse  ];
-      f_SE   = (D.f[DIR_MP0  ])[knw  ];
-      f_BW   = (D.f[DIR_P0P  ])[kte  ];
-      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-      f_BN   = (D.f[DIR_0MP  ])[kts  ];
-      f_BSW  = (D.f[DIR_PPP ])[ktne ];
-      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-      f_BNW  = (D.f[DIR_PMP ])[ktse ];
-      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-      f_TSW  = (D.f[DIR_PPM ])[kbne ];
-      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-      f_TNW  = (D.f[DIR_PMM ])[kbse ];
-      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_W    = (D.f[DIR_P00])[ke   ];
+      f_E    = (D.f[DIR_M00])[kw   ];
+      f_S    = (D.f[DIR_0P0])[kn   ];
+      f_N    = (D.f[DIR_0M0])[ks   ];
+      f_B    = (D.f[DIR_00P])[kt   ];
+      f_T    = (D.f[DIR_00M])[kb   ];
+      f_SW   = (D.f[DIR_PP0])[kne  ];
+      f_NE   = (D.f[DIR_MM0])[ksw  ];
+      f_NW   = (D.f[DIR_PM0])[kse  ];
+      f_SE   = (D.f[DIR_MP0])[knw  ];
+      f_BW   = (D.f[DIR_P0P])[kte  ];
+      f_TE   = (D.f[DIR_M0M])[kbw  ];
+      f_TW   = (D.f[DIR_P0M])[kbe  ];
+      f_BE   = (D.f[DIR_M0P])[ktw  ];
+      f_BS   = (D.f[DIR_0PP])[ktn  ];
+      f_TN   = (D.f[DIR_0MM])[kbs  ];
+      f_TS   = (D.f[DIR_0PM])[kbn  ];
+      f_BN   = (D.f[DIR_0MP])[kts  ];
+      f_BSW  = (D.f[DIR_PPP])[ktne ];
+      f_BNE  = (D.f[DIR_MMP])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP])[ktse ];
+      f_BSE  = (D.f[DIR_MPP])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM])[kbne ];
+      f_TNE  = (D.f[DIR_MMM])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM])[kbse ];
+      f_TSE  = (D.f[DIR_MPM])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       // real vx2, vx3, feq, q;
       real vx1, drho;
@@ -234,63 +234,63 @@ __global__ void WallFunction27(
    //   //////////////////////////////////////////////////////////////////////////
    //   if (isEvenTimestep==false)
    //   {
-   //      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-   //      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-   //      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-   //      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-   //      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-   //      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-   //      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-   //      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-   //      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-   //      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-   //      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-   //      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-   //      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-   //      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-   //      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-   //      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-   //      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-   //      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-   //      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   //      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-   //      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-   //      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-   //      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-   //      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-   //      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-   //      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-   //      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+   //      D.f[DIR_P00] = &DD[DIR_P00 * size_Mat];
+   //      D.f[DIR_M00] = &DD[DIR_M00 * size_Mat];
+   //      D.f[DIR_0P0] = &DD[DIR_0P0 * size_Mat];
+   //      D.f[DIR_0M0] = &DD[DIR_0M0 * size_Mat];
+   //      D.f[DIR_00P] = &DD[DIR_00P * size_Mat];
+   //      D.f[DIR_00M] = &DD[DIR_00M * size_Mat];
+   //      D.f[DIR_PP0] = &DD[DIR_PP0 * size_Mat];
+   //      D.f[DIR_MM0] = &DD[DIR_MM0 * size_Mat];
+   //      D.f[DIR_PM0] = &DD[DIR_PM0 * size_Mat];
+   //      D.f[DIR_MP0] = &DD[DIR_MP0 * size_Mat];
+   //      D.f[DIR_P0P] = &DD[DIR_P0P * size_Mat];
+   //      D.f[DIR_M0M] = &DD[DIR_M0M * size_Mat];
+   //      D.f[DIR_P0M] = &DD[DIR_P0M * size_Mat];
+   //      D.f[DIR_M0P] = &DD[DIR_M0P * size_Mat];
+   //      D.f[DIR_0PP] = &DD[DIR_0PP * size_Mat];
+   //      D.f[DIR_0MM] = &DD[DIR_0MM * size_Mat];
+   //      D.f[DIR_0PM] = &DD[DIR_0PM * size_Mat];
+   //      D.f[DIR_0MP] = &DD[DIR_0MP * size_Mat];
+   //      D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+   //      D.f[DIR_PPP] = &DD[DIR_PPP * size_Mat];
+   //      D.f[DIR_MMP] = &DD[DIR_MMP * size_Mat];
+   //      D.f[DIR_PMP] = &DD[DIR_PMP * size_Mat];
+   //      D.f[DIR_MPP] = &DD[DIR_MPP * size_Mat];
+   //      D.f[DIR_PPM] = &DD[DIR_PPM * size_Mat];
+   //      D.f[DIR_MMM] = &DD[DIR_MMM * size_Mat];
+   //      D.f[DIR_PMM] = &DD[DIR_PMM * size_Mat];
+   //      D.f[DIR_MPM] = &DD[DIR_MPM * size_Mat];
    //   } 
    //   else
    //   {
-   //      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-   //      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-   //      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-   //      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-   //      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-   //      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-   //      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-   //      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-   //      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-   //      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-   //      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-   //      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-   //      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-   //      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-   //      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-   //      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-   //      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-   //      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-   //      D.f[DIR_000] = &DD[DIR_000*size_Mat];
-   //      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-   //      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-   //      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-   //      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-   //      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-   //      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-   //      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-   //      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+   //      D.f[DIR_M00] = &DD[DIR_P00 * size_Mat];
+   //      D.f[DIR_P00] = &DD[DIR_M00 * size_Mat];
+   //      D.f[DIR_0M0] = &DD[DIR_0P0 * size_Mat];
+   //      D.f[DIR_0P0] = &DD[DIR_0M0 * size_Mat];
+   //      D.f[DIR_00M] = &DD[DIR_00P * size_Mat];
+   //      D.f[DIR_00P] = &DD[DIR_00M * size_Mat];
+   //      D.f[DIR_MM0] = &DD[DIR_PP0 * size_Mat];
+   //      D.f[DIR_PP0] = &DD[DIR_MM0 * size_Mat];
+   //      D.f[DIR_MP0] = &DD[DIR_PM0 * size_Mat];
+   //      D.f[DIR_PM0] = &DD[DIR_MP0 * size_Mat];
+   //      D.f[DIR_M0M] = &DD[DIR_P0P * size_Mat];
+   //      D.f[DIR_P0P] = &DD[DIR_M0M * size_Mat];
+   //      D.f[DIR_M0P] = &DD[DIR_P0M * size_Mat];
+   //      D.f[DIR_P0M] = &DD[DIR_M0P * size_Mat];
+   //      D.f[DIR_0MM] = &DD[DIR_0PP * size_Mat];
+   //      D.f[DIR_0PP] = &DD[DIR_0MM * size_Mat];
+   //      D.f[DIR_0MP] = &DD[DIR_0PM * size_Mat];
+   //      D.f[DIR_0PM] = &DD[DIR_0MP * size_Mat];
+   //      D.f[DIR_000] = &DD[DIR_000 * size_Mat];
+   //      D.f[DIR_PPP] = &DD[DIR_MMM * size_Mat];
+   //      D.f[DIR_MMP] = &DD[DIR_PPM * size_Mat];
+   //      D.f[DIR_PMP] = &DD[DIR_MPM * size_Mat];
+   //      D.f[DIR_MPP] = &DD[DIR_PMM * size_Mat];
+   //      D.f[DIR_PPM] = &DD[DIR_MMP * size_Mat];
+   //      D.f[DIR_MMM] = &DD[DIR_PPP * size_Mat];
+   //      D.f[DIR_PMM] = &DD[DIR_MPP * size_Mat];
+   //      D.f[DIR_MPM] = &DD[DIR_PMP * size_Mat];
    //   }
    //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    //   //Test
diff --git a/src/gpu/VirtualFluids_GPU/Init/InitLattice.cpp b/src/gpu/VirtualFluids_GPU/Init/InitLattice.cpp
index 2c85de9e3ec57d50a66fde2c49d3e703676fbf04..ba4a5cae5259ff43a1d85e6ee97dd125e3da912f 100644
--- a/src/gpu/VirtualFluids_GPU/Init/InitLattice.cpp
+++ b/src/gpu/VirtualFluids_GPU/Init/InitLattice.cpp
@@ -45,26 +45,44 @@ void initLattice(SPtr<Parameter> para, SPtr<PreProcessor> preProcessor, SPtr<Cud
         preProcessor->init(para, lev);
 
         CalcMacCompSP27(
-            para->getParD(lev)->velocityX, para->getParD(lev)->velocityY, para->getParD(lev)->velocityZ, para->getParD(lev)->rho,
-            para->getParD(lev)->pressure, para->getParD(lev)->typeOfGridNode, para->getParD(lev)->neighborX,
-            para->getParD(lev)->neighborY, para->getParD(lev)->neighborZ, para->getParD(lev)->numberOfNodes,
-            para->getParD(lev)->numberofthreads, para->getParD(lev)->distributions.f[0], para->getParD(lev)->isEvenTimestep);
+            para->getParD(lev)->velocityX, 
+            para->getParD(lev)->velocityY, 
+            para->getParD(lev)->velocityZ, 
+            para->getParD(lev)->rho,
+            para->getParD(lev)->pressure, 
+            para->getParD(lev)->typeOfGridNode, 
+            para->getParD(lev)->neighborX,
+            para->getParD(lev)->neighborY, 
+            para->getParD(lev)->neighborZ, 
+            para->getParD(lev)->numberOfNodes,
+            para->getParD(lev)->numberofthreads, 
+            para->getParD(lev)->distributions.f[0], 
+            para->getParD(lev)->isEvenTimestep);
 
         if (para->getCalcMedian()) {
             constexpr uint tdiff = 1;
-            CalcMacMedSP27(para->getParD(lev)->vx_SP_Med, para->getParD(lev)->vy_SP_Med, para->getParD(lev)->vz_SP_Med,
-                           para->getParD(lev)->rho_SP_Med, para->getParD(lev)->press_SP_Med, para->getParD(lev)->typeOfGridNode,
-                           para->getParD(lev)->neighborX, para->getParD(lev)->neighborY,
-                           para->getParD(lev)->neighborZ, tdiff, para->getParD(lev)->numberOfNodes,
-                           para->getParD(lev)->numberofthreads, para->getParD(lev)->isEvenTimestep);
+            CalcMacMedSP27(
+                para->getParD(lev)->vx_SP_Med, 
+                para->getParD(lev)->vy_SP_Med, 
+                para->getParD(lev)->vz_SP_Med,
+                para->getParD(lev)->rho_SP_Med, 
+                para->getParD(lev)->press_SP_Med, 
+                para->getParD(lev)->typeOfGridNode,
+                para->getParD(lev)->neighborX, 
+                para->getParD(lev)->neighborY,
+                para->getParD(lev)->neighborZ, 
+                tdiff, 
+                para->getParD(lev)->numberOfNodes,
+                para->getParD(lev)->numberofthreads, 
+                para->getParD(lev)->isEvenTimestep);
         }
         // advection - diffusion
         if (para->getDiffOn()) {
 
             cudaMemoryManager->cudaAllocConcentration(lev);
 
-            for (unsigned int i = 0; i < para->getParH(lev)->numberOfNodes; i++) {
-                para->getParH(lev)->Conc[i] = para->getTemperatureInit();
+            for (size_t index = 0; index < para->getParH(lev)->numberOfNodes; index++) {
+                para->getParH(lev)->concentration[index] = para->getTemperatureInit();
             }
             initTemperatur(para.get(), cudaMemoryManager.get(), lev);
         }
diff --git a/src/gpu/VirtualFluids_GPU/Init/PositionReader.cpp b/src/gpu/VirtualFluids_GPU/Init/PositionReader.cpp
index 04052b38cbfb59bfe620354b62da70402d74bdbd..6eaa0b17653aaf5257c00e674c87e2844c26cf5d 100644
--- a/src/gpu/VirtualFluids_GPU/Init/PositionReader.cpp
+++ b/src/gpu/VirtualFluids_GPU/Init/PositionReader.cpp
@@ -170,7 +170,7 @@ void PositionReader::definePropellerQs(Parameter* para)
 	//////////////////////////////////////////////////////////////////
 	for(uint u=0; u<para->getParH(para->getFine())->propellerBC.numberOfBCnodes; u++)
 	{
-		for (int dir = DIR_P00; dir<=DIR_MMM; dir++)
+		for (size_t dir = DIR_P00; dir<=DIR_MMM; dir++)
 		{
 			if ((dir==DIR_P00)  || 
 				(dir==DIR_PP0) || (dir==DIR_PM0) || (dir==DIR_P0P) || (dir==DIR_P0M) ||
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h
index 50b4460d774010ea7d7b98cfa6fa505cdfeb88c2..d83901a0f7d6a7df8120673a4b14371a6e935aef 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h
@@ -5,7 +5,6 @@
 
 #include "LBM/LB.h" 
 
-#include "Kernel/Utilities/KernelGroup.h"
 #include "PreProcessor/PreProcessorType.h"
 #include "Parameter/CudaStreamManager.h"
 
@@ -20,6 +19,5 @@ public:
 
     virtual bool checkParameter()                                = 0;
     virtual std::vector<PreProcessorType> getPreProcessorTypes() = 0;
-    virtual KernelGroup getKernelGroup()                         = 0;
 };
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp
index 9bd3945aa81147d03be2b1eac3ddec7c24d71532..328cf8db260bc0092cb2081998961d1e9fb17233 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp
+++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp
@@ -19,11 +19,6 @@ std::vector<PreProcessorType> KernelImp::getPreProcessorTypes()
     return myPreProcessorTypes;
 }
 
-KernelGroup KernelImp::getKernelGroup() 
-{ 
-    return myKernelGroup; 
-}
-
 void KernelImp::setCheckParameterStrategy(std::shared_ptr<CheckParameterStrategy> strategy)
 {
     this->checkStrategy = strategy;
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
index a96c2c123472ca33f635273e06a5bf36a745654d..84e5f3f6ac08b92ccd92fbf142cceb3245de51d5 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
+++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
@@ -20,7 +20,6 @@ public:
 
     bool checkParameter();
     std::vector<PreProcessorType> getPreProcessorTypes();
-    KernelGroup getKernelGroup();
 
     void setCheckParameterStrategy(std::shared_ptr<CheckParameterStrategy> strategy);
     bool getKernelUsesFluidNodeIndices();
@@ -33,8 +32,6 @@ protected:
     std::shared_ptr<CheckParameterStrategy> checkStrategy;
     int level;
     std::vector<PreProcessorType> myPreProcessorTypes;
-    KernelGroup myKernelGroup;
-
     vf::cuda::CudaGrid cudaGrid;
 
     bool kernelUsesFluidNodeIndices = false;
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27.cu
index 51b9e4537fa0857e9302aa638ae7729fa9adcdbe..4d4467acb20232ecb364451f61265b71f1692517 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27.cu
@@ -2,6 +2,7 @@
 
 #include "ADComp27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<ADComp27> ADComp27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,34 +11,19 @@ std::shared_ptr<ADComp27> ADComp27::getNewInstance(std::shared_ptr<Parameter> pa
 
 void ADComp27::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_KERNEL_AD_COMP_27 << < grid, threads >> >(	para->getParD(level)->diffusivity,
-												para->getParD(level)->typeOfGridNode,
-												para->getParD(level)->neighborX,
-												para->getParD(level)->neighborY,
-												para->getParD(level)->neighborZ,
-												para->getParD(level)->distributions.f[0],
-												para->getParD(level)->distributionsAD27.f[0],
-												para->getParD(level)->numberOfNodes,
-												para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_ThS27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_KERNEL_AD_COMP_27<<< grid.grid, grid.threads >>>(
+        para->getParD(level)->diffusivity,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->distributionsAD.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_KERNEL_AD_COMP_27 execution failed");
 }
 
 ADComp27::ADComp27(std::shared_ptr<Parameter> para, int level)
@@ -47,7 +33,6 @@ ADComp27::ADComp27(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitCompAD27);
 
-	myKernelGroup = ADKernel27;
 }
 
 ADComp27::ADComp27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cu
index b4c1236300bbb49fe2df1b3f458f506e989e142b..eb6c9814efdedb822ddc3052d6b577750be83b12 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -38,125 +38,125 @@ __global__ void LB_KERNEL_AD_COMP_27(real diffusivity,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			Distributions27 D27;
 			if (EvenOrOdd == true)
 			{
-				D27.f[DIR_P00] = &DD27[DIR_P00   *size_Mat];
-				D27.f[DIR_M00] = &DD27[DIR_M00   *size_Mat];
-				D27.f[DIR_0P0] = &DD27[DIR_0P0   *size_Mat];
-				D27.f[DIR_0M0] = &DD27[DIR_0M0   *size_Mat];
-				D27.f[DIR_00P] = &DD27[DIR_00P   *size_Mat];
-				D27.f[DIR_00M] = &DD27[DIR_00M   *size_Mat];
-				D27.f[DIR_PP0] = &DD27[DIR_PP0  *size_Mat];
-				D27.f[DIR_MM0] = &DD27[DIR_MM0  *size_Mat];
-				D27.f[DIR_PM0] = &DD27[DIR_PM0  *size_Mat];
-				D27.f[DIR_MP0] = &DD27[DIR_MP0  *size_Mat];
-				D27.f[DIR_P0P] = &DD27[DIR_P0P  *size_Mat];
-				D27.f[DIR_M0M] = &DD27[DIR_M0M  *size_Mat];
-				D27.f[DIR_P0M] = &DD27[DIR_P0M  *size_Mat];
-				D27.f[DIR_M0P] = &DD27[DIR_M0P  *size_Mat];
-				D27.f[DIR_0PP] = &DD27[DIR_0PP  *size_Mat];
-				D27.f[DIR_0MM] = &DD27[DIR_0MM  *size_Mat];
-				D27.f[DIR_0PM] = &DD27[DIR_0PM  *size_Mat];
-				D27.f[DIR_0MP] = &DD27[DIR_0MP  *size_Mat];
-				D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-				D27.f[DIR_PPP] = &DD27[DIR_PPP *size_Mat];
-				D27.f[DIR_MMP] = &DD27[DIR_MMP *size_Mat];
-				D27.f[DIR_PMP] = &DD27[DIR_PMP *size_Mat];
-				D27.f[DIR_MPP] = &DD27[DIR_MPP *size_Mat];
-				D27.f[DIR_PPM] = &DD27[DIR_PPM *size_Mat];
-				D27.f[DIR_MMM] = &DD27[DIR_MMM *size_Mat];
-				D27.f[DIR_PMM] = &DD27[DIR_PMM *size_Mat];
-				D27.f[DIR_MPM] = &DD27[DIR_MPM *size_Mat];
+				D27.f[DIR_P00] = &DD27[DIR_P00 * size_Mat];
+				D27.f[DIR_M00] = &DD27[DIR_M00 * size_Mat];
+				D27.f[DIR_0P0] = &DD27[DIR_0P0 * size_Mat];
+				D27.f[DIR_0M0] = &DD27[DIR_0M0 * size_Mat];
+				D27.f[DIR_00P] = &DD27[DIR_00P * size_Mat];
+				D27.f[DIR_00M] = &DD27[DIR_00M * size_Mat];
+				D27.f[DIR_PP0] = &DD27[DIR_PP0 * size_Mat];
+				D27.f[DIR_MM0] = &DD27[DIR_MM0 * size_Mat];
+				D27.f[DIR_PM0] = &DD27[DIR_PM0 * size_Mat];
+				D27.f[DIR_MP0] = &DD27[DIR_MP0 * size_Mat];
+				D27.f[DIR_P0P] = &DD27[DIR_P0P * size_Mat];
+				D27.f[DIR_M0M] = &DD27[DIR_M0M * size_Mat];
+				D27.f[DIR_P0M] = &DD27[DIR_P0M * size_Mat];
+				D27.f[DIR_M0P] = &DD27[DIR_M0P * size_Mat];
+				D27.f[DIR_0PP] = &DD27[DIR_0PP * size_Mat];
+				D27.f[DIR_0MM] = &DD27[DIR_0MM * size_Mat];
+				D27.f[DIR_0PM] = &DD27[DIR_0PM * size_Mat];
+				D27.f[DIR_0MP] = &DD27[DIR_0MP * size_Mat];
+				D27.f[DIR_000] = &DD27[DIR_000 * size_Mat];
+				D27.f[DIR_PPP] = &DD27[DIR_PPP * size_Mat];
+				D27.f[DIR_MMP] = &DD27[DIR_MMP * size_Mat];
+				D27.f[DIR_PMP] = &DD27[DIR_PMP * size_Mat];
+				D27.f[DIR_MPP] = &DD27[DIR_MPP * size_Mat];
+				D27.f[DIR_PPM] = &DD27[DIR_PPM * size_Mat];
+				D27.f[DIR_MMM] = &DD27[DIR_MMM * size_Mat];
+				D27.f[DIR_PMM] = &DD27[DIR_PMM * size_Mat];
+				D27.f[DIR_MPM] = &DD27[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D27.f[DIR_M00] = &DD27[DIR_P00   *size_Mat];
-				D27.f[DIR_P00] = &DD27[DIR_M00   *size_Mat];
-				D27.f[DIR_0M0] = &DD27[DIR_0P0   *size_Mat];
-				D27.f[DIR_0P0] = &DD27[DIR_0M0   *size_Mat];
-				D27.f[DIR_00M] = &DD27[DIR_00P   *size_Mat];
-				D27.f[DIR_00P] = &DD27[DIR_00M   *size_Mat];
-				D27.f[DIR_MM0] = &DD27[DIR_PP0  *size_Mat];
-				D27.f[DIR_PP0] = &DD27[DIR_MM0  *size_Mat];
-				D27.f[DIR_MP0] = &DD27[DIR_PM0  *size_Mat];
-				D27.f[DIR_PM0] = &DD27[DIR_MP0  *size_Mat];
-				D27.f[DIR_M0M] = &DD27[DIR_P0P  *size_Mat];
-				D27.f[DIR_P0P] = &DD27[DIR_M0M  *size_Mat];
-				D27.f[DIR_M0P] = &DD27[DIR_P0M  *size_Mat];
-				D27.f[DIR_P0M] = &DD27[DIR_M0P  *size_Mat];
-				D27.f[DIR_0MM] = &DD27[DIR_0PP  *size_Mat];
-				D27.f[DIR_0PP] = &DD27[DIR_0MM  *size_Mat];
-				D27.f[DIR_0MP] = &DD27[DIR_0PM  *size_Mat];
-				D27.f[DIR_0PM] = &DD27[DIR_0MP  *size_Mat];
-				D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-				D27.f[DIR_MMM] = &DD27[DIR_PPP *size_Mat];
-				D27.f[DIR_PPM] = &DD27[DIR_MMP *size_Mat];
-				D27.f[DIR_MPM] = &DD27[DIR_PMP *size_Mat];
-				D27.f[DIR_PMM] = &DD27[DIR_MPP *size_Mat];
-				D27.f[DIR_MMP] = &DD27[DIR_PPM *size_Mat];
-				D27.f[DIR_PPP] = &DD27[DIR_MMM *size_Mat];
-				D27.f[DIR_MPP] = &DD27[DIR_PMM *size_Mat];
-				D27.f[DIR_PMP] = &DD27[DIR_MPM *size_Mat];
+				D27.f[DIR_M00] = &DD27[DIR_P00 * size_Mat];
+				D27.f[DIR_P00] = &DD27[DIR_M00 * size_Mat];
+				D27.f[DIR_0M0] = &DD27[DIR_0P0 * size_Mat];
+				D27.f[DIR_0P0] = &DD27[DIR_0M0 * size_Mat];
+				D27.f[DIR_00M] = &DD27[DIR_00P * size_Mat];
+				D27.f[DIR_00P] = &DD27[DIR_00M * size_Mat];
+				D27.f[DIR_MM0] = &DD27[DIR_PP0 * size_Mat];
+				D27.f[DIR_PP0] = &DD27[DIR_MM0 * size_Mat];
+				D27.f[DIR_MP0] = &DD27[DIR_PM0 * size_Mat];
+				D27.f[DIR_PM0] = &DD27[DIR_MP0 * size_Mat];
+				D27.f[DIR_M0M] = &DD27[DIR_P0P * size_Mat];
+				D27.f[DIR_P0P] = &DD27[DIR_M0M * size_Mat];
+				D27.f[DIR_M0P] = &DD27[DIR_P0M * size_Mat];
+				D27.f[DIR_P0M] = &DD27[DIR_M0P * size_Mat];
+				D27.f[DIR_0MM] = &DD27[DIR_0PP * size_Mat];
+				D27.f[DIR_0PP] = &DD27[DIR_0MM * size_Mat];
+				D27.f[DIR_0MP] = &DD27[DIR_0PM * size_Mat];
+				D27.f[DIR_0PM] = &DD27[DIR_0MP * size_Mat];
+				D27.f[DIR_000] = &DD27[DIR_000 * size_Mat];
+				D27.f[DIR_MMM] = &DD27[DIR_PPP * size_Mat];
+				D27.f[DIR_PPM] = &DD27[DIR_MMP * size_Mat];
+				D27.f[DIR_MPM] = &DD27[DIR_PMP * size_Mat];
+				D27.f[DIR_PMM] = &DD27[DIR_MPP * size_Mat];
+				D27.f[DIR_MMP] = &DD27[DIR_PPM * size_Mat];
+				D27.f[DIR_PPP] = &DD27[DIR_MMM * size_Mat];
+				D27.f[DIR_MPP] = &DD27[DIR_PMM * size_Mat];
+				D27.f[DIR_PMP] = &DD27[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7.cu
index ab9b0c444513455e0498d79614575e87c2afb6a0..d218489c754edc89f99277670f09536962ce62b2 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7.cu
@@ -2,6 +2,7 @@
 
 #include "ADComp7_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<ADComp7> ADComp7::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,34 +11,19 @@ std::shared_ptr<ADComp7> ADComp7::getNewInstance(std::shared_ptr<Parameter> para
 
 void ADComp7::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_AD_Comp_7 << < grid, threads >> >(	para->getParD(level)->diffusivity,
-											para->getParD(level)->typeOfGridNode,
-											para->getParD(level)->neighborX,
-											para->getParD(level)->neighborY,
-											para->getParD(level)->neighborZ,
-											para->getParD(level)->distributions.f[0], 
-											para->getParD(level)->distributionsAD7.f[0], 
-											para->getParD(level)->numberOfNodes,
-											para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_ThS7 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_AD_Comp_7<<< grid.grid, grid.threads >>>(
+        para->getParD(level)->diffusivity,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0], 
+        para->getParD(level)->distributionsAD7.f[0], 
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_AD_Comp_7 execution failed");
 }
 
 ADComp7::ADComp7(std::shared_ptr<Parameter> para, int level)
@@ -47,7 +33,6 @@ ADComp7::ADComp7(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitCompAD7);
 
-	myKernelGroup = ADKernel7;
 }
 
 ADComp7::ADComp7()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cu
index 52ab9ba6e968ec2293f0a1c4959323c43f328206..075063bc6c3d260376256bd46f5669fae658a7a0 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 #include "math.h"
@@ -39,63 +39,63 @@ __global__ void LB_Kernel_AD_Comp_7(real diffusivity,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			Distributions7 D7;
@@ -157,33 +157,33 @@ __global__ void LB_Kernel_AD_Comp_7(real diffusivity,
 			real fTNE = (D.f[DIR_MMM])[kbsw];
 			real fTNW = (D.f[DIR_PMM])[kbs];//kbse
 			real fTSE = (D.f[DIR_MPM])[kbw];//kbnw
-										   //real fE    =  (D.f[DIR_P00   ])[k  ];//ke
-										   //real fW    =  (D.f[DIR_M00   ])[kw ];
-										   //real fN    =  (D.f[DIR_0P0   ])[k  ];//kn
-										   //real fS    =  (D.f[DIR_0M0   ])[ks ];
-										   //real fT    =  (D.f[DIR_00P   ])[k  ];//kt
-										   //real fB    =  (D.f[DIR_00M   ])[kb ];
-										   //real fNE   =  (D.f[DIR_PP0  ])[k  ];//kne
-										   //real fSW   =  (D.f[DIR_MM0  ])[ksw];
-										   //real fSE   =  (D.f[DIR_PM0  ])[ks ];//kse
-										   //real fNW   =  (D.f[DIR_MP0  ])[kw ];//knw
-										   //real fTE   =  (D.f[DIR_P0P  ])[k  ];//kte
-										   //real fBW   =  (D.f[DIR_M0M  ])[kbw];
-										   //real fBE   =  (D.f[DIR_P0M  ])[kb ];//kbe
-										   //real fTW   =  (D.f[DIR_M0P  ])[kw ];//ktw
-										   //real fTN   =  (D.f[DIR_0PP  ])[k  ];//ktn
-										   //real fBS   =  (D.f[DIR_0MM  ])[kbs];
-										   //real fBN   =  (D.f[DIR_0PM  ])[kb ];//kbn
-										   //real fTS   =  (D.f[DIR_0MP  ])[ks ];//kts
+										   //real fE    =  (D.f[DIR_P00])[k  ];//ke
+										   //real fW    =  (D.f[DIR_M00])[kw ];
+										   //real fN    =  (D.f[DIR_0P0])[k  ];//kn
+										   //real fS    =  (D.f[DIR_0M0])[ks ];
+										   //real fT    =  (D.f[DIR_00P])[k  ];//kt
+										   //real fB    =  (D.f[DIR_00M])[kb ];
+										   //real fNE   =  (D.f[DIR_PP0])[k  ];//kne
+										   //real fSW   =  (D.f[DIR_MM0])[ksw];
+										   //real fSE   =  (D.f[DIR_PM0])[ks ];//kse
+										   //real fNW   =  (D.f[DIR_MP0])[kw ];//knw
+										   //real fTE   =  (D.f[DIR_P0P])[k  ];//kte
+										   //real fBW   =  (D.f[DIR_M0M])[kbw];
+										   //real fBE   =  (D.f[DIR_P0M])[kb ];//kbe
+										   //real fTW   =  (D.f[DIR_M0P])[kw ];//ktw
+										   //real fTN   =  (D.f[DIR_0PP])[k  ];//ktn
+										   //real fBS   =  (D.f[DIR_0MM])[kbs];
+										   //real fBN   =  (D.f[DIR_0PM])[kb ];//kbn
+										   //real fTS   =  (D.f[DIR_0MP])[ks ];//kts
 										   //real fZERO =  (D.f[DIR_000])[k  ];//kzero
-										   //real fTNE   = (D.f[DIR_PPP ])[k  ];//ktne
-										   //real fTSW   = (D.f[DIR_MMP ])[ksw];//ktsw
-										   //real fTSE   = (D.f[DIR_PMP ])[ks ];//ktse
-										   //real fTNW   = (D.f[DIR_MPP ])[kw ];//ktnw
-										   //real fBNE   = (D.f[DIR_PPM ])[kb ];//kbne
-										   //real fBSW   = (D.f[DIR_MMM ])[kbsw];
-										   //real fBSE   = (D.f[DIR_PMM ])[kbs];//kbse
-										   //real fBNW   = (D.f[DIR_MPM ])[kbw];//kbnw
+										   //real fTNE   = (D.f[DIR_PPP])[k  ];//ktne
+										   //real fTSW   = (D.f[DIR_MMP])[ksw];//ktsw
+										   //real fTSE   = (D.f[DIR_PMP])[ks ];//ktse
+										   //real fTNW   = (D.f[DIR_MPP])[kw ];//ktnw
+										   //real fBNE   = (D.f[DIR_PPM])[kb ];//kbne
+										   //real fBSW   = (D.f[DIR_MMM])[kbsw];
+										   //real fBSE   = (D.f[DIR_PMM])[kbs];//kbse
+										   //real fBNW   = (D.f[DIR_MPM])[kbw];//kbnw
 										   ////////////////////////////////////////////////////////////////////////////////
 			real f7ZERO = (D7.f[0])[k];
 			real f7E = (D7.f[1])[k];
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27.cu
index 4ad8a4678ae2e4025a90f639ae366311a247e4b3..3c10e7a6996a9a26668a18390ddee4e2cbbec853 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27.cu
@@ -2,6 +2,7 @@
 
 #include "ADIncomp27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<ADIncomp27> ADIncomp27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,34 +11,19 @@ std::shared_ptr<ADIncomp27> ADIncomp27::getNewInstance(std::shared_ptr<Parameter
 
 void ADIncomp27::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_AD_Incomp_27 << < grid, threads >> >(	para->getParD(level)->diffusivity, 
-													para->getParD(level)->typeOfGridNode,
-													para->getParD(level)->neighborX, 
-													para->getParD(level)->neighborY, 
-													para->getParD(level)->neighborZ,
-													para->getParD(level)->distributions.f[0], 
-													para->getParD(level)->distributionsAD27.f[0], 
-													para->getParD(level)->numberOfNodes,
-													para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_AD_Incomp_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_AD_Incomp_27<<< grid.grid, grid.threads >>>(
+        para->getParD(level)->diffusivity, 
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX, 
+        para->getParD(level)->neighborY, 
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0], 
+        para->getParD(level)->distributionsAD.f[0], 
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_AD_Incomp_27 execution failed");
 }
 
 ADIncomp27::ADIncomp27(std::shared_ptr<Parameter> para, int level)
@@ -47,7 +33,6 @@ ADIncomp27::ADIncomp27(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitIncompAD27);
 
-	myKernelGroup = ADKernel27;
 }
 
 ADIncomp27::ADIncomp27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cu
index e686825ed100417110b02360876dec076553d7de..4a5cbb1168940bd6bfc9d9a48568b5964b736ae4 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -13,7 +13,7 @@ __global__ void LB_Kernel_AD_Incomp_27(real diffusivity,
 	unsigned int* neighborZ,
 	real* DDStart,
 	real* DD27,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	bool EvenOrOdd)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -27,7 +27,7 @@ __global__ void LB_Kernel_AD_Incomp_27(real diffusivity,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if (k<size_Mat)
+	if (k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -38,125 +38,125 @@ __global__ void LB_Kernel_AD_Incomp_27(real diffusivity,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			Distributions27 D27;
 			if (EvenOrOdd == true)
 			{
-				D27.f[DIR_P00] = &DD27[DIR_P00   *size_Mat];
-				D27.f[DIR_M00] = &DD27[DIR_M00   *size_Mat];
-				D27.f[DIR_0P0] = &DD27[DIR_0P0   *size_Mat];
-				D27.f[DIR_0M0] = &DD27[DIR_0M0   *size_Mat];
-				D27.f[DIR_00P] = &DD27[DIR_00P   *size_Mat];
-				D27.f[DIR_00M] = &DD27[DIR_00M   *size_Mat];
-				D27.f[DIR_PP0] = &DD27[DIR_PP0  *size_Mat];
-				D27.f[DIR_MM0] = &DD27[DIR_MM0  *size_Mat];
-				D27.f[DIR_PM0] = &DD27[DIR_PM0  *size_Mat];
-				D27.f[DIR_MP0] = &DD27[DIR_MP0  *size_Mat];
-				D27.f[DIR_P0P] = &DD27[DIR_P0P  *size_Mat];
-				D27.f[DIR_M0M] = &DD27[DIR_M0M  *size_Mat];
-				D27.f[DIR_P0M] = &DD27[DIR_P0M  *size_Mat];
-				D27.f[DIR_M0P] = &DD27[DIR_M0P  *size_Mat];
-				D27.f[DIR_0PP] = &DD27[DIR_0PP  *size_Mat];
-				D27.f[DIR_0MM] = &DD27[DIR_0MM  *size_Mat];
-				D27.f[DIR_0PM] = &DD27[DIR_0PM  *size_Mat];
-				D27.f[DIR_0MP] = &DD27[DIR_0MP  *size_Mat];
-				D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-				D27.f[DIR_PPP] = &DD27[DIR_PPP *size_Mat];
-				D27.f[DIR_MMP] = &DD27[DIR_MMP *size_Mat];
-				D27.f[DIR_PMP] = &DD27[DIR_PMP *size_Mat];
-				D27.f[DIR_MPP] = &DD27[DIR_MPP *size_Mat];
-				D27.f[DIR_PPM] = &DD27[DIR_PPM *size_Mat];
-				D27.f[DIR_MMM] = &DD27[DIR_MMM *size_Mat];
-				D27.f[DIR_PMM] = &DD27[DIR_PMM *size_Mat];
-				D27.f[DIR_MPM] = &DD27[DIR_MPM *size_Mat];
+				D27.f[DIR_P00] = &DD27[DIR_P00 * numberOfLBnodes];
+				D27.f[DIR_M00] = &DD27[DIR_M00 * numberOfLBnodes];
+				D27.f[DIR_0P0] = &DD27[DIR_0P0 * numberOfLBnodes];
+				D27.f[DIR_0M0] = &DD27[DIR_0M0 * numberOfLBnodes];
+				D27.f[DIR_00P] = &DD27[DIR_00P * numberOfLBnodes];
+				D27.f[DIR_00M] = &DD27[DIR_00M * numberOfLBnodes];
+				D27.f[DIR_PP0] = &DD27[DIR_PP0 * numberOfLBnodes];
+				D27.f[DIR_MM0] = &DD27[DIR_MM0 * numberOfLBnodes];
+				D27.f[DIR_PM0] = &DD27[DIR_PM0 * numberOfLBnodes];
+				D27.f[DIR_MP0] = &DD27[DIR_MP0 * numberOfLBnodes];
+				D27.f[DIR_P0P] = &DD27[DIR_P0P * numberOfLBnodes];
+				D27.f[DIR_M0M] = &DD27[DIR_M0M * numberOfLBnodes];
+				D27.f[DIR_P0M] = &DD27[DIR_P0M * numberOfLBnodes];
+				D27.f[DIR_M0P] = &DD27[DIR_M0P * numberOfLBnodes];
+				D27.f[DIR_0PP] = &DD27[DIR_0PP * numberOfLBnodes];
+				D27.f[DIR_0MM] = &DD27[DIR_0MM * numberOfLBnodes];
+				D27.f[DIR_0PM] = &DD27[DIR_0PM * numberOfLBnodes];
+				D27.f[DIR_0MP] = &DD27[DIR_0MP * numberOfLBnodes];
+				D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+				D27.f[DIR_PPP] = &DD27[DIR_PPP * numberOfLBnodes];
+				D27.f[DIR_MMP] = &DD27[DIR_MMP * numberOfLBnodes];
+				D27.f[DIR_PMP] = &DD27[DIR_PMP * numberOfLBnodes];
+				D27.f[DIR_MPP] = &DD27[DIR_MPP * numberOfLBnodes];
+				D27.f[DIR_PPM] = &DD27[DIR_PPM * numberOfLBnodes];
+				D27.f[DIR_MMM] = &DD27[DIR_MMM * numberOfLBnodes];
+				D27.f[DIR_PMM] = &DD27[DIR_PMM * numberOfLBnodes];
+				D27.f[DIR_MPM] = &DD27[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D27.f[DIR_M00] = &DD27[DIR_P00   *size_Mat];
-				D27.f[DIR_P00] = &DD27[DIR_M00   *size_Mat];
-				D27.f[DIR_0M0] = &DD27[DIR_0P0   *size_Mat];
-				D27.f[DIR_0P0] = &DD27[DIR_0M0   *size_Mat];
-				D27.f[DIR_00M] = &DD27[DIR_00P   *size_Mat];
-				D27.f[DIR_00P] = &DD27[DIR_00M   *size_Mat];
-				D27.f[DIR_MM0] = &DD27[DIR_PP0  *size_Mat];
-				D27.f[DIR_PP0] = &DD27[DIR_MM0  *size_Mat];
-				D27.f[DIR_MP0] = &DD27[DIR_PM0  *size_Mat];
-				D27.f[DIR_PM0] = &DD27[DIR_MP0  *size_Mat];
-				D27.f[DIR_M0M] = &DD27[DIR_P0P  *size_Mat];
-				D27.f[DIR_P0P] = &DD27[DIR_M0M  *size_Mat];
-				D27.f[DIR_M0P] = &DD27[DIR_P0M  *size_Mat];
-				D27.f[DIR_P0M] = &DD27[DIR_M0P  *size_Mat];
-				D27.f[DIR_0MM] = &DD27[DIR_0PP  *size_Mat];
-				D27.f[DIR_0PP] = &DD27[DIR_0MM  *size_Mat];
-				D27.f[DIR_0MP] = &DD27[DIR_0PM  *size_Mat];
-				D27.f[DIR_0PM] = &DD27[DIR_0MP  *size_Mat];
-				D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
-				D27.f[DIR_MMM] = &DD27[DIR_PPP *size_Mat];
-				D27.f[DIR_PPM] = &DD27[DIR_MMP *size_Mat];
-				D27.f[DIR_MPM] = &DD27[DIR_PMP *size_Mat];
-				D27.f[DIR_PMM] = &DD27[DIR_MPP *size_Mat];
-				D27.f[DIR_MMP] = &DD27[DIR_PPM *size_Mat];
-				D27.f[DIR_PPP] = &DD27[DIR_MMM *size_Mat];
-				D27.f[DIR_MPP] = &DD27[DIR_PMM *size_Mat];
-				D27.f[DIR_PMP] = &DD27[DIR_MPM *size_Mat];
+				D27.f[DIR_M00] = &DD27[DIR_P00 * numberOfLBnodes];
+				D27.f[DIR_P00] = &DD27[DIR_M00 * numberOfLBnodes];
+				D27.f[DIR_0M0] = &DD27[DIR_0P0 * numberOfLBnodes];
+				D27.f[DIR_0P0] = &DD27[DIR_0M0 * numberOfLBnodes];
+				D27.f[DIR_00M] = &DD27[DIR_00P * numberOfLBnodes];
+				D27.f[DIR_00P] = &DD27[DIR_00M * numberOfLBnodes];
+				D27.f[DIR_MM0] = &DD27[DIR_PP0 * numberOfLBnodes];
+				D27.f[DIR_PP0] = &DD27[DIR_MM0 * numberOfLBnodes];
+				D27.f[DIR_MP0] = &DD27[DIR_PM0 * numberOfLBnodes];
+				D27.f[DIR_PM0] = &DD27[DIR_MP0 * numberOfLBnodes];
+				D27.f[DIR_M0M] = &DD27[DIR_P0P * numberOfLBnodes];
+				D27.f[DIR_P0P] = &DD27[DIR_M0M * numberOfLBnodes];
+				D27.f[DIR_M0P] = &DD27[DIR_P0M * numberOfLBnodes];
+				D27.f[DIR_P0M] = &DD27[DIR_M0P * numberOfLBnodes];
+				D27.f[DIR_0MM] = &DD27[DIR_0PP * numberOfLBnodes];
+				D27.f[DIR_0PP] = &DD27[DIR_0MM * numberOfLBnodes];
+				D27.f[DIR_0MP] = &DD27[DIR_0PM * numberOfLBnodes];
+				D27.f[DIR_0PM] = &DD27[DIR_0MP * numberOfLBnodes];
+				D27.f[DIR_000] = &DD27[DIR_000 * numberOfLBnodes];
+				D27.f[DIR_MMM] = &DD27[DIR_PPP * numberOfLBnodes];
+				D27.f[DIR_PPM] = &DD27[DIR_MMP * numberOfLBnodes];
+				D27.f[DIR_MPM] = &DD27[DIR_PMP * numberOfLBnodes];
+				D27.f[DIR_PMM] = &DD27[DIR_MPP * numberOfLBnodes];
+				D27.f[DIR_MMP] = &DD27[DIR_PPM * numberOfLBnodes];
+				D27.f[DIR_PPP] = &DD27[DIR_MMM * numberOfLBnodes];
+				D27.f[DIR_MPP] = &DD27[DIR_PMM * numberOfLBnodes];
+				D27.f[DIR_PMP] = &DD27[DIR_MPM * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -197,33 +197,33 @@ __global__ void LB_Kernel_AD_Incomp_27(real diffusivity,
 			real fTNW = (D.f[DIR_PMM])[kbs];//kbse
 			real fTSE = (D.f[DIR_MPM])[kbw];//kbnw
 										   ////////////////////////////////////////////////////////////////////////////////
-										   //real f27E    =  (D27.f[DIR_P00   ])[k  ];//ke
-										   //real f27W    =  (D27.f[DIR_M00   ])[kw ];
-										   //real f27N    =  (D27.f[DIR_0P0   ])[k  ];//kn
-										   //real f27S    =  (D27.f[DIR_0M0   ])[ks ];
-										   //real f27T    =  (D27.f[DIR_00P   ])[k  ];//kt
-										   //real f27B    =  (D27.f[DIR_00M   ])[kb ];
-										   //real f27NE   =  (D27.f[DIR_PP0  ])[k  ];//kne
-										   //real f27SW   =  (D27.f[DIR_MM0  ])[ksw];
-										   //real f27SE   =  (D27.f[DIR_PM0  ])[ks ];//kse
-										   //real f27NW   =  (D27.f[DIR_MP0  ])[kw ];//knw
-										   //real f27TE   =  (D27.f[DIR_P0P  ])[k  ];//kte
-										   //real f27BW   =  (D27.f[DIR_M0M  ])[kbw];
-										   //real f27BE   =  (D27.f[DIR_P0M  ])[kb ];//kbe
-										   //real f27TW   =  (D27.f[DIR_M0P  ])[kw ];//ktw
-										   //real f27TN   =  (D27.f[DIR_0PP  ])[k  ];//ktn
-										   //real f27BS   =  (D27.f[DIR_0MM  ])[kbs];
-										   //real f27BN   =  (D27.f[DIR_0PM  ])[kb ];//kbn
-										   //real f27TS   =  (D27.f[DIR_0MP  ])[ks ];//kts
+										   //real f27E    =  (D27.f[DIR_P00])[k  ];//ke
+										   //real f27W    =  (D27.f[DIR_M00])[kw ];
+										   //real f27N    =  (D27.f[DIR_0P0])[k  ];//kn
+										   //real f27S    =  (D27.f[DIR_0M0])[ks ];
+										   //real f27T    =  (D27.f[DIR_00P])[k  ];//kt
+										   //real f27B    =  (D27.f[DIR_00M])[kb ];
+										   //real f27NE   =  (D27.f[DIR_PP0])[k  ];//kne
+										   //real f27SW   =  (D27.f[DIR_MM0])[ksw];
+										   //real f27SE   =  (D27.f[DIR_PM0])[ks ];//kse
+										   //real f27NW   =  (D27.f[DIR_MP0])[kw ];//knw
+										   //real f27TE   =  (D27.f[DIR_P0P])[k  ];//kte
+										   //real f27BW   =  (D27.f[DIR_M0M])[kbw];
+										   //real f27BE   =  (D27.f[DIR_P0M])[kb ];//kbe
+										   //real f27TW   =  (D27.f[DIR_M0P])[kw ];//ktw
+										   //real f27TN   =  (D27.f[DIR_0PP])[k  ];//ktn
+										   //real f27BS   =  (D27.f[DIR_0MM])[kbs];
+										   //real f27BN   =  (D27.f[DIR_0PM])[kb ];//kbn
+										   //real f27TS   =  (D27.f[DIR_0MP])[ks ];//kts
 										   //real f27ZERO =  (D27.f[DIR_000])[k  ];//kzero
-										   //real f27TNE  =  (D27.f[DIR_PPP ])[k  ];//ktne
-										   //real f27TSW  =  (D27.f[DIR_MMP ])[ksw];//ktsw
-										   //real f27TSE  =  (D27.f[DIR_PMP ])[ks ];//ktse
-										   //real f27TNW  =  (D27.f[DIR_MPP ])[kw ];//ktnw
-										   //real f27BNE  =  (D27.f[DIR_PPM ])[kb ];//kbne
-										   //real f27BSW  =  (D27.f[DIR_MMM ])[kbsw];
-										   //real f27BSE  =  (D27.f[DIR_PMM ])[kbs];//kbse
-										   //real f27BNW  =  (D27.f[DIR_MPM ])[kbw];//kbnw
+										   //real f27TNE  =  (D27.f[DIR_PPP])[k  ];//ktne
+										   //real f27TSW  =  (D27.f[DIR_MMP])[ksw];//ktsw
+										   //real f27TSE  =  (D27.f[DIR_PMP])[ks ];//ktse
+										   //real f27TNW  =  (D27.f[DIR_MPP])[kw ];//ktnw
+										   //real f27BNE  =  (D27.f[DIR_PPM])[kb ];//kbne
+										   //real f27BSW  =  (D27.f[DIR_MMM])[kbsw];
+										   //real f27BSE  =  (D27.f[DIR_PMM])[kbs];//kbse
+										   //real f27BNW  =  (D27.f[DIR_MPM])[kbw];//kbnw
 										   ////////////////////////////////////////////////////////////////////////////////
 										   //real vx1     =  ((fTNE-fBSW)+(fBNE-fTSW)+(fTSE-fBNW)+(fBSE-fTNW) +(fNE-fSW)+(fSE-fNW)+(fTE-fBW)+(fBE-fTW)+(fE-fW));
 										   //real vx2     =  ((fTNE-fBSW)+(fBNE-fTSW)+(fBNW-fTSE)+(fTNW-fBSE) +(fNE-fSW)+(fNW-fSE)+(fTN-fBS)+(fBN-fTS)+(fN-fS));
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cuh
index a6d94de4fadb9a93a9e5fed63d87731b12ec2a07..3abee563f676910f422bba0930060c2a0b0c0e21 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cuh
@@ -11,7 +11,7 @@ __global__ void LB_Kernel_AD_Incomp_27(real diffusivity,
 	unsigned int* neighborZ,
 	real* DDStart,
 	real* DD27,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	bool EvenOrOdd);
 
 #endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7.cu
index 27da776eb7612307fa4f9af2886594fc0c75d90b..71adc96eef733084e01fa963f6d0fad66a2e1062 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7.cu
@@ -2,6 +2,7 @@
 
 #include "ADIncomp7_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<ADIncomp7> ADIncomp7::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,34 +11,19 @@ std::shared_ptr<ADIncomp7> ADIncomp7::getNewInstance(std::shared_ptr<Parameter>
 
 void ADIncomp7::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_AD_Incomp_7 << < grid, threads >> >(	para->getParD(level)->diffusivity, 
-													para->getParD(level)->typeOfGridNode,
-													para->getParD(level)->neighborX, 
-													para->getParD(level)->neighborY, 
-													para->getParD(level)->neighborZ,
-													para->getParD(level)->distributions.f[0],
-													para->getParD(level)->distributionsAD7.f[0], 
-													para->getParD(level)->numberOfNodes,
-													para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_AD_Incomp_7 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_AD_Incomp_7<<< grid.grid, grid.threads >>>(
+        para->getParD(level)->diffusivity, 
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX, 
+        para->getParD(level)->neighborY, 
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->distributionsAD7.f[0], 
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_AD_Incomp_7 execution failed");
 }
 
 ADIncomp7::ADIncomp7(std::shared_ptr<Parameter> para, int level)
@@ -47,7 +33,6 @@ ADIncomp7::ADIncomp7(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitIncompAD7);
 
-	myKernelGroup = ADKernel7;
 }
 
 ADIncomp7::ADIncomp7()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cu
index d49b0b48d20d976076a52f804d485b68da55348e..1d393e0c4a5f80fb331c109311876673a86a9d8d 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -13,7 +13,7 @@ __global__ void LB_Kernel_AD_Incomp_7(real diffusivity,
 	unsigned int* neighborZ,
 	real* DDStart,
 	real* DD7,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	bool EvenOrOdd)
 {
 	////////////////////////////////////////////////////////////////////////////////
@@ -27,7 +27,7 @@ __global__ void LB_Kernel_AD_Incomp_7(real diffusivity,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if (k<size_Mat)
+	if (k<numberOfLBnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		unsigned int BC;
@@ -38,85 +38,85 @@ __global__ void LB_Kernel_AD_Incomp_7(real diffusivity,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+				D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+				D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+				D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 			}
 
 			Distributions7 D7;
 			if (EvenOrOdd == true)
 			{
-				D7.f[0] = &DD7[0 * size_Mat];
-				D7.f[1] = &DD7[1 * size_Mat];
-				D7.f[2] = &DD7[2 * size_Mat];
-				D7.f[3] = &DD7[3 * size_Mat];
-				D7.f[4] = &DD7[4 * size_Mat];
-				D7.f[5] = &DD7[5 * size_Mat];
-				D7.f[6] = &DD7[6 * size_Mat];
+				D7.f[0] = &DD7[0 * numberOfLBnodes];
+				D7.f[1] = &DD7[1 * numberOfLBnodes];
+				D7.f[2] = &DD7[2 * numberOfLBnodes];
+				D7.f[3] = &DD7[3 * numberOfLBnodes];
+				D7.f[4] = &DD7[4 * numberOfLBnodes];
+				D7.f[5] = &DD7[5 * numberOfLBnodes];
+				D7.f[6] = &DD7[6 * numberOfLBnodes];
 			}
 			else
 			{
-				D7.f[0] = &DD7[0 * size_Mat];
-				D7.f[2] = &DD7[1 * size_Mat];
-				D7.f[1] = &DD7[2 * size_Mat];
-				D7.f[4] = &DD7[3 * size_Mat];
-				D7.f[3] = &DD7[4 * size_Mat];
-				D7.f[6] = &DD7[5 * size_Mat];
-				D7.f[5] = &DD7[6 * size_Mat];
+				D7.f[0] = &DD7[0 * numberOfLBnodes];
+				D7.f[2] = &DD7[1 * numberOfLBnodes];
+				D7.f[1] = &DD7[2 * numberOfLBnodes];
+				D7.f[4] = &DD7[3 * numberOfLBnodes];
+				D7.f[3] = &DD7[4 * numberOfLBnodes];
+				D7.f[6] = &DD7[5 * numberOfLBnodes];
+				D7.f[5] = &DD7[6 * numberOfLBnodes];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -156,33 +156,33 @@ __global__ void LB_Kernel_AD_Incomp_7(real diffusivity,
 			real fTNE = (D.f[DIR_MMM])[kbsw];
 			real fTNW = (D.f[DIR_PMM])[kbs];//kbse
 			real fTSE = (D.f[DIR_MPM])[kbw];//kbnw
-										   //real fE    =  (D.f[DIR_P00   ])[k  ];//ke
-										   //real fW    =  (D.f[DIR_M00   ])[kw ];
-										   //real fN    =  (D.f[DIR_0P0   ])[k  ];//kn
-										   //real fS    =  (D.f[DIR_0M0   ])[ks ];
-										   //real fT    =  (D.f[DIR_00P   ])[k  ];//kt
-										   //real fB    =  (D.f[DIR_00M   ])[kb ];
-										   //real fNE   =  (D.f[DIR_PP0  ])[k  ];//kne
-										   //real fSW   =  (D.f[DIR_MM0  ])[ksw];
-										   //real fSE   =  (D.f[DIR_PM0  ])[ks ];//kse
-										   //real fNW   =  (D.f[DIR_MP0  ])[kw ];//knw
-										   //real fTE   =  (D.f[DIR_P0P  ])[k  ];//kte
-										   //real fBW   =  (D.f[DIR_M0M  ])[kbw];
-										   //real fBE   =  (D.f[DIR_P0M  ])[kb ];//kbe
-										   //real fTW   =  (D.f[DIR_M0P  ])[kw ];//ktw
-										   //real fTN   =  (D.f[DIR_0PP  ])[k  ];//ktn
-										   //real fBS   =  (D.f[DIR_0MM  ])[kbs];
-										   //real fBN   =  (D.f[DIR_0PM  ])[kb ];//kbn
-										   //real fTS   =  (D.f[DIR_0MP  ])[ks ];//kts
+										   //real fE    =  (D.f[DIR_P00])[k  ];//ke
+										   //real fW    =  (D.f[DIR_M00])[kw ];
+										   //real fN    =  (D.f[DIR_0P0])[k  ];//kn
+										   //real fS    =  (D.f[DIR_0M0])[ks ];
+										   //real fT    =  (D.f[DIR_00P])[k  ];//kt
+										   //real fB    =  (D.f[DIR_00M])[kb ];
+										   //real fNE   =  (D.f[DIR_PP0])[k  ];//kne
+										   //real fSW   =  (D.f[DIR_MM0])[ksw];
+										   //real fSE   =  (D.f[DIR_PM0])[ks ];//kse
+										   //real fNW   =  (D.f[DIR_MP0])[kw ];//knw
+										   //real fTE   =  (D.f[DIR_P0P])[k  ];//kte
+										   //real fBW   =  (D.f[DIR_M0M])[kbw];
+										   //real fBE   =  (D.f[DIR_P0M])[kb ];//kbe
+										   //real fTW   =  (D.f[DIR_M0P])[kw ];//ktw
+										   //real fTN   =  (D.f[DIR_0PP])[k  ];//ktn
+										   //real fBS   =  (D.f[DIR_0MM])[kbs];
+										   //real fBN   =  (D.f[DIR_0PM])[kb ];//kbn
+										   //real fTS   =  (D.f[DIR_0MP])[ks ];//kts
 										   //real fZERO =  (D.f[DIR_000])[k  ];//kzero
-										   //real fTNE   = (D.f[DIR_PPP ])[k  ];//ktne
-										   //real fTSW   = (D.f[DIR_MMP ])[ksw];//ktsw
-										   //real fTSE   = (D.f[DIR_PMP ])[ks ];//ktse
-										   //real fTNW   = (D.f[DIR_MPP ])[kw ];//ktnw
-										   //real fBNE   = (D.f[DIR_PPM ])[kb ];//kbne
-										   //real fBSW   = (D.f[DIR_MMM ])[kbsw];
-										   //real fBSE   = (D.f[DIR_PMM ])[kbs];//kbse
-										   //real fBNW   = (D.f[DIR_MPM ])[kbw];//kbnw
+										   //real fTNE   = (D.f[DIR_PPP])[k  ];//ktne
+										   //real fTSW   = (D.f[DIR_MMP])[ksw];//ktsw
+										   //real fTSE   = (D.f[DIR_PMP])[ks ];//ktse
+										   //real fTNW   = (D.f[DIR_MPP])[kw ];//ktnw
+										   //real fBNE   = (D.f[DIR_PPM])[kb ];//kbne
+										   //real fBSW   = (D.f[DIR_MMM])[kbsw];
+										   //real fBSE   = (D.f[DIR_PMM])[kbs];//kbse
+										   //real fBNW   = (D.f[DIR_MPM])[kbw];//kbnw
 										   ////////////////////////////////////////////////////////////////////////////////
 			real f7ZERO = (D7.f[0])[k];
 			real f7E = (D7.f[1])[k];
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cuh
index 25a17ddbd7038635a2beb2c39212822cbf762034..845ecda946a4e45678082b72b5c74dc96e5810c5 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cuh
@@ -11,7 +11,7 @@ __global__ void LB_Kernel_AD_Incomp_7(real diffusivity,
 	unsigned int* neighborZ,
 	real* DDStart,
 	real* DD7,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	bool EvenOrOdd);
 
 #endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27.cu
index d2f9f60890379d07ecc3d04f4a54d59a0754907a..4aef26b7dd31435b2dadceb78ac1e0b7ebedf029 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27.cu
@@ -2,6 +2,7 @@
 
 #include "BGKCompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<BGKCompSP27> BGKCompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,33 +11,18 @@ std::shared_ptr<BGKCompSP27> BGKCompSP27::getNewInstance(std::shared_ptr<Paramet
 
 void BGKCompSP27::run()
 {
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_BGK_Comp_SP_27 << < grid, threads >> >(	para->getParD(level)->omega,
-														para->getParD(level)->typeOfGridNode,
-														para->getParD(level)->neighborX,
-														para->getParD(level)->neighborY,
-														para->getParD(level)->neighborZ,
-														para->getParD(level)->distributions.f[0],
-														para->getParD(level)->numberOfNodes,
-														para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_BGK_Comp_SP_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_BGK_Comp_SP_27<<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_BGK_Comp_SP_27 execution failed");
 }
 
 BGKCompSP27::BGKCompSP27(std::shared_ptr<Parameter> para, int level)
@@ -45,7 +31,7 @@ BGKCompSP27::BGKCompSP27(std::shared_ptr<Parameter> para, int level)
 	this->level = level;
 
 	myPreProcessorTypes.push_back(InitCompSP27);
-	myKernelGroup = BasicKernel;
+	
 }
 
 BGKCompSP27::BGKCompSP27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cu
index 09196d13e94a2404ba280e8a8e9394f0a79e8211..44add98d9607642531a6068021d0a4e831cb3d4e 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -38,63 +38,63 @@ __global__ void LB_Kernel_BGK_Comp_SP_27(	real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27.cu
index beebda2437ca4e7385ab812b9106edabe213227e..00aaf3c27f16a5d53e7aee225214f05bd62a541a 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27.cu
@@ -2,6 +2,7 @@
 
 #include "BGKPlusCompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<BGKPlusCompSP27> BGKPlusCompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,33 +11,18 @@ std::shared_ptr<BGKPlusCompSP27> BGKPlusCompSP27::getNewInstance(std::shared_ptr
 
 void BGKPlusCompSP27::run()
 {
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_BGK_Plus_Comp_SP_27 << < grid, threads >> >(	para->getParD(level)->omega,
-															para->getParD(level)->typeOfGridNode,
-															para->getParD(level)->neighborX,
-															para->getParD(level)->neighborY,
-															para->getParD(level)->neighborZ,
-															para->getParD(level)->distributions.f[0],
-															size_Mat,
-															para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_BGK_Plus_Comp_SP_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_BGK_Plus_Comp_SP_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_BGK_Plus_Comp_SP_27 execution failed");
 }
 
 BGKPlusCompSP27::BGKPlusCompSP27(std::shared_ptr<Parameter> para, int level)
@@ -46,7 +32,7 @@ BGKPlusCompSP27::BGKPlusCompSP27(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 BGKPlusCompSP27::BGKPlusCompSP27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cu
index 325f65ece9baddf88adc91baa753bdfc4bd0eced..638210bd2da8ebf30bda603a3f6d70c19468193e 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -38,63 +38,63 @@ __global__ void LB_Kernel_BGK_Plus_Comp_SP_27(
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -127,33 +127,33 @@ __global__ void LB_Kernel_BGK_Plus_Comp_SP_27(
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 											//slow
 											//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu
index 3d7f6fb9a8980454ebc83c51c7dd8865688fa166..0a5ac6cf7a1b6564a61d0150b187b10b584222b8 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu
@@ -6,6 +6,7 @@
 #include "../RunLBMKernel.cuh"
 
 #include <lbm/BGK.h>
+#include <lbm/KernelParameter.h>
 
 
 namespace vf
@@ -23,7 +24,7 @@ BGKUnified::BGKUnified(std::shared_ptr<Parameter> para, int level)
 
     myPreProcessorTypes.push_back(InitCompSP27);
 
-    myKernelGroup = BasicKernel;
+    
 
     this->cudaGrid = cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
 }
@@ -31,15 +32,16 @@ BGKUnified::BGKUnified(std::shared_ptr<Parameter> para, int level)
 
 void BGKUnified::run()
 {
-    GPUKernelParameter kernelParameter{ para->getParD(level)->omega,
-                                                 para->getParD(level)->typeOfGridNode,
-                                                 para->getParD(level)->neighborX,
-                                                 para->getParD(level)->neighborY,
-                                                 para->getParD(level)->neighborZ,
-                                                 para->getParD(level)->distributions.f[0],
-                                                 (int)para->getParD(level)->numberOfNodes,
-                                                 nullptr, /* forces not used in bgk kernel */
-                                                 para->getParD(level)->isEvenTimestep };
+    GPUKernelParameter kernelParameter{
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        (int)para->getParD(level)->numberOfNodes,
+        nullptr, /* forces not used in bgk kernel */
+        para->getParD(level)->isEvenTimestep };
 
     auto lambda = [] __device__(lbm::KernelParameter parameter) {
         return lbm::bgk(parameter);
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27.cu
index eca3a9953024e44fd91e7f9f98956e4329574d09..664b46fcebd277b0c93300d86b2171edf4f91b2a 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27.cu
@@ -2,6 +2,7 @@
 
 #include "CascadeCompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<CascadeCompSP27> CascadeCompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,33 +11,18 @@ std::shared_ptr<CascadeCompSP27> CascadeCompSP27::getNewInstance(std::shared_ptr
 
 void CascadeCompSP27::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_Cascade_Comp_SP_27 << < grid, threads >> >(	para->getParD(level)->omega,
-															para->getParD(level)->typeOfGridNode,
-															para->getParD(level)->neighborX,
-															para->getParD(level)->neighborY,
-															para->getParD(level)->neighborZ,
-															para->getParD(level)->distributions.f[0],
-															para->getParD(level)->numberOfNodes,
-															para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_Cascade_Comp_SP_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_Cascade_Comp_SP_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_Cascade_Comp_SP_27 execution failed");
 }
 
 CascadeCompSP27::CascadeCompSP27(std::shared_ptr<Parameter> para, int level)
@@ -46,7 +32,7 @@ CascadeCompSP27::CascadeCompSP27(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 CascadeCompSP27::CascadeCompSP27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cu
index 3f69fa47288343fbdd91e77dbb7f154501349098..6bd4415c7edcb5c9954874c074801b865cce3efe 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -37,63 +37,63 @@ __global__ void LB_Kernel_Cascade_Comp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -126,33 +126,33 @@ __global__ void LB_Kernel_Cascade_Comp_SP_27(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];//[ke   ];
-			real mfabb = (D.f[DIR_M00   ])[kw ];//[kw   ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];//[kn   ];
-			real mfbab = (D.f[DIR_0M0   ])[ks ];//[ks   ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];//[kt   ];
-			real mfbba = (D.f[DIR_00M   ])[kb ];//[kb   ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];//[kne  ];
-			real mfaab = (D.f[DIR_MM0  ])[ksw];//[ksw  ];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];//[kse  ];
-			real mfacb = (D.f[DIR_MP0  ])[kw ];//[knw  ];
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];//[kte  ];
-			real mfaba = (D.f[DIR_M0M  ])[kbw];//[kbw  ];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];//[kbe  ];
-			real mfabc = (D.f[DIR_M0P  ])[kw ];//[ktw  ];
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];//[ktn  ];
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];//[kbs  ];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];//[kbn  ];
-			real mfbac = (D.f[DIR_0MP  ])[ks ];//[kts  ];
+			real mfcbb = (D.f[DIR_P00])[k  ];//[ke   ];
+			real mfabb = (D.f[DIR_M00])[kw ];//[kw   ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];//[kn   ];
+			real mfbab = (D.f[DIR_0M0])[ks ];//[ks   ];
+			real mfbbc = (D.f[DIR_00P])[k  ];//[kt   ];
+			real mfbba = (D.f[DIR_00M])[kb ];//[kb   ];
+			real mfccb = (D.f[DIR_PP0])[k  ];//[kne  ];
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];
+			real mfcab = (D.f[DIR_PM0])[ks ];//[kse  ];
+			real mfacb = (D.f[DIR_MP0])[kw ];//[knw  ];
+			real mfcbc = (D.f[DIR_P0P])[k  ];//[kte  ];
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];
+			real mfcba = (D.f[DIR_P0M])[kb ];//[kbe  ];
+			real mfabc = (D.f[DIR_M0P])[kw ];//[ktw  ];
+			real mfbcc = (D.f[DIR_0PP])[k  ];//[ktn  ];
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];
+			real mfbca = (D.f[DIR_0PM])[kb ];//[kbn  ];
+			real mfbac = (D.f[DIR_0MP])[ks ];//[kts  ];
 			real mfbbb = (D.f[DIR_000])[k  ];//[kzero];
-			real mfccc = (D.f[DIR_PPP ])[k  ];//[ktne ];
-			real mfaac = (D.f[DIR_MMP ])[ksw];//[ktsw ];
-			real mfcac = (D.f[DIR_PMP ])[ks ];//[ktse ];
-			real mfacc = (D.f[DIR_MPP ])[kw ];//[ktnw ];
-			real mfcca = (D.f[DIR_PPM ])[kb ];//[kbne ];
-			real mfaaa = (D.f[DIR_MMM ])[kbsw];//[kbsw ]
-			real mfcaa = (D.f[DIR_PMM ])[kbs];//[kbse ];
-			real mfaca = (D.f[DIR_MPM ])[kbw];//[kbnw ];
+			real mfccc = (D.f[DIR_PPP])[k  ];//[ktne ];
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];
+			real mfcac = (D.f[DIR_PMP])[ks ];//[ktse ];
+			real mfacc = (D.f[DIR_MPP])[kw ];//[ktnw ];
+			real mfcca = (D.f[DIR_PPM])[kb ];//[kbne ];
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ]
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];
 			////////////////////////////////////////////////////////////////////////////////////
 			real rho = (mfccc+mfaaa + mfaca+mfcac + mfacc+mfcaa + mfaac+mfcca + 
 						   mfbac+mfbca + mfbaa+mfbcc + mfabc+mfcba + mfaba+mfcbc + mfacb+mfcab + mfaab+mfccb +
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27.cu
index 3f45c7ea71c385f948eac2e052a8d970010c413d..218623b7c51099717f6aaa6f375a82516e0c0dae 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27.cu
@@ -2,6 +2,7 @@
 
 #include "CumulantCompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<CumulantCompSP27> CumulantCompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,33 +11,18 @@ std::shared_ptr<CumulantCompSP27> CumulantCompSP27::getNewInstance(std::shared_p
 
 void CumulantCompSP27::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_Cum_Comp_SP_27 << < grid, threads >> >(	para->getParD(level)->omega,
-														para->getParD(level)->typeOfGridNode,
-														para->getParD(level)->neighborX,
-														para->getParD(level)->neighborY,
-														para->getParD(level)->neighborZ,
-														para->getParD(level)->distributions.f[0],
-														para->getParD(level)->numberOfNodes,
-														para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_Kum_Comp_SP_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_Cum_Comp_SP_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_Cum_Comp_SP_27 execution failed");
 }
 
 
@@ -47,7 +33,7 @@ CumulantCompSP27::CumulantCompSP27(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 CumulantCompSP27::CumulantCompSP27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cu
index ad2ffdf4170d98125e6758c0e2f548122093cea6..6ab3385b86611614eceeb0018f6beef73031711c 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -37,63 +37,63 @@ __global__ void LB_Kernel_Cum_Comp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27.cu
index 9a84df86e41b3fdff75c2ebf580813afc5ee3feb..c8aad41b87ef39514f6cf5abc8b8bff42a869346 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27.cu
@@ -1,8 +1,8 @@
 #include "CumulantAll4CompSP27.h"
 
 #include "CumulantAll4CompSP27_Device.cuh"
-
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<CumulantAll4CompSP27> CumulantAll4CompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -11,36 +11,21 @@ std::shared_ptr<CumulantAll4CompSP27> CumulantAll4CompSP27::getNewInstance(std::
 
 void CumulantAll4CompSP27::run()
 {
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_Cumulant_D3Q27All4 << < grid, threads >> >(	para->getParD(level)->omega,
-															para->getParD(level)->typeOfGridNode,
-															para->getParD(level)->neighborX,
-															para->getParD(level)->neighborY,
-															para->getParD(level)->neighborZ,
-															para->getParD(level)->distributions.f[0],
-															size_Mat,
-															level,
-															para->getForcesDev(),
-                                                            para->getQuadricLimitersDev(),
-															para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_Cumulant_D3Q27All4 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_Cumulant_D3Q27All4 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        level,
+        para->getForcesDev(),
+        para->getQuadricLimitersDev(),
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_Cumulant_D3Q27All4 execution failed");
 }
 
 CumulantAll4CompSP27::CumulantAll4CompSP27(std::shared_ptr<Parameter> para, int level)
@@ -50,5 +35,5 @@ CumulantAll4CompSP27::CumulantAll4CompSP27(std::shared_ptr<Parameter> para, int
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cu
index 681dbff2ba37a1e0de56341b39cc2dec791f656b..7a5e39d6f1f95f5b34bb38f1514ab728f477c34b 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cu
@@ -2,9 +2,9 @@
 
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -42,63 +42,63 @@ __global__ void LB_Kernel_Cumulant_D3Q27All4(	real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -160,33 +160,33 @@ __global__ void LB_Kernel_Cumulant_D3Q27All4(	real omega,
 			//unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp.cu
index 1b6ba1a2278b68f085a4b7df699b7ca230811f39..09a3aa1cdb1a3cf9c01002a9d335c5a907f94917 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp.cu
@@ -12,7 +12,7 @@ std::shared_ptr<CumulantK15Comp> CumulantK15Comp::getNewInstance(std::shared_ptr
 void CumulantK15Comp::run()
 {
 	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
+	int size_Mat = (int)para->getParD(level)->numberOfNodes;
 
 	int Grid = (size_Mat / numberOfThreads) + 1;
 	int Grid1, Grid2;
@@ -29,16 +29,17 @@ void CumulantK15Comp::run()
 	dim3 grid(Grid1, Grid2, 1);
 	dim3 threads(numberOfThreads, 1, 1);
 
-	LB_Kernel_CumulantK15Comp <<< grid, threads >>>(para->getParD(level)->omega,
-													para->getParD(level)->typeOfGridNode,
-													para->getParD(level)->neighborX,
-													para->getParD(level)->neighborY,
-													para->getParD(level)->neighborZ,
-													para->getParD(level)->distributions.f[0],
-													size_Mat,
-													level,
-													para->getForcesDev(),
-													para->getParD(level)->isEvenTimestep);
+	LB_Kernel_CumulantK15Comp <<< grid, threads >>>(
+		para->getParD(level)->omega,
+		para->getParD(level)->typeOfGridNode,
+		para->getParD(level)->neighborX,
+		para->getParD(level)->neighborY,
+		para->getParD(level)->neighborZ,
+		para->getParD(level)->distributions.f[0],
+		para->getParD(level)->numberOfNodes,
+		level,
+		para->getForcesDev(),
+		para->getParD(level)->isEvenTimestep);
 	getLastCudaError("LB_Kernel_CumulantK15Comp execution failed");
 }
 
@@ -49,5 +50,5 @@ CumulantK15Comp::CumulantK15Comp(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cu
index 93d57d6c9871d66537f25b9188467d46e3b3d05c..9c5d484ee00c1dfab92e1d5eaf0cdffb61fd122b 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -39,63 +39,63 @@ __global__ void LB_Kernel_CumulantK15Comp(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -156,33 +156,33 @@ __global__ void LB_Kernel_CumulantK15Comp(real omega,
 			//unsigned int ktne = k;
 			//unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp.cu
index 188984d001f89d72c967dd6390ca10ae5d2eab32..f0e29a9740438bc78d39574e1046d937cd7b86ce 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp.cu
@@ -2,6 +2,7 @@
 
 #include "CumulantK15BulkComp_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<CumulantK15BulkComp> CumulantK15BulkComp::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,35 +11,20 @@ std::shared_ptr<CumulantK15BulkComp> CumulantK15BulkComp::getNewInstance(std::sh
 
 void CumulantK15BulkComp::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_CumulantK15BulkComp <<< grid, threads >>>(para->getParD(level)->omega,
-														para->getParD(level)->typeOfGridNode,
-														para->getParD(level)->neighborX,
-														para->getParD(level)->neighborY,
-														para->getParD(level)->neighborZ,
-														para->getParD(level)->distributions.f[0],
-														para->getParD(level)->numberOfNodes,
-														level,
-														para->getForcesDev(),
-														para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_CumulantK15BulkComp execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_CumulantK15BulkComp <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        level,
+        para->getForcesDev(),
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_CumulantK15BulkComp execution failed");
 }
 
 CumulantK15BulkComp::CumulantK15BulkComp(std::shared_ptr<Parameter> para, int level)
@@ -48,7 +34,7 @@ CumulantK15BulkComp::CumulantK15BulkComp(std::shared_ptr<Parameter> para, int le
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 CumulantK15BulkComp::CumulantK15BulkComp()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cu
index d2a2f61df902cfd7c5ef52b09f8e7738a108615e..c0e48a9d5754f79d62b11129cef754adf91dbe03 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -39,63 +39,63 @@ __global__ void LB_Kernel_CumulantK15BulkComp(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -156,33 +156,33 @@ __global__ void LB_Kernel_CumulantK15BulkComp(real omega,
 			//unsigned int ktne = k;
 			//unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp.cu
index d28c077031ff9125d1cbc1187def1d1d8fe4d6e8..69f84b0671c11fad8ae15676230c491ee815153d 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp.cu
@@ -2,6 +2,7 @@
 
 #include "CumulantK15SpongeComp_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<CumulantK15SpongeComp> CumulantK15SpongeComp::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,36 +11,21 @@ std::shared_ptr<CumulantK15SpongeComp> CumulantK15SpongeComp::getNewInstance(std
 
 void CumulantK15SpongeComp::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_CumulantK15SpongeComp <<< grid, threads >>>(	para->getParD(level)->omega,
-															para->getParD(level)->typeOfGridNode,
-															para->getParD(level)->neighborX,
-															para->getParD(level)->neighborY,
-															para->getParD(level)->neighborZ,
-															para->getParD(level)->coordinateX,
-															para->getParD(level)->coordinateY,
-															para->getParD(level)->coordinateZ,
-															para->getParD(level)->distributions.f[0],
-															para->getParD(level)->numberOfNodes,
-															para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_CumulantK15SpongeComp execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_CumulantK15SpongeComp <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->coordinateX,
+        para->getParD(level)->coordinateY,
+        para->getParD(level)->coordinateZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_CumulantK15SpongeComp execution failed");
 }
 
 CumulantK15SpongeComp::CumulantK15SpongeComp(std::shared_ptr<Parameter> para, int level)
@@ -49,7 +35,7 @@ CumulantK15SpongeComp::CumulantK15SpongeComp(std::shared_ptr<Parameter> para, in
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 CumulantK15SpongeComp::CumulantK15SpongeComp()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cu
index c2144d324aa3378e8fc9fc5b511bbed385b48a84..20f3f913589f26978dcf713cd0175fc2ad425545 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -40,63 +40,63 @@ __global__ void LB_Kernel_CumulantK15SpongeComp(real omegaIn,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu
index 0b72b46cf25f331172be4abb8dded6d8e5e2b9c5..c95289f15fe13decbbe173e17f5d4255b8ef80b5 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu
@@ -23,22 +23,23 @@ CumulantK15Unified::CumulantK15Unified(std::shared_ptr<Parameter> para, int leve
 
     myPreProcessorTypes.push_back(InitCompSP27);
 
-    myKernelGroup = BasicKernel;
+    
 
     this->cudaGrid = cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
 }
 
 void CumulantK15Unified::run()
 {
-    GPUKernelParameter kernelParameter{ para->getParD(level)->omega,
-                                                 para->getParD(level)->typeOfGridNode,
-                                                 para->getParD(level)->neighborX,
-                                                 para->getParD(level)->neighborY,
-                                                 para->getParD(level)->neighborZ,
-                                                 para->getParD(level)->distributions.f[0],
-                                                 (int)para->getParD(level)->numberOfNodes,
-                                                 para->getParD(level)->forcing,
-                                                 para->getParD(level)->isEvenTimestep };
+    GPUKernelParameter kernelParameter{
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        (int)para->getParD(level)->numberOfNodes,
+        para->getParD(level)->forcing,
+        para->getParD(level)->isEvenTimestep };
 
     auto lambda = [] __device__(lbm::KernelParameter parameter) {
         return lbm::cumulantChimera(parameter, lbm::setRelaxationRatesK15);
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu
index 477539348706de7410319045fa075a6cdf31d01c..b31e4964b609bcee1c3015dcf950b540977f8333 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu
@@ -22,7 +22,7 @@ void CumulantK17<turbulenceModel>::run()
                                                                                                         para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,
                                                                                                         para->getParD(level)->turbViscosity,
                                                                                                         para->getSGSConstant(),
-                                                                                                        (unsigned long)para->getParD(level)->numberOfNodes,
+                                                                                                        para->getParD(level)->numberOfNodes,
                                                                                                         level,
                                                                                                         para->getForcesDev(),
                                                                                                         para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
@@ -49,7 +49,7 @@ void CumulantK17<turbulenceModel>::runOnIndices( const unsigned int *indices, un
                                                                                                                         para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,
                                                                                                                         para->getParD(level)->turbViscosity,
                                                                                                                         para->getSGSConstant(),
-                                                                                                                        (unsigned long)para->getParD(level)->numberOfNodes,
+                                                                                                                        para->getParD(level)->numberOfNodes,
                                                                                                                         level,
                                                                                                                         para->getForcesDev(),
                                                                                                                         para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
@@ -67,7 +67,7 @@ void CumulantK17<turbulenceModel>::runOnIndices( const unsigned int *indices, un
                                                                                                                         para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,
                                                                                                                         para->getParD(level)->turbViscosity,
                                                                                                                         para->getSGSConstant(),
-                                                                                                                        (unsigned long)para->getParD(level)->numberOfNodes,
+                                                                                                                        para->getParD(level)->numberOfNodes,
                                                                                                                         level,
                                                                                                                         para->getForcesDev(),
                                                                                                                         para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
@@ -86,7 +86,7 @@ void CumulantK17<turbulenceModel>::runOnIndices( const unsigned int *indices, un
                                                                                                                         para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,
                                                                                                                         para->getParD(level)->turbViscosity,
                                                                                                                         para->getSGSConstant(),
-                                                                                                                        (unsigned long)para->getParD(level)->numberOfNodes,
+                                                                                                                        para->getParD(level)->numberOfNodes,
                                                                                                                         level,
                                                                                                                         para->getForcesDev(),
                                                                                                                         para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
@@ -94,7 +94,9 @@ void CumulantK17<turbulenceModel>::runOnIndices( const unsigned int *indices, un
                                                                                                                         para->getParD(level)->isEvenTimestep,
                                                                                                                         indices,
                                                                                                                         size_indices);
-            break;	case CollisionTemplate::ApplyBodyForce:
+            break;
+
+        case CollisionTemplate::ApplyBodyForce:
             LB_Kernel_CumulantK17 < turbulenceModel, false, true  > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega,
                                                                                                                         para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,
                                                                                                                         para->getParD(level)->distributions.f[0],
@@ -102,7 +104,7 @@ void CumulantK17<turbulenceModel>::runOnIndices( const unsigned int *indices, un
                                                                                                                         para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,
                                                                                                                         para->getParD(level)->turbViscosity,
                                                                                                                         para->getSGSConstant(),
-                                                                                                                        (unsigned long)para->getParD(level)->numberOfNodes,
+                                                                                                                        para->getParD(level)->numberOfNodes,
                                                                                                                         level,
                                                                                                                         para->getForcesDev(),
                                                                                                                         para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
@@ -110,7 +112,8 @@ void CumulantK17<turbulenceModel>::runOnIndices( const unsigned int *indices, un
                                                                                                                         para->getParD(level)->isEvenTimestep,
                                                                                                                         indices,
                                                                                                                         size_indices);
-            break;	default:
+            break;
+        default:
             throw std::runtime_error("Invalid CollisionTemplate in CumulantK17::runOnIndices()");
             break;
     }
@@ -126,7 +129,7 @@ CumulantK17<turbulenceModel>::CumulantK17(std::shared_ptr<Parameter> para, int l
 
     myPreProcessorTypes.push_back(InitCompSP27);
 
-    myKernelGroup = BasicKernel;
+    
 
     this->cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
     this->kernelUsesFluidNodeIndices = true;
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu
index 77e5172dae7b0ff6b51ed79a0a4356c7461801c5..c206381d4b54130a7a5489536729465f398e5ee4 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu
@@ -38,21 +38,20 @@
 //! required options are switched on ( \param writeMacroscopicVariables and/or \param applyBodyForce) in order to minimize memory accesses. The default
 //! refers to the plain cumlant kernel (CollisionTemplate::Default).
 //! Nodes are added to subsets (taggedFluidNodes) in Simulation::init using a corresponding tag with different values of CollisionTemplate. These subsets
-//! are provided by the utilized PostCollisionInteractiors depending on they specifc requirements (e.g. writeMacroscopicVariables for probes).
+//! are provided by the utilized PostCollisionInteractiors depending on they specific requirements (e.g. writeMacroscopicVariables for probes).
 
 //=======================================================================================
-/* Device code */
 #include "LBM/LB.h"
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
-#include "Kernel/Utilities/DistributionHelper.cuh"
+#include "basics/constants/NumericConstants.h"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
+#include "LBM/GPUHelperFunctions/ChimeraTransformation.h"
 
 #include "GPU/TurbulentViscosityInlines.cuh"
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
-#include "Kernel/Utilities/ChimeraTransformation.h"
-
+using namespace vf::gpu;
 
 ////////////////////////////////////////////////////////////////////////////////
 template<TurbulenceModel turbulenceModel, bool writeMacroscopicVariables, bool applyBodyForce>
@@ -68,7 +67,7 @@ __global__ void LB_Kernel_CumulantK17(
     real* vz,
     real* turbulentViscosity,
     real SGSconstant,
-    unsigned long numberOfLBnodes,
+    unsigned long long numberOfLBnodes,
     int level,
     real* forces,
     real* bodyForceX,
@@ -90,16 +89,16 @@ __global__ void LB_Kernel_CumulantK17(
     ////////////////////////////////////////////////////////////////////////////////
     //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
     //!
-    const unsigned kThread = vf::gpu::getNodeIndex();
+    const unsigned nodeIndex = getNodeIndex();
 
     //////////////////////////////////////////////////////////////////////////
     // run for all indices in size_Mat and fluid nodes
-    if (kThread >= numberOfFluidNodes)
+    if (nodeIndex >= numberOfFluidNodes)
         return;
     ////////////////////////////////////////////////////////////////////////////////
     //! - Get the node index from the array containing all indices of fluid nodes
     //!
-    const unsigned k_000 = fluidNodeIndices[kThread];
+    const unsigned k_000 = fluidNodeIndices[nodeIndex];
 
     //////////////////////////////////////////////////////////////////////////
     //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on
@@ -107,7 +106,8 @@ __global__ void LB_Kernel_CumulantK17(
     //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
     //! DOI:10.3390/computation5020019 ]</b></a>
     //!
-    Distributions27 dist = vf::gpu::getDistributionReferences27(distributions, numberOfLBnodes, isEvenTimestep);
+    Distributions27 dist;
+    getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
 
     ////////////////////////////////////////////////////////////////////////////////
     //! - Set neighbor indices (necessary for indirect addressing)
@@ -607,7 +607,7 @@ __global__ void LB_Kernel_CumulantK17(
     m_001 = -m_001;
 
     //Write to array here to distribute read/write
-    if(writeMacroscopicVariables)
+    if(writeMacroscopicVariables || turbulenceModel==TurbulenceModel::AMD)
     {
         rho[k_000] = drho;
         vx[k_000] = vvx;
@@ -664,63 +664,63 @@ __global__ void LB_Kernel_CumulantK17(
     //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
     //! DOI:10.3390/computation5020019 ]</b></a>
     //!
-    (dist.f[DIR_P00])[k_000]    = f_M00;
-    (dist.f[DIR_M00])[k_M00]    = f_P00;
-    (dist.f[DIR_0P0])[k_000]    = f_0M0;
-    (dist.f[DIR_0M0])[k_0M0]    = f_0P0;
-    (dist.f[DIR_00P])[k_000]    = f_00M;
-    (dist.f[DIR_00M])[k_00M]    = f_00P;
-    (dist.f[DIR_PP0])[k_000]   = f_MM0;
-    (dist.f[DIR_MM0])[k_MM0]   = f_PP0;
-    (dist.f[DIR_PM0])[k_0M0]   = f_MP0;
-    (dist.f[DIR_MP0])[k_M00]   = f_PM0;
-    (dist.f[DIR_P0P])[k_000]   = f_M0M;
-    (dist.f[DIR_M0M])[k_M0M]   = f_P0P;
-    (dist.f[DIR_P0M])[k_00M]   = f_M0P;
-    (dist.f[DIR_M0P])[k_M00]   = f_P0M;
-    (dist.f[DIR_0PP])[k_000]   = f_0MM;
-    (dist.f[DIR_0MM])[k_0MM]   = f_0PP;
-    (dist.f[DIR_0PM])[k_00M]   = f_0MP;
-    (dist.f[DIR_0MP])[k_0M0]   = f_0PM;
+    (dist.f[DIR_P00])[k_000] = f_M00;
+    (dist.f[DIR_M00])[k_M00] = f_P00;
+    (dist.f[DIR_0P0])[k_000] = f_0M0;
+    (dist.f[DIR_0M0])[k_0M0] = f_0P0;
+    (dist.f[DIR_00P])[k_000] = f_00M;
+    (dist.f[DIR_00M])[k_00M] = f_00P;
+    (dist.f[DIR_PP0])[k_000] = f_MM0;
+    (dist.f[DIR_MM0])[k_MM0] = f_PP0;
+    (dist.f[DIR_PM0])[k_0M0] = f_MP0;
+    (dist.f[DIR_MP0])[k_M00] = f_PM0;
+    (dist.f[DIR_P0P])[k_000] = f_M0M;
+    (dist.f[DIR_M0M])[k_M0M] = f_P0P;
+    (dist.f[DIR_P0M])[k_00M] = f_M0P;
+    (dist.f[DIR_M0P])[k_M00] = f_P0M;
+    (dist.f[DIR_0PP])[k_000] = f_0MM;
+    (dist.f[DIR_0MM])[k_0MM] = f_0PP;
+    (dist.f[DIR_0PM])[k_00M] = f_0MP;
+    (dist.f[DIR_0MP])[k_0M0] = f_0PM;
     (dist.f[DIR_000])[k_000] = f_000;
-    (dist.f[DIR_PPP])[k_000]  = f_MMM;
-    (dist.f[DIR_PMP])[k_0M0]  = f_MPM;
-    (dist.f[DIR_PPM])[k_00M]  = f_MMP;
-    (dist.f[DIR_PMM])[k_0MM]  = f_MPP;
-    (dist.f[DIR_MPP])[k_M00]  = f_PMM;
-    (dist.f[DIR_MMP])[k_MM0]  = f_PPM;
-    (dist.f[DIR_MPM])[k_M0M]  = f_PMP;
-    (dist.f[DIR_MMM])[k_MMM]  = f_PPP;
+    (dist.f[DIR_PPP])[k_000] = f_MMM;
+    (dist.f[DIR_PMP])[k_0M0] = f_MPM;
+    (dist.f[DIR_PPM])[k_00M] = f_MMP;
+    (dist.f[DIR_PMM])[k_0MM] = f_MPP;
+    (dist.f[DIR_MPP])[k_M00] = f_PMM;
+    (dist.f[DIR_MMP])[k_MM0] = f_PPM;
+    (dist.f[DIR_MPM])[k_M0M] = f_PMP;
+    (dist.f[DIR_MMM])[k_MMM] = f_PPP;
 }
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, true, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, true, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, true, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, true, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, true, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, true, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, true, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, true, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, false, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, false, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, false, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, false, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, false, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, false, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, false, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, false, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cuh
index 55c22def9c43ab2678fc808043859f43021270a5..da576618d1b08b55629c3c65fc115ceb822c8f7e 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cuh
@@ -16,7 +16,7 @@ template< TurbulenceModel turbulenceModel, bool writeMacroscopicVariables, bool
     real* vz,
     real* turbulentViscosity,
     real SGSconstant,
-    unsigned long numberOfLBnodes,
+    unsigned long long numberOfLBnodes,
     int level,
     real* forces,
     real* bodyForceX,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp.cu
index 72d13282fc604dddcfa84682425a7a1829855ea0..13b54723780fa16374b332c731fc35c5664d75b6 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp.cu
@@ -17,17 +17,18 @@ void CumulantK17BulkComp::run()
 	dim3 grid(Grid, 1, 1);
 	dim3 threads(numberOfThreads, 1, 1);
 
-	LB_Kernel_CumulantK17BulkComp << < grid, threads >> >(	para->getParD(level)->omega,
-																	para->getParD(level)->typeOfGridNode,
-																	para->getParD(level)->neighborX,
-																	para->getParD(level)->neighborY,
-																	para->getParD(level)->neighborZ,
-																	para->getParD(level)->distributions.f[0],
-																	para->getParD(level)->numberOfNodes,
-																	level,
-																	para->getForcesDev(),
-                                                                    para->getQuadricLimitersDev(),
-																	para->getParD(level)->isEvenTimestep);
+	LB_Kernel_CumulantK17BulkComp << < grid, threads >> >(
+		para->getParD(level)->omega,
+		para->getParD(level)->typeOfGridNode,
+		para->getParD(level)->neighborX,
+		para->getParD(level)->neighborY,
+		para->getParD(level)->neighborZ,
+		para->getParD(level)->distributions.f[0],
+		para->getParD(level)->numberOfNodes,
+		level,
+		para->getForcesDev(),
+		para->getQuadricLimitersDev(),
+		para->getParD(level)->isEvenTimestep);
 	getLastCudaError("LB_Kernel_CumulantK17BulkComp execution failed");
 }
 
@@ -38,7 +39,7 @@ CumulantK17BulkComp::CumulantK17BulkComp(std::shared_ptr<Parameter> para, int le
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 CumulantK17BulkComp::CumulantK17BulkComp()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cu
index cec04116ae4b411b1b3816ff4a8cab606c92491e..5e98ebe6db990f5d16fd9d7a839c0e5f0927ba87 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -40,63 +40,63 @@ __global__ void LB_Kernel_CumulantK17BulkComp(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -129,33 +129,33 @@ __global__ void LB_Kernel_CumulantK17BulkComp(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu
index 6ef6b40d3b7079579f54ca68734deb274d0c1c3a..59c405ae6e3bb46f608454ddb3a11bb0baac134f 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu
@@ -22,7 +22,7 @@ CumulantK17Unified::CumulantK17Unified(std::shared_ptr<Parameter> para, int leve
 
     myPreProcessorTypes.push_back(InitCompSP27);
 
-    myKernelGroup = BasicKernel;
+    
 
     this->cudaGrid = cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
 }
@@ -31,15 +31,16 @@ CumulantK17Unified::CumulantK17Unified(std::shared_ptr<Parameter> para, int leve
 
 void CumulantK17Unified::run()
 {
-    GPUKernelParameter kernelParameter{ para->getParD(level)->omega,
-                                                 para->getParD(level)->typeOfGridNode,
-                                                 para->getParD(level)->neighborX,
-                                                 para->getParD(level)->neighborY,
-                                                 para->getParD(level)->neighborZ,
-                                                 para->getParD(level)->distributions.f[0],
-                                                 (int)para->getParD(level)->numberOfNodes,
-                                                 para->getParD(level)->forcing,
-                                                 para->getParD(level)->isEvenTimestep };
+    GPUKernelParameter kernelParameter{
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        (int)para->getParD(level)->numberOfNodes,
+        para->getParD(level)->forcing,
+        para->getParD(level)->isEvenTimestep };
 
     auto lambda = [] __device__(lbm::KernelParameter parameter) {
         return lbm::cumulantChimera(parameter, lbm::setRelaxationRatesK17);
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim.cu
index bc058881e2a013effa417a149cf7a17bce646c6f..466b9f85999257196e860e84919ca6ccce6946b7 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim.cu
@@ -33,6 +33,6 @@ void CumulantK17CompChim::run()
 CumulantK17CompChim::CumulantK17CompChim(std::shared_ptr<Parameter> para, int level): KernelImp(para, level)
 {
 	myPreProcessorTypes.push_back(InitCompSP27);
-	myKernelGroup = BasicKernel;
+	
 	this->cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu
index 3eea267e55fee45111fb11cf1258559e2c3c63f2..1da801654adb3682ea11ca87c7c7a2fd10b065d3 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu
@@ -33,11 +33,12 @@
 /* Device code */
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include "basics/constants/NumericConstants.h"
+#include "LBM/GPUHelperFunctions/ChimeraTransformation.h"
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
-#include "Kernel/Utilities/ChimeraTransformation.h"
+using namespace vf::gpu;
 
 ////////////////////////////////////////////////////////////////////////////////
 __global__ void LB_Kernel_CumulantK17CompChim(
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp.cu
index 54af306039585f3beb39b05f2f2e0a96ae784e12..15d3509e735faa08b97d0876600c30876829c35f 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp.cu
@@ -1,8 +1,8 @@
 #include "CumulantK18Comp.h"
 
 #include "CumulantK18Comp_Device.cuh"
-
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<CumulantK18Comp> CumulantK18Comp::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -11,37 +11,22 @@ std::shared_ptr<CumulantK18Comp> CumulantK18Comp::getNewInstance(std::shared_ptr
 
 void CumulantK18Comp::run()
 {
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_CumulantK18Comp << < grid, threads >> >(	para->getParD(level)->omega,
-														para->getParD(level)->typeOfGridNode,
-														para->getParD(level)->neighborX,
-														para->getParD(level)->neighborY,
-														para->getParD(level)->neighborZ,
-														para->getParD(level)->distributions.f[0],
-														para->getParD(level)->g6.g[0],
-														para->getParD(level)->numberOfNodes,
-														level,
-														para->getForcesDev(),
-                                                        para->getQuadricLimitersDev(),
-														para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_CumulantK18Comp execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_CumulantK18Comp <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->g6.g[0],
+        para->getParD(level)->numberOfNodes,
+        level,
+        para->getForcesDev(),
+        para->getQuadricLimitersDev(),
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_CumulantK18Comp execution failed");
 }
 
 CumulantK18Comp::CumulantK18Comp(std::shared_ptr<Parameter> para, int level)
@@ -52,5 +37,5 @@ CumulantK18Comp::CumulantK18Comp(std::shared_ptr<Parameter> para, int level)
 	myPreProcessorTypes.push_back(InitCompSP27);
 	myPreProcessorTypes.push_back(InitF3);
 
-	myKernelGroup = F3Kernel;
+	
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cu
index bb42d113e47ce28f153ac295f2d9a934dd1b213a..c585c19aaca870e97fec63b0cd1742f7aad32556 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -42,83 +42,83 @@ __global__ void LB_Kernel_CumulantK18Comp(
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			Distributions6 G;
 			if (EvenOrOdd == true)
 			{
-				G.g[DIR_P00] = &G6[DIR_P00   *size_Mat];
-				G.g[DIR_M00] = &G6[DIR_M00   *size_Mat];
-				G.g[DIR_0P0] = &G6[DIR_0P0   *size_Mat];
-				G.g[DIR_0M0] = &G6[DIR_0M0   *size_Mat];
-				G.g[DIR_00P] = &G6[DIR_00P   *size_Mat];
-				G.g[DIR_00M] = &G6[DIR_00M   *size_Mat];
+				G.g[DIR_P00] = &G6[DIR_P00 * size_Mat];
+				G.g[DIR_M00] = &G6[DIR_M00 * size_Mat];
+				G.g[DIR_0P0] = &G6[DIR_0P0 * size_Mat];
+				G.g[DIR_0M0] = &G6[DIR_0M0 * size_Mat];
+				G.g[DIR_00P] = &G6[DIR_00P * size_Mat];
+				G.g[DIR_00M] = &G6[DIR_00M * size_Mat];
 			}
 			else
 			{
-				G.g[DIR_M00] = &G6[DIR_P00   *size_Mat];
-				G.g[DIR_P00] = &G6[DIR_M00   *size_Mat];
-				G.g[DIR_0M0] = &G6[DIR_0P0   *size_Mat];
-				G.g[DIR_0P0] = &G6[DIR_0M0   *size_Mat];
-				G.g[DIR_00M] = &G6[DIR_00P   *size_Mat];
-				G.g[DIR_00P] = &G6[DIR_00M   *size_Mat];
+				G.g[DIR_M00] = &G6[DIR_P00 * size_Mat];
+				G.g[DIR_P00] = &G6[DIR_M00 * size_Mat];
+				G.g[DIR_0M0] = &G6[DIR_0P0 * size_Mat];
+				G.g[DIR_0P0] = &G6[DIR_0M0 * size_Mat];
+				G.g[DIR_00M] = &G6[DIR_00P * size_Mat];
+				G.g[DIR_00P] = &G6[DIR_00M * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp.cu
index 0c1778dc39496c6564dedcbe1f6e818bee147191..8181cdb690b5813c368eddadb9cda58a7d749302 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp.cu
@@ -1,8 +1,8 @@
 #include "CumulantK20Comp.h"
 
 #include "CumulantK20Comp_Device.cuh"
-
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<CumulantK20Comp> CumulantK20Comp::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -11,37 +11,22 @@ std::shared_ptr<CumulantK20Comp> CumulantK20Comp::getNewInstance(std::shared_ptr
 
 void CumulantK20Comp::run()
 {
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_CumulantK20Comp << < grid, threads >> >(	para->getParD(level)->omega,
-																para->getParD(level)->typeOfGridNode,
-																para->getParD(level)->neighborX,
-																para->getParD(level)->neighborY,
-																para->getParD(level)->neighborZ,
-																para->getParD(level)->distributions.f[0],
-																para->getParD(level)->g6.g[0],
-																para->getParD(level)->numberOfNodes,
-																level,
-																para->getForcesDev(),
-                                                                para->getQuadricLimitersDev(),
-																para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_CumulantK20Comp execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_CumulantK20Comp <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->g6.g[0],
+        para->getParD(level)->numberOfNodes,
+        level,
+        para->getForcesDev(),
+        para->getQuadricLimitersDev(),
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_CumulantK20Comp execution failed");
 }
 
 CumulantK20Comp::CumulantK20Comp(std::shared_ptr<Parameter> para, int level)
@@ -52,5 +37,5 @@ CumulantK20Comp::CumulantK20Comp(std::shared_ptr<Parameter> para, int level)
 	myPreProcessorTypes.push_back(InitCompSP27);
 	myPreProcessorTypes.push_back(InitF3);
 
-	myKernelGroup = F3Kernel;
+	
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cu
index c805fc293aeb8b182bb0e01df82b584da69d0175..0a26eff29c3624bd78ef7dd4a8f675b8cb1a99d3 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -42,83 +42,83 @@ __global__ void LB_Kernel_CumulantK20Comp(
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			Distributions6 G;
 			if (EvenOrOdd == true)
 			{
-				G.g[DIR_P00] = &G6[DIR_P00   *size_Mat];
-				G.g[DIR_M00] = &G6[DIR_M00   *size_Mat];
-				G.g[DIR_0P0] = &G6[DIR_0P0   *size_Mat];
-				G.g[DIR_0M0] = &G6[DIR_0M0   *size_Mat];
-				G.g[DIR_00P] = &G6[DIR_00P   *size_Mat];
-				G.g[DIR_00M] = &G6[DIR_00M   *size_Mat];
+				G.g[DIR_P00] = &G6[DIR_P00 * size_Mat];
+				G.g[DIR_M00] = &G6[DIR_M00 * size_Mat];
+				G.g[DIR_0P0] = &G6[DIR_0P0 * size_Mat];
+				G.g[DIR_0M0] = &G6[DIR_0M0 * size_Mat];
+				G.g[DIR_00P] = &G6[DIR_00P * size_Mat];
+				G.g[DIR_00M] = &G6[DIR_00M * size_Mat];
 			}
 			else
 			{
-				G.g[DIR_M00] = &G6[DIR_P00   *size_Mat];
-				G.g[DIR_P00] = &G6[DIR_M00   *size_Mat];
-				G.g[DIR_0M0] = &G6[DIR_0P0   *size_Mat];
-				G.g[DIR_0P0] = &G6[DIR_0M0   *size_Mat];
-				G.g[DIR_00M] = &G6[DIR_00P   *size_Mat];
-				G.g[DIR_00P] = &G6[DIR_00M   *size_Mat];
+				G.g[DIR_M00] = &G6[DIR_P00 * size_Mat];
+				G.g[DIR_P00] = &G6[DIR_M00 * size_Mat];
+				G.g[DIR_0M0] = &G6[DIR_0P0 * size_Mat];
+				G.g[DIR_0P0] = &G6[DIR_0M0 * size_Mat];
+				G.g[DIR_00M] = &G6[DIR_00P * size_Mat];
+				G.g[DIR_00P] = &G6[DIR_00M * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27.cu
index be94791572f739fb2eef7c049702caeedb6641fc..6e11bd97a2e76cca3983a83f785a2435d40f594b 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27.cu
@@ -2,6 +2,7 @@
 
 #include "MRTCompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<MRTCompSP27> MRTCompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,33 +11,18 @@ std::shared_ptr<MRTCompSP27> MRTCompSP27::getNewInstance(std::shared_ptr<Paramet
 
 void MRTCompSP27::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_MRT_Comp_SP_27 << < grid, threads >> >(	para->getParD(level)->omega,
-														para->getParD(level)->typeOfGridNode,
-														para->getParD(level)->neighborX,
-														para->getParD(level)->neighborY,
-														para->getParD(level)->neighborZ,
-														para->getParD(level)->distributions.f[0],
-														para->getParD(level)->numberOfNodes,
-														para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_MRT_Comp_SP_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_MRT_Comp_SP_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_MRT_Comp_SP_27 execution failed");
 }
 
 MRTCompSP27::MRTCompSP27(std::shared_ptr<Parameter> para, int level)
@@ -46,7 +32,7 @@ MRTCompSP27::MRTCompSP27(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 MRTCompSP27::MRTCompSP27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cu
index a9aefa2d62a962766470c93a62adeefa4f19570e..41b24a349da75586be4fb818c9eb3f194a2447fd 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -37,63 +37,63 @@ __global__ void LB_Kernel_MRT_Comp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -126,33 +126,33 @@ __global__ void LB_Kernel_MRT_Comp_SP_27(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 			real rho = (mfccc + mfaaa + mfaca + mfcac + mfacc + mfcaa + mfaac + mfcca +
 				mfbac + mfbca + mfbaa + mfbcc + mfabc + mfcba + mfaba + mfcbc + mfacb + mfcab + mfaab + mfccb +
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh
index 558b4f333e7c92b372a5097aa4917dd6d1230a34..3be594e3e39a57cd71741cd060e9dddda15d6035 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh
@@ -5,7 +5,7 @@
 #include <DataTypes.h>
 #include <cuda_runtime.h>
 
-#include <lbm/KernelParameter.h>
+#include "lbm/KernelParameter.h"
 
 #include "Kernel/Utilities/DistributionHelper.cuh"
 
@@ -23,7 +23,7 @@ struct GPUKernelParameter
     unsigned int* neighborY;
     unsigned int* neighborZ;
     real* distributions;
-    int size_Mat;
+    int numberOfLBnodes;
     real* forces;
     bool isEvenTimestep;
 };
@@ -31,19 +31,22 @@ struct GPUKernelParameter
 template<typename KernelFunctor>
 __global__ void runKernel(KernelFunctor kernel, GPUKernelParameter kernelParameter)
 {
-    const uint k = getNodeIndex();
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = getNodeIndex();
 
-    if(k >= kernelParameter.size_Mat)
+    if(nodeIndex >= kernelParameter.numberOfLBnodes)
         return;
 
-    if (!isValidFluidNode(kernelParameter.typeOfGridNode[k]))
+    if (!isValidFluidNode(kernelParameter.typeOfGridNode[nodeIndex]))
         return;
 
     DistributionWrapper distributionWrapper {
         kernelParameter.distributions,
-        (unsigned int)kernelParameter.size_Mat,
+        (unsigned int)kernelParameter.numberOfLBnodes,
         kernelParameter.isEvenTimestep,
-        k,
+        nodeIndex,
         kernelParameter.neighborX,
         kernelParameter.neighborY,
         kernelParameter.neighborZ
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27.cu
index 81655fac9cfd0b562ba60a5ee289fb64da5c1fba..39bd1f3491d0d70e4734d04ef8a2d6e38cdc6448 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27.cu
@@ -2,6 +2,7 @@
 
 #include "BGKIncompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<BGKIncompSP27> BGKIncompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,33 +11,18 @@ std::shared_ptr<BGKIncompSP27> BGKIncompSP27::getNewInstance(std::shared_ptr<Par
 
 void BGKIncompSP27::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_BGK_Incomp_SP_27 << < grid, threads >> >(	para->getParD(level)->omega,
-													para->getParD(level)->typeOfGridNode,
-													para->getParD(level)->neighborX,
-													para->getParD(level)->neighborY,
-													para->getParD(level)->neighborZ,
-													para->getParD(level)->distributions.f[0],
-													para->getParD(level)->numberOfNodes,
-													para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_BGK_SP_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_BGK_Incomp_SP_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_BGK_Incomp_SP_27 execution failed");
 }
 
 BGKIncompSP27::BGKIncompSP27(std::shared_ptr<Parameter> para, int level)
@@ -46,7 +32,7 @@ BGKIncompSP27::BGKIncompSP27(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 BGKIncompSP27::BGKIncompSP27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cu
index 9a94006b8a1be745fc2bcfdd80e454152347139d..6cfde7648bdfe8808cb39dd1b80d6537bb3c3280 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -37,63 +37,63 @@ __global__ void LB_Kernel_BGK_Incomp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27.cu
index 86b513f1252f2787abee637819e64606d111c4fa..84a55b89d68f9a1e18c5114f8088a7dee24a4cd1 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27.cu
@@ -2,6 +2,7 @@
 
 #include "BGKPlusIncompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<BGKPlusIncompSP27> BGKPlusIncompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,33 +11,18 @@ std::shared_ptr<BGKPlusIncompSP27> BGKPlusIncompSP27::getNewInstance(std::shared
 
 void BGKPlusIncompSP27::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_BGK_Plus_Incomp_SP_27 << < grid, threads >> >(	para->getParD(level)->omega,
-														para->getParD(level)->typeOfGridNode,
-														para->getParD(level)->neighborX,
-														para->getParD(level)->neighborY,
-														para->getParD(level)->neighborZ,
-														para->getParD(level)->distributions.f[0],
-														para->getParD(level)->numberOfNodes,
-														para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_BGK_Plus_SP_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_BGK_Plus_Incomp_SP_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_BGK_Plus_Incomp_SP_27 execution failed");
 }
 
 BGKPlusIncompSP27::BGKPlusIncompSP27(std::shared_ptr<Parameter> para, int level)
@@ -46,7 +32,7 @@ BGKPlusIncompSP27::BGKPlusIncompSP27(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 BGKPlusIncompSP27::BGKPlusIncompSP27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cu
index 9355e42aa5b05190f063f5247d8d6c0dea787a02..1fee181619070e3bc30370a1bc32b949a3af9a2a 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -37,63 +37,63 @@ __global__ void LB_Kernel_BGK_Plus_Incomp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -126,33 +126,33 @@ __global__ void LB_Kernel_BGK_Plus_Incomp_SP_27(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 											//slow
 											//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27.cu
index 05f374096c9c5da2460b32cf5ae8cb59cfa78382..b060137f2d505886ee02a4b72e372ce8b4d48a78 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27.cu
@@ -2,6 +2,7 @@
 
 #include "CascadeIncompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<CascadeIncompSP27> CascadeIncompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,33 +11,18 @@ std::shared_ptr<CascadeIncompSP27> CascadeIncompSP27::getNewInstance(std::shared
 
 void CascadeIncompSP27::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_Cascade_Incomp_SP_27 << < grid, threads >> >(	para->getParD(level)->omega,
-														para->getParD(level)->typeOfGridNode,
-														para->getParD(level)->neighborX,
-														para->getParD(level)->neighborY,
-														para->getParD(level)->neighborZ,
-														para->getParD(level)->distributions.f[0],
-														para->getParD(level)->numberOfNodes,
-														para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_Cascade_SP_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_Cascade_Incomp_SP_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_Cascade_Incomp_SP_27 execution failed");
 }
 
 CascadeIncompSP27::CascadeIncompSP27(std::shared_ptr<Parameter> para, int level)
@@ -46,7 +32,7 @@ CascadeIncompSP27::CascadeIncompSP27(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 CascadeIncompSP27::CascadeIncompSP27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cu
index 92cc749b135739d5f38c9916c4ee0da7497e5f2d..0346f12cf609c355aedd4743dd7f971f444d5fe9 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -37,63 +37,63 @@ __global__ void LB_Kernel_Cascade_Incomp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -126,33 +126,33 @@ __global__ void LB_Kernel_Cascade_Incomp_SP_27(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 											//slow
 											//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27.cu
index 62768ef9948b6c259c5ad4005237081f4d255e73..2cade430786b17567c47264f0638dba259b3192d 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27.cu
@@ -2,6 +2,7 @@
 
 #include "Cumulant1hIncompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<Cumulant1hIncompSP27> Cumulant1hIncompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,38 +11,23 @@ std::shared_ptr<Cumulant1hIncompSP27> Cumulant1hIncompSP27::getNewInstance(std::
 
 void Cumulant1hIncompSP27::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_Cum_1h_Incomp_SP_27 << < grid, threads >> >(	para->getParD(level)->omega,
-													para->getParD(level)->deltaPhi,
-													para->getAngularVelocity(),
-													para->getParD(level)->typeOfGridNode,
-													para->getParD(level)->neighborX,
-													para->getParD(level)->neighborY,
-													para->getParD(level)->neighborZ,
-													para->getParD(level)->coordinateX,
-													para->getParD(level)->coordinateY,
-													para->getParD(level)->coordinateZ,
-													para->getParD(level)->distributions.f[0],
-													para->getParD(level)->numberOfNodes,
-													para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_Cum_1h_SP_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_Cum_1h_Incomp_SP_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->deltaPhi,
+        para->getAngularVelocity(),
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->coordinateX,
+        para->getParD(level)->coordinateY,
+        para->getParD(level)->coordinateZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_Cum_1h_Incomp_SP_27 execution failed");
 }
 
 Cumulant1hIncompSP27::Cumulant1hIncompSP27(std::shared_ptr<Parameter> para, int level)
@@ -51,7 +37,7 @@ Cumulant1hIncompSP27::Cumulant1hIncompSP27(std::shared_ptr<Parameter> para, int
 
 	myPreProcessorTypes.push_back(InitSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 Cumulant1hIncompSP27::Cumulant1hIncompSP27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cu
index 0243046082ce1853011c6632d5a2f80364ebe0db..ac88396b42483e3178869be585124fb031d099ec 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -42,63 +42,63 @@ __global__ void LB_Kernel_Cum_1h_Incomp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -159,33 +159,33 @@ __global__ void LB_Kernel_Cum_1h_Incomp_SP_27(real omega,
 			//unsigned int ktne = k;
 			//unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 											//Ship
 			real coord0X = 281.125f;//7.5f;
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27.cu
index 6551e1bde300e3a4d2a4f50cefdfff258edfacee..840067da7f34a4415b1b14458ae0fc8d316e366d 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27.cu
@@ -2,6 +2,7 @@
 
 #include "CumulantIsoIncompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<CumulantIsoIncompSP27> CumulantIsoIncompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,36 +11,21 @@ std::shared_ptr<CumulantIsoIncompSP27> CumulantIsoIncompSP27::getNewInstance(std
 
 void CumulantIsoIncompSP27::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_Cum_IsoTest_Incomp_SP_27 << < grid, threads >> >(para->getParD(level)->omega,
-		para->getParD(level)->typeOfGridNode,
-		para->getParD(level)->neighborX,
-		para->getParD(level)->neighborY,
-		para->getParD(level)->neighborZ,
-		para->getParD(level)->distributions.f[0],
-		para->getParD(level)->dxxUx,
-		para->getParD(level)->dyyUy,
-		para->getParD(level)->dzzUz,
-		para->getParD(level)->numberOfNodes,
-		para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_Kum_IsoTest_SP_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_Cum_IsoTest_Incomp_SP_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->dxxUx,
+        para->getParD(level)->dyyUy,
+        para->getParD(level)->dzzUz,
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_Cum_IsoTest_Incomp_SP_27 execution failed");
 }
 
 CumulantIsoIncompSP27::CumulantIsoIncompSP27(std::shared_ptr<Parameter> para, int level)
@@ -49,7 +35,7 @@ CumulantIsoIncompSP27::CumulantIsoIncompSP27(std::shared_ptr<Parameter> para, in
 
 	myPreProcessorTypes.push_back(InitSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 CumulantIsoIncompSP27::CumulantIsoIncompSP27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cu
index 64d697f2b0953cee75f4397e399a0e6128e486a2..623d3c2c26b2c4d8fdfdaa718ca92662dfe5b548 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -40,63 +40,63 @@ __global__ void LB_Kernel_Cum_IsoTest_Incomp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -129,33 +129,33 @@ __global__ void LB_Kernel_Cum_IsoTest_Incomp_SP_27(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 											//slow
 											//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp.cu
index 40cde56b007f70f98db13d5962f3e746b97637ef..c597924193d859a35dddaa7b37a56e21d265ceba 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp.cu
@@ -2,6 +2,7 @@
 
 #include "CumulantK15Incomp_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<CumulantK15Incomp> CumulantK15Incomp::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,33 +11,18 @@ std::shared_ptr<CumulantK15Incomp> CumulantK15Incomp::getNewInstance(std::shared
 
 void CumulantK15Incomp::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_CumulantK15Incomp <<< grid, threads >>>(	para->getParD(level)->omega,
-														para->getParD(level)->typeOfGridNode,
-														para->getParD(level)->neighborX,
-														para->getParD(level)->neighborY,
-														para->getParD(level)->neighborZ,
-														para->getParD(level)->distributions.f[0],
-														para->getParD(level)->numberOfNodes,
-														para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_CumulantK15Incomp execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_CumulantK15Incomp <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_CumulantK15Incomp execution failed");
 }
 
 CumulantK15Incomp::CumulantK15Incomp(std::shared_ptr<Parameter> para, int level)
@@ -46,7 +32,7 @@ CumulantK15Incomp::CumulantK15Incomp(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 CumulantK15Incomp::CumulantK15Incomp()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cu
index fc108ef1ef109a40735e250bd9a0f21491e4f977..9fcfeaa97d97b9bfcf2ad0227464cbcabbc28f44 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -37,63 +37,63 @@ __global__ void LB_Kernel_CumulantK15Incomp(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -154,33 +154,33 @@ __global__ void LB_Kernel_CumulantK15Incomp(real omega,
 			//unsigned int ktne = k;
 			//unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 											//slow
 											//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27.cu
index c4311309e4653f2862e303dacb3e2d07646a5061..daa90091fe092a98741d0764e2327f3ce4c9d2bc 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27.cu
@@ -2,6 +2,7 @@
 
 #include "MRTIncompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<MRTIncompSP27> MRTIncompSP27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,33 +11,18 @@ std::shared_ptr<MRTIncompSP27> MRTIncompSP27::getNewInstance(std::shared_ptr<Par
 
 void MRTIncompSP27::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Kernel_MRT_Incomp_SP_27 << < grid, threads >> >(	para->getParD(level)->omega,
-													para->getParD(level)->typeOfGridNode,
-													para->getParD(level)->neighborX,
-													para->getParD(level)->neighborY,
-													para->getParD(level)->neighborZ,
-													para->getParD(level)->distributions.f[0],
-													para->getParD(level)->numberOfNodes,
-													para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LB_Kernel_MRT_SP_27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Kernel_MRT_Incomp_SP_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Kernel_MRT_Incomp_SP_27 execution failed");
 }
 
 MRTIncompSP27::MRTIncompSP27(std::shared_ptr<Parameter> para, int level)
@@ -46,7 +32,7 @@ MRTIncompSP27::MRTIncompSP27(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 MRTIncompSP27::MRTIncompSP27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cu
index f6a283c2f9ba3c15729061ebeabcf34edd0abe97..2fbc7d64d5f10a2c6313647e508fbb2192dd435b 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -37,63 +37,63 @@ __global__ void LB_Kernel_MRT_Incomp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -126,33 +126,33 @@ __global__ void LB_Kernel_MRT_Incomp_SP_27(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
-			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
-			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
-			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
-			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
-			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
-			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
-			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
-			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
-			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
-			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
-			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP])[ks ];//kts
 			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
-			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
-			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
-			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
-			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
-			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
-			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
-			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
-			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 											//slow
 											//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27.cu
index 77527d5bedab08fdcacb3a103727ae25274b2aa4..a8c1af64ebd4641a755bf9fed7e9fafa18e9cad7 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27.cu
@@ -11,7 +11,7 @@ std::shared_ptr<PMCumulantOneCompSP27> PMCumulantOneCompSP27::getNewInstance(std
 
 void PMCumulantOneCompSP27::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
+	int size_Mat = (int)para->getParD(level)->numberOfNodes;
 	int numberOfThreads = para->getParD(level)->numberofthreads;
 
 	int Grid = (size_Mat / numberOfThreads) + 1;
@@ -30,7 +30,8 @@ void PMCumulantOneCompSP27::run()
 	dim3 threads(numberOfThreads, 1, 1);
 
 	for (int i = 0; i < pm.size(); i++) {
-		LB_Kernel_PM_Cum_One_Comp_SP_27 << < grid, threads >> >(para->getParD(level)->omega,
+		LB_Kernel_PM_Cum_One_Comp_SP_27 <<< grid, threads >>>(
+			para->getParD(level)->omega,
 			para->getParD(level)->neighborX,
 			para->getParD(level)->neighborY,
 			para->getParD(level)->neighborZ,
@@ -56,7 +57,7 @@ PMCumulantOneCompSP27::PMCumulantOneCompSP27(std::shared_ptr<Parameter> para, st
 
 	myPreProcessorTypes.push_back(InitSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 PMCumulantOneCompSP27::PMCumulantOneCompSP27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cu
index 89975d1663fb236295c22b81af4b0544ffc489bb..1ed7cf3af37251550d38affe512f841a3779b918 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -11,7 +11,7 @@ __global__ void LB_Kernel_PM_Cum_One_Comp_SP_27(real omega,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	real* DDStart,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	int level,
 	real* forces,
 	real porosity,
@@ -24,63 +24,63 @@ __global__ void LB_Kernel_PM_Cum_One_Comp_SP_27(real omega,
 	Distributions27 D;
 	if (EvenOrOdd == true)
 	{
-		D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-		D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-		D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-		D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-		D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-		D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-		D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-		D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-		D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-		D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-		D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-		D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-		D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-		D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-		D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-		D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-		D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-		D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-		D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-		D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-		D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-		D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-		D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-		D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-		D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
-		D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
+		D.f[DIR_P00] = &DDStart[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_M00] = &DDStart[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00P] = &DDStart[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00M] = &DDStart[DIR_00M * numberOfLBnodes];
+		D.f[DIR_PP0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_MM0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_P0P] = &DDStart[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_M0M] = &DDStart[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DDStart[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DDStart[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0PP] = &DDStart[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0MM] = &DDStart[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DDStart[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DDStart[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+		D.f[DIR_PPP] = &DDStart[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_MMP] = &DDStart[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_PMP] = &DDStart[DIR_PMP * numberOfLBnodes];
+		D.f[DIR_MPP] = &DDStart[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_PPM] = &DDStart[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_MMM] = &DDStart[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_PMM] = &DDStart[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_MPM] = &DDStart[DIR_MPM * numberOfLBnodes];
 	}
 	else
 	{
-		D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-		D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-		D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-		D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-		D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-		D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-		D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-		D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-		D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-		D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-		D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-		D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-		D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-		D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-		D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-		D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-		D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-		D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-		D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-		D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-		D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
-		D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
-		D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-		D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-		D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-		D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+		D.f[DIR_M00] = &DDStart[DIR_P00 * numberOfLBnodes];
+		D.f[DIR_P00] = &DDStart[DIR_M00 * numberOfLBnodes];
+		D.f[DIR_0M0] = &DDStart[DIR_0P0 * numberOfLBnodes];
+		D.f[DIR_0P0] = &DDStart[DIR_0M0 * numberOfLBnodes];
+		D.f[DIR_00M] = &DDStart[DIR_00P * numberOfLBnodes];
+		D.f[DIR_00P] = &DDStart[DIR_00M * numberOfLBnodes];
+		D.f[DIR_MM0] = &DDStart[DIR_PP0 * numberOfLBnodes];
+		D.f[DIR_PP0] = &DDStart[DIR_MM0 * numberOfLBnodes];
+		D.f[DIR_MP0] = &DDStart[DIR_PM0 * numberOfLBnodes];
+		D.f[DIR_PM0] = &DDStart[DIR_MP0 * numberOfLBnodes];
+		D.f[DIR_M0M] = &DDStart[DIR_P0P * numberOfLBnodes];
+		D.f[DIR_P0P] = &DDStart[DIR_M0M * numberOfLBnodes];
+		D.f[DIR_M0P] = &DDStart[DIR_P0M * numberOfLBnodes];
+		D.f[DIR_P0M] = &DDStart[DIR_M0P * numberOfLBnodes];
+		D.f[DIR_0MM] = &DDStart[DIR_0PP * numberOfLBnodes];
+		D.f[DIR_0PP] = &DDStart[DIR_0MM * numberOfLBnodes];
+		D.f[DIR_0MP] = &DDStart[DIR_0PM * numberOfLBnodes];
+		D.f[DIR_0PM] = &DDStart[DIR_0MP * numberOfLBnodes];
+		D.f[DIR_000] = &DDStart[DIR_000 * numberOfLBnodes];
+		D.f[DIR_MMM] = &DDStart[DIR_PPP * numberOfLBnodes];
+		D.f[DIR_PPM] = &DDStart[DIR_MMP * numberOfLBnodes];
+		D.f[DIR_MPM] = &DDStart[DIR_PMP * numberOfLBnodes];
+		D.f[DIR_PMM] = &DDStart[DIR_MPP * numberOfLBnodes];
+		D.f[DIR_MMP] = &DDStart[DIR_PPM * numberOfLBnodes];
+		D.f[DIR_PPP] = &DDStart[DIR_MMM * numberOfLBnodes];
+		D.f[DIR_MPP] = &DDStart[DIR_PMM * numberOfLBnodes];
+		D.f[DIR_PMP] = &DDStart[DIR_MPM * numberOfLBnodes];
 	}
 
 	////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cuh
index 6533c604f32a478cdc6a097e4dd7d0b56e48150d..f2cf530b5d331c71d4a13bd5882a3657a3bbddea 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cuh
@@ -9,7 +9,7 @@ __global__ void LB_Kernel_PM_Cum_One_Comp_SP_27(real omega,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	real* DDStart,
-	int size_Mat,
+	unsigned long long numberOfLBnodes,
 	int level,
 	real* forces,
 	real porosity,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp.cu
index cfcb70cd2bd6f3cc8ec4349650c44b7d3b0619fc..cfcc544aac2172cef2f4d58600931db8ccfa0189 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp.cu
@@ -10,7 +10,7 @@ std::shared_ptr<WaleCumulantK15Comp> WaleCumulantK15Comp::getNewInstance(std::sh
 
 void WaleCumulantK15Comp::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
+	int size_Mat = (int)para->getParD(level)->numberOfNodes;
 	int numberOfThreads = para->getParD(level)->numberofthreads;
 
 	int Grid = (size_Mat / numberOfThreads) + 1;
@@ -28,22 +28,23 @@ void WaleCumulantK15Comp::run()
 	dim3 grid(Grid1, Grid2, 1);
 	dim3 threads(numberOfThreads, 1, 1);
 
-	LB_Kernel_WaleCumulantK15Comp << < grid, threads >> >(	para->getParD(level)->omega,
-																para->getParD(level)->typeOfGridNode,
-																para->getParD(level)->neighborX,
-																para->getParD(level)->neighborY,
-																para->getParD(level)->neighborZ,
-																para->getParD(level)->neighborInverse,
-																para->getParD(level)->velocityX,
-																para->getParD(level)->velocityY,
-																para->getParD(level)->velocityZ,
-																para->getParD(level)->distributions.f[0],
-																para->getParD(level)->turbViscosity,
-																para->getParD(level)->numberOfNodes,
-																level,
-																para->getTimestepOfCoarseLevel(),
-																para->getForcesDev(),
-																para->getParD(level)->isEvenTimestep);
+	LB_Kernel_WaleCumulantK15Comp <<< grid, threads >>>(
+		para->getParD(level)->omega,
+		para->getParD(level)->typeOfGridNode,
+		para->getParD(level)->neighborX,
+		para->getParD(level)->neighborY,
+		para->getParD(level)->neighborZ,
+		para->getParD(level)->neighborInverse,
+		para->getParD(level)->velocityX,
+		para->getParD(level)->velocityY,
+		para->getParD(level)->velocityZ,
+		para->getParD(level)->distributions.f[0],
+		para->getParD(level)->turbViscosity,
+		para->getParD(level)->numberOfNodes,
+		level,
+		para->getTimestepOfCoarseLevel(),
+		para->getForcesDev(),
+		para->getParD(level)->isEvenTimestep);
 	getLastCudaError("LB_Kernel_WaleCumulantK15Comp execution failed");
 }
 
@@ -54,7 +55,7 @@ WaleCumulantK15Comp::WaleCumulantK15Comp(std::shared_ptr<Parameter> para, int le
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicWaleKernel;
+	
 }
 
 WaleCumulantK15Comp::WaleCumulantK15Comp()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cu
index 3da25060e6c82ea685a1659fecc8cf66eeaf44c4..62658ccbdcead27f77d3b72d2daa311ade5baa59 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -46,63 +46,63 @@ __global__ void LB_Kernel_WaleCumulantK15Comp(
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -136,33 +136,33 @@ __global__ void LB_Kernel_WaleCumulantK15Comp(
 			unsigned int kbsw = neighborZ[ksw];
 
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];
-			real mfabb = (D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];
-			real mfbab = (D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];
-			real mfbba = (D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];
-			real mfaab = (D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];
-			real mfacb = (D.f[DIR_MP0  ])[kw ];
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];
-			real mfaba = (D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];
-			real mfabc = (D.f[DIR_M0P  ])[kw ];
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];
-			real mfbac = (D.f[DIR_0MP  ])[ks ];
+			real mfcbb = (D.f[DIR_P00])[k  ];
+			real mfabb = (D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];
+			real mfbab = (D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k  ];
+			real mfbba = (D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k  ];
+			real mfaab = (D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks ];
+			real mfacb = (D.f[DIR_MP0])[kw ];
+			real mfcbc = (D.f[DIR_P0P])[k  ];
+			real mfaba = (D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb ];
+			real mfabc = (D.f[DIR_M0P])[kw ];
+			real mfbcc = (D.f[DIR_0PP])[k  ];
+			real mfbaa = (D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb ];
+			real mfbac = (D.f[DIR_0MP])[ks ];
 			real mfbbb = (D.f[DIR_000])[k  ];
-			real mfccc = (D.f[DIR_PPP ])[k  ];
-			real mfaac = (D.f[DIR_MMP ])[ksw];
-			real mfcac = (D.f[DIR_PMP ])[ks ];
-			real mfacc = (D.f[DIR_MPP ])[kw ];
-			real mfcca = (D.f[DIR_PPM ])[kb ];
+			real mfccc = (D.f[DIR_PPP])[k  ];
+			real mfaac = (D.f[DIR_MMP])[ksw];
+			real mfcac = (D.f[DIR_PMP])[ks ];
+			real mfacc = (D.f[DIR_MPP])[kw ];
+			real mfcca = (D.f[DIR_PPM])[kb ];
 			real mfaaa = (D.f[DIR_MMM])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs];
-			real mfaca = (D.f[DIR_MPM ])[kbw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];
+			real mfaca = (D.f[DIR_MPM])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
 							(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp.cu
index 15b808279a4c9dc771531f118cb369b7c5380a84..05e257a52b38e2c31badcb1fb739de3ab0239f6e 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp.cu
@@ -10,7 +10,7 @@ std::shared_ptr<WaleBySoniMalavCumulantK15Comp> WaleBySoniMalavCumulantK15Comp::
 
 void WaleBySoniMalavCumulantK15Comp::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
+	int size_Mat = (int)para->getParD(level)->numberOfNodes;
 	int numberOfThreads = para->getParD(level)->numberofthreads;
 
 	//int Grid = size_Array / numberOfThreads;
@@ -32,21 +32,22 @@ void WaleBySoniMalavCumulantK15Comp::run()
 	dim3 grid(Grid1, Grid2, 1);
 	dim3 threads(numberOfThreads, 1, 1);
 
-	LB_Kernel_WaleBySoniMalavCumulantK15Comp << < grid, threads >> >(	para->getParD(level)->omega,
-																			para->getParD(level)->typeOfGridNode,
-																			para->getParD(level)->neighborX,
-																			para->getParD(level)->neighborY,
-																			para->getParD(level)->neighborZ,
-																			para->getParD(level)->neighborInverse,
-																			para->getParD(level)->velocityX,
-																			para->getParD(level)->velocityY,
-																			para->getParD(level)->velocityZ,
-																			para->getParD(level)->distributions.f[0],
-																			para->getParD(level)->turbViscosity,
-																			para->getParD(level)->numberOfNodes,
-																			level,
-																			para->getForcesDev(),
-																			para->getParD(level)->isEvenTimestep);
+	LB_Kernel_WaleBySoniMalavCumulantK15Comp <<< grid, threads >>>(
+		para->getParD(level)->omega,
+		para->getParD(level)->typeOfGridNode,
+		para->getParD(level)->neighborX,
+		para->getParD(level)->neighborY,
+		para->getParD(level)->neighborZ,
+		para->getParD(level)->neighborInverse,
+		para->getParD(level)->velocityX,
+		para->getParD(level)->velocityY,
+		para->getParD(level)->velocityZ,
+		para->getParD(level)->distributions.f[0],
+		para->getParD(level)->turbViscosity,
+		para->getParD(level)->numberOfNodes,
+		level,
+		para->getForcesDev(),
+		para->getParD(level)->isEvenTimestep);
 	getLastCudaError("LB_Kernel_WaleBySoniMalavCumulantK15Comp execution failed");
 }
 
@@ -57,7 +58,7 @@ WaleBySoniMalavCumulantK15Comp::WaleBySoniMalavCumulantK15Comp(std::shared_ptr<P
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicWaleKernel;
+	
 }
 
 WaleBySoniMalavCumulantK15Comp::WaleBySoniMalavCumulantK15Comp()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cu
index 511219c352c4d156428565f718191a70b9cc6c32..d266aac648c6163fb24764879f391304f32aba87 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -45,63 +45,63 @@ __global__ void LB_Kernel_WaleBySoniMalavCumulantK15Comp(
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -115,33 +115,33 @@ __global__ void LB_Kernel_WaleBySoniMalavCumulantK15Comp(
 			unsigned int kbsw = neighborZ[ksw];
 
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];
-			real mfabb = (D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];
-			real mfbab = (D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];
-			real mfbba = (D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];
-			real mfaab = (D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];
-			real mfacb = (D.f[DIR_MP0  ])[kw ];
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];
-			real mfaba = (D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];
-			real mfabc = (D.f[DIR_M0P  ])[kw ];
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];
-			real mfbac = (D.f[DIR_0MP  ])[ks ];
+			real mfcbb = (D.f[DIR_P00])[k  ];
+			real mfabb = (D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];
+			real mfbab = (D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k  ];
+			real mfbba = (D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k  ];
+			real mfaab = (D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks ];
+			real mfacb = (D.f[DIR_MP0])[kw ];
+			real mfcbc = (D.f[DIR_P0P])[k  ];
+			real mfaba = (D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb ];
+			real mfabc = (D.f[DIR_M0P])[kw ];
+			real mfbcc = (D.f[DIR_0PP])[k  ];
+			real mfbaa = (D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb ];
+			real mfbac = (D.f[DIR_0MP])[ks ];
 			real mfbbb = (D.f[DIR_000])[k  ];
-			real mfccc = (D.f[DIR_PPP ])[k  ];
-			real mfaac = (D.f[DIR_MMP ])[ksw];
-			real mfcac = (D.f[DIR_PMP ])[ks ];
-			real mfacc = (D.f[DIR_MPP ])[kw ];
-			real mfcca = (D.f[DIR_PPM ])[kb ];
+			real mfccc = (D.f[DIR_PPP])[k  ];
+			real mfaac = (D.f[DIR_MMP])[ksw];
+			real mfcac = (D.f[DIR_PMP])[ks ];
+			real mfacc = (D.f[DIR_MPP])[kw ];
+			real mfcca = (D.f[DIR_PPM])[kb ];
 			real mfaaa = (D.f[DIR_MMM])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs];
-			real mfaca = (D.f[DIR_MPM ])[kbw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];
+			real mfaca = (D.f[DIR_MPM])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
 							(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp.cu
index 5eeea51301c666cf17546c85a444413111bebf2c..b7f4038c6b67cc4d1cf521bc7a904801650d1e8d 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp.cu
@@ -10,7 +10,7 @@ std::shared_ptr<WaleCumulantK17Comp> WaleCumulantK17Comp::getNewInstance(std::sh
 
 void WaleCumulantK17Comp::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
+	int size_Mat = (int)para->getParD(level)->numberOfNodes;
 	int numberOfThreads = para->getParD(level)->numberofthreads;
 
 	//int Grid = size_Array / numberOfThreads;
@@ -32,23 +32,24 @@ void WaleCumulantK17Comp::run()
 	dim3 grid(Grid1, Grid2, 1);
 	dim3 threads(numberOfThreads, 1, 1);
 
-	LB_Kernel_WaleCumulantK17Comp <<< grid, threads >>>(para->getParD(level)->omega,
-														para->getParD(level)->typeOfGridNode,
-														para->getParD(level)->neighborX,
-														para->getParD(level)->neighborY,
-														para->getParD(level)->neighborZ,
-														para->getParD(level)->neighborInverse,
-														para->getParD(level)->velocityX,
-														para->getParD(level)->velocityY,
-														para->getParD(level)->velocityZ,
-														para->getParD(level)->distributions.f[0],
-														para->getParD(level)->turbViscosity,
-														para->getParD(level)->numberOfNodes,
-														level,
-														para->getTimestepOfCoarseLevel(),
-														para->getForcesDev(),
-                                                        para->getQuadricLimitersDev(),
-														para->getParD(level)->isEvenTimestep);
+	LB_Kernel_WaleCumulantK17Comp <<< grid, threads >>>(
+		para->getParD(level)->omega,
+		para->getParD(level)->typeOfGridNode,
+		para->getParD(level)->neighborX,
+		para->getParD(level)->neighborY,
+		para->getParD(level)->neighborZ,
+		para->getParD(level)->neighborInverse,
+		para->getParD(level)->velocityX,
+		para->getParD(level)->velocityY,
+		para->getParD(level)->velocityZ,
+		para->getParD(level)->distributions.f[0],
+		para->getParD(level)->turbViscosity,
+		para->getParD(level)->numberOfNodes,
+		level,
+		para->getTimestepOfCoarseLevel(),
+		para->getForcesDev(),
+        para->getQuadricLimitersDev(),
+		para->getParD(level)->isEvenTimestep);
 	getLastCudaError("LB_Kernel_WaleCumulantK17Comp execution failed");
 }
 
@@ -59,7 +60,7 @@ WaleCumulantK17Comp::WaleCumulantK17Comp(std::shared_ptr<Parameter> para, int le
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicWaleKernel;
+	
 }
 
 WaleCumulantK17Comp::WaleCumulantK17Comp()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cu
index 8aaa13ab1d868e15ea5707d1566ba653b44c645d..71d3ed0604feb43422e3e738bb2ca9bca147ab17 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -47,63 +47,63 @@ __global__ void LB_Kernel_WaleCumulantK17Comp(
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -137,33 +137,33 @@ __global__ void LB_Kernel_WaleCumulantK17Comp(
 			unsigned int kbsw = neighborZ[ksw];
 
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[DIR_P00   ])[k  ];
-			real mfabb = (D.f[DIR_M00   ])[kw ];
-			real mfbcb = (D.f[DIR_0P0   ])[k  ];
-			real mfbab = (D.f[DIR_0M0   ])[ks ];
-			real mfbbc = (D.f[DIR_00P   ])[k  ];
-			real mfbba = (D.f[DIR_00M   ])[kb ];
-			real mfccb = (D.f[DIR_PP0  ])[k  ];
-			real mfaab = (D.f[DIR_MM0  ])[ksw];
-			real mfcab = (D.f[DIR_PM0  ])[ks ];
-			real mfacb = (D.f[DIR_MP0  ])[kw ];
-			real mfcbc = (D.f[DIR_P0P  ])[k  ];
-			real mfaba = (D.f[DIR_M0M  ])[kbw];
-			real mfcba = (D.f[DIR_P0M  ])[kb ];
-			real mfabc = (D.f[DIR_M0P  ])[kw ];
-			real mfbcc = (D.f[DIR_0PP  ])[k  ];
-			real mfbaa = (D.f[DIR_0MM  ])[kbs];
-			real mfbca = (D.f[DIR_0PM  ])[kb ];
-			real mfbac = (D.f[DIR_0MP  ])[ks ];
+			real mfcbb = (D.f[DIR_P00])[k  ];
+			real mfabb = (D.f[DIR_M00])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k  ];
+			real mfbab = (D.f[DIR_0M0])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k  ];
+			real mfbba = (D.f[DIR_00M])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k  ];
+			real mfaab = (D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks ];
+			real mfacb = (D.f[DIR_MP0])[kw ];
+			real mfcbc = (D.f[DIR_P0P])[k  ];
+			real mfaba = (D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb ];
+			real mfabc = (D.f[DIR_M0P])[kw ];
+			real mfbcc = (D.f[DIR_0PP])[k  ];
+			real mfbaa = (D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb ];
+			real mfbac = (D.f[DIR_0MP])[ks ];
 			real mfbbb = (D.f[DIR_000])[k  ];
-			real mfccc = (D.f[DIR_PPP ])[k  ];
-			real mfaac = (D.f[DIR_MMP ])[ksw];
-			real mfcac = (D.f[DIR_PMP ])[ks ];
-			real mfacc = (D.f[DIR_MPP ])[kw ];
-			real mfcca = (D.f[DIR_PPM ])[kb ];
+			real mfccc = (D.f[DIR_PPP])[k  ];
+			real mfaac = (D.f[DIR_MMP])[ksw];
+			real mfcac = (D.f[DIR_PMP])[ks ];
+			real mfacc = (D.f[DIR_MPP])[kw ];
+			real mfcca = (D.f[DIR_PPM])[kb ];
 			real mfaaa = (D.f[DIR_MMM])[kbsw];
-			real mfcaa = (D.f[DIR_PMM ])[kbs];
-			real mfaca = (D.f[DIR_MPM ])[kbw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];
+			real mfaca = (D.f[DIR_MPM])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
 							(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp.cu
index 98dca58f522bf02ce66328819e42c717f0ceef28..5fe0284e675785691e51a58e7e0869ba4164ad5f 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp.cu
@@ -10,7 +10,7 @@ std::shared_ptr<WaleCumulantK17DebugComp> WaleCumulantK17DebugComp::getNewInstan
 
 void WaleCumulantK17DebugComp::run()
 {
-	int size_Mat = para->getParD(level)->numberOfNodes;
+	int size_Mat = (int)para->getParD(level)->numberOfNodes;
 	int numberOfThreads = para->getParD(level)->numberofthreads;
 
 	//int Grid = size_Array / numberOfThreads;
@@ -32,34 +32,34 @@ void WaleCumulantK17DebugComp::run()
 	dim3 grid(Grid1, Grid2, 1);
 	dim3 threads(numberOfThreads, 1, 1);
 
-	LB_Kernel_WaleCumulantK17DebugComp << < grid, threads >> >(
-																		para->getParD(level)->omega,
-																		para->getParD(level)->typeOfGridNode,
-																		para->getParD(level)->neighborX,
-																		para->getParD(level)->neighborY,
-																		para->getParD(level)->neighborZ,
-																		para->getParD(level)->neighborInverse,
-																		para->getParD(level)->velocityX,
-																		para->getParD(level)->velocityY,
-																		para->getParD(level)->velocityZ,
-																		para->getParD(level)->distributions.f[0],
-																		para->getParD(level)->turbViscosity,
-																		para->getParD(level)->gSij,
-																		para->getParD(level)->gSDij,
-																		para->getParD(level)->gDxvx,
-																		para->getParD(level)->gDyvx,
-																		para->getParD(level)->gDzvx,
-																		para->getParD(level)->gDxvy,
-																		para->getParD(level)->gDyvy,
-																		para->getParD(level)->gDzvy,
-																		para->getParD(level)->gDxvz,
-																		para->getParD(level)->gDyvz,
-																		para->getParD(level)->gDzvz,
-																		para->getParD(level)->numberOfNodes,
-																		level,
-																		para->getForcesDev(),
-                                                                        para->getQuadricLimitersDev(),
-																		para->getParD(level)->isEvenTimestep);
+	LB_Kernel_WaleCumulantK17DebugComp <<< grid, threads >>>(
+		para->getParD(level)->omega,
+		para->getParD(level)->typeOfGridNode,
+		para->getParD(level)->neighborX,
+		para->getParD(level)->neighborY,
+		para->getParD(level)->neighborZ,
+		para->getParD(level)->neighborInverse,
+		para->getParD(level)->velocityX,
+		para->getParD(level)->velocityY,
+		para->getParD(level)->velocityZ,
+		para->getParD(level)->distributions.f[0],
+		para->getParD(level)->turbViscosity,
+		para->getParD(level)->gSij,
+		para->getParD(level)->gSDij,
+		para->getParD(level)->gDxvx,
+		para->getParD(level)->gDyvx,
+		para->getParD(level)->gDzvx,
+		para->getParD(level)->gDxvy,
+		para->getParD(level)->gDyvy,
+		para->getParD(level)->gDzvy,
+		para->getParD(level)->gDxvz,
+		para->getParD(level)->gDyvz,
+		para->getParD(level)->gDzvz,
+		para->getParD(level)->numberOfNodes,
+		level,
+		para->getForcesDev(),
+        para->getQuadricLimitersDev(),
+		para->getParD(level)->isEvenTimestep);
 	getLastCudaError("LB_Kernel_WaleCumulantK17DebugComp execution failed");
 }
 
@@ -70,7 +70,7 @@ WaleCumulantK17DebugComp::WaleCumulantK17DebugComp(std::shared_ptr<Parameter> pa
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicWaleKernel;
+	
 }
 
 WaleCumulantK17DebugComp::WaleCumulantK17DebugComp()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cu
index a1feba477a6555ea728311a6e99d5302652813ff..0a48c68059d794ddd7aed85c266604d51809d978 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -57,63 +57,63 @@ __global__ void LB_Kernel_WaleCumulantK17DebugComp(
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_PMM]= &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_MPM]= &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_PMM]= &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_MPM]= &DDStart[DIR_MPM * size_Mat];
 			}
 			else
 			{
-				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
-				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
-				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
-				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
-				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
-				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
-				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
-				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
-				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
-				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
-				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
-				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
-				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
-				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
-				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
-				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
-				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
-				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
-				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
-				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
-				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
-				D.f[DIR_MPM]= &DDStart[DIR_PMP *size_Mat];
-				D.f[DIR_PMM]= &DDStart[DIR_MPP *size_Mat];
-				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
-				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
-				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
-				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00 * size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00 * size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0 * size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0 * size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P * size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M * size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0 * size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0 * size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0 * size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0 * size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P * size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M * size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M * size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P * size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP * size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM * size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM * size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP * size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000 * size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP * size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP * size_Mat];
+				D.f[DIR_MPM]= &DDStart[DIR_PMP * size_Mat];
+				D.f[DIR_PMM]= &DDStart[DIR_MPP * size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM * size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM * size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM * size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM * size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/ChimeraTransformation.h b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/ChimeraTransformation.h
deleted file mode 100644
index f7822d63fa0efd34b27773dffdeebddf521a8792..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/ChimeraTransformation.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#ifndef CHIMERA_TRANSFORMATION_H
-#define CHIMERA_TRANSFORMATION_H
-
-#include <lbm/constants/NumericConstants.h>
-
-using namespace vf::lbm::constant;
-
-////////////////////////////////////////////////////////////////////////////////
-//! \brief forward chimera transformation \ref forwardInverseChimeraWithK
-//! Transformation from distributions to central moments according to Eq. (6)-(14) in \ref
-//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
-//! ]</b></a> Modified for lower round-off errors.
-inline __device__ void forwardInverseChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real Kinverse, real K)
-{
-    real m2 = mfa + mfc;
-    real m1 = mfc - mfa;
-    real m0 = m2 + mfb;
-    mfa     = m0;
-    m0 *= Kinverse;
-    m0 += c1o1;
-    mfb = (m1 * Kinverse - m0 * vv) * K;
-    mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! \brief backward chimera transformation \ref backwardInverseChimeraWithK
-//! Transformation from central moments to distributions according to Eq. (57)-(65) in \ref
-//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
-//! ]</b></a> Modified for lower round-off errors.
-inline __device__ void backwardInverseChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real Kinverse, real K)
-{
-    real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 - vv) * c1o2) * K;
-    real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (-v2)) * K;
-    mfc     = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 + vv) * c1o2) * K;
-    mfa     = m0;
-    mfb     = m1;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! \brief forward chimera transformation \ref forwardChimera
-//! Transformation from distributions to central moments according to Eq. (6)-(14) in \ref
-//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
-//! ]</b></a> for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations. Modified for lower round-off
-//! errors.
-inline __device__ void forwardChimera(real &mfa, real &mfb, real &mfc, real vv, real v2)
-{
-    real m1 = (mfa + mfc) + mfb;
-    real m2 = mfc - mfa;
-    mfc     = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2);
-    mfb     = m2 - vv * m1;
-    mfa     = m1;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! \brief backward chimera transformation \ref backwardChimera
-//! Transformation from central moments to distributions according to Eq. (57)-(65) in \ref
-//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
-//! ]</b></a> for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations. Modified for lower round-off
-//! errors.
-inline __device__ void backwardChimera(real &mfa, real &mfb, real &mfc, real vv, real v2)
-{
-    real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
-    real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv;
-    mfc     = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2);
-    mfb     = mb;
-    mfa     = ma;
-}
-#endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu
index 7c477c539dc3526389dc22563b50501e778a63f3..a1d9ba6665576c90406eee13084e5133acdb448c 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu
@@ -2,8 +2,7 @@
 
 #include <cuda_runtime.h>
 
-
-#include <lbm/constants/NumericConstants.h>
+#include "basics/constants/NumericConstants.h"
 #include "lbm/constants/D3Q27.h"
 using namespace vf::lbm::dir;
 
@@ -80,10 +79,4 @@ __device__ void DistributionWrapper::write()
     (distribution_references.f[DIR_000])[k]   = distribution.f[vf::lbm::dir::ZZZ];
 }
 
-__device__ bool isValidFluidNode(uint nodeType)
-{
-    return (nodeType == GEO_FLUID || nodeType == GEO_PM_0 || nodeType == GEO_PM_1 || nodeType == GEO_PM_2);
-}
-
-
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh
index 1009ecfa92f31e821d825ad72ba681bc3ae96d1b..599f3f46668c07da49725770177d77239f8ef9df 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh
@@ -37,76 +37,13 @@
 
 #include "lbm/KernelParameter.h"
 #include "lbm/constants/D3Q27.h"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
 using namespace vf::lbm::dir;
 
 namespace vf::gpu
 {
 
-__inline__ __device__ __host__ void getPointersToDistributions(Distributions27 &dist, real *distributionArray, const uint numberOfLBnodes, const bool isEvenTimestep)
-{
-    if (isEvenTimestep)
-    {
-        dist.f[DIR_000] = &distributionArray[DIR_000 * numberOfLBnodes];
-        dist.f[DIR_P00] = &distributionArray[DIR_P00 * numberOfLBnodes];
-        dist.f[DIR_M00] = &distributionArray[DIR_M00 * numberOfLBnodes];
-        dist.f[DIR_0P0] = &distributionArray[DIR_0P0 * numberOfLBnodes];
-        dist.f[DIR_0M0] = &distributionArray[DIR_0M0 * numberOfLBnodes];
-        dist.f[DIR_00P] = &distributionArray[DIR_00P * numberOfLBnodes];
-        dist.f[DIR_00M] = &distributionArray[DIR_00M * numberOfLBnodes];
-        dist.f[DIR_PP0] = &distributionArray[DIR_PP0 * numberOfLBnodes];
-        dist.f[DIR_MM0] = &distributionArray[DIR_MM0 * numberOfLBnodes];
-        dist.f[DIR_PM0] = &distributionArray[DIR_PM0 * numberOfLBnodes];
-        dist.f[DIR_MP0] = &distributionArray[DIR_MP0 * numberOfLBnodes];
-        dist.f[DIR_P0P] = &distributionArray[DIR_P0P * numberOfLBnodes];
-        dist.f[DIR_M0M] = &distributionArray[DIR_M0M * numberOfLBnodes];
-        dist.f[DIR_P0M] = &distributionArray[DIR_P0M * numberOfLBnodes];
-        dist.f[DIR_M0P] = &distributionArray[DIR_M0P * numberOfLBnodes];
-        dist.f[DIR_0PP] = &distributionArray[DIR_0PP * numberOfLBnodes];
-        dist.f[DIR_0MM] = &distributionArray[DIR_0MM * numberOfLBnodes];
-        dist.f[DIR_0PM] = &distributionArray[DIR_0PM * numberOfLBnodes];
-        dist.f[DIR_0MP] = &distributionArray[DIR_0MP * numberOfLBnodes];
-        dist.f[DIR_PPP] = &distributionArray[DIR_PPP * numberOfLBnodes];
-        dist.f[DIR_MMP] = &distributionArray[DIR_MMP * numberOfLBnodes];
-        dist.f[DIR_PMP] = &distributionArray[DIR_PMP * numberOfLBnodes];
-        dist.f[DIR_MPP] = &distributionArray[DIR_MPP * numberOfLBnodes];
-        dist.f[DIR_PPM] = &distributionArray[DIR_PPM * numberOfLBnodes];
-        dist.f[DIR_MMM] = &distributionArray[DIR_MMM * numberOfLBnodes];
-        dist.f[DIR_PMM] = &distributionArray[DIR_PMM * numberOfLBnodes];
-        dist.f[DIR_MPM] = &distributionArray[DIR_MPM * numberOfLBnodes];
-    }
-    else
-    {
-         dist.f[DIR_M00] = &distributionArray[DIR_P00 * numberOfLBnodes];
-         dist.f[DIR_P00] = &distributionArray[DIR_M00 * numberOfLBnodes];
-         dist.f[DIR_0M0] = &distributionArray[DIR_0P0 * numberOfLBnodes];
-         dist.f[DIR_0P0] = &distributionArray[DIR_0M0 * numberOfLBnodes];
-         dist.f[DIR_00M] = &distributionArray[DIR_00P * numberOfLBnodes];
-         dist.f[DIR_00P] = &distributionArray[DIR_00M * numberOfLBnodes];
-         dist.f[DIR_MM0] = &distributionArray[DIR_PP0 * numberOfLBnodes];
-         dist.f[DIR_PP0] = &distributionArray[DIR_MM0 * numberOfLBnodes];
-         dist.f[DIR_MP0] = &distributionArray[DIR_PM0 * numberOfLBnodes];
-         dist.f[DIR_PM0] = &distributionArray[DIR_MP0 * numberOfLBnodes];
-         dist.f[DIR_M0M] = &distributionArray[DIR_P0P * numberOfLBnodes];
-         dist.f[DIR_P0P] = &distributionArray[DIR_M0M * numberOfLBnodes];
-         dist.f[DIR_M0P] = &distributionArray[DIR_P0M * numberOfLBnodes];
-         dist.f[DIR_P0M] = &distributionArray[DIR_M0P * numberOfLBnodes];
-         dist.f[DIR_0MM] = &distributionArray[DIR_0PP * numberOfLBnodes];
-         dist.f[DIR_0PP] = &distributionArray[DIR_0MM * numberOfLBnodes];
-         dist.f[DIR_0MP] = &distributionArray[DIR_0PM * numberOfLBnodes];
-         dist.f[DIR_0PM] = &distributionArray[DIR_0MP * numberOfLBnodes];
-         dist.f[DIR_000] = &distributionArray[DIR_000 * numberOfLBnodes];
-         dist.f[DIR_PPP] = &distributionArray[DIR_MMM * numberOfLBnodes];
-         dist.f[DIR_MMP] = &distributionArray[DIR_PPM * numberOfLBnodes];
-         dist.f[DIR_PMP] = &distributionArray[DIR_MPM * numberOfLBnodes];
-         dist.f[DIR_MPP] = &distributionArray[DIR_PMM * numberOfLBnodes];
-         dist.f[DIR_PPM] = &distributionArray[DIR_MMP * numberOfLBnodes];
-         dist.f[DIR_MMM] = &distributionArray[DIR_PPP * numberOfLBnodes];
-         dist.f[DIR_PMM] = &distributionArray[DIR_MPP * numberOfLBnodes];
-         dist.f[DIR_MPM] = &distributionArray[DIR_PMP * numberOfLBnodes];
-    }
-}
-
 /**
 *  Getting references to the 27 directions.
 *  @params distributions 1D real* array containing all data (number of elements = 27 * matrix_size)
@@ -114,7 +51,7 @@ __inline__ __device__ __host__ void getPointersToDistributions(Distributions27 &
 *  @params isEvenTimestep: stored data dependent on timestep is based on the esoteric twist algorithm
 *  @return a data struct containing the addresses to the 27 directions within the 1D distribution array
 */
-__inline__ __device__ __host__ DistributionReferences27 getDistributionReferences27(real* distributions, unsigned int numberOfLBnodes, bool isEvenTimestep){
+__inline__ __device__ __host__ DistributionReferences27 getDistributionReferences27(real* distributions, const unsigned long long numberOfLBnodes, const bool isEvenTimestep){
     DistributionReferences27 distribution_references;
     getPointersToDistributions(distribution_references, distributions, numberOfLBnodes, isEvenTimestep);
     return distribution_references;
@@ -157,20 +94,6 @@ struct DistributionWrapper
     const uint kbsw;
 };
 
-__inline__ __device__ unsigned int getNodeIndex()
-{
-    const unsigned x = threadIdx.x;
-    const unsigned y = blockIdx.x;
-    const unsigned z = blockIdx.y;
-
-    const unsigned nx = blockDim.x;
-    const unsigned ny = gridDim.x;
-
-    return nx * (ny * z + y) + x;
-}
-
-__device__ bool isValidFluidNode(uint nodeType);
-
 }
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp
index 5a2d8c9a426e5cb23ca75f91aaf6fbff75cba72b..7b7b857d5c3ac157445bb154aecfb4dfa9c4b0bc 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp
@@ -1,7 +1,11 @@
 #include "KernelFactoryImp.h"
 
+#include <logger/Logger.h>
+
 #include "Parameter/Parameter.h"
 
+#include "Kernel/Utilities/KernelTypes.h"
+
 //LBM kernel (compressible)
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.h"
@@ -57,151 +61,153 @@
 #include "Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/PMFluidFlowCompStrategy.h"
 #include "Kernel/Kernels/WaleKernels/FluidFlow/Compressible/WaleFluidFlowCompStrategy.h"
 
+using namespace vf;
+
 std::vector<std::shared_ptr<Kernel>> KernelFactoryImp::makeKernels(std::shared_ptr<Parameter> para)
 {
-	std::vector< std::shared_ptr< Kernel>> kernels;
-	for (int level = 0; level <= para->getMaxLevel(); level++)
-		kernels.push_back(makeKernel(para, para->getMainKernel(), level));
-
-	if (para->getMaxLevel() > 0)
-		if (para->getMultiKernelOn())
-			for (std::size_t i = 0; i < para->getMultiKernelLevel().size(); i++)
-				setKernelAtLevel(kernels, para, para->getMultiKernel().at(i), para->getMultiKernelLevel().at(i));
-	return kernels;
+    std::vector< std::shared_ptr< Kernel>> kernels;
+    for (int level = 0; level <= para->getMaxLevel(); level++)
+        kernels.push_back(makeKernel(para, para->getMainKernel(), level));
+
+    if (para->getMaxLevel() > 0)
+        if (para->getMultiKernelOn())
+            for (std::size_t i = 0; i < para->getMultiKernelLevel().size(); i++)
+                setKernelAtLevel(kernels, para, para->getMultiKernel().at(i), para->getMultiKernelLevel().at(i));
+    return kernels;
 }
 
 std::vector<std::shared_ptr<ADKernel>> KernelFactoryImp::makeAdvDifKernels(std::shared_ptr<Parameter> para)
 {
-	std::vector< std::shared_ptr< ADKernel>> aDKernels;
-	for (int level = 0; level <= para->getMaxLevel(); level++)
-		aDKernels.push_back(makeAdvDifKernel(para, para->getADKernel(), level));
-	return aDKernels;
+    std::vector< std::shared_ptr< ADKernel>> aDKernels;
+    for (int level = 0; level <= para->getMaxLevel(); level++)
+        aDKernels.push_back(makeAdvDifKernel(para, para->getADKernel(), level));
+    return aDKernels;
 }
 
 void KernelFactoryImp::setPorousMedia(std::vector<std::shared_ptr<PorousMedia>> pm)
 {
-	this->pm = pm;
+    this->pm = pm;
 }
 
 void KernelFactoryImp::setKernelAtLevel(std::vector<std::shared_ptr<Kernel>> kernels, std::shared_ptr<Parameter> para, std::string kernel, int level)
 {
-	kernels.at(level) = makeKernel(para, kernel, level);
+    kernels.at(level) = makeKernel(para, kernel, level);
 }
 
 std::shared_ptr<Kernel> KernelFactoryImp::makeKernel(std::shared_ptr<Parameter> para, std::string kernel, int level)
 {
-    printf("Instantiating Kernel: %s\n", kernel.c_str());
-	std::shared_ptr<KernelImp> newKernel;
-	std::shared_ptr<CheckParameterStrategy> checkStrategy;
-
-    if (kernel == "BGKCompSP27") {
-        newKernel     = BGKCompSP27::getNewInstance(para, level);   // compressible
-        checkStrategy = FluidFlowCompStrategy::getInstance();       //      ||
-    } else if (kernel == "BGKUnified") {                            //      \/
+    VF_LOG_INFO("Instantiating Kernel: {}", kernel);
+    std::shared_ptr<KernelImp> newKernel;
+    std::shared_ptr<CheckParameterStrategy> checkStrategy;
+
+    if (kernel == CollisionKernel::Compressible::BGK) {
+        newKernel     = BGKCompSP27::getNewInstance(para, level);               // compressible
+        checkStrategy = FluidFlowCompStrategy::getInstance();                   //      ||
+    } else if (kernel == CollisionKernel::Compressible::BGKUnified) {           //      \/
         newKernel     = std::make_shared<vf::gpu::BGKUnified>(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "BGKPlusCompSP27") {
+    } else if (kernel == CollisionKernel::Compressible::BGKPlus) {
         newKernel     = BGKPlusCompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "MRTCompSP27") {
+    } else if (kernel == CollisionKernel::Compressible::MRT) {
         newKernel     = MRTCompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CascadeCompSP27") {
+    } else if (kernel == CollisionKernel::Compressible::Cascade) {
         newKernel     = CascadeCompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantCompSP27") {
+    } else if (kernel == CollisionKernel::Compressible::CumulantClassic) {
         newKernel     = CumulantCompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantK15Unified") {
+    } else if (kernel == CollisionKernel::Compressible::CumulantK15Unified) {
         newKernel     = std::make_shared<vf::gpu::CumulantK15Unified>(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantK17Unified") {
+    } else if (kernel == CollisionKernel::Compressible::CumulantK17Unified) {
         newKernel     = std::make_shared<vf::gpu::CumulantK17Unified>(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantK17BulkComp") {
+    } else if (kernel == CollisionKernel::Compressible::CumulantK17Bulk) {
         newKernel     = CumulantK17BulkComp::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantK17CompChim") {
+    } else if (kernel == CollisionKernel::Compressible::CumulantK17Chim) {
         newKernel     = CumulantK17CompChim::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantK17"){               
-        switch(para->getTurbulenceModel())                                          
-        {   
+    } else if (kernel == CollisionKernel::Compressible::CumulantK17){
+        switch(para->getTurbulenceModel())
+        {
             case TurbulenceModel::AMD:
-                newKernel = CumulantK17<TurbulenceModel::AMD>::getNewInstance(para, level);   
+                newKernel = CumulantK17<TurbulenceModel::AMD>::getNewInstance(para, level);
                 break;
             case TurbulenceModel::Smagorinsky:
-                newKernel = CumulantK17<TurbulenceModel::Smagorinsky>::getNewInstance(para, level);  
+                newKernel = CumulantK17<TurbulenceModel::Smagorinsky>::getNewInstance(para, level);
                 break;
             case TurbulenceModel::QR:
-                newKernel = CumulantK17<TurbulenceModel::QR>::getNewInstance(para, level);  
+                newKernel = CumulantK17<TurbulenceModel::QR>::getNewInstance(para, level);
                 break;
             case TurbulenceModel::None:
-                newKernel = CumulantK17<TurbulenceModel::None>::getNewInstance(para, level); 
+                newKernel = CumulantK17<TurbulenceModel::None>::getNewInstance(para, level);
                 break;
             default:
                 throw std::runtime_error("Unknown turbulence model!");
-            break;                                                              
-        }                                                                       
-        checkStrategy = FluidFlowCompStrategy::getInstance();       
-    } else if (kernel == "CumulantAll4CompSP27") {
+            break;
+        }
+        checkStrategy = FluidFlowCompStrategy::getInstance();
+    } else if (kernel == CollisionKernel::Compressible::CumulantAll4SP27) {
         newKernel     = CumulantAll4CompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantK18Comp") {
+    } else if (kernel == CollisionKernel::Compressible::CumulantK18) {
         newKernel     = CumulantK18Comp::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantK20Comp") {
+    } else if (kernel == CollisionKernel::Compressible::CumulantK20) {
         newKernel     = CumulantK20Comp::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantK15Comp") {
+    } else if (kernel == CollisionKernel::Compressible::CumulantK15) {
         newKernel     = CumulantK15Comp::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantK15BulkComp") {
+    } else if (kernel == CollisionKernel::Compressible::CumulantK15Bulk) {
         newKernel     = CumulantK15BulkComp::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantK15SpongeComp") {                             //     /\      //
-        newKernel     = CumulantK15SpongeComp::getNewInstance(para, level);     //	   ||
+    } else if (kernel == CollisionKernel::Compressible::CumulantK15Sponge) {    //     /\      //
+        newKernel     = CumulantK15SpongeComp::getNewInstance(para, level);     //     ||
         checkStrategy = FluidFlowCompStrategy::getInstance();                   // compressible
-    }																			//===============
-	else if (  kernel == "BGKIncompSP27") {										// incompressible
-        newKernel     = BGKIncompSP27::getNewInstance(para, level);				//	   ||
+    }                                                                           //===============
+    else if (  kernel == CollisionKernel::Incompressible::BGK) {                // incompressible
+        newKernel     = BGKIncompSP27::getNewInstance(para, level);             //     ||
         checkStrategy = FluidFlowIncompStrategy::getInstance();                 //     \/
-    } else if (kernel == "BGKPlusIncompSP27") {
+    } else if (kernel == CollisionKernel::Incompressible::BGKPlus) {
         newKernel     = BGKPlusIncompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowIncompStrategy::getInstance();
-    } else if (kernel == "MRTIncompSP27") {
+    } else if (kernel == CollisionKernel::Incompressible::MRT) {
         newKernel     = MRTIncompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowIncompStrategy::getInstance();
-    } else if (kernel == "CascadeIncompSP27") {
+    } else if (kernel == CollisionKernel::Incompressible::Cascade) {
         newKernel     = CascadeIncompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowIncompStrategy::getInstance();
-    } else if (kernel == "Cumulant1hIncompSP27") {
+    } else if (kernel == CollisionKernel::Incompressible::Cumulant1h) {
         newKernel     = Cumulant1hIncompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowIncompStrategy::getInstance();
-    } else if (kernel == "CumulantIsoIncompSP27") {
+    } else if (kernel == CollisionKernel::Incompressible::CumulantIsometric) {
         newKernel     = CumulantIsoIncompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowIncompStrategy::getInstance();
-    } else if (kernel == "CumulantK15Incomp") {									//     /\      //
-        newKernel     = CumulantK15Incomp::getNewInstance(para, level);			//	   ||
-        checkStrategy = FluidFlowIncompStrategy::getInstance();                 // incompressible
-    }																			//===============
-	else if (kernel == "PMCumulantOneCompSP27") {								// porous media
-        newKernel     = PMCumulantOneCompSP27::getNewInstance(para, pm, level);	//	   ||
-        checkStrategy = PMFluidFlowCompStrategy::getInstance();                 // porous media
-    }                                                                           //===============
-    else if (kernel == "WaleCumulantK17Comp") {                                 // wale model
-        newKernel     = WaleCumulantK17Comp::getNewInstance(para, level);       //	   ||
-        checkStrategy = WaleFluidFlowCompStrategy::getInstance();               //     \/
-    } else if (kernel == "WaleCumulantK17DebugComp") {
+    } else if (kernel == CollisionKernel::Incompressible::CumulantK15) {          //     /\      //
+        newKernel     = CumulantK15Incomp::getNewInstance(para, level);           //     ||
+        checkStrategy = FluidFlowIncompStrategy::getInstance();                   // incompressible
+    }                                                                             //===============
+    else if (kernel == CollisionKernel::PorousMedia::CumulantOne) {               // porous media
+        newKernel     = PMCumulantOneCompSP27::getNewInstance(para, pm, level);   //     ||
+        checkStrategy = PMFluidFlowCompStrategy::getInstance();                   // porous media
+    }                                                                             //===============
+    else if (kernel == CollisionKernel::Wale::CumulantK17) {                      // wale model
+        newKernel     = WaleCumulantK17Comp::getNewInstance(para, level);         //     ||
+        checkStrategy = WaleFluidFlowCompStrategy::getInstance();                 //     \/
+    } else if (kernel == CollisionKernel::Wale::CumulantK17Debug) {
         newKernel     = WaleCumulantK17DebugComp::getNewInstance(para, level);
         checkStrategy = WaleFluidFlowCompStrategy::getInstance();
-    } else if (kernel == "WaleCumulantK15Comp") {
+    } else if (kernel == CollisionKernel::Wale::CumulantK15) {
         newKernel     = WaleCumulantK15Comp::getNewInstance(para, level);
         checkStrategy = WaleFluidFlowCompStrategy::getInstance();
-    } else if (kernel == "WaleBySoniMalavCumulantK15Comp") {                    //     /\      //
-        newKernel     = WaleBySoniMalavCumulantK15Comp::getNewInstance(para, level);// ||
-        checkStrategy = WaleFluidFlowCompStrategy::getInstance();               // wale model
-    }                                                                          //===============
+    } else if (kernel == CollisionKernel::Wale::CumulantK15SoniMalav) {              //     /\      //
+        newKernel     = WaleBySoniMalavCumulantK15Comp::getNewInstance(para, level); //     ||
+        checkStrategy = WaleFluidFlowCompStrategy::getInstance();                    // wale model
+    }                                                                                //===============
     else {
         throw std::runtime_error("KernelFactory does not know the KernelType.");
     }
@@ -212,8 +218,8 @@ std::shared_ptr<Kernel> KernelFactoryImp::makeKernel(std::shared_ptr<Parameter>
 
 std::shared_ptr<ADKernel> KernelFactoryImp::makeAdvDifKernel(std::shared_ptr<Parameter> para, std::string kernel, int level)
 {
-	std::shared_ptr<ADKernel> newKernel;
-	std::shared_ptr<CheckParameterStrategy> checkStrategy;
+    std::shared_ptr<ADKernel> newKernel;
+    std::shared_ptr<CheckParameterStrategy> checkStrategy;
 
     if (kernel == "ADComp27") {
         newKernel     = ADComp27::getNewInstance(para, level);
@@ -223,18 +229,18 @@ std::shared_ptr<ADKernel> KernelFactoryImp::makeAdvDifKernel(std::shared_ptr<Par
         checkStrategy = ADMod7CompStrategy::getInstance();
     } else if (kernel == "ADIncomp27") {
         newKernel     = ADIncomp27::getNewInstance(para, level);
-        checkStrategy = ADMod7CompStrategy::getInstance();
+        checkStrategy = ADMod7IncompStrategy::getInstance();
     } else if (kernel == "ADIncomp7") {
         newKernel     = ADIncomp7::getNewInstance(para, level);
-        checkStrategy = ADMod7CompStrategy::getInstance();
+        checkStrategy = ADMod7IncompStrategy::getInstance();
     } else {
         throw std::runtime_error("KernelFactory does not know the KernelType.");
     }
 
-	if (newKernel) {
-		newKernel->setCheckParameterStrategy(checkStrategy);
-		return newKernel;
-	}
-	else
-		throw  std::runtime_error("KernelFactory does not know the KernelType.");
+    if (newKernel) {
+        newKernel->setCheckParameterStrategy(checkStrategy);
+        return newKernel;
+    }
+    else
+        throw  std::runtime_error("KernelFactory does not know the KernelType.");
 }
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelGroup.h b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelGroup.h
deleted file mode 100644
index 0a6543ca0ac1d47bb6f8838d029769846c361868..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelGroup.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef KERNEL_GROUP_H
-#define KERNEL_GROUP_H
-
-enum KernelGroup
-{
-	BasicKernel,
-	BasicWaleKernel,
-	F3Kernel,
-	F3WaleKernel,
-	ADKernel7,
-	ADKernel27
-};
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelTypes.h b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelTypes.h
new file mode 100644
index 0000000000000000000000000000000000000000..f249c0bd595d21455b4338334763be4e08abeda9
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelTypes.h
@@ -0,0 +1,51 @@
+#ifndef KERNEL_TYPES_H
+#define KERNEL_TYPES_H
+
+namespace vf::CollisionKernel::Compressible {
+    static const std::string BGK = "BGKCompSP27";
+    static const std::string BGKUnified = "BGKUnified";
+    static const std::string BGKPlus = "BGKPlusCompSP27";
+    static const std::string MRT = "MRTCompSP27";
+    static const std::string Cascade = "CascadeCompSP27";
+
+    static const std::string CumulantClassic = "CumulantCompSP27";
+
+    static const std::string CumulantK15Unified = "CumulantK15Unified";
+    static const std::string CumulantK17Unified = "CumulantK17Unified";
+
+    static const std::string CumulantK17Bulk = "CumulantK17BulkComp";
+    static const std::string CumulantK17Chim = "CumulantK17CompChim";
+    static const std::string CumulantK17 = "CumulantK17";
+
+    static const std::string CumulantAll4SP27 = "CumulantAll4CompSP27";
+    static const std::string CumulantK18 = "CumulantK18Comp";
+    static const std::string CumulantK20 = "CumulantK20Comp";
+
+    static const std::string CumulantK15 = "CumulantK15Comp";
+    static const std::string CumulantK15Bulk = "CumulantK15BulkComp";
+    static const std::string CumulantK15Sponge = "CumulantK15SpongeComp";
+}
+
+namespace vf::CollisionKernel::Incompressible {
+    static const std::string BGK = "BGKIncompSP27";
+    static const std::string BGKPlus = "BGKPlusIncompSP27";
+    static const std::string MRT = "MRTIncompSP27";
+    static const std::string Cascade = "CascadeIncompSP27";
+
+    static const std::string Cumulant1h = "Cumulant1hIncompSP27";
+    static const std::string CumulantIsometric = "CumulantIsoIncompSP27";
+    static const std::string CumulantK15 = "CumulantK15Incomp";
+}
+
+namespace vf::CollisionKernel::PorousMedia {
+    static const std::string CumulantOne = "CumulantOneCompSP27";
+}
+
+namespace vf::CollisionKernel::Wale {
+    static const std::string CumulantK17 = "WaleCumulantK17Comp";
+    static const std::string CumulantK17Debug = "WaleCumulantK17DebugComp";
+    static const std::string CumulantK15 = "WaleCumulantK15Comp";
+    static const std::string CumulantK15SoniMalav = "WaleBySoniMalavCumulantK15Comp";
+}
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/ScalingHelperFunctions.h b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/ScalingHelperFunctions.h
deleted file mode 100644
index 13ce5d88aaa7cb49225fa914c1f59c2de05802f5..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/ScalingHelperFunctions.h
+++ /dev/null
@@ -1,148 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
-//  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
-//  for more details.
-//  
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file scalingHelperFunctions.h
-//! \ingroup GPU/Kernel/Utilities
-//! \author Martin Schoenherr, Anna Wellmann
-//=======================================================================================
-
-#ifndef SCALING_HELPER_FUNCTIONS_H
-#define SCALING_HELPER_FUNCTIONS_H
-
-#include "LBM/LB.h" 
-#include "lbm/constants/D3Q27.h"
-#include "lbm/constants/NumericConstants.h"
-
-using namespace vf::lbm::constant;
-using namespace vf::lbm::dir;
-
-__device__ __inline__ void calculateMomentsOnSourceNodes(
-    Distributions27& dist,
-    real& omega,
-    unsigned int& k_000,
-    unsigned int& k_M00,
-    unsigned int& k_0M0,
-    unsigned int& k_00M,
-    unsigned int& k_MM0,
-    unsigned int& k_M0M,
-    unsigned int& k_0MM,
-    unsigned int& k_MMM,
-    real& drho,
-    real& velocityX,
-    real& velocityY,
-    real& velocityZ,
-    real& kxyFromfcNEQ,
-    real& kyzFromfcNEQ,
-    real& kxzFromfcNEQ,
-    real& kxxMyyFromfcNEQ,
-    real& kxxMzzFromfcNEQ
-    ){
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Set local distributions (f's) on source nodes:
-        //!
-        real f_000 = (dist.f[DIR_000])[k_000]; 
-        real f_P00 = (dist.f[DIR_P00])[k_000];
-        real f_M00 = (dist.f[DIR_M00])[k_M00];
-        real f_0P0 = (dist.f[DIR_0P0])[k_000];
-        real f_0M0 = (dist.f[DIR_0M0])[k_0M0];
-        real f_00P = (dist.f[DIR_00P])[k_000];
-        real f_00M = (dist.f[DIR_00M])[k_00M];
-        real f_PP0 = (dist.f[DIR_PP0])[k_000];
-        real f_MM0 = (dist.f[DIR_MM0])[k_MM0];
-        real f_PM0 = (dist.f[DIR_PM0])[k_0M0];
-        real f_MP0 = (dist.f[DIR_MP0])[k_M00];
-        real f_P0P = (dist.f[DIR_P0P])[k_000];
-        real f_M0M = (dist.f[DIR_M0M])[k_M0M];
-        real f_P0M = (dist.f[DIR_P0M])[k_00M];
-        real f_M0P = (dist.f[DIR_M0P])[k_M00];
-        real f_0PP = (dist.f[DIR_0PP])[k_000];
-        real f_0MM = (dist.f[DIR_0MM])[k_0MM];
-        real f_0PM = (dist.f[DIR_0PM])[k_00M];
-        real f_0MP = (dist.f[DIR_0MP])[k_0M0];
-        real f_PPP = (dist.f[DIR_PPP])[k_000];
-        real f_MPP = (dist.f[DIR_MPP])[k_M00];
-        real f_PMP = (dist.f[DIR_PMP])[k_0M0];
-        real f_MMP = (dist.f[DIR_MMP])[k_MM0];
-        real f_PPM = (dist.f[DIR_PPM])[k_00M];
-        real f_MPM = (dist.f[DIR_MPM])[k_M0M];
-        real f_PMM = (dist.f[DIR_PMM])[k_0MM];
-        real f_MMM = (dist.f[DIR_MMM])[k_MMM];
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
-        //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
-        //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
-        //!
-        drho = ((((f_PPP + f_MMM) + (f_MPM + f_PMP)) + ((f_MPP + f_PMM) + (f_MMP + f_PPM))) +
-                (((f_0MP + f_0PM) + (f_0MM + f_0PP)) + ((f_M0P + f_P0M) + (f_M0M + f_P0P)) +
-                 ((f_MP0 + f_PM0) + (f_MM0 + f_PP0))) +
-                 ((f_M00 + f_P00) + (f_0M0 + f_0P0) + (f_00M + f_00P))) +
-                   f_000;
-
-        real oneOverRho = c1o1 / (c1o1 + drho);
-
-        velocityX = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_PMM - f_MPP) + (f_PPM - f_MMP))) +
-                     (((f_P0M - f_M0P) + (f_P0P - f_M0M)) + ((f_PM0 - f_MP0) + (f_PP0 - f_MM0))) + (f_P00 - f_M00)) *
-                    oneOverRho;
-        velocityY = ((((f_PPP - f_MMM) + (f_MPM - f_PMP)) + ((f_MPP - f_PMM) + (f_PPM - f_MMP))) +
-                     (((f_0PM - f_0MP) + (f_0PP - f_0MM)) + ((f_MP0 - f_PM0) + (f_PP0 - f_MM0))) + (f_0P0 - f_0M0)) *
-                    oneOverRho;
-        velocityZ = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_MPP - f_PMM) + (f_MMP - f_PPM))) +
-                     (((f_0MP - f_0PM) + (f_0PP - f_0MM)) + ((f_M0P - f_P0M) + (f_P0P - f_M0M))) + (f_00P - f_00M)) *
-                    oneOverRho;
-
-        ////////////////////////////////////////////////////////////////////////////////////
-        //! - Calculate second order moments for interpolation
-        //!
-        // example: kxxMzz: moment, second derivative in x direction minus the second derivative in z direction
-        kxyFromfcNEQ =
-            -c3o1 * omega *
-            ((f_MM0 + f_MMM + f_MMP - f_MP0 - f_MPM - f_MPP - f_PM0 - f_PMM - f_PMP + f_PP0 + f_PPM + f_PPP) /
-                 (c1o1 + drho) -
-             ((velocityX * velocityY)));
-        kyzFromfcNEQ =
-            -c3o1 * omega *
-            ((f_0MM + f_PMM + f_MMM - f_0MP - f_PMP - f_MMP - f_0PM - f_PPM - f_MPM + f_0PP + f_PPP + f_MPP) /
-                 (c1o1 + drho) -
-             ((velocityY * velocityZ)));
-        kxzFromfcNEQ =
-            -c3o1 * omega *
-            ((f_M0M + f_MMM + f_MPM - f_M0P - f_MMP - f_MPP - f_P0M - f_PMM - f_PPM + f_P0P + f_PMP + f_PPP) /
-                 (c1o1 + drho) -
-             ((velocityX * velocityZ)));
-        kxxMyyFromfcNEQ =
-            -c3o2 * omega *
-            ((f_M0M + f_M00 + f_M0P - f_0MM - f_0M0 - f_0MP - f_0PM - f_0P0 - f_0PP + f_P0M + f_P00 + f_P0P) / (c1o1 + drho) -
-             ((velocityX * velocityX - velocityY * velocityY)));
-        kxxMzzFromfcNEQ =
-            -c3o2 * omega *
-            ((f_MM0 + f_M00 + f_MP0 - f_0MM - f_0MP - f_00M - f_00P - f_0PM - f_0PP + f_PM0 + f_P00 + f_PP0) / (c1o1 + drho) -
-             ((velocityX * velocityX - velocityZ * velocityZ)));
-}
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/ADKernelManager.cpp b/src/gpu/VirtualFluids_GPU/KernelManager/ADKernelManager.cpp
index 9ca813ac4987af618491422acb60207b7fee543c..34a0c589919058d4edd1969812c16e75941d28b4 100644
--- a/src/gpu/VirtualFluids_GPU/KernelManager/ADKernelManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/KernelManager/ADKernelManager.cpp
@@ -56,7 +56,7 @@ void ADKernelManager::initAD(const int level) const
         para->getParD(level)->velocityY, 
         para->getParD(level)->velocityZ,
         para->getParD(level)->numberOfNodes, 
-        para->getParD(level)->distributionsAD27.f[0],
+        para->getParD(level)->distributionsAD.f[0],
         para->getParD(level)->isEvenTimestep);
     //////////////////////////////////////////////////////////////////////////
     para->getParD(level)->isEvenTimestep = false;
@@ -72,7 +72,7 @@ void ADKernelManager::initAD(const int level) const
         para->getParD(level)->velocityY, 
         para->getParD(level)->velocityZ,
         para->getParD(level)->numberOfNodes, 
-        para->getParD(level)->distributionsAD27.f[0],
+        para->getParD(level)->distributionsAD.f[0],
         para->getParD(level)->isEvenTimestep);
     //////////////////////////////////////////////////////////////////////////
     CalcConcentration27(
@@ -83,17 +83,17 @@ void ADKernelManager::initAD(const int level) const
         para->getParD(level)->neighborY,
         para->getParD(level)->neighborZ,
         para->getParD(level)->numberOfNodes,
-        para->getParD(level)->distributionsAD27.f[0],
+        para->getParD(level)->distributionsAD.f[0],
         para->getParD(level)->isEvenTimestep);
 }
 
 ////////////////////////////////////////////////////////////////////////////////
 void ADKernelManager::setInitialNodeValuesAD(const int level, SPtr<CudaMemoryManager> cudaMemoryManager) const
 {
-    for (uint j = 1; j <= para->getParH(level)->numberOfNodes; j++) {
-        const real coordX = para->getParH(level)->coordinateX[j];
-        const real coordY = para->getParH(level)->coordinateY[j];
-        const real coordZ = para->getParH(level)->coordinateZ[j];
+    for (size_t index = 1; index <= para->getParH(level)->numberOfNodes; index++) {
+        const real coordX = para->getParH(level)->coordinateX[index];
+        const real coordY = para->getParH(level)->coordinateY[index];
+        const real coordZ = para->getParH(level)->coordinateZ[index];
 
         real concentration;
 
@@ -104,7 +104,7 @@ void ADKernelManager::setInitialNodeValuesAD(const int level, SPtr<CudaMemoryMan
             concentration = real(0.0);
         }
 
-        para->getParH(level)->concentration[j] = concentration;
+        para->getParH(level)->concentration[index] = concentration;
     }
 
     cudaMemoryManager->cudaCopyConcentrationHostToDevice(level);
@@ -173,7 +173,7 @@ void ADKernelManager::runADcollisionKernel(const int level)const
             para->getParD(level)->neighborY,
             para->getParD(level)->neighborZ,
             para->getParD(level)->distributions.f[0],
-            para->getParD(level)->distributionsAD27.f[0],
+            para->getParD(level)->distributionsAD.f[0],
             para->getParD(level)->numberOfNodes,
             para->getParD(level)->forcing,
             para->getParD(level)->isEvenTimestep);
@@ -188,7 +188,7 @@ void ADKernelManager::runADslipBCKernel(const int level) const{
             para->getParD(level)->slipBC.normalY,
             para->getParD(level)->slipBC.normalZ,
             para->getParD(level)->distributions.f[0],
-            para->getParD(level)->distributionsAD27.f[0],
+            para->getParD(level)->distributionsAD.f[0],
             para->getParD(level)->slipBC.k,
             para->getParD(level)->slipBC.q27[0],
             para->getParD(level)->slipBC.numberOfBCnodes,
@@ -265,7 +265,7 @@ void ADKernelManager::runADpressureBCKernel(const int level) const{
             QADPressDev27(
                 para->getParD(level)->numberofthreads,
                 para->getParD(level)->distributions.f[0],
-                para->getParD(level)->distributionsAD27.f[0],
+                para->getParD(level)->distributionsAD.f[0],
                 para->getParD(level)->TempPress.temp,
                 para->getParD(level)->TempPress.velo,
                 para->getParD(level)->diffusivity,
@@ -346,7 +346,7 @@ void ADKernelManager::runADgeometryBCKernel(const int level) const
             QADBBDev27(
                 para->getParD(level)->numberofthreads,
                 para->getParD(level)->distributions.f[0],
-                para->getParD(level)->distributionsAD27.f[0],
+                para->getParD(level)->distributionsAD.f[0],
                 para->getParD(level)->Temp.temp,
                 para->getParD(level)->diffusivity,
                 para->getParD(level)->Temp.k,
@@ -428,7 +428,7 @@ void ADKernelManager::runADveloBCKernel(const int level) const{
             QADVelDev27(
                 para->getParD(level)->numberofthreads,
                 para->getParD(level)->distributions.f[0],
-                para->getParD(level)->distributionsAD27.f[0],
+                para->getParD(level)->distributionsAD.f[0],
                 para->getParD(level)->TempVel.tempPulse,
                 para->getParD(level)->TempVel.velo,
                 para->getParD(level)->diffusivity,
@@ -498,7 +498,7 @@ void ADKernelManager::printAD(const int level, SPtr<CudaMemoryManager> cudaMemor
         para->getParD(level)->neighborY,
         para->getParD(level)->neighborZ,
         para->getParD(level)->numberOfNodes,
-        para->getParD(level)->distributionsAD27.f[0],
+        para->getParD(level)->distributionsAD.f[0],
         para->getParD(level)->isEvenTimestep);
 
     cudaMemoryManager->cudaCopyConcentrationDeviceToHost(level);
diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/ADKernelManager.h b/src/gpu/VirtualFluids_GPU/KernelManager/ADKernelManager.h
index 1c069a364ab5ef2837ffd05d0ef4b6488365e4e3..d961452575b905acb96fbc1c30ff0d5e71af2722 100644
--- a/src/gpu/VirtualFluids_GPU/KernelManager/ADKernelManager.h
+++ b/src/gpu/VirtualFluids_GPU/KernelManager/ADKernelManager.h
@@ -33,7 +33,7 @@
 #ifndef ADVECTION_DIFFUSION_H
 #define ADVECTION_DIFFUSION_H
 
-#include "Core/DataTypes.h"
+#include "DataTypes.h"
 #include "PointerDefinitions.h"
 #include "VirtualFluids_GPU_export.h"
 
diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.cpp b/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.cpp
index 2b6a266c0d4e5f523091fa4982eee5d83b2ec675..229922b1ec1654e7ca664f9d19a7b7c6e264fd83 100644
--- a/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.cpp
@@ -47,10 +47,10 @@ GridScalingKernelManager::GridScalingKernelManager(SPtr<Parameter> parameter, Gr
         if(!gridScalingFactory){
             throw std::runtime_error("There is more than one level, but no scalingFactory was provided.");
         }
-        checkScalingFunction(gridScalingFactory->getGridScalingFC(), this->para->getParD(0)->intFC, "scalingFineToCoarse");
-        checkScalingFunction(gridScalingFactory->getGridScalingCF(), this->para->getParD(0)->intCF, "scalingCoarseToFine");
-        this->scalingFineToCoarse = gridScalingFactory->getGridScalingFC();
-        this->scalingCoarseToFine = gridScalingFactory->getGridScalingCF();
+        checkScalingFunction(gridScalingFactory->getGridScalingFC(parameter->getUseTurbulentViscosity()), this->para->getParD(0)->fineToCoarse, "scalingFineToCoarse");
+        checkScalingFunction(gridScalingFactory->getGridScalingCF(parameter->getUseTurbulentViscosity()), this->para->getParD(0)->coarseToFine, "scalingCoarseToFine");
+        this->scalingFineToCoarse = gridScalingFactory->getGridScalingFC(parameter->getUseTurbulentViscosity());
+        this->scalingCoarseToFine = gridScalingFactory->getGridScalingCF(parameter->getUseTurbulentViscosity());
     }
     
     if(this->scalingFineToCoarse == nullptr)
@@ -59,11 +59,11 @@ GridScalingKernelManager::GridScalingKernelManager(SPtr<Parameter> parameter, Gr
         VF_LOG_TRACE("Function for scalingCoarseToFine is nullptr");
 }
 
-void GridScalingKernelManager::runFineToCoarseKernelLB(const int level, InterpolationCellFC *icellFC, OffFC &offFC, CudaStreamIndex streamIndex) const
+void GridScalingKernelManager::runFineToCoarseKernelLB(const int level, InterpolationCells *fineToCoarse, ICellNeigh &neighborFineToCoarse, CudaStreamIndex streamIndex) const
 {
     cudaStream_t stream = para->getStreamManager()->getStream(streamIndex);
 
-    this->scalingFineToCoarse(para->getParD(level).get(), para->getParD(level+1).get(), icellFC, offFC, stream);
+    this->scalingFineToCoarse(para->getParD(level).get(), para->getParD(level+1).get(), fineToCoarse, neighborFineToCoarse, stream);
 
     // ScaleFC_comp_D3Q27F3(
     //     para->getParD(level)->distributions.f[0],
@@ -294,21 +294,21 @@ void GridScalingKernelManager::runFineToCoarseKernelAD(const int level) const
             para->getParD(level)->numberOfNodes,
             para->getParD(level+1)->numberOfNodes,
             para->getParD(level)->isEvenTimestep,
-            para->getParD(level)->intFC.ICellFCC,
-            para->getParD(level)->intFC.ICellFCF,
-            para->getParD(level)->K_FC,
-            para->getParD(level)->vis,
+            para->getParD(level)->fineToCoarse.coarseCellIndices,
+            para->getParD(level)->fineToCoarse.fineCellIndices,
+            para->getParD(level)->fineToCoarse.numberOfCells,
+            para->getParD(level)->viscosity,
             para->getParD(level)->diffusivity,
             para->getParD(level)->numberofthreads,
-            para->getParD(level)->offFC);
+            para->getParD(level)->neighborFineToCoarse);
     }
     else if (para->getDiffMod() == 27)
     {
         ScaleFCThS27(
             para->getParD(level)->distributions.f[0],
             para->getParD(level+1)->distributions.f[0],
-            para->getParD(level)->distributionsAD27.f[0],
-            para->getParD(level+1)->distributionsAD27.f[0],
+            para->getParD(level)->distributionsAD.f[0],
+            para->getParD(level+1)->distributionsAD.f[0],
             para->getParD(level)->neighborX,
             para->getParD(level)->neighborY,
             para->getParD(level)->neighborZ,
@@ -318,20 +318,20 @@ void GridScalingKernelManager::runFineToCoarseKernelAD(const int level) const
             para->getParD(level)->numberOfNodes,
             para->getParD(level+1)->numberOfNodes,
             para->getParD(level)->isEvenTimestep,
-            para->getParD(level)->intFC.ICellFCC,
-            para->getParD(level)->intFC.ICellFCF,
-            para->getParD(level)->K_FC,
-            para->getParD(level)->vis,
+            para->getParD(level)->fineToCoarse.coarseCellIndices,
+            para->getParD(level)->fineToCoarse.fineCellIndices,
+            para->getParD(level)->fineToCoarse.numberOfCells,
+            para->getParD(level)->viscosity,
             para->getParD(level)->diffusivity,
             para->getParD(level)->numberofthreads,
-            para->getParD(level)->offFC);
+            para->getParD(level)->neighborFineToCoarse);
     }
 }
 
-void GridScalingKernelManager::runCoarseToFineKernelLB(const int level, InterpolationCellCF* icellCF, OffCF &offCF, CudaStreamIndex streamIndex) const
+void GridScalingKernelManager::runCoarseToFineKernelLB(const int level, InterpolationCells* coarseToFine, ICellNeigh &neighborFineToCoarse, CudaStreamIndex streamIndex) const
 {
     cudaStream_t stream = para->getStreamManager()->getStream(streamIndex);
-    this->scalingCoarseToFine(para->getParD(level).get(), para->getParD(level+1).get(), icellCF, offCF, stream);
+    this->scalingCoarseToFine(para->getParD(level).get(), para->getParD(level+1).get(), coarseToFine, neighborFineToCoarse, stream);
 
     // ScaleCF_comp_D3Q27F3(
     //     para->getParD(level)->distributions.f[0],
@@ -563,21 +563,21 @@ void GridScalingKernelManager::runCoarseToFineKernelAD(const int level) const
             para->getParD(level)->numberOfNodes,
             para->getParD(level+1)->numberOfNodes,
             para->getParD(level)->isEvenTimestep,
-            para->getParD(level)->intCF.ICellCFC,
-            para->getParD(level)->intCF.ICellCFF,
-            para->getParD(level)->K_CF,
-            para->getParD(level)->vis,
+            para->getParD(level)->coarseToFine.coarseCellIndices,
+            para->getParD(level)->coarseToFine.fineCellIndices,
+            para->getParD(level)->coarseToFine.numberOfCells,
+            para->getParD(level)->viscosity,
             para->getParD(level+1)->diffusivity,
             para->getParD(level)->numberofthreads,
-            para->getParD(level)->offCF);
+            para->getParD(level)->neighborCoarseToFine);
     }
     else if (para->getDiffMod() == 27)
     {
         ScaleCFThS27(
             para->getParD(level)->distributions.f[0],
             para->getParD(level+1)->distributions.f[0],
-            para->getParD(level)->distributionsAD27.f[0],
-            para->getParD(level+1)->distributionsAD27.f[0],
+            para->getParD(level)->distributionsAD.f[0],
+            para->getParD(level+1)->distributionsAD.f[0],
             para->getParD(level)->neighborX,
             para->getParD(level)->neighborY,
             para->getParD(level)->neighborZ,
@@ -587,12 +587,12 @@ void GridScalingKernelManager::runCoarseToFineKernelAD(const int level) const
             para->getParD(level)->numberOfNodes,
             para->getParD(level+1)->numberOfNodes,
             para->getParD(level)->isEvenTimestep,
-            para->getParD(level)->intCF.ICellCFC,
-            para->getParD(level)->intCF.ICellCFF,
-            para->getParD(level)->K_CF,
-            para->getParD(level)->vis,
+            para->getParD(level)->coarseToFine.coarseCellIndices,
+            para->getParD(level)->coarseToFine.fineCellIndices,
+            para->getParD(level)->coarseToFine.numberOfCells,
+            para->getParD(level)->viscosity,
             para->getParD(level+1)->diffusivity,
             para->getParD(level)->numberofthreads,
-            para->getParD(level)->offCF);
+            para->getParD(level)->neighborCoarseToFine);
     }
 }
diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.h b/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.h
index 3c78ee7f9db254556e8ec6dbbafaf51cd995f10b..0c24801506b9a19985d1a805b349d0b58e9bbf84 100644
--- a/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.h
+++ b/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.h
@@ -36,7 +36,7 @@
 #include "LBM/LB.h"
 #include "PointerDefinitions.h"
 #include "VirtualFluids_GPU_export.h"
-#include "logger/Logger.h"
+#include <logger/Logger.h>
 #include <functional>
 #include <memory>
 #include <stdexcept>
@@ -48,10 +48,8 @@ enum class CudaStreamIndex;
 struct LBMSimulationParameter;
 struct CUstream_st;
 
-using gridScalingFC =
-    std::function<void(LBMSimulationParameter *, LBMSimulationParameter *, ICellFC *, OffFC &, CUstream_st *stream)>;
-using gridScalingCF =
-    std::function<void(LBMSimulationParameter *, LBMSimulationParameter *, ICellCF *, OffCF &, CUstream_st *stream)>;
+using gridScaling =
+    std::function<void(LBMSimulationParameter *, LBMSimulationParameter *, ICells *, ICellNeigh &, CUstream_st *stream)>;
 
 //! \class GridScalingKernelManager
 //! \brief manage the cuda kernel calls
@@ -64,13 +62,13 @@ public:
     GridScalingKernelManager(SPtr<Parameter> parameter, GridScalingFactory *gridScalingFactory);
 
     //! \brief calls the device function of the fine to coarse grid interpolation kernelH
-    void runFineToCoarseKernelLB(const int level, InterpolationCellFC *icellFC, OffFC &offFC, CudaStreamIndex streamIndex) const;
+    void runFineToCoarseKernelLB(const int level, InterpolationCells *fineToCoarse, ICellNeigh &neighborFineToCoarse, CudaStreamIndex streamIndex) const;
 
     //! \brief calls the device function of the fine to coarse grid interpolation kernel (advection diffusion)
     void runFineToCoarseKernelAD(const int level) const;
 
     //! \brief calls the device function of the coarse to fine grid interpolation kernel
-    void runCoarseToFineKernelLB(const int level, InterpolationCellCF *icellCF, OffCF &offCF, CudaStreamIndex streamIndex) const;
+    void runCoarseToFineKernelLB(const int level, InterpolationCells *coarseToFine, ICellNeigh &neighborCoarseToFine, CudaStreamIndex streamIndex) const;
 
     //! \brief calls the device function of the coarse to fine grid interpolation kernel (advection diffusion)
     void runCoarseToFineKernelAD(const int level) const;
@@ -78,35 +76,21 @@ public:
 private:
     //! \brief check if grid scaling was set
     //! \throws std::runtime_error if interpolation nodes were assigned, but no scaling function was set in the grid
-    //! scaling factory \param scalingFunctionFC: a kernel function for the grid scaling \param scalingStruct: a struct
+    //! scaling factory \param scalingFunction: a kernel function for the grid scaling \param scalingStruct: a struct
     //! containing the grid nodes which are part of the interpolation \param scalingName: the name of the checked
     //! scaling function
-    void checkScalingFunction(const gridScalingFC &scalingFunctionFC, const InterpolationCellFC &scalingStruct,
+    void checkScalingFunction(const gridScaling &scalingFunction, const InterpolationCells &scalingStruct,
                               const std::string &scalingName)
     {
-        if (!scalingFunctionFC && scalingStruct.kFC > 0)
+        if (!scalingFunction && scalingStruct.numberOfCells > 0)
             throw std::runtime_error("The scaling function " + scalingName + " was not set!");
-        if (scalingFunctionFC && scalingStruct.kFC == 0)
-            VF_LOG_WARNING("The scaling function {} was set, although there is no refinement", scalingName);
-    }
-
-    //! \brief check if grid scaling was set
-    //! \throws std::runtime_error if interpolation nodes were assigned, but no scaling function was set in the grid
-    //! scaling factory \param scalingFunctionCF: a kernel function for the grid scaling \param scalingStruct: a struct
-    //! containing the grid nodes which are part of the interpolation \param scalingName: the name of the checked
-    //! scaling function
-    void checkScalingFunction(const gridScalingCF &scalingFunctionCF, const InterpolationCellCF &scalingStruct,
-                              const std::string &scalingName)
-    {
-        if (!scalingFunctionCF && scalingStruct.kCF > 0)
-            throw std::runtime_error("The scaling function " + scalingName + " was not set!");
-        if (scalingFunctionCF && scalingStruct.kCF == 0)
+        if (scalingFunction && scalingStruct.numberOfCells == 0)
             VF_LOG_WARNING("The scaling function {} was set, although there is no refinement", scalingName);
     }
 
     SPtr<Parameter> para;
 
-    gridScalingFC scalingFineToCoarse = nullptr;
-    gridScalingCF scalingCoarseToFine = nullptr;
+    gridScaling scalingFineToCoarse = nullptr;
+    gridScaling scalingCoarseToFine = nullptr;
 };
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/ChimeraTransformation.h b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/ChimeraTransformation.h
new file mode 100644
index 0000000000000000000000000000000000000000..4ba786cc1f281725ed24bc9f5c587f33cec78f56
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/ChimeraTransformation.h
@@ -0,0 +1,108 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file ChimeraTransformation.h
+//! \ingroup LBM/GPUHelperFunctions
+//! \author Martin Schoenherr, Anna Wellmann, Soeren Peters
+//=======================================================================================
+#ifndef CHIMERA_TRANSFORMATION_H
+#define CHIMERA_TRANSFORMATION_H
+
+#include "LBM/LB.h"
+
+#include <basics/constants/NumericConstants.h>
+
+using namespace vf::basics::constant;
+
+namespace vf::gpu
+{
+
+////////////////////////////////////////////////////////////////////////////////
+//! \brief forward chimera transformation \ref forwardInverseChimeraWithK
+//! Transformation from distributions to central moments according to Eq. (6)-(14) in \ref
+//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
+//! ]</b></a> Modified for lower round-off errors.
+__inline__ __device__ void forwardInverseChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real Kinverse, real K)
+{
+    real m2 = mfa + mfc;
+    real m1 = mfc - mfa;
+    real m0 = m2 + mfb;
+    mfa = m0;
+    m0 *= Kinverse;
+    m0 += c1o1;
+    mfb = (m1 * Kinverse - m0 * vv) * K;
+    mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+//! \brief backward chimera transformation \ref backwardInverseChimeraWithK
+//! Transformation from central moments to distributions according to Eq. (57)-(65) in \ref
+//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
+//! ]</b></a> Modified for lower round-off errors.
+__inline__ __device__ void backwardInverseChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real Kinverse, real K)
+{
+    real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 - vv) * c1o2) * K;
+    real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (-v2)) * K;
+    mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 + vv) * c1o2) * K;
+    mfa = m0;
+    mfb = m1;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+//! \brief forward chimera transformation \ref forwardChimera
+//! Transformation from distributions to central moments according to Eq. (6)-(14) in \ref
+//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
+//! ]</b></a> for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations. Modified for lower round-off
+//! errors.
+__inline__ __device__ void forwardChimera(real &mfa, real &mfb, real &mfc, real vv, real v2)
+{
+    real m1 = (mfa + mfc) + mfb;
+    real m2 = mfc - mfa;
+    mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2);
+    mfb = m2 - vv * m1;
+    mfa = m1;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+//! \brief backward chimera transformation \ref backwardChimera
+//! Transformation from central moments to distributions according to Eq. (57)-(65) in \ref
+//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
+//! ]</b></a> for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations. Modified for lower round-off
+//! errors.
+__inline__ __device__ void backwardChimera(real &mfa, real &mfb, real &mfc, real vv, real v2)
+{
+    real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
+    real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv;
+    mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2);
+    mfb = mb;
+    mfa = ma;
+}
+
+} // namespace vf::gpu
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/KernelUtilities.h b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/KernelUtilities.h
new file mode 100644
index 0000000000000000000000000000000000000000..5541bc54597ea02c5e3e89d00169b6eb6ff6564b
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/KernelUtilities.h
@@ -0,0 +1,206 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __         
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
+//      \    \  |    |   ________________________________________________________________    
+//       \    \ |    |  |  ______________________________________________________________|   
+//        \    \|    |  |  |         __          __     __     __     ______      _______    
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file KernelUtilities.h
+//! \ingroup LBM/GPUHelperFunctions
+//! \author Martin Schoenherr, Anna Wellmann, Soeren Peters
+//=======================================================================================
+#ifndef KERNEL_UTILITIES_H
+#define KERNEL_UTILITIES_H
+
+#include "LBM/LB.h"
+#include "lbm/constants/D3Q27.h"
+#include "basics/constants/NumericConstants.h"
+
+using namespace vf::basics::constant;
+using namespace vf::lbm::dir;
+
+namespace vf::gpu
+{
+
+__inline__ __device__ __host__ void getPointersToDistributions(Distributions27 &dist, real *distributionArray, const unsigned long long numberOfLBnodes, const bool isEvenTimestep)
+{
+    if (isEvenTimestep)
+    {
+        dist.f[DIR_000] = &distributionArray[DIR_000 * numberOfLBnodes];
+        dist.f[DIR_P00] = &distributionArray[DIR_P00 * numberOfLBnodes];
+        dist.f[DIR_M00] = &distributionArray[DIR_M00 * numberOfLBnodes];
+        dist.f[DIR_0P0] = &distributionArray[DIR_0P0 * numberOfLBnodes];
+        dist.f[DIR_0M0] = &distributionArray[DIR_0M0 * numberOfLBnodes];
+        dist.f[DIR_00P] = &distributionArray[DIR_00P * numberOfLBnodes];
+        dist.f[DIR_00M] = &distributionArray[DIR_00M * numberOfLBnodes];
+        dist.f[DIR_PP0] = &distributionArray[DIR_PP0 * numberOfLBnodes];
+        dist.f[DIR_MM0] = &distributionArray[DIR_MM0 * numberOfLBnodes];
+        dist.f[DIR_PM0] = &distributionArray[DIR_PM0 * numberOfLBnodes];
+        dist.f[DIR_MP0] = &distributionArray[DIR_MP0 * numberOfLBnodes];
+        dist.f[DIR_P0P] = &distributionArray[DIR_P0P * numberOfLBnodes];
+        dist.f[DIR_M0M] = &distributionArray[DIR_M0M * numberOfLBnodes];
+        dist.f[DIR_P0M] = &distributionArray[DIR_P0M * numberOfLBnodes];
+        dist.f[DIR_M0P] = &distributionArray[DIR_M0P * numberOfLBnodes];
+        dist.f[DIR_0PP] = &distributionArray[DIR_0PP * numberOfLBnodes];
+        dist.f[DIR_0MM] = &distributionArray[DIR_0MM * numberOfLBnodes];
+        dist.f[DIR_0PM] = &distributionArray[DIR_0PM * numberOfLBnodes];
+        dist.f[DIR_0MP] = &distributionArray[DIR_0MP * numberOfLBnodes];
+        dist.f[DIR_PPP] = &distributionArray[DIR_PPP * numberOfLBnodes];
+        dist.f[DIR_MMP] = &distributionArray[DIR_MMP * numberOfLBnodes];
+        dist.f[DIR_PMP] = &distributionArray[DIR_PMP * numberOfLBnodes];
+        dist.f[DIR_MPP] = &distributionArray[DIR_MPP * numberOfLBnodes];
+        dist.f[DIR_PPM] = &distributionArray[DIR_PPM * numberOfLBnodes];
+        dist.f[DIR_MMM] = &distributionArray[DIR_MMM * numberOfLBnodes];
+        dist.f[DIR_PMM] = &distributionArray[DIR_PMM * numberOfLBnodes];
+        dist.f[DIR_MPM] = &distributionArray[DIR_MPM * numberOfLBnodes];
+    }
+    else
+    {
+         dist.f[DIR_M00] = &distributionArray[DIR_P00 * numberOfLBnodes];
+         dist.f[DIR_P00] = &distributionArray[DIR_M00 * numberOfLBnodes];
+         dist.f[DIR_0M0] = &distributionArray[DIR_0P0 * numberOfLBnodes];
+         dist.f[DIR_0P0] = &distributionArray[DIR_0M0 * numberOfLBnodes];
+         dist.f[DIR_00M] = &distributionArray[DIR_00P * numberOfLBnodes];
+         dist.f[DIR_00P] = &distributionArray[DIR_00M * numberOfLBnodes];
+         dist.f[DIR_MM0] = &distributionArray[DIR_PP0 * numberOfLBnodes];
+         dist.f[DIR_PP0] = &distributionArray[DIR_MM0 * numberOfLBnodes];
+         dist.f[DIR_MP0] = &distributionArray[DIR_PM0 * numberOfLBnodes];
+         dist.f[DIR_PM0] = &distributionArray[DIR_MP0 * numberOfLBnodes];
+         dist.f[DIR_M0M] = &distributionArray[DIR_P0P * numberOfLBnodes];
+         dist.f[DIR_P0P] = &distributionArray[DIR_M0M * numberOfLBnodes];
+         dist.f[DIR_M0P] = &distributionArray[DIR_P0M * numberOfLBnodes];
+         dist.f[DIR_P0M] = &distributionArray[DIR_M0P * numberOfLBnodes];
+         dist.f[DIR_0MM] = &distributionArray[DIR_0PP * numberOfLBnodes];
+         dist.f[DIR_0PP] = &distributionArray[DIR_0MM * numberOfLBnodes];
+         dist.f[DIR_0MP] = &distributionArray[DIR_0PM * numberOfLBnodes];
+         dist.f[DIR_0PM] = &distributionArray[DIR_0MP * numberOfLBnodes];
+         dist.f[DIR_000] = &distributionArray[DIR_000 * numberOfLBnodes];
+         dist.f[DIR_PPP] = &distributionArray[DIR_MMM * numberOfLBnodes];
+         dist.f[DIR_MMP] = &distributionArray[DIR_PPM * numberOfLBnodes];
+         dist.f[DIR_PMP] = &distributionArray[DIR_MPM * numberOfLBnodes];
+         dist.f[DIR_MPP] = &distributionArray[DIR_PMM * numberOfLBnodes];
+         dist.f[DIR_PPM] = &distributionArray[DIR_MMP * numberOfLBnodes];
+         dist.f[DIR_MMM] = &distributionArray[DIR_PPP * numberOfLBnodes];
+         dist.f[DIR_PMM] = &distributionArray[DIR_MPP * numberOfLBnodes];
+         dist.f[DIR_MPM] = &distributionArray[DIR_PMP * numberOfLBnodes];
+    }
+}
+
+__inline__ __device__ void getPointersToSubgridDistances(SubgridDistances27& subgridD, real* subgridDistances, const unsigned int numberOfSubgridIndices)
+{
+    subgridD.q[DIR_P00] = &subgridDistances[DIR_P00 * numberOfSubgridIndices];
+    subgridD.q[DIR_M00] = &subgridDistances[DIR_M00 * numberOfSubgridIndices];
+    subgridD.q[DIR_0P0] = &subgridDistances[DIR_0P0 * numberOfSubgridIndices];
+    subgridD.q[DIR_0M0] = &subgridDistances[DIR_0M0 * numberOfSubgridIndices];
+    subgridD.q[DIR_00P] = &subgridDistances[DIR_00P * numberOfSubgridIndices];
+    subgridD.q[DIR_00M] = &subgridDistances[DIR_00M * numberOfSubgridIndices];
+    subgridD.q[DIR_PP0] = &subgridDistances[DIR_PP0 * numberOfSubgridIndices];
+    subgridD.q[DIR_MM0] = &subgridDistances[DIR_MM0 * numberOfSubgridIndices];
+    subgridD.q[DIR_PM0] = &subgridDistances[DIR_PM0 * numberOfSubgridIndices];
+    subgridD.q[DIR_MP0] = &subgridDistances[DIR_MP0 * numberOfSubgridIndices];
+    subgridD.q[DIR_P0P] = &subgridDistances[DIR_P0P * numberOfSubgridIndices];
+    subgridD.q[DIR_M0M] = &subgridDistances[DIR_M0M * numberOfSubgridIndices];
+    subgridD.q[DIR_P0M] = &subgridDistances[DIR_P0M * numberOfSubgridIndices];
+    subgridD.q[DIR_M0P] = &subgridDistances[DIR_M0P * numberOfSubgridIndices];
+    subgridD.q[DIR_0PP] = &subgridDistances[DIR_0PP * numberOfSubgridIndices];
+    subgridD.q[DIR_0MM] = &subgridDistances[DIR_0MM * numberOfSubgridIndices];
+    subgridD.q[DIR_0PM] = &subgridDistances[DIR_0PM * numberOfSubgridIndices];
+    subgridD.q[DIR_0MP] = &subgridDistances[DIR_0MP * numberOfSubgridIndices];
+    subgridD.q[DIR_000] = &subgridDistances[DIR_000 * numberOfSubgridIndices];
+    subgridD.q[DIR_PPP] = &subgridDistances[DIR_PPP * numberOfSubgridIndices];
+    subgridD.q[DIR_MMP] = &subgridDistances[DIR_MMP * numberOfSubgridIndices];
+    subgridD.q[DIR_PMP] = &subgridDistances[DIR_PMP * numberOfSubgridIndices];
+    subgridD.q[DIR_MPP] = &subgridDistances[DIR_MPP * numberOfSubgridIndices];
+    subgridD.q[DIR_PPM] = &subgridDistances[DIR_PPM * numberOfSubgridIndices];
+    subgridD.q[DIR_MMM] = &subgridDistances[DIR_MMM * numberOfSubgridIndices];
+    subgridD.q[DIR_PMM] = &subgridDistances[DIR_PMM * numberOfSubgridIndices];
+    subgridD.q[DIR_MPM] = &subgridDistances[DIR_MPM * numberOfSubgridIndices];
+}
+
+__inline__ __device__ real getEquilibriumForBC(const real& drho, const real& velocity, const real& cu_sq, const real weight)
+{
+    return weight * (drho + c9o2 * velocity * velocity * (c1o1 + drho) - cu_sq);
+}
+
+__inline__ __device__ real getInterpolatedDistributionForVeloBC(const real& q, const real& f, const real& fInverse, const real& feq,
+                                                                const real& omega, const real& velocity, const real weight)
+{
+
+    return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2
+           + (q * (f + fInverse) - c6o1 * weight * velocity) / (c1o1 + q);
+}
+
+__inline__ __device__ real getBounceBackDistributionForVeloBC(  const real& f,
+                                                                const real& velocity, const real weight)
+{
+
+    return f - (c6o1 * weight * velocity);
+}
+
+__inline__ __device__ real getInterpolatedDistributionForNoSlipBC(const real& q, const real& f, const real& fInverse, const real& feq,
+                                                                  const real& omega)
+{
+
+    return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2
+           + (q * (f + fInverse)) / (c1o1 + q);
+}
+
+__inline__ __device__ real getInterpolatedDistributionForNoSlipWithPressureBC(const real& q, const real& f, const real& fInverse, const real& feq, 
+                                                                  const real& omega, const real& drho, const real weight)
+{
+
+    return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2 
+           + (q * (f + fInverse)) / (c1o1 + q) - weight * drho;
+}
+
+
+__inline__ __device__ real getInterpolatedDistributionForVeloWithPressureBC(const real& q, const real& f, const real& fInverse, const real& feq,
+                                                                            const real& omega, const real& drho, const real& velocity, const real weight)
+{
+
+    return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2
+           + (q * (f + fInverse) - c6o1 * weight * velocity) / (c1o1 + q) - weight * drho;
+}
+
+__inline__ __device__ unsigned int getNodeIndex()
+{
+    const unsigned x = threadIdx.x;
+    const unsigned y = blockIdx.x;
+    const unsigned z = blockIdx.y;
+
+    const unsigned nx = blockDim.x;
+    const unsigned ny = gridDim.x;
+
+    return nx * (ny * z + y) + x;
+}
+
+__inline__ __device__ bool isValidFluidNode(uint nodeType)
+{
+    return (nodeType == GEO_FLUID || nodeType == GEO_PM_0 || nodeType == GEO_PM_1 || nodeType == GEO_PM_2);
+}
+
+
+}
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/ScalingUtilities.h b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/ScalingUtilities.h
new file mode 100644
index 0000000000000000000000000000000000000000..a7c1390c728df1d0ca83424fb7f9f4fb09faba65
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/ScalingUtilities.h
@@ -0,0 +1,136 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __         
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
+//      \    \  |    |   ________________________________________________________________    
+//       \    \ |    |  |  ______________________________________________________________|   
+//        \    \|    |  |  |         __          __     __     __     ______      _______    
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file ScalingUtilities.h
+//! \ingroup LBM/GPUHelperFunctions
+//! \author Martin Schoenherr, Anna Wellmann
+//=======================================================================================
+#ifndef SCALING_HELPER_FUNCTIONS_H
+#define SCALING_HELPER_FUNCTIONS_H
+
+#include "LBM/LB.h" 
+#include "lbm/constants/D3Q27.h"
+#include "basics/constants/NumericConstants.h"
+
+using namespace vf::basics::constant;
+using namespace vf::lbm::dir;
+
+namespace vf::gpu
+{
+
+__device__ __inline__ void calculateMomentsOnSourceNodes(Distributions27 &dist, real &omega, unsigned int &k_000,
+                                                         unsigned int &k_M00, unsigned int &k_0M0, unsigned int &k_00M,
+                                                         unsigned int &k_MM0, unsigned int &k_M0M, unsigned int &k_0MM,
+                                                         unsigned int &k_MMM, real &drho, real &velocityX,
+                                                         real &velocityY, real &velocityZ, real &kxyFromfcNEQ,
+                                                         real &kyzFromfcNEQ, real &kxzFromfcNEQ, real &kxxMyyFromfcNEQ,
+                                                         real &kxxMzzFromfcNEQ)
+{
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Set local distributions (f's) on source nodes:
+    //!
+    real f_000 = (dist.f[DIR_000])[k_000];
+    real f_P00 = (dist.f[DIR_P00])[k_000];
+    real f_M00 = (dist.f[DIR_M00])[k_M00];
+    real f_0P0 = (dist.f[DIR_0P0])[k_000];
+    real f_0M0 = (dist.f[DIR_0M0])[k_0M0];
+    real f_00P = (dist.f[DIR_00P])[k_000];
+    real f_00M = (dist.f[DIR_00M])[k_00M];
+    real f_PP0 = (dist.f[DIR_PP0])[k_000];
+    real f_MM0 = (dist.f[DIR_MM0])[k_MM0];
+    real f_PM0 = (dist.f[DIR_PM0])[k_0M0];
+    real f_MP0 = (dist.f[DIR_MP0])[k_M00];
+    real f_P0P = (dist.f[DIR_P0P])[k_000];
+    real f_M0M = (dist.f[DIR_M0M])[k_M0M];
+    real f_P0M = (dist.f[DIR_P0M])[k_00M];
+    real f_M0P = (dist.f[DIR_M0P])[k_M00];
+    real f_0PP = (dist.f[DIR_0PP])[k_000];
+    real f_0MM = (dist.f[DIR_0MM])[k_0MM];
+    real f_0PM = (dist.f[DIR_0PM])[k_00M];
+    real f_0MP = (dist.f[DIR_0MP])[k_0M0];
+    real f_PPP = (dist.f[DIR_PPP])[k_000];
+    real f_MPP = (dist.f[DIR_MPP])[k_M00];
+    real f_PMP = (dist.f[DIR_PMP])[k_0M0];
+    real f_MMP = (dist.f[DIR_MMP])[k_MM0];
+    real f_PPM = (dist.f[DIR_PPM])[k_00M];
+    real f_MPM = (dist.f[DIR_MPM])[k_M0M];
+    real f_PMM = (dist.f[DIR_PMM])[k_0MM];
+    real f_MMM = (dist.f[DIR_MMM])[k_MMM];
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
+    //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
+    //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
+    //!
+    drho = ((((f_PPP + f_MMM) + (f_MPM + f_PMP)) + ((f_MPP + f_PMM) + (f_MMP + f_PPM))) +
+            (((f_0MP + f_0PM) + (f_0MM + f_0PP)) + ((f_M0P + f_P0M) + (f_M0M + f_P0P)) +
+             ((f_MP0 + f_PM0) + (f_MM0 + f_PP0))) +
+            ((f_M00 + f_P00) + (f_0M0 + f_0P0) + (f_00M + f_00P))) +
+           f_000;
+
+    real oneOverRho = c1o1 / (c1o1 + drho);
+
+    velocityX = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_PMM - f_MPP) + (f_PPM - f_MMP))) +
+                 (((f_P0M - f_M0P) + (f_P0P - f_M0M)) + ((f_PM0 - f_MP0) + (f_PP0 - f_MM0))) + (f_P00 - f_M00)) *
+                oneOverRho;
+    velocityY = ((((f_PPP - f_MMM) + (f_MPM - f_PMP)) + ((f_MPP - f_PMM) + (f_PPM - f_MMP))) +
+                 (((f_0PM - f_0MP) + (f_0PP - f_0MM)) + ((f_MP0 - f_PM0) + (f_PP0 - f_MM0))) + (f_0P0 - f_0M0)) *
+                oneOverRho;
+    velocityZ = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_MPP - f_PMM) + (f_MMP - f_PPM))) +
+                 (((f_0MP - f_0PM) + (f_0PP - f_0MM)) + ((f_M0P - f_P0M) + (f_P0P - f_M0M))) + (f_00P - f_00M)) *
+                oneOverRho;
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Calculate second order moments for interpolation
+    //!
+    // example: kxxMzz: moment, second derivative in x direction minus the second derivative in z direction
+    kxyFromfcNEQ = -c3o1 * omega *
+                   ((f_MM0 + f_MMM + f_MMP - f_MP0 - f_MPM - f_MPP - f_PM0 - f_PMM - f_PMP + f_PP0 + f_PPM + f_PPP) /
+                    (c1o1 + drho) -
+                    ((velocityX * velocityY)));
+    kyzFromfcNEQ = -c3o1 * omega *
+                   ((f_0MM + f_PMM + f_MMM - f_0MP - f_PMP - f_MMP - f_0PM - f_PPM - f_MPM + f_0PP + f_PPP + f_MPP) /
+                    (c1o1 + drho) -
+                    ((velocityY * velocityZ)));
+    kxzFromfcNEQ = -c3o1 * omega *
+                   ((f_M0M + f_MMM + f_MPM - f_M0P - f_MMP - f_MPP - f_P0M - f_PMM - f_PPM + f_P0P + f_PMP + f_PPP) /
+                    (c1o1 + drho) -
+                    ((velocityX * velocityZ)));
+    kxxMyyFromfcNEQ = -c3o2 * omega *
+                      ((f_M0M + f_M00 + f_M0P - f_0MM - f_0M0 - f_0MP - f_0PM - f_0P0 - f_0PP + f_P0M + f_P00 + f_P0P) /
+                       (c1o1 + drho) -
+                       ((velocityX * velocityX - velocityY * velocityY)));
+    kxxMzzFromfcNEQ = -c3o2 * omega *
+                      ((f_MM0 + f_M00 + f_MP0 - f_0MM - f_0MP - f_00M - f_00P - f_0PM - f_0PP + f_PM0 + f_P00 + f_PP0) /
+                       (c1o1 + drho) -
+                       ((velocityX * velocityX - velocityZ * velocityZ)));
+}
+
+} // namespace vf::gpu
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/LBM/LB.h b/src/gpu/VirtualFluids_GPU/LBM/LB.h
index cfdbbbae040a13f94e97d40d702b93d5a1e19c86..a5ae5f5ceef213e8ec9b2306106035a09b1ffd0d 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/LB.h
+++ b/src/gpu/VirtualFluids_GPU/LBM/LB.h
@@ -1,10 +1,35 @@
-//  _    ___      __              __________      _     __        ______________   __
-// | |  / (_)____/ /___  ______ _/ / ____/ /_  __(_)___/ /____   /  ___/ __  / /  / /
-// | | / / / ___/ __/ / / / __ `/ / /_  / / / / / / __  / ___/  / /___/ /_/ / /  / /
-// | |/ / / /  / /_/ /_/ / /_/ / / __/ / / /_/ / / /_/ (__  )  / /_) / ____/ /__/ /
-// |___/_/_/   \__/\__,_/\__,_/_/_/   /_/\__,_/_/\__,_/____/   \____/_/    \_____/
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
 //
-//////////////////////////////////////////////////////////////////////////
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file lb.h
+//! \ingroup LBM
+//! \author Martin Schoenherr
+//=======================================================================================#ifndef _LB_H_
 #ifndef _LB_H_
 #define _LB_H_
 
@@ -45,7 +70,7 @@
 #define INTERFACE_B 5
 
 
-#include "Core/DataTypes.h"
+#include "DataTypes.h"
 
 #include <string>
 #include <vector>
@@ -80,127 +105,33 @@ enum class CollisionTemplate {
 constexpr std::initializer_list<CollisionTemplate> all_CollisionTemplate  = { CollisionTemplate::Default, CollisionTemplate::WriteMacroVars, CollisionTemplate::ApplyBodyForce, CollisionTemplate::AllFeatures, CollisionTemplate::SubDomainBorder};
 constexpr std::initializer_list<CollisionTemplate> bulk_CollisionTemplate = { CollisionTemplate::Default, CollisionTemplate::WriteMacroVars, CollisionTemplate::ApplyBodyForce, CollisionTemplate::AllFeatures};
 
-struct InitCondition
-{
-   real Re;
-   real factorPressBC {1.0};
-   real Diffusivity {0.001};
-   real Temp {0.0};
-   real TempBC {1.0};
-   real RealX {1.0};
-   real RealY {1.0};
-   int numprocs {1};
-   int myProcessId {0};
-   int maxdev {1};
-   uint tDoCheckPoint {0};
-   uint tDoRestart {0};
-   uint tCalcMedStart {0};
-   uint tCalcMedEnd {10};
-   uint tend {10};
-   uint tout {1};
-   uint tStartOut {0};
-   uint PressInID {0};
-   uint PressOutID {0};
-   uint PressInZ {1};
-   uint PressOutZ {2};
-   std::vector<uint> devices {0, 1}; // one device with ID = 0
-   std::vector<int> GridX, GridY, GridZ, DistX, DistY, DistZ;
-   std::vector<real> scaleLBMtoSI, translateLBMtoSI;
-   std::vector<real> minCoordX, minCoordY, minCoordZ, maxCoordX, maxCoordY, maxCoordZ;
-   std::string fname {"output/simulation"};
-   std::string oPath {"output/"};
-   std::string gridPath {"grid/"};
-   std::string oPrefix {"simulation"};
-   std::string geometryFileC, geometryFileM, geometryFileF;
-   std::string kFull, geoFull, geoVec, coordX, coordY, coordZ, neighborX, neighborY, neighborZ, neighborWSB, scaleCFC, scaleCFF, scaleFCC, scaleFCF, scaleOffsetCF, scaleOffsetFC;
-   std::string noSlipBcPos, noSlipBcQs, noSlipBcValue;
-   std::string slipBcPos, slipBcQs, slipBcValue;
-   std::string pressBcPos, pressBcQs, pressBcValue;
-   std::string geomBoundaryBcQs,velBcQs;
-   std::string geomBoundaryBcValues,velBcValues,pressBcValues,noSlipBcValues;
-   std::string propellerCylinder, propellerValues, propellerQs, measurePoints;
-   std::string inletBcQs, inletBcValues;
-   std::string outletBcQs, outletBcValues;
-   std::string topBcQs, topBcValues;
-   std::string bottomBcQs, bottomBcValues;
-   std::string frontBcQs, frontBcValues;
-   std::string backBcQs, backBcValues;
-   std::string wallBcQs, wallBcValues;
-   std::string periodicBcQs, periodicBcValues;
-   std::string numberNodes, LBMvsSI;
-   std::string cpTop, cpBottom, cpBottom2;
-   std::string concentration, streetVelocity;
-   std::string geomNormalX, geomNormalY, geomNormalZ, inflowNormalX, inflowNormalY, inflowNormalZ, outflowNormalX, outflowNormalY, outflowNormalZ;
-   uint timeStepForMP {10};
-   real clockCycleForMP {1.0};
-   real vis {0.001};
-   real vis_ratio {1.0};
-   real u0 {0.01};
-   real u0_ratio {1.0};
-   real delta_rho {0.0};
-   real delta_press {1.0};
-   bool printFiles {false};
-   bool doRestart {false};
-   bool doCheckPoint {false};
-   bool readGeo {false};
-   bool isGeo, isProp, isCp;
-   bool GeometryValues {false};
-   bool is2ndOrderMoments {false};
-   bool is3rdOrderMoments {false};
-   bool isHighOrderMoments {false};
-   bool calcMedian {false};
-   bool isConc {false};
-   bool isWale {false};
-   TurbulenceModel turbulenceModel {TurbulenceModel::None};
-   bool isTurbulentViscosity {false};
-   real SGSConstant {0.0};
-   bool isMeasurePoints {false};
-   bool isInitNeq {false};
-   bool isGeoNormal, isInflowNormal, isOutflowNormal;
-   bool hasWallModelMonitor {false};
-   bool simulatePorousMedia {false};
-   bool streetVelocityFile {false};
-   real outflowPressureCorrectionFactor {0.0};
-};
-
 //Interface Cells
-typedef struct ICellCF{
-   uint* ICellCFF;
-   uint* ICellCFC;
-   uint kCF;
-} InterpolationCellCF;
-
-typedef struct ICellFC{
-   uint* ICellFCF;
-   uint* ICellFCC;
-   uint kFC;
-} InterpolationCellFC;
-
-//Offset of the interface cells at the wall
-typedef struct OffCF{
-   real* xOffCF;
-   real* yOffCF;
-   real* zOffCF;
-} OffsetCF;
-
-typedef struct OffFC{
-   real* xOffFC;
-   real* yOffFC;
-   real* zOffFC;
-} OffsetFC;
+// example of old names (pre 2023) ICellCFC: interpolation from Coarse (C) to Fine (F), indices of the Coarse cells (C)
+typedef struct ICells{
+   uint* fineCellIndices;
+   uint* coarseCellIndices;
+   uint numberOfCells;
+} InterpolationCells;
+
+//! \brief stores location of neighboring cell (necessary for refinement into the wall)
+typedef struct ICellNeigh{
+   real* x;
+   real* y;
+   real* z;
+} InterpolationCellNeighbor;
 
 // Distribution functions g 6
-typedef struct  Distri6 {
+typedef struct  Distri6 { // ADD IN FUTURE RELEASE
    real* g[6];
 } Distributions6;
 
 // Distribution functions f 7
-typedef struct  Distri7{
+typedef struct  Distri7{ // ADD IN FUTURE RELEASE
    real* f[7];
 } Distributions7;
 
 // Distribution functions f 19
-typedef struct  Distri19{
+typedef struct  Distri19{ // DEPRECATED
    real* f[19];
 } Distributions19;
 
@@ -247,14 +178,14 @@ typedef struct QforPrecursorBC{
 }QforPrecursorBoundaryConditions;
 
 //BCTemp
-typedef struct TempforBC{
+typedef struct TempforBC{  // ADD IN FUTURE RELEASE
    int* k;
    real* temp;
    int kTemp=0;
 }TempforBoundaryConditions;
 
 //BCTempVel
-typedef struct TempVelforBC{
+typedef struct TempVelforBC{  // ADD IN FUTURE RELEASE
    int* k;
    real* temp;
    real* tempPulse;
@@ -263,7 +194,7 @@ typedef struct TempVelforBC{
 }TempVelforBoundaryConditions;
 
 //BCTempPress
-typedef struct TempPressforBC{
+typedef struct TempPressforBC{  // ADD IN FUTURE RELEASE
    int* k;
    real* temp;
    real* velo;
@@ -283,7 +214,7 @@ typedef struct WMparas{
 
 
 //measurePoints
-typedef struct MeasP{
+typedef struct MeasP{ // ADD IN FUTURE RELEASE
    std::string name;
    uint k;
    std::vector<real> Vx;
@@ -307,7 +238,7 @@ typedef struct PN27{
    int numberOfFs;
 }ProcessNeighbor27;
 
-typedef struct PN_F3 {
+typedef struct PN_F3 { // ADD IN FUTURE RELEASE
    real* g[6];
    uint memsizeGs;
    int* index;
@@ -317,7 +248,7 @@ typedef struct PN_F3 {
    int numberOfGs;
 }ProcessNeighborF3;
 
-//path line particles
+//path line particles // DEPRECATED
 typedef struct PLP{
    bool *stuck, *hot;
    real *coordXabsolut, *coordYabsolut, *coordZabsolut;
@@ -332,6 +263,7 @@ typedef struct PLP{
 }PathLineParticles;
 
 //////////////////////////////////////////////////////////////////////////
+// DEPRECATED
 inline int vectorPosition(int i, int j, int k, int Lx, int Ly )
 {
    //return((j+15)*(Lx+2*16)+(i+15));
diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
index 84ab84ff93fa7706bcc27d7e61a18f580f3c8dbe..55c0250223901a94d03bd1e65bbb72438bcc99c3 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
+++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
@@ -25,7 +25,7 @@
 #include "Output/VeloASCIIWriter.hpp"
 //////////////////////////////////////////////////////////////////////////
 #include "Utilities/Buffer2D.hpp"
-#include "Core/StringUtilities/StringUtil.h"
+#include "StringUtilities/StringUtil.h"
 //////////////////////////////////////////////////////////////////////////
 #include "Init/InitLattice.h"
 #include "Init/VfReader.h"
@@ -420,6 +420,18 @@ void Simulation::setFactories(std::unique_ptr<KernelFactory> &&kernelFactory_,
     this->preProcessorFactory = std::move(preProcessorFactory_);
 }
 
+void Simulation::initTimers()
+{
+    previousTimestepForAveraging = para->getTimeCalcMedStart();
+    previousTimestepForTurbulenceIntensityCalculation = 0;
+    timestepForMeasuringPoints = 0;
+    
+    para->setStepEnsight(0);
+
+    averageTimer = std::make_unique<Timer>("Average performance");
+    averageTimer->startTimer();
+}
+
 
 void Simulation::allocNeighborsOffsetsScalesAndBoundaries(GridProvider &gridProvider)
 {
@@ -432,603 +444,546 @@ void Simulation::allocNeighborsOffsetsScalesAndBoundaries(GridProvider &gridProv
 
 void Simulation::run()
 {
-   unsigned int timestep, t_prev;
-   uint t_turbulenceIntensity = 0;
-   unsigned int t_MP = 0;
-
-   //////////////////////////////////////////////////////////////////////////
-   para->setStepEnsight(0);
-
-   //turning Ship
-   real Pi = (real)3.14159265358979323846;
-   real delta_x_F = (real)0.1;
-   real delta_t_F = (real)((double)para->getVelocity() * (double)delta_x_F / (double)3.75);
-   real delta_t_C = (real)(delta_t_F * pow(2.,para->getMaxLevel()));
-   real timesteps_C = (real)(12.5 / delta_t_C);
-   real AngularVelocity = (real)(12.5 / timesteps_C * Pi / 180.);
-   para->setAngularVelocity(AngularVelocity);
-   for (int i = 0; i<= para->getMaxLevel(); i++)
-   {
-       para->getParD(i)->deltaPhi = (real)(para->getAngularVelocity()/(pow(2.,i)));
-   }
-   //////////////////////////////////////////////////////////////////////////
-
-   t_prev = para->getTimeCalcMedStart();
-
-    Timer* averageTimer = new Timer("Average performance");
-    averageTimer->startTimer();
+    this->initTimers();
+
+    //////////////////////////////////////////////////////////////////////////
+    // turning Ship
+    real Pi = (real)3.14159265358979323846;
+    real delta_x_F = (real)0.1;
+    real delta_t_F = (real)((double)para->getVelocity() * (double)delta_x_F / (double)3.75);
+    real delta_t_C = (real)(delta_t_F * pow(2., para->getMaxLevel()));
+    real timesteps_C = (real)(12.5 / delta_t_C);
+    real AngularVelocity = (real)(12.5 / timesteps_C * Pi / 180.);
+    para->setAngularVelocity(AngularVelocity);
+    for (int i = 0; i <= para->getMaxLevel(); i++) {
+        para->getParD(i)->deltaPhi = (real)(para->getAngularVelocity() / (pow(2., i)));
+    }
 
     ////////////////////////////////////////////////////////////////////////////////
     // Time loop
     ////////////////////////////////////////////////////////////////////////////////
-    for(timestep=para->getTimestepStart();timestep<=para->getTimestepEnd();timestep++)
+    for(uint timestep=para->getTimestepStart();timestep<=para->getTimestepEnd();timestep++)
     {
-        this->updateGrid27->updateGrid(0, timestep);
-
-        ////////////////////////////////////////////////////////////////////////////////
-        //Particles
-        ////////////////////////////////////////////////////////////////////////////////
-        if (para->getCalcParticles()) propagateParticles(para.get(), timestep);
-        ////////////////////////////////////////////////////////////////////////////////
-
-
-
-
-        ////////////////////////////////////////////////////////////////////////////////
-        // run Analyzers for kinetic energy and enstrophy for TGV in 3D
-        // these analyzers only work on level 0
-        ////////////////////////////////////////////////////////////////////////////////
-        if (this->kineticEnergyAnalyzer || this->enstrophyAnalyzer) {
-            updateGrid27->exchangeData(0);
-        }
+        this->calculateTimestep(timestep);
+    }
 
-        if( this->kineticEnergyAnalyzer ) this->kineticEnergyAnalyzer->run(timestep);
-        if( this->enstrophyAnalyzer     ) this->enstrophyAnalyzer->run(timestep);
-        ////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////
+    // printDragLift(para);
+    ////////////////////////////////////////////////////////////////////////////////
 
+    ////////////////////////////////////////////////////////////////////////////////
+    if (para->getDiffOn() == true) printPlaneConc(para.get(), cudaMemoryManager.get());
+    ////////////////////////////////////////////////////////////////////////////////
 
+    ////////////////////////////////////////////////////////////////////////////////
+    ////for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
+    ////{
+    ////    if (para->getParH(lev)->cpTop.size() > 0)
+    ////    {
+    ////        printCpTop(para, lev);
+    ////    }
+    ////}
+    // for (int lev = 7; lev <= 8; lev++)
+    //{
+    //    printCpTop(para, lev);
+    //}
+    ////printCpTop(para);
+    ////printCpBottom(para);
+    ////printCpBottom2(para);
+    ////////////////////////////////////////////////////////////////////////////////
 
+    //  //////////////////////////////////////////////////////////////////////////
+    //  //Copy Measure Values
+    // for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
+    //{
+    //    VF_LOG_INFO("Copy MeasurePoints at level = {}", lev);
+    //    para->cudaCopyMeasurePointsToHost(lev);
+    //    para->copyMeasurePointsArrayToVector(lev);
+    //    VF_LOG_INFO("Write MeasurePoints at level = {}", lev);
+    //    for(int j = 0; j < (int)para->getParH(lev)->MP.size(); j++)
+    //    {
+    //        MeasurePointWriter::writeMeasurePoints(para, lev, j, 0);
+    //    }
+    //}
+    //  //////////////////////////////////////////////////////////////////////////
+}
 
-        ////////////////////////////////////////////////////////////////////////////////
-        //Calc Median
-        ////////////////////////////////////////////////////////////////////////////////
-        if (para->getCalcMedian() && ((int)timestep >= para->getTimeCalcMedStart()) && ((int)timestep <= para->getTimeCalcMedEnd()))
+void Simulation::calculateTimestep(uint timestep)
+{
+    this->updateGrid27->updateGrid(0, timestep);
+    ////////////////////////////////////////////////////////////////////////////////
+    //Particles
+    ////////////////////////////////////////////////////////////////////////////////
+    if (para->getCalcParticles()) propagateParticles(para.get(), timestep);
+    ////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////
+    // run Analyzers for kinetic energy and enstrophy for TGV in 3D
+    // these analyzers only work on level 0
+    ////////////////////////////////////////////////////////////////////////////////
+    if (this->kineticEnergyAnalyzer || this->enstrophyAnalyzer) {
+        updateGrid27->exchangeData(0);
+    }
+    if( this->kineticEnergyAnalyzer ) this->kineticEnergyAnalyzer->run(timestep);
+    if( this->enstrophyAnalyzer     ) this->enstrophyAnalyzer->run(timestep);
+    ////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////
+    //Calc Median
+    ////////////////////////////////////////////////////////////////////////////////
+    if (para->getCalcMedian() && ((int)timestep >= para->getTimeCalcMedStart()) && ((int)timestep <= para->getTimeCalcMedEnd()))
+    {
+        for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
         {
-          for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
-          {
-              //CalcMedSP27(para->getParD(lev)->vx_SP_Med,
-                    //      para->getParD(lev)->vy_SP_Med,
-                    //      para->getParD(lev)->vz_SP_Med,
-                    //      para->getParD(lev)->rho_SP_Med,
-                    //      para->getParD(lev)->press_SP_Med,
-                    //      para->getParD(lev)->geoSP,
-                    //      para->getParD(lev)->neighborX_SP,
-                    //      para->getParD(lev)->neighborY_SP,
-                    //      para->getParD(lev)->neighborZ_SP,
-                    //      para->getParD(lev)->size_Mat_SP,
-                    //      para->getParD(lev)->numberofthreads,
-                    //      para->getParD(lev)->d0SP.f[0],
-                    //      para->getParD(lev)->evenOrOdd);
-              //getLastCudaError("CalcMacSP27 execution failed");
-
-              CalcMedCompSP27(para->getParD(lev)->vx_SP_Med,
-                              para->getParD(lev)->vy_SP_Med,
-                              para->getParD(lev)->vz_SP_Med,
-                              para->getParD(lev)->rho_SP_Med,
-                              para->getParD(lev)->press_SP_Med,
-                              para->getParD(lev)->typeOfGridNode,
-                              para->getParD(lev)->neighborX,
-                              para->getParD(lev)->neighborY,
-                              para->getParD(lev)->neighborZ,
-                              para->getParD(lev)->numberOfNodes,
-                              para->getParD(lev)->numberofthreads,
-                              para->getParD(lev)->distributions.f[0],
-                              para->getParD(lev)->isEvenTimestep);
-              getLastCudaError("CalcMacMedCompSP27 execution failed");
-
-          }
+            //CalcMedSP27(para->getParD(lev)->vx_SP_Med,
+                  //      para->getParD(lev)->vy_SP_Med,
+                  //      para->getParD(lev)->vz_SP_Med,
+                  //      para->getParD(lev)->rho_SP_Med,
+                  //      para->getParD(lev)->press_SP_Med,
+                  //      para->getParD(lev)->geoSP,
+                  //      para->getParD(lev)->neighborX_SP,
+                  //      para->getParD(lev)->neighborY_SP,
+                  //      para->getParD(lev)->neighborZ_SP,
+                  //      para->getParD(lev)->size_Mat_SP,
+                  //      para->getParD(lev)->numberofthreads,
+                  //      para->getParD(lev)->d0SP.f[0],
+                  //      para->getParD(lev)->evenOrOdd);
+            //getLastCudaError("CalcMacSP27 execution failed");
+            CalcMedCompSP27(para->getParD(lev)->vx_SP_Med,
+                            para->getParD(lev)->vy_SP_Med,
+                            para->getParD(lev)->vz_SP_Med,
+                            para->getParD(lev)->rho_SP_Med,
+                            para->getParD(lev)->press_SP_Med,
+                            para->getParD(lev)->typeOfGridNode,
+                            para->getParD(lev)->neighborX,
+                            para->getParD(lev)->neighborY,
+                            para->getParD(lev)->neighborZ,
+                            para->getParD(lev)->numberOfNodes,
+                            para->getParD(lev)->numberofthreads,
+                            para->getParD(lev)->distributions.f[0],
+                            para->getParD(lev)->isEvenTimestep);
+            getLastCudaError("CalcMacMedCompSP27 execution failed");
         }
-
-        if (para->getCalcTurbulenceIntensity()) {
-            for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) {
-                CalcTurbulenceIntensityDevice(
-                    para->getParD(lev)->vxx,
-                    para->getParD(lev)->vyy,
-                    para->getParD(lev)->vzz,
-                    para->getParD(lev)->vxy,
-                    para->getParD(lev)->vxz,
-                    para->getParD(lev)->vyz,
-                    para->getParD(lev)->vx_mean,
-                    para->getParD(lev)->vy_mean,
-                    para->getParD(lev)->vz_mean,
-                    para->getParD(lev)->distributions.f[0],
-                    para->getParD(lev)->typeOfGridNode,
-                    para->getParD(lev)->neighborX,
-                    para->getParD(lev)->neighborY,
-                    para->getParD(lev)->neighborZ,
-                    para->getParD(lev)->numberOfNodes,
-                    para->getParD(lev)->isEvenTimestep,
-                    para->getParD(lev)->numberofthreads
-                );
-            }
+    }
+    if (para->getCalcTurbulenceIntensity()) {
+        for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) {
+            CalcTurbulenceIntensityDevice(
+                para->getParD(lev)->vxx,
+                para->getParD(lev)->vyy,
+                para->getParD(lev)->vzz,
+                para->getParD(lev)->vxy,
+                para->getParD(lev)->vxz,
+                para->getParD(lev)->vyz,
+                para->getParD(lev)->vx_mean,
+                para->getParD(lev)->vy_mean,
+                para->getParD(lev)->vz_mean,
+                para->getParD(lev)->distributions.f[0],
+                para->getParD(lev)->typeOfGridNode,
+                para->getParD(lev)->neighborX,
+                para->getParD(lev)->neighborY,
+                para->getParD(lev)->neighborZ,
+                para->getParD(lev)->numberOfNodes,
+                para->getParD(lev)->isEvenTimestep,
+                para->getParD(lev)->numberofthreads
+            );
         }
-        ////////////////////////////////////////////////////////////////////////////////
-
-
-
-
-        ////////////////////////////////////////////////////////////////////////////////
-        // CheckPoint
-        ////////////////////////////////////////////////////////////////////////////////
-        if(para->getDoCheckPoint() && para->getTimeDoCheckPoint()>0 && timestep%para->getTimeDoCheckPoint()==0 && timestep>0 && !para->overWritingRestart(timestep))
+    }
+    ////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////
+    // CheckPoint
+    ////////////////////////////////////////////////////////////////////////////////
+    if(para->getDoCheckPoint() && para->getTimeDoCheckPoint()>0 && timestep%para->getTimeDoCheckPoint()==0 && timestep>0 && !para->overWritingRestart(timestep))
+    {
+        averageTimer->stopTimer();
+        //////////////////////////////////////////////////////////////////////////
+        if( para->getDoCheckPoint() )
         {
-            averageTimer->stopTimer();
-            //////////////////////////////////////////////////////////////////////////
-
-            if( para->getDoCheckPoint() )
+            VF_LOG_INFO("Copy data for CheckPoint t = {}....", timestep);
+            for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
             {
-                VF_LOG_INFO("Copy data for CheckPoint t = {}....", timestep);
-
-                for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
-                {
-                    cudaMemoryManager->cudaCopyFsForCheckPoint(lev);
-                }
-
-                VF_LOG_INFO("Write data for CheckPoint t = {}...", timestep);
-
-                const auto name = getFileName(para->getFName(), timestep, para->getMyProcessID());
-                restart_object->serialize(name, para);
-
-                VF_LOG_INFO("done");
+                cudaMemoryManager->cudaCopyFsForCheckPoint(lev);
             }
-            //////////////////////////////////////////////////////////////////////////
-            averageTimer->startTimer();
+            VF_LOG_INFO("Write data for CheckPoint t = {}...", timestep);
+            const auto name = getFileName(para->getFName(), timestep, para->getMyProcessID());
+            restart_object->serialize(name, para);
+            VF_LOG_INFO("done");
         }
-        //////////////////////////////////////////////////////////////////////////////
-
-
-
-
-
-        ////////////////////////////////////////////////////////////////////////////////
-        //Measure Points
-        ////////////////////////////////////////////////////////////////////////////////
-        //set MP-Time
-        if (para->getUseMeasurePoints())
+        //////////////////////////////////////////////////////////////////////////
+        averageTimer->startTimer();
+    }
+    //////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////
+    //Measure Points
+    ////////////////////////////////////////////////////////////////////////////////
+    //set MP-Time
+    if (para->getUseMeasurePoints())
+    {
+        if ((timestep%para->getTimestepForMP()) == 0)
         {
-            if ((timestep%para->getTimestepForMP()) == 0)
+            unsigned int valuesPerClockCycle = (unsigned int)(para->getclockCycleForMP() / para->getTimestepForMP());
+            for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
             {
-                unsigned int valuesPerClockCycle = (unsigned int)(para->getclockCycleForMP() / para->getTimestepForMP());
-                for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
-                {
-                    // VF_LOG_INFO("start level = {}", lev);
-                    LBCalcMeasurePoints27(  para->getParD(lev)->VxMP,            para->getParD(lev)->VyMP,                 para->getParD(lev)->VzMP,
-                                            para->getParD(lev)->RhoMP,           para->getParD(lev)->kMP,                  para->getParD(lev)->numberOfPointskMP,
-                                            valuesPerClockCycle,                 t_MP,                                     para->getParD(lev)->typeOfGridNode,
-                                            para->getParD(lev)->neighborX,       para->getParD(lev)->neighborY,            para->getParD(lev)->neighborZ,
-                                            para->getParD(lev)->numberOfNodes,   para->getParD(lev)->distributions.f[0],   para->getParD(lev)->numberofthreads,
-                                            para->getParD(lev)->isEvenTimestep);
-                }
-                t_MP++;
+                // VF_LOG_INFO("start level = {}", lev);
+                LBCalcMeasurePoints27(  para->getParD(lev)->VxMP,            para->getParD(lev)->VyMP,                 para->getParD(lev)->VzMP,
+                                        para->getParD(lev)->RhoMP,           para->getParD(lev)->kMP,                  para->getParD(lev)->numberOfPointskMP,
+                                        valuesPerClockCycle,                 timestepForMeasuringPoints,                                     para->getParD(lev)->typeOfGridNode,
+                                        para->getParD(lev)->neighborX,       para->getParD(lev)->neighborY,            para->getParD(lev)->neighborZ,
+                                        para->getParD(lev)->numberOfNodes,   para->getParD(lev)->distributions.f[0],   para->getParD(lev)->numberofthreads,
+                                        para->getParD(lev)->isEvenTimestep);
             }
-
-            //Copy Measure Values
-            if ((timestep % (unsigned int)para->getclockCycleForMP()) == 0)
+            timestepForMeasuringPoints++;
+        }
+        //Copy Measure Values
+        if ((timestep % (unsigned int)para->getclockCycleForMP()) == 0)
+        {
+            for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
             {
-                for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
+                cudaMemoryManager->cudaCopyMeasurePointsToHost(lev);
+                para->copyMeasurePointsArrayToVector(lev);
+                VF_LOG_INFO("Write MeasurePoints at level = {} and timestep = {}", lev, timestep);
+                for (int j = 0; j < (int)para->getParH(lev)->MP.size(); j++)
                 {
-                    cudaMemoryManager->cudaCopyMeasurePointsToHost(lev);
-                    para->copyMeasurePointsArrayToVector(lev);
-                    VF_LOG_INFO("Write MeasurePoints at level = {} and timestep = {}", lev, timestep);
-                    for (int j = 0; j < (int)para->getParH(lev)->MP.size(); j++)
-                    {
-                        MeasurePointWriter::writeMeasurePoints(para.get(), lev, j, timestep);
-                    }
-                    //MeasurePointWriter::calcAndWriteMeanAndFluctuations(para.get(), lev, t, para->getTStartOut());
+                    MeasurePointWriter::writeMeasurePoints(para.get(), lev, j, timestep);
                 }
-                t_MP = 0;
+                //MeasurePointWriter::calcAndWriteMeanAndFluctuations(para.get(), lev, t, para->getTStartOut());
             }
+            timestepForMeasuringPoints = 0;
         }
+    }
+    //////////////////////////////////////////////////////////////////////////////////
+    //////////////////////////////////////////////////////////////////////////////////
+    ////get concentration at the plane
+    //////////////////////////////////////////////////////////////////////////////////
+    if (para->getDiffOn() && para->getCalcPlaneConc())
+    {
+        PlaneConcThS27( para->getParD(0)->ConcPlaneIn,
+                       para->getParD(0)->cpTopIndex,
+                       para->getParD(0)->numberOfPointsCpTop,
+                       para->getParD(0)->typeOfGridNode,
+                       para->getParD(0)->neighborX,
+                       para->getParD(0)->neighborY,
+                       para->getParD(0)->neighborZ,
+                       para->getParD(0)->numberOfNodes,
+                       para->getParD(0)->numberofthreads,
+                       para->getParD(0)->distributionsAD.f[0],
+                       para->getParD(0)->isEvenTimestep);
+        getLastCudaError("PlaneConcThS27 execution failed");
+        PlaneConcThS27( para->getParD(0)->ConcPlaneOut1,
+                        para->getParD(0)->cpBottomIndex,
+                        para->getParD(0)->numberOfPointsCpBottom,
+                        para->getParD(0)->typeOfGridNode,
+                        para->getParD(0)->neighborX,
+                        para->getParD(0)->neighborY,
+                        para->getParD(0)->neighborZ,
+                        para->getParD(0)->numberOfNodes,
+                        para->getParD(0)->numberofthreads,
+                        para->getParD(0)->distributionsAD.f[0],
+                        para->getParD(0)->isEvenTimestep);
+        getLastCudaError("PlaneConcThS27 execution failed");
+        PlaneConcThS27( para->getParD(0)->ConcPlaneOut2,
+                        para->getParD(0)->pressureBC.kN,
+                        para->getParD(0)->pressureBC.numberOfBCnodes,
+                        para->getParD(0)->typeOfGridNode,
+                        para->getParD(0)->neighborX,
+                        para->getParD(0)->neighborY,
+                        para->getParD(0)->neighborZ,
+                        para->getParD(0)->numberOfNodes,
+                        para->getParD(0)->numberofthreads,
+                        para->getParD(0)->distributionsAD.f[0],
+                        para->getParD(0)->isEvenTimestep);
+        getLastCudaError("PlaneConcThS27 execution failed");
         //////////////////////////////////////////////////////////////////////////////////
-
-
-
-
+        ////Calculation of concentration at the plane
         //////////////////////////////////////////////////////////////////////////////////
-        ////get concentration at the plane
+        calcPlaneConc(para.get(), cudaMemoryManager.get(), 0);
+    }
+    //////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////
+    // File IO
+    ////////////////////////////////////////////////////////////////////////////////
+    //communicator->startTimer();
+    if(para->getTimestepOut()>0 && timestep%para->getTimestepOut()==0 && timestep>para->getTimestepStartOut())
+    {
         //////////////////////////////////////////////////////////////////////////////////
-        if (para->getDiffOn() && para->getCalcPlaneConc())
-        {
-            PlaneConcThS27( para->getParD(0)->ConcPlaneIn,
-                           para->getParD(0)->cpTopIndex,
-                           para->getParD(0)->numberOfPointsCpTop,
-                           para->getParD(0)->typeOfGridNode,
-                           para->getParD(0)->neighborX,
-                           para->getParD(0)->neighborY,
-                           para->getParD(0)->neighborZ,
-                           para->getParD(0)->numberOfNodes,
-                           para->getParD(0)->numberofthreads,
-                           para->getParD(0)->distributionsAD27.f[0],
-                           para->getParD(0)->isEvenTimestep);
-            getLastCudaError("PlaneConcThS27 execution failed");
-            PlaneConcThS27( para->getParD(0)->ConcPlaneOut1,
-                            para->getParD(0)->cpBottomIndex,
-                            para->getParD(0)->numberOfPointsCpBottom,
-                            para->getParD(0)->typeOfGridNode,
-                            para->getParD(0)->neighborX,
-                            para->getParD(0)->neighborY,
-                            para->getParD(0)->neighborZ,
-                            para->getParD(0)->numberOfNodes,
-                            para->getParD(0)->numberofthreads,
-                            para->getParD(0)->distributionsAD27.f[0],
-                            para->getParD(0)->isEvenTimestep);
-            getLastCudaError("PlaneConcThS27 execution failed");
-            PlaneConcThS27( para->getParD(0)->ConcPlaneOut2,
-                            para->getParD(0)->pressureBC.kN,
-                            para->getParD(0)->pressureBC.numberOfBCnodes,
-                            para->getParD(0)->typeOfGridNode,
-                            para->getParD(0)->neighborX,
-                            para->getParD(0)->neighborY,
-                            para->getParD(0)->neighborZ,
-                            para->getParD(0)->numberOfNodes,
-                            para->getParD(0)->numberofthreads,
-                            para->getParD(0)->distributionsAD27.f[0],
-                            para->getParD(0)->isEvenTimestep);
-            getLastCudaError("PlaneConcThS27 execution failed");
-            //////////////////////////////////////////////////////////////////////////////////
-            ////Calculation of concentration at the plane
-            //////////////////////////////////////////////////////////////////////////////////
-            calcPlaneConc(para.get(), cudaMemoryManager.get(), 0);
-        }
+        //if (para->getParD(0)->evenOrOdd==true)  para->getParD(0)->evenOrOdd=false;
+        //else                                    para->getParD(0)->evenOrOdd=true;
         //////////////////////////////////////////////////////////////////////////////////
-
-
-
-
-      ////////////////////////////////////////////////////////////////////////////////
-      // File IO
-      ////////////////////////////////////////////////////////////////////////////////
-      //communicator->startTimer();
-      if(para->getTimestepOut()>0 && timestep%para->getTimestepOut()==0 && timestep>para->getTimestepStartOut())
-      {
-          //////////////////////////////////////////////////////////////////////////////////
-          //if (para->getParD(0)->evenOrOdd==true)  para->getParD(0)->evenOrOdd=false;
-          //else                                    para->getParD(0)->evenOrOdd=true;
-          //////////////////////////////////////////////////////////////////////////////////
-
         //////////////////////////////////////////////////////////////////////////
         averageTimer->stopTimer();
         averageTimer->outputPerformance(timestep, para.get(), communicator);
         //////////////////////////////////////////////////////////////////////////
+        if( para->getPrintFiles() )
+        {
+            readAndWriteFiles(timestep);
+        }
+        averageTimer->startTimer();
+    }
+}
 
-         if( para->getPrintFiles() )
-         {
-            VF_LOG_INFO("Write files t = {} ...", timestep);
-            for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
-            {
-                //////////////////////////////////////////////////////////////////////////
-                //exchange data for valid post process
-                updateGrid27->exchangeData(lev);
-                //////////////////////////////////////////////////////////////////////////
-               //if (para->getD3Qxx()==19)
-               //{
-                  //CalcMac(para->getParD(lev)->vx,     para->getParD(lev)->vy,       para->getParD(lev)->vz,      para->getParD(lev)->rho,
-                  //        para->getParD(lev)->geo,    para->getParD(lev)->size_Mat, para->getParD(lev)->gridNX,  para->getParD(lev)->gridNY,
-                  //        para->getParD(lev)->gridNZ, para->getParD(lev)->d0.f[0],  para->getParD(lev)->evenOrOdd);
-               //}
-               //else if (para->getD3Qxx()==27)
-               //{
-                   //if (para->getCalcMedian() && ((int)t > para->getTimeCalcMedStart()) && ((int)t <= para->getTimeCalcMedEnd()))
-                   //{
-                      // unsigned int tdiff = t - t_prev;
-                      // CalcMacMedSP27(para->getParD(lev)->vx_SP_Med,
-                   //                      para->getParD(lev)->vy_SP_Med,
-                   //                      para->getParD(lev)->vz_SP_Med,
-                   //                      para->getParD(lev)->rho_SP_Med,
-                   //                      para->getParD(lev)->press_SP_Med,
-                   //                      para->getParD(lev)->geoSP,
-                   //                      para->getParD(lev)->neighborX_SP,
-                   //                      para->getParD(lev)->neighborY_SP,
-                   //                      para->getParD(lev)->neighborZ_SP,
-                   //                      tdiff,
-                   //                      para->getParD(lev)->size_Mat_SP,
-                   //                      para->getParD(lev)->numberofthreads,
-                   //                      para->getParD(lev)->evenOrOdd);
-                      // getLastCudaError("CalcMacMedSP27 execution failed");
-                   //}
-
-                   //CalcMacSP27(para->getParD(lev)->vx_SP,
-       //                        para->getParD(lev)->vy_SP,
-       //                        para->getParD(lev)->vz_SP,
-       //                        para->getParD(lev)->rho,
-       //                        para->getParD(lev)->pressure,
-       //                        para->getParD(lev)->geoSP,
-       //                        para->getParD(lev)->neighborX_SP,
-       //                        para->getParD(lev)->neighborY_SP,
-       //                        para->getParD(lev)->neighborZ_SP,
-       //                        para->getParD(lev)->size_Mat_SP,
-       //                        para->getParD(lev)->numberofthreads,
-       //                        para->getParD(lev)->d0SP.f[0],
-       //                        para->getParD(lev)->evenOrOdd);
-       //            getLastCudaError("CalcMacSP27 execution failed");
-
-
-                   CalcMacCompSP27(para->getParD(lev)->velocityX,
-                                   para->getParD(lev)->velocityY,
-                                   para->getParD(lev)->velocityZ,
-                                   para->getParD(lev)->rho,
-                                   para->getParD(lev)->pressure,
-                                   para->getParD(lev)->typeOfGridNode,
-                                   para->getParD(lev)->neighborX,
-                                   para->getParD(lev)->neighborY,
-                                   para->getParD(lev)->neighborZ,
-                                   para->getParD(lev)->numberOfNodes,
-                                   para->getParD(lev)->numberofthreads,
-                                   para->getParD(lev)->distributions.f[0],
-                                   para->getParD(lev)->isEvenTimestep);
-                   getLastCudaError("CalcMacSP27 execution failed");
-
-                // // overwrite with wall nodes
-                //    SetOutputWallVelocitySP27(  para->getParD(lev)->numberofthreads,
-                //                                para->getParD(lev)->velocityX,
-                //                                para->getParD(lev)->velocityY,
-                //                                para->getParD(lev)->velocityZ,
-                //                                para->getParD(lev)->geometryBC.Vx,
-                //                                para->getParD(lev)->geometryBC.Vy,
-                //                                para->getParD(lev)->geometryBC.Vz,
-                //                                para->getParD(lev)->geometryBC.numberOfBCnodes,
-                //                                para->getParD(lev)->geometryBC.k,
-                //                                para->getParD(lev)->rho,
-                //                                para->getParD(lev)->pressure,
-                //                                para->getParD(lev)->typeOfGridNode,
-                //                                para->getParD(lev)->neighborX,
-                //                                para->getParD(lev)->neighborY,
-                //                                para->getParD(lev)->neighborZ,
-                //                                para->getParD(lev)->size_Mat,
-                //                                para->getParD(lev)->distributions.f[0],
-                //                                para->getParD(lev)->isEvenTimestep);
-                //   getLastCudaError("SetOutputWallVelocitySP27 execution failed");
-
-                //    SetOutputWallVelocitySP27(  para->getParD(lev)->numberofthreads,
-                //                                para->getParD(lev)->velocityX,
-                //                                para->getParD(lev)->velocityY,
-                //                                para->getParD(lev)->velocityZ,
-                //                                para->getParD(lev)->velocityBC.Vx,
-                //                                para->getParD(lev)->velocityBC.Vy,
-                //                                para->getParD(lev)->velocityBC.Vz,
-                //                                para->getParD(lev)->velocityBC.numberOfBCnodes,
-                //                                para->getParD(lev)->velocityBC.k,
-                //                                para->getParD(lev)->rho,
-                //                                para->getParD(lev)->pressure,
-                //                                para->getParD(lev)->typeOfGridNode,
-                //                                para->getParD(lev)->neighborX,
-                //                                para->getParD(lev)->neighborY,
-                //                                para->getParD(lev)->neighborZ,
-                //                                para->getParD(lev)->size_Mat,
-                //                                para->getParD(lev)->distributions.f[0],
-                //                                para->getParD(lev)->isEvenTimestep);
-                //   getLastCudaError("SetOutputWallVelocitySP27 execution failed");
-
-                 //}
-
-                   cudaMemoryManager->cudaCopyPrint(lev);
-               if (para->getCalcMedian())
-               {
-                   cudaMemoryManager->cudaCopyMedianPrint(lev);
-               }
-
-               //////////////////////////////////////////////////////////////////////////
-               //TODO: implement flag to write ASCII data
-               if (para->getWriteVeloASCIIfiles())
-                   VeloASCIIWriter::writeVelocitiesAsTXT(para.get(), lev, timestep);
-               //////////////////////////////////////////////////////////////////////////
-               if( this->kineticEnergyAnalyzer || this->enstrophyAnalyzer )
-               {
-                   std::string fname = para->getFName() + "_ID_" + StringUtil::toString<int>(para->getMyProcessID()) + "_t_" + StringUtil::toString<int>(timestep);
-
-                   if (this->kineticEnergyAnalyzer) this->kineticEnergyAnalyzer->writeToFile(fname);
-                   if (this->enstrophyAnalyzer)     this->enstrophyAnalyzer->writeToFile(fname);
-               }
-               //////////////////////////////////////////////////////////////////////////
-
-
-               ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-               if (para->getDiffOn()==true)
-               {
-                  if (para->getDiffMod() == 7)
-                  {
-                     CalcMacThS7(   para->getParD(lev)->Conc,
-                                    para->getParD(lev)->typeOfGridNode,
-                                    para->getParD(lev)->neighborX,
-                                    para->getParD(lev)->neighborY,
-                                    para->getParD(lev)->neighborZ,
-                                    para->getParD(lev)->numberOfNodes,
-                                    para->getParD(lev)->numberofthreads,
-                                    para->getParD(lev)->distributionsAD7.f[0],
-                                    para->getParD(lev)->isEvenTimestep);
-                     getLastCudaError("CalcMacTh7 execution failed");
-                  }
-                  else if (para->getDiffMod() == 27)
-                  {
-                     CalcConcentration27(
-                                    para->getParD(lev)->numberofthreads,
-                                    para->getParD(lev)->Conc,
-                                    para->getParD(lev)->typeOfGridNode,
-                                    para->getParD(lev)->neighborX,
-                                    para->getParD(lev)->neighborY,
-                                    para->getParD(lev)->neighborZ,
-                                    para->getParD(lev)->numberOfNodes,
-                                    para->getParD(lev)->distributionsAD27.f[0],
-                                    para->getParD(lev)->isEvenTimestep);
-                  }
-
-                  cudaMemoryManager->cudaCopyConcentrationDeviceToHost(lev);
-                  //cudaMemoryCopy(para->getParH(lev)->Conc, para->getParD(lev)->Conc,  para->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost);
-               }
-               ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-               ////print cp
-               //if ((para->getParH(lev)->cpTop.size() > 0) && (t > para->getTStartOut()))
-               //{
-                  // printCpTopIntermediateStep(para, t, lev);
-               //}
-               ////////////////////////////////////////////////////////////////////////////////
-               //MeasurePointWriter::writeSpacialAverageForXZSlices(para, lev, t);
-               ////////////////////////////////////////////////////////////////////////////////
-               //MeasurePointWriter::writeTestAcousticXY(para, lev, t);
-               //MeasurePointWriter::writeTestAcousticYZ(para, lev, t);
-               //MeasurePointWriter::writeTestAcousticXZ(para, lev, t);
-               ////////////////////////////////////////////////////////////////////////
-            }
-
-            //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-            ////test print press mirror
-            //if (t > para->getTStartOut())
-            //{
-            //    ////////////////////////////////////////////////////////////////////////////////
-            //    //Level 7
-            //    CalcCPtop27(para->getParD(7)->d0SP.f[0],
-            //        para->getParD(7)->cpTopIndex,
-            //        para->getParD(7)->numberOfPointsCpTop,
-            //        para->getParD(7)->cpPressTop,
-            //        para->getParD(7)->neighborX_SP,
-            //        para->getParD(7)->neighborY_SP,
-            //        para->getParD(7)->neighborZ_SP,
-            //        para->getParD(7)->size_Mat_SP,
-            //        para->getParD(7)->evenOrOdd,
-            //        para->getParD(7)->numberofthreads);
-            //    //////////////////////////////////////////////////////////////////////////////////
-            //    calcPressForMirror(para, 7);
-            //    ////////////////////////////////////////////////////////////////////////////////
-            //    //Level 8
-            //    CalcCPtop27(para->getParD(8)->d0SP.f[0],
-            //        para->getParD(8)->cpTopIndex,
-            //        para->getParD(8)->numberOfPointsCpTop,
-            //        para->getParD(8)->cpPressTop,
-            //        para->getParD(8)->neighborX_SP,
-            //        para->getParD(8)->neighborY_SP,
-            //        para->getParD(8)->neighborZ_SP,
-            //        para->getParD(8)->size_Mat_SP,
-            //        para->getParD(8)->evenOrOdd,
-            //        para->getParD(8)->numberofthreads);
-            //    //////////////////////////////////////////////////////////////////////////////////
-            //    calcPressForMirror(para, 8);
-            //    ////////////////////////////////////////////////////////////////////////////////
-            //    //print press mirror
-            //    printScalars(para, false);
-            //    ////////////////////////////////////////////////////////////////////////////////
-            //}
-            //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-            //t_prev = t;
-
-            //////////////////////////////////////////////////////////////////////////
-            ////Data Analysis
-            ////AnalysisData::writeAnalysisData(para, t);
-            //AnalysisData::writeAnalysisDataX(para, t);
-            //AnalysisData::writeAnalysisDataZ(para, t);
-            //////////////////////////////////////////////////////////////////////////
+void Simulation::readAndWriteFiles(uint timestep)
+{
+    VF_LOG_INFO("Write files t = {} ...", timestep);
 
-            ////////////////////////////////////////////////////////////////////////
-            //pressure difference
-            ////////////////////////////////////////////////////////////////////////
-               //if (para->getMyID() == para->getPressInID())       calcPressure(para,  "in", 0);
-               //else if (para->getMyID() == para->getPressOutID()) calcPressure(para, "out", 0);
-            ////////////////////////////////////////////////////////////////////////
-            //flow rate
-            ////////////////////////////////////////////////////////////////////////
-              //calcFlowRate(para, 0);
-            ////////////////////////////////////////////////////////////////////////
-
-            ////////////////////////////////////////////////////////////////////////
-            //calculate 2nd, 3rd and higher order moments
-            ////////////////////////////////////////////////////////////////////////
-            if (para->getCalc2ndOrderMoments())  calc2ndMoments(para.get(), cudaMemoryManager.get());
-            if (para->getCalc3rdOrderMoments())  calc3rdMoments(para.get(), cudaMemoryManager.get());
-            if (para->getCalcHighOrderMoments()) calcHigherOrderMoments(para.get(), cudaMemoryManager.get());
-            ////////////////////////////////////////////////////////////////////////
-
-            ////////////////////////////////////////////////////////////////////////
-            //calculate median on host
-            ////////////////////////////////////////////////////////////////////////
-            if (para->getCalcMedian() && ((int)timestep > para->getTimeCalcMedStart()) && ((int)timestep <= para->getTimeCalcMedEnd()) && ((timestep%(unsigned int)para->getclockCycleForMP())==0))
+    for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
+    {
+        //////////////////////////////////////////////////////////////////////////
+        //exchange data for valid post process
+        updateGrid27->exchangeData(lev);
+
+        // ////////////////////////////////////////////////////////////////////////
+        // if (para->getD3Qxx()==19)
+        // {
+        //     CalcMac(para->getParD(lev)->vx,     para->getParD(lev)->vy,       para->getParD(lev)->vz,      para->getParD(lev)->rho,
+        //             para->getParD(lev)->geo,    para->getParD(lev)->size_Mat, para->getParD(lev)->gridNX,  para->getParD(lev)->gridNY,
+        //             para->getParD(lev)->gridNZ, para->getParD(lev)->d0.f[0],  para->getParD(lev)->evenOrOdd);
+        // }
+        // else if (para->getD3Qxx()==27)
+        // {
+        //    if (para->getCalcMedian() && ((int)t > para->getTimeCalcMedStart()) && ((int)t <= para->getTimeCalcMedEnd()))
+        //    {
+        //         unsigned int tdiff = t - t_prev;
+        //         CalcMacMedSP27(para->getParD(lev)->vx_SP_Med,
+        //                           para->getParD(lev)->vy_SP_Med,
+        //                           para->getParD(lev)->vz_SP_Med,
+        //                           para->getParD(lev)->rho_SP_Med,
+        //                           para->getParD(lev)->press_SP_Med,
+        //                           para->getParD(lev)->geoSP,
+        //                           para->getParD(lev)->neighborX_SP,
+        //                           para->getParD(lev)->neighborY_SP,
+        //                           para->getParD(lev)->neighborZ_SP,
+        //                           tdiff,
+        //                           para->getParD(lev)->size_Mat_SP,
+        //                           para->getParD(lev)->numberofthreads,
+        //                           para->getParD(lev)->evenOrOdd);
+        //         getLastCudaError("CalcMacMedSP27 execution failed");
+        //    }
+        //    CalcMacSP27(para->getParD(lev)->vx_SP,
+        //                        para->getParD(lev)->vy_SP,
+        //                        para->getParD(lev)->vz_SP,
+        //                        para->getParD(lev)->rho,
+        //                        para->getParD(lev)->pressure,
+        //                        para->getParD(lev)->geoSP,
+        //                        para->getParD(lev)->neighborX_SP,
+        //                        para->getParD(lev)->neighborY_SP,
+        //                        para->getParD(lev)->neighborZ_SP,
+        //                        para->getParD(lev)->size_Mat_SP,
+        //                        para->getParD(lev)->numberofthreads,
+        //                        para->getParD(lev)->d0SP.f[0],
+        //                        para->getParD(lev)->evenOrOdd);
+        //     getLastCudaError("CalcMacSP27 execution failed");
+            CalcMacCompSP27(para->getParD(lev)->velocityX,
+                            para->getParD(lev)->velocityY,
+                            para->getParD(lev)->velocityZ,
+                            para->getParD(lev)->rho,
+                            para->getParD(lev)->pressure,
+                            para->getParD(lev)->typeOfGridNode,
+                            para->getParD(lev)->neighborX,
+                            para->getParD(lev)->neighborY,
+                            para->getParD(lev)->neighborZ,
+                            para->getParD(lev)->numberOfNodes,
+                            para->getParD(lev)->numberofthreads,
+                            para->getParD(lev)->distributions.f[0],
+                            para->getParD(lev)->isEvenTimestep);
+            getLastCudaError("CalcMacSP27 execution failed");
+        //     // overwrite with wall nodes
+        //     SetOutputWallVelocitySP27( para->getParD(lev)->numberofthreads,
+        //                                para->getParD(lev)->velocityX,
+        //                                para->getParD(lev)->velocityY,
+        //                                para->getParD(lev)->velocityZ,
+        //                                para->getParD(lev)->geometryBC.Vx,
+        //                                para->getParD(lev)->geometryBC.Vy,
+        //                                para->getParD(lev)->geometryBC.Vz,
+        //                                para->getParD(lev)->geometryBC.numberOfBCnodes,
+        //                                para->getParD(lev)->geometryBC.k,
+        //                                para->getParD(lev)->rho,
+        //                                para->getParD(lev)->pressure,
+        //                                para->getParD(lev)->typeOfGridNode,
+        //                                para->getParD(lev)->neighborX,
+        //                                para->getParD(lev)->neighborY,
+        //                                para->getParD(lev)->neighborZ,
+        //                                para->getParD(lev)->size_Mat,
+        //                                para->getParD(lev)->distributions.f[0],
+        //                                para->getParD(lev)->isEvenTimestep);
+        //     getLastCudaError("SetOutputWallVelocitySP27 execution failed");
+        //     SetOutputWallVelocitySP27( para->getParD(lev)->numberofthreads,
+        //                                para->getParD(lev)->velocityX,
+        //                                para->getParD(lev)->velocityY,
+        //                                para->getParD(lev)->velocityZ,
+        //                                para->getParD(lev)->velocityBC.Vx,
+        //                                para->getParD(lev)->velocityBC.Vy,
+        //                                para->getParD(lev)->velocityBC.Vz,
+        //                                para->getParD(lev)->velocityBC.numberOfBCnodes,
+        //                                para->getParD(lev)->velocityBC.k,
+        //                                para->getParD(lev)->rho,
+        //                                para->getParD(lev)->pressure,
+        //                                para->getParD(lev)->typeOfGridNode,
+        //                                para->getParD(lev)->neighborX,
+        //                                para->getParD(lev)->neighborY,
+        //                                para->getParD(lev)->neighborZ,
+        //                                para->getParD(lev)->size_Mat,
+        //                                para->getParD(lev)->distributions.f[0],
+        //                                para->getParD(lev)->isEvenTimestep);
+        //     getLastCudaError("SetOutputWallVelocitySP27 execution failed");
+        // }
+
+        cudaMemoryManager->cudaCopyPrint(lev);
+        if (para->getCalcMedian())
+        {
+            cudaMemoryManager->cudaCopyMedianPrint(lev);
+        }
+        //////////////////////////////////////////////////////////////////////////
+        //TODO: implement flag to write ASCII data
+        if (para->getWriteVeloASCIIfiles())
+            VeloASCIIWriter::writeVelocitiesAsTXT(para.get(), lev, timestep);
+        //////////////////////////////////////////////////////////////////////////
+        if( this->kineticEnergyAnalyzer || this->enstrophyAnalyzer )
+        {
+            std::string fname = para->getFName() + "_ID_" + StringUtil::toString<int>(para->getMyProcessID()) + "_t_" + StringUtil::toString<int>(timestep);
+            if (this->kineticEnergyAnalyzer) this->kineticEnergyAnalyzer->writeToFile(fname);
+            if (this->enstrophyAnalyzer)     this->enstrophyAnalyzer->writeToFile(fname);
+        }
+        //////////////////////////////////////////////////////////////////////////
+        if (para->getDiffOn()==true)
+        {
+            if (para->getDiffMod() == 7)
             {
-                unsigned int tdiff = timestep - t_prev;
-                calcMedian(para.get(), tdiff);
-
-                /////////////////////////////////
-                //added for incremental averaging
-                t_prev = timestep;
-                resetMedian(para.get());
-                /////////////////////////////////
+               CalcMacThS7( para->getParD(lev)->concentration,
+                            para->getParD(lev)->typeOfGridNode,
+                            para->getParD(lev)->neighborX,
+                            para->getParD(lev)->neighborY,
+                            para->getParD(lev)->neighborZ,
+                            para->getParD(lev)->numberOfNodes,
+                            para->getParD(lev)->numberofthreads,
+                            para->getParD(lev)->distributionsAD7.f[0],
+                            para->getParD(lev)->isEvenTimestep);
+               getLastCudaError("CalcMacTh7 execution failed");
             }
-            if (para->getCalcTurbulenceIntensity())
+            else if (para->getDiffMod() == 27)
             {
-                uint t_diff = timestep - t_turbulenceIntensity;
-                calcTurbulenceIntensity(para.get(), cudaMemoryManager.get(), t_diff);
-                //writeAllTiDatafToFile(para.get(), t);
-            }
-            ////////////////////////////////////////////////////////////////////////
-            dataWriter->writeTimestep(para, timestep);
-            ////////////////////////////////////////////////////////////////////////
-            if (para->getCalcTurbulenceIntensity()) {
-                t_turbulenceIntensity = timestep;
-                resetVelocityFluctuationsAndMeans(para.get(), cudaMemoryManager.get());
+               CalcConcentration27(
+                              para->getParD(lev)->numberofthreads,
+                              para->getParD(lev)->concentration,
+                              para->getParD(lev)->typeOfGridNode,
+                              para->getParD(lev)->neighborX,
+                              para->getParD(lev)->neighborY,
+                              para->getParD(lev)->neighborZ,
+                              para->getParD(lev)->numberOfNodes,
+                              para->getParD(lev)->distributionsAD.f[0],
+                              para->getParD(lev)->isEvenTimestep);
             }
-            ////////////////////////////////////////////////////////////////////////
-            if (para->getCalcDragLift()) printDragLift(para.get(), cudaMemoryManager.get(), timestep);
-            ////////////////////////////////////////////////////////////////////////
-            if (para->getCalcParticles()) copyAndPrintParticles(para.get(), cudaMemoryManager.get(), timestep, false);
-            ////////////////////////////////////////////////////////////////////////
-            VF_LOG_INFO("... done");
-            ////////////////////////////////////////////////////////////////////////
-         }
-
+            cudaMemoryManager->cudaCopyConcentrationDeviceToHost(lev);
+            //cudaMemoryCopy(para->getParH(lev)->Conc, para->getParD(lev)->Conc,  para->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost);
+        }
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+        ////print cp
+        //if ((para->getParH(lev)->cpTop.size() > 0) && (t > para->getTStartOut()))
+        //{
+           // printCpTopIntermediateStep(para, t, lev);
+        //}
+        ////////////////////////////////////////////////////////////////////////////////
+        //MeasurePointWriter::writeSpacialAverageForXZSlices(para, lev, t);
+        ////////////////////////////////////////////////////////////////////////////////
+        //MeasurePointWriter::writeTestAcousticXY(para, lev, t);
+        //MeasurePointWriter::writeTestAcousticYZ(para, lev, t);
+        //MeasurePointWriter::writeTestAcousticXZ(para, lev, t);
         ////////////////////////////////////////////////////////////////////////
-        averageTimer->startTimer();
-      }
     }
-
-    /////////////////////////////////////////////////////////////////////////
-
-    ////////////////////////////////////////////////////////////////////////////////
-    //printDragLift(para);
-    ////////////////////////////////////////////////////////////////////////////////
-
-    ////////////////////////////////////////////////////////////////////////////////
-    if (para->getDiffOn()==true) printPlaneConc(para.get(), cudaMemoryManager.get());
-    ////////////////////////////////////////////////////////////////////////////////
-
-    ////////////////////////////////////////////////////////////////////////////////
-    ////for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
-    ////{
-    ////    if (para->getParH(lev)->cpTop.size() > 0)
-    ////    {
-    ////        printCpTop(para, lev);
-    ////    }
-    ////}
-    //for (int lev = 7; lev <= 8; lev++)
+    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////test print press mirror
+    //if (t > para->getTStartOut())
     //{
-    //    printCpTop(para, lev);
+    //    ////////////////////////////////////////////////////////////////////////////////
+    //    //Level 7
+    //    CalcCPtop27(para->getParD(7)->d0SP.f[0],
+    //        para->getParD(7)->cpTopIndex,
+    //        para->getParD(7)->numberOfPointsCpTop,
+    //        para->getParD(7)->cpPressTop,
+    //        para->getParD(7)->neighborX_SP,
+    //        para->getParD(7)->neighborY_SP,
+    //        para->getParD(7)->neighborZ_SP,
+    //        para->getParD(7)->size_Mat_SP,
+    //        para->getParD(7)->evenOrOdd,
+    //        para->getParD(7)->numberofthreads);
+    //    //////////////////////////////////////////////////////////////////////////////////
+    //    calcPressForMirror(para, 7);
+    //    ////////////////////////////////////////////////////////////////////////////////
+    //    //Level 8
+    //    CalcCPtop27(para->getParD(8)->d0SP.f[0],
+    //        para->getParD(8)->cpTopIndex,
+    //        para->getParD(8)->numberOfPointsCpTop,
+    //        para->getParD(8)->cpPressTop,
+    //        para->getParD(8)->neighborX_SP,
+    //        para->getParD(8)->neighborY_SP,
+    //        para->getParD(8)->neighborZ_SP,
+    //        para->getParD(8)->size_Mat_SP,
+    //        para->getParD(8)->evenOrOdd,
+    //        para->getParD(8)->numberofthreads);
+    //    //////////////////////////////////////////////////////////////////////////////////
+    //    calcPressForMirror(para, 8);
+    //    ////////////////////////////////////////////////////////////////////////////////
+    //    //print press mirror
+    //    printScalars(para, false);
+    //    ////////////////////////////////////////////////////////////////////////////////
     //}
-    ////printCpTop(para);
-    ////printCpBottom(para);
-    ////printCpBottom2(para);
-    ////////////////////////////////////////////////////////////////////////////////
-
- //  //////////////////////////////////////////////////////////////////////////
- //  //Copy Measure Values
-    //for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
-    //{
-    //    VF_LOG_INFO("Copy MeasurePoints at level = {}", lev);
-    //    para->cudaCopyMeasurePointsToHost(lev);
-    //    para->copyMeasurePointsArrayToVector(lev);
-    //    VF_LOG_INFO("Write MeasurePoints at level = {}", lev);
-    //    for(int j = 0; j < (int)para->getParH(lev)->MP.size(); j++)
-    //    {
-    //        MeasurePointWriter::writeMeasurePoints(para, lev, j, 0);
-    //    }
-    //}
- //  //////////////////////////////////////////////////////////////////////////
+    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    //t_prev = t;
+    //////////////////////////////////////////////////////////////////////////
+    ////Data Analysis
+    ////AnalysisData::writeAnalysisData(para, t);
+    //AnalysisData::writeAnalysisDataX(para, t);
+    //AnalysisData::writeAnalysisDataZ(para, t);
+    //////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////
+    //pressure difference
+    ////////////////////////////////////////////////////////////////////////
+    //if (para->getMyID() == para->getPressInID())       calcPressure(para,  "in", 0);
+    //else if (para->getMyID() == para->getPressOutID()) calcPressure(para, "out", 0);
+    ////////////////////////////////////////////////////////////////////////
+    //flow rate
+    ////////////////////////////////////////////////////////////////////////
+    //calcFlowRate(para, 0);
+    ////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////
+    //calculate 2nd, 3rd and higher order moments
+    ////////////////////////////////////////////////////////////////////////
+    if (para->getCalc2ndOrderMoments())  calc2ndMoments(para.get(), cudaMemoryManager.get());
+    if (para->getCalc3rdOrderMoments())  calc3rdMoments(para.get(), cudaMemoryManager.get());
+    if (para->getCalcHighOrderMoments()) calcHigherOrderMoments(para.get(), cudaMemoryManager.get());
+    ////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////
+    //calculate median on host
+    ////////////////////////////////////////////////////////////////////////
+    if (para->getCalcMedian() && ((int)timestep > para->getTimeCalcMedStart()) && ((int)timestep <= para->getTimeCalcMedEnd()) && ((timestep%(unsigned int)para->getclockCycleForMP())==0))
+    {
+        unsigned int tdiff = timestep - previousTimestepForAveraging;
+        calcMedian(para.get(), tdiff);
+        /////////////////////////////////
+        //added for incremental averaging
+        previousTimestepForAveraging = timestep;
+        resetMedian(para.get());
+        /////////////////////////////////
+    }
+    if (para->getCalcTurbulenceIntensity())
+    {
+        uint t_diff = timestep - previousTimestepForTurbulenceIntensityCalculation;
+        calcTurbulenceIntensity(para.get(), cudaMemoryManager.get(), t_diff);
+        //writeAllTiDatafToFile(para.get(), t);
+    }
+    ////////////////////////////////////////////////////////////////////////
+    dataWriter->writeTimestep(para, timestep);
+    ////////////////////////////////////////////////////////////////////////
+    if (para->getCalcTurbulenceIntensity()) {
+        previousTimestepForTurbulenceIntensityCalculation = timestep;
+        resetVelocityFluctuationsAndMeans(para.get(), cudaMemoryManager.get());
+    }
+    ////////////////////////////////////////////////////////////////////////
+    if (para->getCalcDragLift()) 
+    {
+        printDragLift(para.get(), cudaMemoryManager.get(), timestep);
+    }
+    ////////////////////////////////////////////////////////////////////////
+    if (para->getCalcParticles()) copyAndPrintParticles(para.get(), cudaMemoryManager.get(), timestep, false);
+    ////////////////////////////////////////////////////////////////////////
+    VF_LOG_INFO("... done");
+    ////////////////////////////////////////////////////////////////////////
 }
 
 void Simulation::porousMedia()
@@ -1214,7 +1169,7 @@ Simulation::~Simulation()
         for (int lev = para->getCoarse(); lev < para->getFine(); lev++)
         {
             checkCudaErrors(cudaFreeHost(para->getParH(lev)->Conc_Full));
-            checkCudaErrors(cudaFreeHost(para->getParH(lev)->Conc));
+            checkCudaErrors(cudaFreeHost(para->getParH(lev)->concentration));
             checkCudaErrors(cudaFreeHost(para->getParH(lev)->Temp.temp));
             checkCudaErrors(cudaFreeHost(para->getParH(lev)->Temp.k));
             checkCudaErrors(cudaFreeHost(para->getParH(lev)->TempVel.temp));
diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.h b/src/gpu/VirtualFluids_GPU/LBM/Simulation.h
index 5bb58827ea58a8fe135b934f6e1aa0a5ee42cdfd..ba2a321707db4138aee9e1c30bae4dede017a5b8 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.h
+++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.h
@@ -31,6 +31,7 @@ class EnstrophyAnalyzer;
 class BoundaryConditionFactory;
 class GridScalingFactory;
 class TurbulenceModelFactory;
+class Timer;
 
 class Simulation
 {
@@ -44,16 +45,22 @@ public:
     void run();
 
     void setFactories(std::unique_ptr<KernelFactory> &&kernelFactory,
-               std::unique_ptr<PreProcessorFactory> &&preProcessorFactory);
+                      std::unique_ptr<PreProcessorFactory> &&preProcessorFactory);
     void setDataWriter(std::shared_ptr<DataWriter> dataWriter);
     void addKineticEnergyAnalyzer(uint tAnalyse);
     void addEnstrophyAnalyzer(uint tAnalyse);
 
+    //! \brief can be used as an alternative to run(), if the simulation needs to be controlled from the outside (e. g. for fluid structure interaction FSI)
+    void calculateTimestep(uint timestep);
+    //! \brief needed to initialize the simulation timers if calculateTimestep is used instead of run()
+    void initTimers();
+
 private:
 	void init(GridProvider &gridProvider, BoundaryConditionFactory *bcFactory, SPtr<TurbulenceModelFactory> tmFactory, GridScalingFactory *scalingFactory);
     void allocNeighborsOffsetsScalesAndBoundaries(GridProvider& gridProvider);
     void porousMedia();
     void definePMarea(std::shared_ptr<PorousMedia>& pm);
+    void readAndWriteFiles(uint timestep);
 
 	std::unique_ptr<KernelFactory> kernelFactory;
 	std::shared_ptr<PreProcessorFactory> preProcessorFactory;
@@ -80,6 +87,13 @@ private:
 
     SPtr<RestartObject> restart_object;
 
+    // Timer
+    std::unique_ptr<Timer> averageTimer;
+    uint previousTimestepForAveraging;
+    uint previousTimestepForTurbulenceIntensityCalculation;
+    uint timestepForMeasuringPoints;
+    
+
 	//Forcing Calculation
 	std::shared_ptr<ForceCalculations> forceCalculator;
 
diff --git a/src/gpu/VirtualFluids_GPU/Output/AnalysisData.hpp b/src/gpu/VirtualFluids_GPU/Output/AnalysisData.hpp
index d091f38ea228d43345235132cd4947ff750436d6..882cc2b46c3faf3eb81a5296a7b8c6f2e1bc3ac8 100644
--- a/src/gpu/VirtualFluids_GPU/Output/AnalysisData.hpp
+++ b/src/gpu/VirtualFluids_GPU/Output/AnalysisData.hpp
@@ -3,7 +3,7 @@
 
 #include "basics/utilities/UbFileOutputASCII.h"
 #include "Parameter/Parameter.h"
-#include "Core/StringUtilities/StringUtil.h"
+#include "StringUtilities/StringUtil.h"
 
 class AnalysisData
 {
diff --git a/src/gpu/VirtualFluids_GPU/Output/DataWriter.h b/src/gpu/VirtualFluids_GPU/Output/DataWriter.h
index 48a9b56da3abd5337e1c6042a7dcba3304c449e6..67a50867c1ef957c2e80b338f8597262508c468d 100644
--- a/src/gpu/VirtualFluids_GPU/Output/DataWriter.h
+++ b/src/gpu/VirtualFluids_GPU/Output/DataWriter.h
@@ -2,7 +2,7 @@
 #define DATA_WRITER_H
 
 #include "PointerDefinitions.h"
-#include "Core/DataTypes.h"
+#include "DataTypes.h"
 
 #include <memory>
 #include <vector>
diff --git a/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.cu b/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.cu
index 9ff86ea36c5b3c465990d622547d98cb4686f929..e5062a8ec63940ab6e23e567c0674681d4af6509 100644
--- a/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.cu
+++ b/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.cu
@@ -1,121 +1,157 @@
-
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file DistributionDebugInspector.cu
+//! \ingroup Output
+//! \author Henrik Asmuth, Henry Korb
+//======================================================================================
 #include "DistributionDebugInspector.h"
 
 #include "Parameter/Parameter.h"
-#include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
-#include "Kernel/Utilities/DistributionHelper.cuh"
+#include "basics/constants/NumericConstants.h"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
 #include <cuda/CudaGrid.h>
 #include <cuda.h>
 
 #include <iostream>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
+using namespace vf::gpu;
+
+__global__ void printFs(
+    real* distributions,
+    bool isEvenTimestep,
+    unsigned long long numberOfFluidNodes,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    uint* typeOfGridNode,
+    real* coordX,
+    real* coordY,
+    real* coordZ,
+    real minX,
+    real maxX,
+    real minY,
+    real maxY,
+    real minZ,
+    real maxZ)
+{
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned k_000 = getNodeIndex();
+
+    if (k_000 >= numberOfFluidNodes || typeOfGridNode[k_000]!=GEO_FLUID ) 
+        return;
 
-__global__ void printFs(  real* distributions,
-                        bool isEvenTimestep,
-                        unsigned long numberOfFluidNodes,
-                        uint* neighborX,
-                        uint* neighborY,
-                        uint* neighborZ,
-                        uint* typeOfGridNode,
-                        real* coordX,
-                        real* coordY,
-                        real* coordZ,
-                        real minX,
-                        real maxX,
-                        real minY,
-                        real maxY,
-                        real minZ,
-                        real maxZ)
-                        {
-                            const unsigned k_000 = vf::gpu::getNodeIndex();
-
-                            if (k_000 >= numberOfFluidNodes || typeOfGridNode[k_000]!=GEO_FLUID ) 
-                                return;
-
-                            real coordNodeX = coordX[k_000];
-                            real coordNodeY = coordY[k_000];
-                            real coordNodeZ = coordZ[k_000];
-
-                            if( coordNodeX>=minX && coordNodeX<=maxX &&
-                                coordNodeY>=minY && coordNodeY<=maxY &&
-                                coordNodeZ>=minZ && coordNodeZ<=maxZ    )
-                                {
-                                    Distributions27 dist = vf::gpu::getDistributionReferences27(distributions, numberOfFluidNodes, isEvenTimestep);
-                                    ////////////////////////////////////////////////////////////////////////////////
-                                    //! - Set neighbor indices (necessary for indirect addressing)
-                                    uint k_M00 = neighborX[k_000];
-                                    uint k_0M0 = neighborY[k_000];
-                                    uint k_00M = neighborZ[k_000];
-                                    uint k_MM0 = neighborY[k_M00];
-                                    uint k_M0M = neighborZ[k_M00];
-                                    uint k_0MM = neighborZ[k_0M0];
-                                    uint k_MMM = neighborZ[k_MM0];
-                                    ////////////////////////////////////////////////////////////////////////////////////
-                                    //! - Set local distributions
-                                    //!
-                                    real f_000 = (dist.f[DIR_000])[k_000];
-                                    real f_P00 = (dist.f[DIR_P00])[k_000];
-                                    real f_M00 = (dist.f[DIR_M00])[k_M00];
-                                    real f_0P0 = (dist.f[DIR_0P0])[k_000];
-                                    real f_0M0 = (dist.f[DIR_0M0])[k_0M0];
-                                    real f_00P = (dist.f[DIR_00P])[k_000];
-                                    real f_00M = (dist.f[DIR_00M])[k_00M];
-                                    real f_PP0 = (dist.f[DIR_PP0])[k_000];
-                                    real f_MM0 = (dist.f[DIR_MM0])[k_MM0];
-                                    real f_PM0 = (dist.f[DIR_PM0])[k_0M0];
-                                    real f_MP0 = (dist.f[DIR_MP0])[k_M00];
-                                    real f_P0P = (dist.f[DIR_P0P])[k_000];
-                                    real f_M0M = (dist.f[DIR_M0M])[k_M0M];
-                                    real f_P0M = (dist.f[DIR_P0M])[k_00M];
-                                    real f_M0P = (dist.f[DIR_M0P])[k_M00];
-                                    real f_0PP = (dist.f[DIR_0PP])[k_000];
-                                    real f_0MM = (dist.f[DIR_0MM])[k_0MM];
-                                    real f_0PM = (dist.f[DIR_0PM])[k_00M];
-                                    real f_0MP = (dist.f[DIR_0MP])[k_0M0];
-                                    real f_PPP = (dist.f[DIR_PPP])[k_000];
-                                    real f_MPP = (dist.f[DIR_MPP])[k_M00];
-                                    real f_PMP = (dist.f[DIR_PMP])[k_0M0];
-                                    real f_MMP = (dist.f[DIR_MMP])[k_MM0];
-                                    real f_PPM = (dist.f[DIR_PPM])[k_00M];
-                                    real f_MPM = (dist.f[DIR_MPM])[k_M0M];
-                                    real f_PMM = (dist.f[DIR_PMM])[k_0MM];
-                                    real f_MMM = (dist.f[DIR_MMM])[k_MMM];
-
-                                    real drho = ((((f_PPP + f_MMM) + (f_MPM + f_PMP)) + ((f_MPP + f_PMM) + (f_MMP + f_PPM))) +
-                                                (((f_0MP + f_0PM) + (f_0MM + f_0PP)) + ((f_M0P + f_P0M) + (f_M0M + f_P0P)) +
-                                                ((f_MP0 + f_PM0) + (f_MM0 + f_PP0))) +
-                                                ((f_M00 + f_P00) + (f_0M0 + f_0P0) + (f_00M + f_00P))) +
-                                                    f_000;
-
-                                    real oneOverRho = c1o1 / (c1o1 + drho);
-
-                                    real vvx = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_PMM - f_MPP) + (f_PPM - f_MMP))) +
-                                                (((f_P0M - f_M0P) + (f_P0P - f_M0M)) + ((f_PM0 - f_MP0) + (f_PP0 - f_MM0))) + (f_P00 - f_M00)) *
-                                            oneOverRho;
-                                    real vvy = ((((f_PPP - f_MMM) + (f_MPM - f_PMP)) + ((f_MPP - f_PMM) + (f_PPM - f_MMP))) +
-                                                (((f_0PM - f_0MP) + (f_0PP - f_0MM)) + ((f_MP0 - f_PM0) + (f_PP0 - f_MM0))) + (f_0P0 - f_0M0)) *
-                                            oneOverRho;
-                                    real vvz = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_MPP - f_PMM) + (f_MMP - f_PPM))) +
-                                                (((f_0MP - f_0PM) + (f_0PP - f_0MM)) + ((f_M0P - f_P0M) + (f_P0P - f_M0M))) + (f_00P - f_00M)) *
-                                            oneOverRho;
-
-                                    printf("Node %u \t (%f\t%f\t%f)\n rho: %f\t velo: %f\t %f \t %f\n\n" , k_000, coordNodeX, coordNodeY, coordNodeZ, drho, vvx, vvy, vvz);
-                                    printf("Node %u \t (%f\t%f\t%f)\n f_M00\t%f\t f_000\t%f\t f_P00\t%f\n f_MP0\t%f\t f_0P0\t%f\t f_PP0\t%f\n f_MM0\t%f\t f_0M0\t%f\t f_PM0\t%f\n f_M0P\t%f\t f_00P\t%f\t f_P0P\t%f\n f_M0M\t%f\t f_00M\t%f\t f_P0M\t%f\n f_MPP\t%f\t f_0PP\t%f\t f_PPP\t%f\n f_MPM\t%f\t f_0PM\t%f\t f_PPM\t%f\n f_MMP\t%f\t f_0MP\t%f\t f_PMP\t%f\n f_MMM\t%f\t f_0MM\t%f\t f_PMM\t%f\n\n\n" , k_000, coordNodeX, coordNodeY, coordNodeZ, f_M00, f_000, f_P00,f_MP0, f_0P0, f_PP0, f_MM0, f_0M0, f_PM0, f_M0P, f_00P, f_P0P, f_M0M, f_00M, f_P0M, f_MPP, f_0PP, f_PPP, f_MPM, f_0PM, f_PPM, f_MMP, f_0MP, f_PMP, f_MMM, f_0MM, f_PMM);
-
-                                }
-
-                        }
-
-
-
-
-void DistributionDebugInspector::inspect(std::shared_ptr<Parameter> para, uint level, uint t){
-    
+    real coordNodeX = coordX[k_000];
+    real coordNodeY = coordY[k_000];
+    real coordNodeZ = coordZ[k_000];
+
+    if( coordNodeX>=minX && coordNodeX<=maxX &&
+        coordNodeY>=minY && coordNodeY<=maxY &&
+        coordNodeZ>=minZ && coordNodeZ<=maxZ    )
+        {
+            Distributions27 dist;
+            getPointersToDistributions(dist, distributions, numberOfFluidNodes, isEvenTimestep);
+            ////////////////////////////////////////////////////////////////////////////////
+            //! - Set neighbor indices (necessary for indirect addressing)
+            uint k_M00 = neighborX[k_000];
+            uint k_0M0 = neighborY[k_000];
+            uint k_00M = neighborZ[k_000];
+            uint k_MM0 = neighborY[k_M00];
+            uint k_M0M = neighborZ[k_M00];
+            uint k_0MM = neighborZ[k_0M0];
+            uint k_MMM = neighborZ[k_MM0];
+            ////////////////////////////////////////////////////////////////////////////////////
+            //! - Set local distributions
+            //!
+            real f_000 = (dist.f[DIR_000])[k_000];
+            real f_P00 = (dist.f[DIR_P00])[k_000];
+            real f_M00 = (dist.f[DIR_M00])[k_M00];
+            real f_0P0 = (dist.f[DIR_0P0])[k_000];
+            real f_0M0 = (dist.f[DIR_0M0])[k_0M0];
+            real f_00P = (dist.f[DIR_00P])[k_000];
+            real f_00M = (dist.f[DIR_00M])[k_00M];
+            real f_PP0 = (dist.f[DIR_PP0])[k_000];
+            real f_MM0 = (dist.f[DIR_MM0])[k_MM0];
+            real f_PM0 = (dist.f[DIR_PM0])[k_0M0];
+            real f_MP0 = (dist.f[DIR_MP0])[k_M00];
+            real f_P0P = (dist.f[DIR_P0P])[k_000];
+            real f_M0M = (dist.f[DIR_M0M])[k_M0M];
+            real f_P0M = (dist.f[DIR_P0M])[k_00M];
+            real f_M0P = (dist.f[DIR_M0P])[k_M00];
+            real f_0PP = (dist.f[DIR_0PP])[k_000];
+            real f_0MM = (dist.f[DIR_0MM])[k_0MM];
+            real f_0PM = (dist.f[DIR_0PM])[k_00M];
+            real f_0MP = (dist.f[DIR_0MP])[k_0M0];
+            real f_PPP = (dist.f[DIR_PPP])[k_000];
+            real f_MPP = (dist.f[DIR_MPP])[k_M00];
+            real f_PMP = (dist.f[DIR_PMP])[k_0M0];
+            real f_MMP = (dist.f[DIR_MMP])[k_MM0];
+            real f_PPM = (dist.f[DIR_PPM])[k_00M];
+            real f_MPM = (dist.f[DIR_MPM])[k_M0M];
+            real f_PMM = (dist.f[DIR_PMM])[k_0MM];
+            real f_MMM = (dist.f[DIR_MMM])[k_MMM];
+
+            real drho = ((((f_PPP + f_MMM) + (f_MPM + f_PMP)) + ((f_MPP + f_PMM) + (f_MMP + f_PPM))) +
+                        (((f_0MP + f_0PM) + (f_0MM + f_0PP)) + ((f_M0P + f_P0M) + (f_M0M + f_P0P)) +
+                        ((f_MP0 + f_PM0) + (f_MM0 + f_PP0))) +
+                        ((f_M00 + f_P00) + (f_0M0 + f_0P0) + (f_00M + f_00P))) +
+                            f_000;
+
+            real oneOverRho = c1o1 / (c1o1 + drho);
+
+            real vvx = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_PMM - f_MPP) + (f_PPM - f_MMP))) +
+                        (((f_P0M - f_M0P) + (f_P0P - f_M0M)) + ((f_PM0 - f_MP0) + (f_PP0 - f_MM0))) + (f_P00 - f_M00)) *
+                    oneOverRho;
+            real vvy = ((((f_PPP - f_MMM) + (f_MPM - f_PMP)) + ((f_MPP - f_PMM) + (f_PPM - f_MMP))) +
+                        (((f_0PM - f_0MP) + (f_0PP - f_0MM)) + ((f_MP0 - f_PM0) + (f_PP0 - f_MM0))) + (f_0P0 - f_0M0)) *
+                    oneOverRho;
+            real vvz = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_MPP - f_PMM) + (f_MMP - f_PPM))) +
+                        (((f_0MP - f_0PM) + (f_0PP - f_0MM)) + ((f_M0P - f_P0M) + (f_P0P - f_M0M))) + (f_00P - f_00M)) *
+                    oneOverRho;
+
+            printf("Node %u \t (%f\t%f\t%f)\n rho: %f\t velo: %f\t %f \t %f\n\n" , k_000, coordNodeX, coordNodeY, coordNodeZ, drho, vvx, vvy, vvz);
+            printf("Node %u \t (%f\t%f\t%f)\n f_M00\t%f\t f_000\t%f\t f_P00\t%f\n f_MP0\t%f\t f_0P0\t%f\t f_PP0\t%f\n f_MM0\t%f\t f_0M0\t%f\t f_PM0\t%f\n f_M0P\t%f\t f_00P\t%f\t f_P0P\t%f\n f_M0M\t%f\t f_00M\t%f\t f_P0M\t%f\n f_MPP\t%f\t f_0PP\t%f\t f_PPP\t%f\n f_MPM\t%f\t f_0PM\t%f\t f_PPM\t%f\n f_MMP\t%f\t f_0MP\t%f\t f_PMP\t%f\n f_MMM\t%f\t f_0MM\t%f\t f_PMM\t%f\n\n\n" , k_000, coordNodeX, coordNodeY, coordNodeZ, f_M00, f_000, f_P00,f_MP0, f_0P0, f_PP0, f_MM0, f_0M0, f_PM0, f_M0P, f_00P, f_P0P, f_M0M, f_00M, f_P0M, f_MPP, f_0PP, f_PPP, f_MPM, f_0PM, f_PPM, f_MMP, f_0MP, f_PMP, f_MMM, f_0MM, f_PMM);
+
+        }
+
+}
+
+
+
+
+void DistributionDebugInspector::inspect(std::shared_ptr<Parameter> para, uint level, uint t)
+{
     if(this->inspectionLevel!=level)
         return;
 
@@ -125,7 +161,7 @@ void DistributionDebugInspector::inspect(std::shared_ptr<Parameter> para, uint l
 
     printFs <<< cudaGrid.grid, cudaGrid.threads >>>(    para->getParD(level)->distributions.f[0],
                                                         para->getParD(level)->isEvenTimestep,
-                                                        (unsigned long)para->getParD(level)->numberOfNodes,
+                                                        para->getParD(level)->numberOfNodes,
                                                         para->getParD(level)->neighborX,
                                                         para->getParD(level)->neighborY,
                                                         para->getParD(level)->neighborZ,
diff --git a/src/gpu/VirtualFluids_GPU/Output/EdgeNodeDebugWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/EdgeNodeDebugWriter.hpp
index 6498bbb0a2b787d70a3b11500549b2a8956ce67a..fb0423de958e16727eca8e5a40af2c3e32faf1ae 100644
--- a/src/gpu/VirtualFluids_GPU/Output/EdgeNodeDebugWriter.hpp
+++ b/src/gpu/VirtualFluids_GPU/Output/EdgeNodeDebugWriter.hpp
@@ -5,7 +5,7 @@
 #include <sstream>
 #include <stdio.h>
 // #include <math.h>
-#include "Core/StringUtilities/StringUtil.h"
+#include "StringUtilities/StringUtil.h"
 #include "lbm/constants/D3Q27.h"
 #include "LBM/LB.h"
 #include "Parameter/Parameter.h"
diff --git a/src/gpu/VirtualFluids_GPU/Output/FileWriter.cpp b/src/gpu/VirtualFluids_GPU/Output/FileWriter.cpp
index c6e53ee3cbfb98f11e373ca014c7faf4e70a86f0..cb8cefa389c141b7f38bbc54a68d8cf9841ba699 100644
--- a/src/gpu/VirtualFluids_GPU/Output/FileWriter.cpp
+++ b/src/gpu/VirtualFluids_GPU/Output/FileWriter.cpp
@@ -13,7 +13,7 @@
 #include <sstream>
 #include <cmath>
 
-#include <Core/StringUtilities/StringUtil.h>
+#include <StringUtilities/StringUtil.h>
 
 #include "Parameter/Parameter.h"
 #include "GPU/CudaMemoryManager.h"
@@ -50,7 +50,7 @@ void FileWriter::writeTimestep(std::shared_ptr<Parameter> para, unsigned int tim
 
 void FileWriter::writeTimestep(std::shared_ptr<Parameter> para, unsigned int timestep, int level)
 {
-    const unsigned int numberOfParts = para->getParH(level)->numberOfNodes / para->getlimitOfNodesForVTK() + 1;
+    const unsigned int numberOfParts = (uint)para->getParH(level)->numberOfNodes / para->getlimitOfNodesForVTK() + 1;
     std::vector<std::string> fname;
     std::vector<std::string> fnameMed;
 
@@ -217,8 +217,8 @@ void FileWriter::writeUnstrucuredGridLT(std::shared_ptr<Parameter> para, int lev
 
     for (unsigned int part = 0; part < fname.size(); part++)
     {
-        if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
-            sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+        if (((part + 1)*para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
+            sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
         else
             sizeOfNodes = para->getlimitOfNodesForVTK();
 
@@ -340,8 +340,8 @@ void FileWriter::writeUnstrucuredGridLTConc(std::shared_ptr<Parameter> para, int
 
     for (unsigned int part = 0; part < fname.size(); part++)
     {
-        if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
-            sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+        if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
+            sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
         else
             sizeOfNodes = para->getlimitOfNodesForVTK();
 
@@ -379,7 +379,7 @@ void FileWriter::writeUnstrucuredGridLTConc(std::shared_ptr<Parameter> para, int
                 nodedata[3][dn1] = (double)para->getParH(level)->velocityY[pos] * (double)para->getVelocityRatio();
                 nodedata[4][dn1] = (double)para->getParH(level)->velocityZ[pos] * (double)para->getVelocityRatio();
                 nodedata[5][dn1] = (double)para->getParH(level)->typeOfGridNode[pos];
-                nodedata[6][dn1] = (double)para->getParH(level)->Conc[pos];
+                nodedata[6][dn1] = (double)para->getParH(level)->concentration[pos];
                 //////////////////////////////////////////////////////////////////////////
                 number2 = para->getParH(level)->neighborX[number1];
                 number3 = para->getParH(level)->neighborY[number2];
@@ -449,9 +449,9 @@ void FileWriter::writeUnstrucuredGridMedianLT(std::shared_ptr<Parameter> para, i
     {
         //printf("\n test in if I... \n");
         //////////////////////////////////////////////////////////////////////////
-        if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
+        if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
         {
-            sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+            sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
         }
         else
         {
@@ -558,8 +558,8 @@ void FileWriter::writeUnstrucuredGridMedianLTConc(std::shared_ptr<Parameter> par
 
     for (unsigned int part = 0; part < fname.size(); part++)
     {
-        if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
-            sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+        if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
+            sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
         else
             sizeOfNodes = para->getlimitOfNodesForVTK();
         //////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp
index 0b1e9dc1c25457457eabe3013a288c4c93577dc3..705d86992d9eac39b66cf5033f7c32b5cb4fb602 100644
--- a/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp
+++ b/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp
@@ -3,7 +3,7 @@
 
 #include <fstream>
 #include <sstream>
-#include "Core/StringUtilities/StringUtil.h"
+#include "StringUtilities/StringUtil.h"
 #include "lbm/constants/D3Q27.h"
 #include "LBM/LB.h"
 #include "Parameter/Parameter.h"
@@ -46,8 +46,7 @@ void writeInterfaceLinesDebugCF(Parameter *para)
 {
     for (int level = 0; level < para->getMaxLevel(); level++) {
         const std::string fileName = para->getFName() + "_" + StringUtil::toString<int>(level) + "_OffDebugCF.vtk";
-        writeGridInterfaceLines(para, level, para->getParH(level)->intCF.ICellCFC, para->getParH(level)->intCF.ICellCFF,
-                                para->getParH(level)->K_CF, fileName);
+        writeGridInterfaceLines(para, level, para->getParH(level)->coarseToFine.coarseCellIndices, para->getParH(level)->coarseToFine.fineCellIndices, para->getParH(level)->coarseToFine.numberOfCells, fileName);
     }
 }
 
@@ -55,8 +54,7 @@ void writeInterfaceLinesDebugFC(Parameter *para)
 {
     for (int level = 0; level < para->getMaxLevel(); level++) {
         const std::string fileName = para->getFName() + "_" + StringUtil::toString<int>(level) + "_OffDebugFC.vtk";
-        writeGridInterfaceLines(para, level, para->getParH(level)->intFC.ICellFCC, para->getParH(level)->intFC.ICellFCF,
-                                para->getParH(level)->K_FC, fileName);
+        writeGridInterfaceLines(para, level, para->getParH(level)->fineToCoarse.coarseCellIndices, para->getParH(level)->fineToCoarse.fineCellIndices, para->getParH(level)->fineToCoarse.numberOfCells, fileName);
     }
 }
 
@@ -90,7 +88,7 @@ void writeInterfaceLinesDebugCFCneighbor(Parameter *para)
 {
     for (int level = 0; level < para->getMaxLevel(); level++) {
         std::string filename = para->getFName() + "_" + StringUtil::toString<int>(level) + "_CFCneighbor.vtk";
-        writeGridInterfaceLinesNeighbors(para, level, para->getParH(level)->intCF.ICellCFC, para->getParH(level)->K_CF,
+        writeGridInterfaceLinesNeighbors(para, level, para->getParH(level)->coarseToFine.coarseCellIndices, para->getParH(level)->coarseToFine.numberOfCells,
                                          filename);
     }
 }
@@ -100,8 +98,7 @@ void writeInterfaceLinesDebugCFFneighbor(Parameter *para)
 {
     for (int level = 0; level < para->getMaxLevel(); level++) {
         std::string filename = para->getFName() + "_" + StringUtil::toString<int>(level) + "_CFFneighbor.vtk";
-        writeGridInterfaceLinesNeighbors(para, level + 1, para->getParH(level)->intCF.ICellCFF,
-                                         para->getParH(level)->K_CF, filename);
+        writeGridInterfaceLinesNeighbors(para, level + 1, para->getParH(level)->coarseToFine.fineCellIndices, para->getParH(level)->coarseToFine.numberOfCells, filename);
     }
 }
 
@@ -110,7 +107,7 @@ void writeInterfaceLinesDebugFCCneighbor(Parameter *para)
 {
     for (int level = 0; level < para->getMaxLevel(); level++) {
         std::string filename = para->getFName() + "_" + StringUtil::toString<int>(level) + "_FCCneighbor.vtk";
-        writeGridInterfaceLinesNeighbors(para, level, para->getParH(level)->intFC.ICellFCC, para->getParH(level)->K_FC,
+        writeGridInterfaceLinesNeighbors(para, level, para->getParH(level)->fineToCoarse.coarseCellIndices, para->getParH(level)->fineToCoarse.numberOfCells,
                                          filename);
     }
 }
@@ -120,8 +117,7 @@ void writeInterfaceLinesDebugFCFneighbor(Parameter *para)
 {
     for (int level = 0; level < para->getMaxLevel(); level++) {
         std::string filename = para->getFName() + "_" + StringUtil::toString<int>(level) + "_FCFneighbor.vtk";
-        writeGridInterfaceLinesNeighbors(para, level + 1, para->getParH(level)->intFC.ICellFCF,
-                                         para->getParH(level)->K_FC, filename);
+        writeGridInterfaceLinesNeighbors(para, level + 1, para->getParH(level)->fineToCoarse.fineCellIndices, para->getParH(level)->fineToCoarse.numberOfCells, filename);
     }
 }
 
@@ -134,17 +130,17 @@ void writeInterfaceLinesDebugOff(Parameter *para)
 
     for (int level = 0; level < para->getMaxLevel(); level++) // evtl. Maxlevel + 1
     {
-        nodeNumberVec += (int)para->getParH(level)->K_CF;
+        nodeNumberVec += (int)para->getParH(level)->coarseToFine.numberOfCells;
     }
     nodesVec.resize(nodeNumberVec * 8);
     int nodeCount = 0;
     for (int level = 0; level < para->getMaxLevel(); level++) {
-        for (unsigned int u = 0; u < para->getParH(level)->K_CF; u++) {
-            double xoff = para->getParH(level)->offCF.xOffCF[u];
-            double yoff = para->getParH(level)->offCF.yOffCF[u];
-            double zoff = para->getParH(level)->offCF.zOffCF[u];
+        for (unsigned int u = 0; u < para->getParH(level)->coarseToFine.numberOfCells; u++) {
+            double xoff = para->getParH(level)->neighborCoarseToFine.x[u];
+            double yoff = para->getParH(level)->neighborCoarseToFine.y[u];
+            double zoff = para->getParH(level)->neighborCoarseToFine.z[u];
 
-            int posFine = para->getParH(level)->intCF.ICellCFF[u];
+            int posFine = para->getParH(level)->coarseToFine.fineCellIndices[u];
 
             double x1Fine = para->getParH(level + 1)->coordinateX[posFine];
             double x2Fine = para->getParH(level + 1)->coordinateY[posFine];
@@ -175,13 +171,13 @@ void writeInterfacePointsDebugCFC(Parameter *para)
 
     for (int level = 0; level < para->getMaxLevel(); level++) // evtl. Maxlevel + 1
     {
-        nodeNumberVec += (int)para->getParH(level)->K_CF;
+        nodeNumberVec += (int)para->getParH(level)->coarseToFine.numberOfCells;
     }
     nodesVec2.resize(nodeNumberVec * 8);
     int nodeCount2 = 0;
     for (int level = 0; level < para->getMaxLevel(); level++) {
-        for (unsigned int u = 0; u < para->getParH(level)->K_CF; u++) {
-            int pos = para->getParH(level)->intCF.ICellCFC[u];
+        for (unsigned int u = 0; u < para->getParH(level)->coarseToFine.numberOfCells; u++) {
+            int pos = para->getParH(level)->coarseToFine.coarseCellIndices[u];
 
             double x1 = para->getParH(level)->coordinateX[pos];
             double x2 = para->getParH(level)->coordinateY[pos];
@@ -290,10 +286,10 @@ void writeNeighborXPointsDebug(Parameter *para)
     nodesVec.resize(nodeNumberVec);
     int nodeCount2 = 0;
     for (int level = 0; level <= para->getMaxLevel(); level++) {
-        for (unsigned int u = 0; u < para->getParH(level)->numberOfNodes; u++) {
-            real x1 = para->getParH(level)->coordinateX[para->getParH(level)->neighborX[u]];
-            real x2 = para->getParH(level)->coordinateY[para->getParH(level)->neighborX[u]];
-            real x3 = para->getParH(level)->coordinateZ[para->getParH(level)->neighborX[u]];
+        for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++) {
+            real x1 = para->getParH(level)->coordinateX[para->getParH(level)->neighborX[index]];
+            real x2 = para->getParH(level)->coordinateY[para->getParH(level)->neighborX[index]];
+            real x3 = para->getParH(level)->coordinateZ[para->getParH(level)->neighborX[index]];
 
             nodesVec[nodeCount2++] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3)));
         }
@@ -317,18 +313,18 @@ void writeNeighborXLinesDebug(Parameter *para)
     nodesVec.resize(nodeNumberVec * 2);
     int nodeCount = 0;
     for (int level = 0; level < para->getMaxLevel(); level++) {
-        for (unsigned int u = 0; u < para->getParH(level)->numberOfNodes; u++) {
-            real x1  = para->getParH(level)->coordinateX[u];
-            real x2  = para->getParH(level)->coordinateY[u];
-            real x3  = para->getParH(level)->coordinateZ[u];
-            real x1N = para->getParH(level)->coordinateX[para->getParH(level)->neighborX[u]];
-            real x2N = para->getParH(level)->coordinateY[para->getParH(level)->neighborX[u]];
-            real x3N = para->getParH(level)->coordinateZ[para->getParH(level)->neighborX[u]];
+        for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++) {
+            real x1  = para->getParH(level)->coordinateX[index];
+            real x2  = para->getParH(level)->coordinateY[index];
+            real x3  = para->getParH(level)->coordinateZ[index];
+            real x1N = para->getParH(level)->coordinateX[para->getParH(level)->neighborX[index]];
+            real x2N = para->getParH(level)->coordinateY[para->getParH(level)->neighborX[index]];
+            real x3N = para->getParH(level)->coordinateZ[para->getParH(level)->neighborX[index]];
 
             nodesVec[nodeCount++] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3)));
             nodesVec[nodeCount++] = (makeUbTuple((float)(x1N), (float)(x2N), (float)(x3N)));
 
-            if (para->getParH(level)->typeOfGridNode[u] == GEO_FLUID) {
+            if (para->getParH(level)->typeOfGridNode[index] == GEO_FLUID) {
                 cellsVec.push_back(makeUbTuple(nodeCount - 2, nodeCount - 1));
             }
         }
@@ -350,10 +346,10 @@ void writeNeighborYPointsDebug(Parameter *para)
     nodesVec.resize(nodeNumberVec);
     int nodeCount2 = 0;
     for (int level = 0; level <= para->getMaxLevel(); level++) {
-        for (unsigned int u = 0; u < para->getParH(level)->numberOfNodes; u++) {
-            real x1 = para->getParH(level)->coordinateX[para->getParH(level)->neighborY[u]];
-            real x2 = para->getParH(level)->coordinateY[para->getParH(level)->neighborY[u]];
-            real x3 = para->getParH(level)->coordinateZ[para->getParH(level)->neighborY[u]];
+        for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++) {
+            real x1 = para->getParH(level)->coordinateX[para->getParH(level)->neighborY[index]];
+            real x2 = para->getParH(level)->coordinateY[para->getParH(level)->neighborY[index]];
+            real x3 = para->getParH(level)->coordinateZ[para->getParH(level)->neighborY[index]];
 
             nodesVec[nodeCount2++] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3)));
         }
@@ -377,18 +373,18 @@ void writeNeighborYLinesDebug(Parameter *para)
     nodesVec.resize(nodeNumberVec * 2);
     int nodeCount = 0;
     for (int level = 0; level < para->getMaxLevel(); level++) {
-        for (unsigned int u = 0; u < para->getParH(level)->numberOfNodes; u++) {
-            real x1  = para->getParH(level)->coordinateX[u];
-            real x2  = para->getParH(level)->coordinateY[u];
-            real x3  = para->getParH(level)->coordinateZ[u];
-            real x1N = para->getParH(level)->coordinateX[para->getParH(level)->neighborY[u]];
-            real x2N = para->getParH(level)->coordinateY[para->getParH(level)->neighborY[u]];
-            real x3N = para->getParH(level)->coordinateZ[para->getParH(level)->neighborY[u]];
+        for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++) {
+            real x1  = para->getParH(level)->coordinateX[index];
+            real x2  = para->getParH(level)->coordinateY[index];
+            real x3  = para->getParH(level)->coordinateZ[index];
+            real x1N = para->getParH(level)->coordinateX[para->getParH(level)->neighborY[index]];
+            real x2N = para->getParH(level)->coordinateY[para->getParH(level)->neighborY[index]];
+            real x3N = para->getParH(level)->coordinateZ[para->getParH(level)->neighborY[index]];
 
             nodesVec[nodeCount++] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3)));
             nodesVec[nodeCount++] = (makeUbTuple((float)(x1N), (float)(x2N), (float)(x3N)));
 
-            if (para->getParH(level)->typeOfGridNode[u] == GEO_FLUID) {
+            if (para->getParH(level)->typeOfGridNode[index] == GEO_FLUID) {
                 cellsVec.push_back(makeUbTuple(nodeCount - 2, nodeCount - 1));
             }
         }
@@ -410,10 +406,10 @@ void writeNeighborZPointsDebug(Parameter *para)
     nodesVec.resize(nodeNumberVec);
     int nodeCount2 = 0;
     for (int level = 0; level <= para->getMaxLevel(); level++) {
-        for (unsigned int u = 0; u < para->getParH(level)->numberOfNodes; u++) {
-            real x1 = para->getParH(level)->coordinateX[para->getParH(level)->neighborZ[u]];
-            real x2 = para->getParH(level)->coordinateY[para->getParH(level)->neighborZ[u]];
-            real x3 = para->getParH(level)->coordinateZ[para->getParH(level)->neighborZ[u]];
+        for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++) {
+            real x1 = para->getParH(level)->coordinateX[para->getParH(level)->neighborZ[index]];
+            real x2 = para->getParH(level)->coordinateY[para->getParH(level)->neighborZ[index]];
+            real x3 = para->getParH(level)->coordinateZ[para->getParH(level)->neighborZ[index]];
 
             nodesVec[nodeCount2++] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3)));
         }
@@ -437,18 +433,18 @@ void writeNeighborZLinesDebug(Parameter *para)
     nodesVec.resize(nodeNumberVec * 2);
     int nodeCount = 0;
     for (int level = 0; level < para->getMaxLevel(); level++) {
-        for (unsigned int u = 0; u < para->getParH(level)->numberOfNodes; u++) {
-            real x1  = para->getParH(level)->coordinateX[u];
-            real x2  = para->getParH(level)->coordinateY[u];
-            real x3  = para->getParH(level)->coordinateZ[u];
-            real x1N = para->getParH(level)->coordinateX[para->getParH(level)->neighborZ[u]];
-            real x2N = para->getParH(level)->coordinateY[para->getParH(level)->neighborZ[u]];
-            real x3N = para->getParH(level)->coordinateZ[para->getParH(level)->neighborZ[u]];
+        for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++) {
+            real x1  = para->getParH(level)->coordinateX[index];
+            real x2  = para->getParH(level)->coordinateY[index];
+            real x3  = para->getParH(level)->coordinateZ[index];
+            real x1N = para->getParH(level)->coordinateX[para->getParH(level)->neighborZ[index]];
+            real x2N = para->getParH(level)->coordinateY[para->getParH(level)->neighborZ[index]];
+            real x3N = para->getParH(level)->coordinateZ[para->getParH(level)->neighborZ[index]];
 
             nodesVec[nodeCount++] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3)));
             nodesVec[nodeCount++] = (makeUbTuple((float)(x1N), (float)(x2N), (float)(x3N)));
 
-            if (para->getParH(level)->typeOfGridNode[u] == GEO_FLUID) {
+            if (para->getParH(level)->typeOfGridNode[index] == GEO_FLUID) {
                 cellsVec.push_back(makeUbTuple(nodeCount - 2, nodeCount - 1));
             }
         }
@@ -467,13 +463,13 @@ void writeInterfaceCellsDebugCFC(Parameter *para)
     int nodeNumberVec = 0;
     for (int level = 0; level < para->getMaxLevel(); level++) // evtl. Maxlevel + 1
     {
-        nodeNumberVec += (int)para->getParH(level)->K_CF;
+        nodeNumberVec += (int)para->getParH(level)->coarseToFine.numberOfCells;
     }
     nodesVec.resize(nodeNumberVec * 8);
     int nodeCount = 0;
     for (int level = 0; level < para->getMaxLevel(); level++) {
-        for (unsigned int u = 0; u < para->getParH(level)->K_CF; u++) {
-            int pos = para->getParH(level)->intCF.ICellCFC[u];
+        for (unsigned int u = 0; u < para->getParH(level)->coarseToFine.numberOfCells; u++) {
+            int pos = para->getParH(level)->coarseToFine.coarseCellIndices[u];
 
             double x1             = para->getParH(level)->coordinateX[pos];
             double x2             = para->getParH(level)->coordinateY[pos];
@@ -508,13 +504,13 @@ void writeInterfaceCellsDebugCFF(Parameter *para)
     int nodeNumberVec = 0;
     for (int level = 0; level < para->getMaxLevel(); level++) // evtl. Maxlevel + 1
     {
-        nodeNumberVec += (int)para->getParH(level)->K_CF;
+        nodeNumberVec += (int)para->getParH(level)->coarseToFine.numberOfCells;
     }
     nodesVec.resize(nodeNumberVec * 8);
     int nodeCount = 0;
     for (int level = 0; level < para->getMaxLevel(); level++) {
-        for (unsigned int u = 0; u < para->getParH(level)->K_CF; u++) {
-            int pos = para->getParH(level)->intCF.ICellCFF[u];
+        for (unsigned int u = 0; u < para->getParH(level)->coarseToFine.numberOfCells; u++) {
+            int pos = para->getParH(level)->coarseToFine.fineCellIndices[u];
 
             double x1             = para->getParH(level + 1)->coordinateX[pos];
             double x2             = para->getParH(level + 1)->coordinateY[pos];
@@ -620,7 +616,7 @@ void writeInterfaceFCC_Send(Parameter *para)
     std::vector<std::vector<double>> nodedata;
 
     for (int level = 0; level < para->getMaxLevel(); level++) {
-        nodeNumberVec += (int)para->getParH(level)->intFC.kFC;
+        nodeNumberVec += (int)para->getParH(level)->fineToCoarse.numberOfCells;
     }
 
     nodesVec.resize(nodeNumberVec);
@@ -628,8 +624,8 @@ void writeInterfaceFCC_Send(Parameter *para)
 
     int nodeCount = 0;
     for (int level = 0; level < para->getMaxLevel(); level++) {
-        for (unsigned int u = 0; u < para->getParH(level)->intFC.kFC; u++) {
-            int pos                = para->getParH(level)->intFC.ICellFCC[u];
+        for (unsigned int u = 0; u < para->getParH(level)->fineToCoarse.numberOfCells; u++) {
+            int pos                = para->getParH(level)->fineToCoarse.coarseCellIndices[u];
             nodedata[0][nodeCount] = pos;
 
             // coordinate section
@@ -639,7 +635,7 @@ void writeInterfaceFCC_Send(Parameter *para)
             nodesVec[nodeCount] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3)));
 
             // nodedata section
-            nodedata[1][nodeCount]           = u < para->getParH(level)->intFCBorder.kFC;
+            nodedata[1][nodeCount]           = u < para->getParH(level)->fineToCoarseBorder.numberOfCells;
             int sendDir                      = 0.0;
             int sendDirectionInCommAfterFtoC = 0.0;
             int sendIndex                    = 0.0;
@@ -674,7 +670,7 @@ void writeInterfaceCFC_Recv(Parameter *para)
     std::vector<std::vector<double>> nodedata;
 
     for (int level = 0; level < para->getMaxLevel(); level++) {
-        nodeNumberVec += (int)para->getParH(level)->intCF.kCF;
+        nodeNumberVec += (int)para->getParH(level)->coarseToFine.numberOfCells;
     }
 
     nodesVec.resize(nodeNumberVec);
@@ -682,8 +678,8 @@ void writeInterfaceCFC_Recv(Parameter *para)
 
     int nodeCount = 0;
     for (int level = 0; level < para->getMaxLevel(); level++) {
-        for (unsigned int u = 0; u < para->getParH(level)->intCF.kCF; u++) {
-            int pos                = para->getParH(level)->intCF.ICellCFC[u];
+        for (unsigned int u = 0; u < para->getParH(level)->coarseToFine.numberOfCells; u++) {
+            int pos                = para->getParH(level)->coarseToFine.coarseCellIndices[u];
             nodedata[0][nodeCount] = pos;
 
             // coordinate section
@@ -693,7 +689,7 @@ void writeInterfaceCFC_Recv(Parameter *para)
             nodesVec[nodeCount] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3)));
 
             // nodedata section
-            nodedata[1][nodeCount]           = u < para->getParH(level)->intCFBorder.kCF;
+            nodedata[1][nodeCount]           = u < para->getParH(level)->coarseToFineBorder.numberOfCells;
             int recvDir                      = 0.0;
             int recvDirectionInCommAfterFtoC = 0.0;
             int recvIndex                    = 0.0;
@@ -799,12 +795,12 @@ void writeSendNodesStream(Parameter *para)
             }
         }
 
-        // check if node is in iCellFCC
+        // check if node is in a coarse cell for the interpolation from fine to coarse
         nodedata[4].resize(nodedata[0].size());
         for (int i = 0; i < (int)nodedata[0].size(); i++) {
             pos = nodedata[0][i];
-            for (unsigned int u = 0; u < para->getParH(level)->intFC.kFC; u++) {
-                if (para->getParH(level)->intFC.ICellFCC[u] == (uint)pos) {
+            for (unsigned int u = 0; u < para->getParH(level)->fineToCoarse.numberOfCells; u++) {
+                if (para->getParH(level)->fineToCoarse.coarseCellIndices[u] == (uint)pos) {
                     nodedata[4][i] = 1.0;
                     break;
                 }
@@ -895,7 +891,7 @@ void writeRecvNodesStream(Parameter *para)
             }
         }
 
-        // Recv are nodes ghost nodes and therefore they can't be iCellCFCs
+        // Recv are nodes ghost nodes and therefore they can't be coarse cells for the interpolation from coarse to fine
 
         std::string filenameVec = para->getFName() + "_writeRecvNodesStreams_PID_" +
                                   std::to_string(vf::gpu::Communicator::getInstance().getPID()) + "_" +
diff --git a/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriter.hpp
index 83f0a677b0012153cf079b466a333acc58bda6be..e506a56bb76a263ac8982a7e53f39e67c268e49b 100644
--- a/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriter.hpp
+++ b/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriter.hpp
@@ -5,53 +5,57 @@
 #include "Logger.h"
 #include "Parameter/Parameter.h"
 #include "basics/utilities/UbSystem.h"
-#include "grid/NodeValues.h"
+#include "gpu/GridGenerator/grid/NodeValues.h"
 #include "lbm/constants/D3Q27.h"
 #include <basics/writer/WbWriterVtkXmlBinary.h>
 
+#include "StringUtilities/StringUtil.h"
 #include "Utilities/FindNeighbors.h"
-#include "VirtualFluids_GPU/Communication/Communicator.h"
-#include "Core/StringUtilities/StringUtil.h"
+#include "gpu/VirtualFluids_GPU/Communication/Communicator.h"
 
 namespace NeighborDebugWriter
 {
 
-inline void writeNeighborLinkLines(Parameter *para, const int level, const uint numberOfNodes, const int direction,
-                                   const std::string &name)
+inline void writeNeighborLinkLines(LBMSimulationParameter *parH, int direction, const std::string &name,
+                                   WbWriter *writer)
 {
     VF_LOG_INFO("Write node links in direction {}.", direction);
-    std::vector<UbTupleFloat3> nodes(numberOfNodes * 2);
-    std::vector<UbTupleInt2> cells(numberOfNodes);
 
-    for (uint position = 0; position < numberOfNodes; position++) {
-        if (para->getParH(level)->typeOfGridNode[position] != GEO_FLUID)
+    const unsigned long long numberOfNodes = parH->numberOfNodes;
+    std::vector<UbTupleFloat3> nodes;
+    nodes.reserve(numberOfNodes);
+    std::vector<UbTupleInt2> cells;
+    cells.reserve(numberOfNodes/2);
+
+    for (size_t position = 0; position < numberOfNodes; position++) {
+        if (parH->typeOfGridNode[position] != GEO_FLUID)
             continue;
 
-        const double x1 = para->getParH(level)->coordinateX[position];
-        const double x2 = para->getParH(level)->coordinateY[position];
-        const double x3 = para->getParH(level)->coordinateZ[position];
+        const double x1 = parH->coordinateX[position];
+        const double x2 = parH->coordinateY[position];
+        const double x3 = parH->coordinateZ[position];
 
-        const uint positionNeighbor = getNeighborIndex(para->getParH(level).get(), position, direction);
+        const uint positionNeighbor = getNeighborIndex(parH, (uint)position, direction);
 
-        const double x1Neighbor = para->getParH(level)->coordinateX[positionNeighbor];
-        const double x2Neighbor = para->getParH(level)->coordinateY[positionNeighbor];
-        const double x3Neighbor = para->getParH(level)->coordinateZ[positionNeighbor];
+        const double x1Neighbor = parH->coordinateX[positionNeighbor];
+        const double x2Neighbor = parH->coordinateY[positionNeighbor];
+        const double x3Neighbor = parH->coordinateZ[positionNeighbor];
 
         nodes.emplace_back(float(x1), float(x2), float(x3));
         nodes.emplace_back(float(x1Neighbor), float(x2Neighbor), float(x3Neighbor));
 
         cells.emplace_back((int)nodes.size() - 2, (int)nodes.size() - 1);
     }
-    WbWriterVtkXmlBinary::getInstance()->writeLines(name, nodes, cells);
+    writer->writeLines(name, nodes, cells);
 }
 
 inline void writeNeighborLinkLinesDebug(Parameter *para)
 {
     for (int level = 0; level <= para->getMaxLevel(); level++) {
-        for (int direction = vf::lbm::dir::STARTDIR; direction <= vf::lbm::dir::ENDDIR; direction++) {
+        for (size_t direction = vf::lbm::dir::STARTDIR; direction <= vf::lbm::dir::ENDDIR; direction++) {
             const std::string fileName = para->getFName() + "_" + StringUtil::toString<int>(level) + "_Link_" +
                                          std::to_string(direction) + "_Debug.vtk";
-            writeNeighborLinkLines(para, level, para->getParH(level)->numberOfNodes, direction, fileName);
+            writeNeighborLinkLines(para->getParH(level).get(), (int)direction, fileName, WbWriterVtkXmlBinary::getInstance());
         }
     }
 }
diff --git a/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriterTest.cpp b/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriterTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a19ed3d723f28998f5d27cd15ebf4bab8ba061c4
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriterTest.cpp
@@ -0,0 +1,79 @@
+#include <gmock/gmock.h>
+#include "NeighborDebugWriter.hpp"
+#include "gpu/VirtualFluids_GPU/Utilities/testUtilitiesGPU.h"
+
+class WbWriterSpy : public WbWriter
+{
+public:
+    std::string writeLines(const std::string & /*filename*/, std::vector<UbTupleFloat3> &nodes,
+                           std::vector<UbTupleInt2> &lines) override
+    {
+        this->nodes = nodes;
+        this->lines = lines;
+        return "";
+    }
+    std::vector<UbTupleFloat3> nodes;
+    std::vector<UbTupleInt2> lines;
+
+    std::string getFileExtension() override { return ""; }
+};
+
+class NeighborDebugWriterTest : public testing::Test
+{
+protected:
+    void SetUp() override
+    {
+        typeOfGridNode = std::vector<uint>(numberOfNodes, GEO_FLUID);
+        neighbors = std::vector<uint>(numberOfNodes, 2);
+        coordinates = std::vector<real>(numberOfNodes, 1.0);
+        coordinates[2] = 3.0;
+
+        parH->numberOfNodes = numberOfNodes;
+        parH->coordinateX = coordinates.data();
+        parH->coordinateY = coordinates.data();
+        parH->coordinateZ = coordinates.data();
+        parH->neighborX = neighbors.data();
+        parH->typeOfGridNode = typeOfGridNode.data();
+    }
+
+    const int level = 0;
+    const unsigned long long numberOfNodes = 3;
+    const uint direction = vf::lbm::dir::DIR_P00; // x
+    std::unique_ptr<LBMSimulationParameter> parH = std::make_unique<LBMSimulationParameter>();
+    WbWriterSpy writerSpy;
+    std::vector<uint> typeOfGridNode;
+    std::vector<uint> neighbors;
+    std::vector<real> coordinates;
+};
+
+TEST_F(NeighborDebugWriterTest, writeNeighborLinkLines_onlyFLuidNodes_writesAllNodes)
+{
+    UbTupleFloat3 oneCoord(1.0, 1.0, 1.0);
+    UbTupleFloat3 threeCoord(3.0, 3.0, 3.0);
+    std::vector<UbTupleFloat3> expectedNodes = { oneCoord, threeCoord, oneCoord, threeCoord, threeCoord, threeCoord };
+    std::vector<UbTupleInt2> expectedLines = { UbTupleInt2(0, 1), UbTupleInt2(2, 3), UbTupleInt2(4, 5) };
+
+    NeighborDebugWriter::writeNeighborLinkLines(parH.get(), direction, "name", &writerSpy);
+
+    EXPECT_THAT(writerSpy.nodes.size(), testing::Eq(numberOfNodes * 2));
+    EXPECT_THAT(writerSpy.lines.size(), testing::Eq(numberOfNodes));
+    EXPECT_THAT(writerSpy.nodes, testing::Eq(expectedNodes));
+    EXPECT_THAT(writerSpy.lines, testing::Eq(expectedLines));
+}
+
+TEST_F(NeighborDebugWriterTest, writeNeighborLinkLines_fluidAndSolidNodes_writesOnlyFluidNodes)
+{
+    typeOfGridNode[2] = GEO_SOLID;
+    
+    UbTupleFloat3 oneCoord(1.0, 1.0, 1.0);
+    UbTupleFloat3 threeCoord(3.0, 3.0, 3.0);
+    std::vector<UbTupleFloat3> expectedNodes = { oneCoord, threeCoord, oneCoord, threeCoord};
+    std::vector<UbTupleInt2> expectedLines = { UbTupleInt2(0, 1), UbTupleInt2(2, 3)};
+
+    NeighborDebugWriter::writeNeighborLinkLines(parH.get(), direction, "name", &writerSpy);
+
+    EXPECT_THAT(writerSpy.nodes.size(), testing::Eq((numberOfNodes-1) * 2));
+    EXPECT_THAT(writerSpy.lines.size(), testing::Eq(numberOfNodes-1));
+    EXPECT_THAT(writerSpy.nodes, testing::Eq(expectedNodes));
+    EXPECT_THAT(writerSpy.lines, testing::Eq(expectedLines));
+}
diff --git a/src/gpu/VirtualFluids_GPU/Output/OffsetWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/OffsetWriter.hpp
index 7aa660fa33d8ad31a19053e3511241de3ee07c07..fb04951db68ae509dddc7d2fe52250489b54bde2 100644
--- a/src/gpu/VirtualFluids_GPU/Output/OffsetWriter.hpp
+++ b/src/gpu/VirtualFluids_GPU/Output/OffsetWriter.hpp
@@ -21,13 +21,13 @@ public:
 		{
 			for (int level = 0; level < para->getMaxLevel(); level++)
 			{
-				out.writeInteger(para->getParH(level)->K_CF);
+                out.writeInteger(para->getParH(level)->coarseToFine.numberOfCells);
 				out.writeLine();
-				for(unsigned int u=0; u<para->getParH(level)->K_CF; u++)
+                for (unsigned int u = 0; u < para->getParH(level)->coarseToFine.numberOfCells; u++)
 				{
-					out.writeDouble(para->getParH(level)->offCF.xOffCF[u]);
-					out.writeDouble(para->getParH(level)->offCF.yOffCF[u]);
-					out.writeDouble(para->getParH(level)->offCF.zOffCF[u]);
+					out.writeDouble(para->getParH(level)->neighborCoarseToFine.x[u]);
+					out.writeDouble(para->getParH(level)->neighborCoarseToFine.y[u]);
+					out.writeDouble(para->getParH(level)->neighborCoarseToFine.z[u]);
 				}
 				out.writeLine();
 			} //end levelloop
@@ -36,13 +36,13 @@ public:
 		{
 			for (int level = 0; level < para->getMaxLevel(); level++)
 			{
-				out.writeInteger(para->getParH(level)->K_FC);
+                out.writeInteger(para->getParH(level)->fineToCoarse.numberOfCells);
 				out.writeLine();
-				for(unsigned int u=0; u<para->getParH(level)->K_FC; u++)
+                for (unsigned int u = 0; u < para->getParH(level)->fineToCoarse.numberOfCells; u++)
 				{
-					out.writeDouble(para->getParH(level)->offFC.xOffFC[u]);
-					out.writeDouble(para->getParH(level)->offFC.yOffFC[u]);
-					out.writeDouble(para->getParH(level)->offFC.zOffFC[u]);
+					out.writeDouble(para->getParH(level)->neighborFineToCoarse.x[u]);
+					out.writeDouble(para->getParH(level)->neighborFineToCoarse.y[u]);
+					out.writeDouble(para->getParH(level)->neighborFineToCoarse.z[u]);
 				}
 				out.writeLine();
 			} //end levelloop
diff --git a/src/gpu/VirtualFluids_GPU/Output/PosVecIntWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/PosVecIntWriter.hpp
index a9207ed231e8d9667e57636a5a5fdd6b5aeab94f..d7eab31d639d23d2356263aa8b152d69b3c042b8 100644
--- a/src/gpu/VirtualFluids_GPU/Output/PosVecIntWriter.hpp
+++ b/src/gpu/VirtualFluids_GPU/Output/PosVecIntWriter.hpp
@@ -38,11 +38,11 @@ public:
 		{
 			for (int level = 0; level < para->getMaxLevel(); level++)
 			{
-				out.writeInteger(para->getParH(level)->K_CF);
+                out.writeInteger(para->getParH(level)->coarseToFine.numberOfCells);
 				out.writeLine();
-				for(unsigned int u=0; u<para->getParH(level)->K_CF; u++)
+                for (unsigned int u = 0; u < para->getParH(level)->coarseToFine.numberOfCells; u++)
 				{
-					out.writeInteger(para->getParH(level)->intCF.ICellCFC[u]);
+					out.writeInteger(para->getParH(level)->coarseToFine.coarseCellIndices[u]);
 				}
 				out.writeLine();
 			} //end levelloop
@@ -51,11 +51,11 @@ public:
 		{
 			for (int level = 0; level < para->getMaxLevel(); level++)
 			{
-				out.writeInteger(para->getParH(level)->K_CF);
+                out.writeInteger(para->getParH(level)->coarseToFine.numberOfCells);
 				out.writeLine();
-				for(unsigned int u=0; u<para->getParH(level)->K_CF; u++)
+                for (unsigned int u = 0; u < para->getParH(level)->coarseToFine.numberOfCells; u++)
 				{
-					out.writeInteger(para->getParH(level)->intCF.ICellCFF[u]);
+					out.writeInteger(para->getParH(level)->coarseToFine.fineCellIndices[u]);
 				}
 				out.writeLine();
 			} //end levelloop
@@ -64,11 +64,11 @@ public:
 		{
 			for (int level = 0; level < para->getMaxLevel(); level++)
 			{
-				out.writeInteger(para->getParH(level)->K_FC);
+                out.writeInteger(para->getParH(level)->fineToCoarse.numberOfCells);
 				out.writeLine();
-				for(unsigned int u=0; u<para->getParH(level)->K_FC; u++)
+                for (unsigned int u = 0; u < para->getParH(level)->fineToCoarse.numberOfCells; u++)
 				{
-					out.writeInteger(para->getParH(level)->intFC.ICellFCC[u]);
+					out.writeInteger(para->getParH(level)->fineToCoarse.coarseCellIndices[u]);
 				}
 				out.writeLine();
 			} //end levelloop
@@ -77,11 +77,11 @@ public:
 		{
 			for (int level = 0; level < para->getMaxLevel(); level++)
 			{
-				out.writeInteger(para->getParH(level)->K_FC);
+                out.writeInteger(para->getParH(level)->fineToCoarse.numberOfCells);
 				out.writeLine();
-				for(unsigned int u=0; u<para->getParH(level)->K_FC; u++)
+                for (unsigned int u = 0; u < para->getParH(level)->fineToCoarse.numberOfCells; u++)
 				{
-					out.writeInteger(para->getParH(level)->intFC.ICellFCF[u]);
+					out.writeInteger(para->getParH(level)->fineToCoarse.fineCellIndices[u]);
 				}
 				out.writeLine();
 			} //end levelloop
diff --git a/src/gpu/VirtualFluids_GPU/Output/PosWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/PosWriter.hpp
index 456f9c148c75c27fb899f976ba4f99b109fc3d4b..ce611d25d1aa3f9e98840a0f04d9b2045d0a224f 100644
--- a/src/gpu/VirtualFluids_GPU/Output/PosWriter.hpp
+++ b/src/gpu/VirtualFluids_GPU/Output/PosWriter.hpp
@@ -33,9 +33,9 @@ public:
 			{
 				out.writeInteger(para->getParH(level)->numberOfNodes);
 				out.writeLine();
-				for(unsigned int u=0; u<para->getParH(level)->numberOfNodes; u++)
+				for(size_t index = 0; index < para->getParH(level)->numberOfNodes; index++)
 				{
-					out.writeInteger(para->getParH(level)->typeOfGridNode[u]);
+					out.writeInteger(para->getParH(level)->typeOfGridNode[index]);
 				}
 				out.writeLine();
 			} //end levelloop
@@ -46,9 +46,9 @@ public:
 			{
 				out.writeInteger(para->getParH(level)->numberOfNodes);
 				out.writeLine();
-				for(unsigned int u=0; u<para->getParH(level)->numberOfNodes; u++)
+                for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++)
 				{
-					out.writeInteger(para->getParH(level)->neighborX[u]);
+					out.writeInteger(para->getParH(level)->neighborX[index]);
 				}
 				out.writeLine();
 			} //end levelloop
@@ -59,9 +59,9 @@ public:
 			{
 				out.writeInteger(para->getParH(level)->numberOfNodes);
 				out.writeLine();
-				for(unsigned int u=0; u<para->getParH(level)->numberOfNodes; u++)
+                for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++)
 				{
-					out.writeInteger(para->getParH(level)->neighborY[u]);
+					out.writeInteger(para->getParH(level)->neighborY[index]);
 				}
 				out.writeLine();
 			} //end levelloop
@@ -72,9 +72,9 @@ public:
 			{
 				out.writeInteger(para->getParH(level)->numberOfNodes);
 				out.writeLine();
-				for(unsigned int u=0; u<para->getParH(level)->numberOfNodes; u++)
+                for (size_t index = 0; index < para->getParH(level)->numberOfNodes; index++)
 				{
-					out.writeInteger(para->getParH(level)->neighborZ[u]);
+					out.writeInteger(para->getParH(level)->neighborZ[index]);
 				}
 				out.writeLine();
 			} //end levelloop
diff --git a/src/gpu/VirtualFluids_GPU/Output/QDebugVtkWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/QDebugVtkWriter.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..d075c78e53a45e96adea43c8846159f4ba128c6d
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Output/QDebugVtkWriter.hpp
@@ -0,0 +1,96 @@
+#ifndef QVTKWRITER_HPP
+#define QVTKWRITER_HPP
+
+#include <array>
+#include <vector>
+
+#include "basics/StringUtilities/StringUtil.h"
+#include "basics/utilities/UbSystem.h"
+#include "basics/writer/WbWriterVtkXmlBinary.h"
+#include "lbm/constants/D3Q27.h"
+#include <logger/Logger.h>
+
+#include "gpu/GridGenerator/grid/NodeValues.h"
+#include "gpu/VirtualFluids_GPU/Communication/Communicator.h"
+#include "gpu/VirtualFluids_GPU/LBM/LB.h"
+#include "gpu/VirtualFluids_GPU/Parameter/Parameter.h"
+#include "gpu/VirtualFluids_GPU/Utilities/FindNeighbors.h"
+
+namespace QDebugVtkWriter
+{
+
+using namespace vf::lbm::dir;
+
+namespace
+{
+inline void modifyLineLengthsForQs(const std::array<double, 3> &coords, std::array<double, 3> &neighborCoords, real q)
+{
+    if (q == 1.0 || q <= 0.0)
+        return;
+
+    const auto dx = neighborCoords[0] - coords[0];
+    const auto dy = neighborCoords[1] - coords[1];
+    const auto dz = neighborCoords[2] - coords[2];
+
+    neighborCoords[0] = coords[0] + q * dx;
+    neighborCoords[1] = coords[1] + q * dy;
+    neighborCoords[2] = coords[2] + q * dz;
+}
+
+inline void writeQLines(LBMSimulationParameter *parH, QforBoundaryConditions &boundaryQ, const std::string &filepath,
+                        WbWriter *writer)
+{
+    VF_LOG_INFO("Write qs in for boundary condition to {}.", filepath);
+
+    const auto numberOfNodes = boundaryQ.numberOfBCnodes;
+    std::vector<UbTupleFloat3> nodes;
+    nodes.reserve(numberOfNodes * 8 * 2);
+    std::vector<UbTupleInt2> lines;
+    lines.reserve(numberOfNodes * 8);
+
+    std::vector<std::string> dataNames = { "nodeIndex", "q" };
+    std::vector<std::vector<float>> lineData(2);
+
+    for (size_t i = 0; i < numberOfNodes; i++) {
+        const auto nodeIndex = boundaryQ.k[i];
+        const std::array<double, 3> coords = { parH->coordinateX[nodeIndex], parH->coordinateY[nodeIndex],
+                                               parH->coordinateZ[nodeIndex] };
+
+        for (size_t direction = 1; direction < ENDDIR; direction++) {
+
+            const auto q = boundaryQ.q27[direction][i];
+            if (q <= (real)0.0) {
+                continue;
+            }
+
+            const auto positionNeighbor = getNeighborIndex(parH, (uint)nodeIndex, (int)direction);
+
+            std::array<double, 3> neighborCoords = { parH->coordinateX[positionNeighbor],
+                                                     parH->coordinateY[positionNeighbor],
+                                                     parH->coordinateZ[positionNeighbor] };
+
+            modifyLineLengthsForQs(coords, neighborCoords, q);
+
+            nodes.emplace_back(float(coords[0]), float(coords[1]), coords[2]);
+            nodes.emplace_back(float(neighborCoords[0]), float(neighborCoords[1]), float(neighborCoords[2]));
+
+            lines.emplace_back((int)nodes.size() - 2, (int)nodes.size() - 1);
+            lineData[0].push_back(nodeIndex);
+            lineData[1].push_back(q);
+        }
+    }
+
+    writer->writeLinesWithLineData(filepath, nodes, lines, dataNames, lineData);
+}
+} // namespace
+
+inline void writeQLinesDebug(Parameter *para, QforBoundaryConditions &boundaryQ, uint level, const std::string& fileName)
+{
+    const auto filePath = para->getFName() + "_" + fileName + ".vtk";
+    auto writer = WbWriterVtkXmlBinary::getInstance();
+    writeQLines(para->getParH(level).get(), boundaryQ, filePath, writer);
+}
+
+} // namespace QDebugVtkWriter
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/Output/QDebugVtkWriterTest.cpp b/src/gpu/VirtualFluids_GPU/Output/QDebugVtkWriterTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9eecb25c663fcfc8fde353b76ccf20cbcb9cf272
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Output/QDebugVtkWriterTest.cpp
@@ -0,0 +1,60 @@
+#include "gmock/gmock.h"
+#include <cmath>
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include "QDebugVtkWriter.hpp"
+#include <tuple>
+
+MATCHER(DoubleNear5, "") {
+    return abs(std::get<0>(arg) - std::get<1>(arg)) < 0.00001;
+}
+
+using namespace QDebugVtkWriter;
+
+double calcVectorLength(const std::array<double, 3> coords, const std::array<double, 3> neighborCoords)
+{
+    return std::sqrt(std::pow((neighborCoords[0] - coords[0]), 2) + std::pow((neighborCoords[1] - coords[1]), 2) +
+                     std::pow((neighborCoords[2] - coords[2]), 2));
+}
+
+TEST(QDebugVtkWriterTest, modifyLineLengthsForQsSameCoords3)
+{
+    const std::array<double, 3> coords = { 0, 0, 0 };
+    std::array<double, 3> neighborCoords = { 1, 1, 1 };
+    const real q = 0.3;
+    const real initialLength = calcVectorLength(coords, neighborCoords);
+
+    modifyLineLengthsForQs(coords, neighborCoords, q);
+
+    std::array<double, 3> expectedNeighborCoords = { 0.3, 0.3, 0.3 };
+    EXPECT_THAT(neighborCoords,testing::Pointwise(DoubleNear5(), expectedNeighborCoords));
+    EXPECT_THAT(calcVectorLength(coords, neighborCoords), testing::DoubleNear(q*initialLength, 0.00001));
+}
+
+TEST(QDebugVtkWriterTest, modifyLineLengthDifferentCoords)
+{
+    const std::array<double, 3> coords = { 0, 0, 0 };
+    std::array<double, 3> neighborCoords = { 1, 2, 3 };
+    const real q = 0.3;
+    const real initialLength = calcVectorLength(coords, neighborCoords);
+
+    modifyLineLengthsForQs(coords, neighborCoords, q);
+
+    std::array<double, 3> expectedNeighborCoords = { 0.3, 0.6, 0.9 };
+    EXPECT_THAT(neighborCoords,testing::Pointwise(DoubleNear5(), expectedNeighborCoords));
+    EXPECT_THAT(calcVectorLength(coords, neighborCoords), testing::DoubleNear(q*initialLength, 0.00001));
+}
+
+TEST(QDebugVtkWriterTest, modifyLineLengthNegativeCoord)
+{
+    const std::array<double, 3> coords = { 0, 0, 0 };
+    std::array<double, 3> neighborCoords = { 1, 2, -3 };
+    const real q = 0.3;
+    const real initialLength = calcVectorLength(coords, neighborCoords);
+
+    modifyLineLengthsForQs(coords, neighborCoords, q);
+
+    std::array<double, 3> expectedNeighborCoords = { 0.3, 0.6, -0.9 };
+    EXPECT_THAT(neighborCoords,testing::Pointwise(DoubleNear5(), expectedNeighborCoords));
+    EXPECT_THAT(calcVectorLength(coords, neighborCoords), testing::DoubleNear(q*initialLength, 0.00001));
+}
diff --git a/src/gpu/VirtualFluids_GPU/Output/QDebugWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/QDebugWriter.hpp
index d006636572377477aeb3599a8ae843ea2b1e31ff..b24dab20be957fa27b9306d0bbabbec53694753c 100644
--- a/src/gpu/VirtualFluids_GPU/Output/QDebugWriter.hpp
+++ b/src/gpu/VirtualFluids_GPU/Output/QDebugWriter.hpp
@@ -11,9 +11,7 @@
 #include "Parameter/Parameter.h"
 #include "basics/utilities/UbSystem.h"
 #include <basics/writer/WbWriterVtkXmlBinary.h>
-#include "Core/StringUtilities/StringUtil.h"
-
-//using namespace std;
+#include "StringUtilities/StringUtil.h"
 
 namespace QDebugWriter
 {
diff --git a/src/gpu/VirtualFluids_GPU/Output/TimeStepTimer.cpp b/src/gpu/VirtualFluids_GPU/Output/TimeStepTimer.cpp
index 78da4947d0c8196cda49fef754a3f44fc39d0a44..979c4349a084342a897a0269ffb86a3714065e96 100644
--- a/src/gpu/VirtualFluids_GPU/Output/TimeStepTimer.cpp
+++ b/src/gpu/VirtualFluids_GPU/Output/TimeStepTimer.cpp
@@ -1,6 +1,6 @@
 #include "helper_cuda.h"
 #include <cuda_runtime.h>
-#include "Core/DataTypes.h"
+#include "DataTypes.h"
 #include "UbScheduler.h"
 #include "Parameter/Parameter.h"
 
diff --git a/src/gpu/VirtualFluids_GPU/Output/TimeStepTimer.h b/src/gpu/VirtualFluids_GPU/Output/TimeStepTimer.h
index 982d1ce56bfadb7eddfd3d34d8d6b01ac6f92233..41bb9eba7ba09ddb092e644894eca0fde71fa27b 100644
--- a/src/gpu/VirtualFluids_GPU/Output/TimeStepTimer.h
+++ b/src/gpu/VirtualFluids_GPU/Output/TimeStepTimer.h
@@ -3,7 +3,7 @@
 
 #include "helper_cuda.h"
 #include <cuda_runtime.h>
-#include "Core/DataTypes.h"
+#include "DataTypes.h"
 #include "UbScheduler.h"
 #include "Parameter/Parameter.h"
 
diff --git a/src/gpu/VirtualFluids_GPU/Output/Timer.cpp b/src/gpu/VirtualFluids_GPU/Output/Timer.cpp
index 74a706165489a86cace40047beb09996aa0aa8db..5a5e010944a776038416386267c3bf6477d47e9f 100644
--- a/src/gpu/VirtualFluids_GPU/Output/Timer.cpp
+++ b/src/gpu/VirtualFluids_GPU/Output/Timer.cpp
@@ -51,9 +51,9 @@ void Timer::outputPerformance(uint t, Parameter* para, vf::gpu::Communicator& co
     VF_LOG_INFO(" {} \t --- {} --- {:>8.1f}/ {:<8.1f} \t   {:5.1f} \t       {:4.1f}",  communicator.getPID(), this->name, this->elapsedTime, this->totalElapsedTime, fnups, bandwidth);
 
     // When using multiple GPUs, sum the nups of all processes
-    if (communicator.getNummberOfProcess() > 1) {
+    if (communicator.getNumberOfProcess() > 1) {
         double nupsSum =  communicator.sumNups(fnups);
         if (communicator.getPID() == 0)
-            VF_LOG_INFO("Sum of all {} processes: Nups in Mio: {:.1f}", communicator.getNummberOfProcess(), nupsSum);
+            VF_LOG_INFO("Sum of all {} processes: Nups in Mio: {:.1f}", communicator.getNumberOfProcess(), nupsSum);
     }
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Output/Timer.h b/src/gpu/VirtualFluids_GPU/Output/Timer.h
index d035cbb6cef7ea9f8edabbd2894671a868c37eec..55ada64ad245ee41aa99a1185eba134a652067c9 100644
--- a/src/gpu/VirtualFluids_GPU/Output/Timer.h
+++ b/src/gpu/VirtualFluids_GPU/Output/Timer.h
@@ -2,9 +2,9 @@
 #define TIMER_H
 #include <cuda_runtime.h>
 
-#include "Core/DataTypes.h"
+#include "DataTypes.h"
 #include "Parameter/Parameter.h"
-#include "logger/Logger.h"
+#include <logger/Logger.h>
 
 namespace vf::gpu{
     class Communicator;
diff --git a/src/gpu/VirtualFluids_GPU/Output/UnstructuredGridWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/UnstructuredGridWriter.hpp
index 81f2c028a6bbc7cd9c077571349f4f0465a08a05..cafe70205455ae8592c1efe86e4ba9de8e1ba170 100644
--- a/src/gpu/VirtualFluids_GPU/Output/UnstructuredGridWriter.hpp
+++ b/src/gpu/VirtualFluids_GPU/Output/UnstructuredGridWriter.hpp
@@ -33,7 +33,7 @@ namespace UnstructuredGridWriter
 
 		bool neighborsFluid;
 
-		unsigned int allnodes = para->getParH(level)->numberOfNodes * 8;
+		unsigned long long allnodes = para->getParH(level)->numberOfNodes * 8;
 
 		nodes.resize(allnodes);
 		nodedata[0].resize(allnodes);
@@ -45,7 +45,7 @@ namespace UnstructuredGridWriter
 		unsigned int nodeCount = 0;
 		double nodeDeltaLevel = para->getParH(level)->dx;
 
-		for (unsigned int pos=0;pos<para->getParH(level)->numberOfNodes;pos++)
+		for (size_t pos = 0; pos < para->getParH(level)->numberOfNodes; pos++)
 		{
 			if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID /*!= GEO_VOID*/)
 			{
@@ -197,9 +197,9 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-			if ( ((part+1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
+			if ( ((part+1)*para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-				sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
 			}
 			else
 			{
@@ -340,9 +340,9 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-			if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-				sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
 			}
 			else
 			{
@@ -479,9 +479,9 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-			if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-				sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
 			}
 			else
 			{
@@ -628,9 +628,9 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-			if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-				sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
 			}
 			else
 			{
@@ -771,9 +771,9 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-			if ( ((part+1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-				sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
 			}
 			else
 			{
@@ -814,7 +814,7 @@ namespace UnstructuredGridWriter
 					nodedata[3][dn1] = (double)para->getParH(level)->velocityY[pos] * (double)para->getVelocityRatio();
 					nodedata[4][dn1] = (double)para->getParH(level)->velocityZ[pos] * (double)para->getVelocityRatio();
 					nodedata[5][dn1] = (double)para->getParH(level)->typeOfGridNode[pos];
-					nodedata[6][dn1] = (double)para->getParH(level)->Conc[pos];
+					nodedata[6][dn1] = (double)para->getParH(level)->concentration[pos];
 					//////////////////////////////////////////////////////////////////////////
 					number2 = para->getParH(level)->neighborX[number1];
 					number3 = para->getParH(level)->neighborY[number2];
@@ -896,10 +896,10 @@ namespace UnstructuredGridWriter
 		vector< vector< double > > nodedata(nodedatanames.size());
 
 		//printf("\n test for if... \n");
-		if (para->getParH(level)->numberOfNodes > limitOfNodes)
+        if ((uint)para->getParH(level)->numberOfNodes > limitOfNodes)
 		{
 			//printf("\n test in if I... \n");
-			unsigned int restOfNodes = para->getParH(level)->numberOfNodes - limitOfNodes;
+            unsigned int restOfNodes = (uint)para->getParH(level)->numberOfNodes - limitOfNodes;
 			//////////////////////////////////////////////////////////////////////////
 			//PART I
 			nodes.resize(limitOfNodes);
@@ -984,7 +984,7 @@ namespace UnstructuredGridWriter
 			nodedata[5].resize(restOfNodes);
 			//printf("\n test in if IV... \n");
 
-			for (unsigned int pos=limitOfNodes;pos<para->getParH(level)->numberOfNodes;pos++)
+			for (size_t pos = limitOfNodes; pos < para->getParH(level)->numberOfNodes; pos++)
 			{
 				if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID)
 				{
@@ -1055,7 +1055,7 @@ namespace UnstructuredGridWriter
 			nodedata[5].resize(para->getParH(level)->numberOfNodes);
 
 			//printf("\n test in else II... \n");
-			for (unsigned int pos=0;pos<para->getParH(level)->numberOfNodes;pos++)
+			for (size_t pos = 0; pos < para->getParH(level)->numberOfNodes; pos++)
 			{
 				if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID)
 				{
@@ -1148,7 +1148,7 @@ namespace UnstructuredGridWriter
 		unsigned int number1,number2,number3,number4,number5,number6,number7,number8;
 		bool neighborsFluid;
 		double vxmax = 0;
-		vector< vector< double > > nodedata(nodedatanames.size());
+		vector<vector<double>> nodedata(nodedatanames.size());
 
 		nodes.resize(para->getParH(level)->numberOfNodes);
 		nodedata[0].resize(para->getParH(level)->numberOfNodes);
@@ -1158,7 +1158,7 @@ namespace UnstructuredGridWriter
 		nodedata[4].resize(para->getParH(level)->numberOfNodes);
 		nodedata[5].resize(para->getParH(level)->numberOfNodes);
 
-		for (unsigned int pos=0;pos<para->getParH(level)->numberOfNodes;pos++)
+		for (size_t pos = 0; pos < para->getParH(level)->numberOfNodes; pos++)
 		{
 			if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID)
 			{
@@ -1244,7 +1244,7 @@ namespace UnstructuredGridWriter
 		nodedata[4].resize(para->getParH(level)->numberOfNodes);
 		nodedata[5].resize(para->getParH(level)->numberOfNodes);
 
-		for (unsigned int pos=0;pos<para->getParH(level)->numberOfNodes;pos++)
+		for (size_t pos = 0; pos < para->getParH(level)->numberOfNodes; pos++)
 		{
 			if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID)
 			{
@@ -1342,9 +1342,9 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-			if ( ((part+1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
+			if ( ((part+1)*para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-				sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
 			}
 			else
 			{
@@ -1364,7 +1364,7 @@ namespace UnstructuredGridWriter
 			nodedata[5].resize(sizeOfNodes);
 			//////////////////////////////////////////////////////////////////////////
 			//printf("\n test in if II... \n");
-			for (unsigned int pos=startpos;pos<endpos;pos++)
+			for (size_t pos = startpos; pos < endpos; pos++)
 			{
 				if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID)
 				{
@@ -1465,9 +1465,9 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-			if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-				sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
 			}
 			else
 			{
@@ -1595,9 +1595,9 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-			if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-				sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
 			}
 			else
 			{
@@ -1728,7 +1728,7 @@ namespace UnstructuredGridWriter
 		nodedatanames.push_back("geo");
 		unsigned int number1,number2,number3,number4,number5,number6,number7,number8;
 		bool neighborsFluid;
-		vector< vector< double > > nodedata(nodedatanames.size());
+		vector< vector<double>> nodedata(nodedatanames.size());
 
 		nodes.resize(para->getParH(level)->numberOfNodes);
 		nodedata[0].resize(para->getParH(level)->numberOfNodes);
@@ -1738,7 +1738,7 @@ namespace UnstructuredGridWriter
 		nodedata[4].resize(para->getParH(level)->numberOfNodes);
 		nodedata[5].resize(para->getParH(level)->numberOfNodes);
 
-		for (unsigned int pos=0;pos<para->getParH(level)->numberOfNodes;pos++)
+		for (size_t pos = 0; pos < para->getParH(level)->numberOfNodes; pos++)
 		{
 			if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID)
 			{
@@ -1825,7 +1825,7 @@ namespace UnstructuredGridWriter
 		nodedata[4].resize(para->getParH(level)->numberOfNodes);
 		nodedata[5].resize(para->getParH(level)->numberOfNodes);
 
-		for (unsigned int pos=0;pos<para->getParH(level)->numberOfNodes;pos++)
+		for (size_t pos = 0; pos < para->getParH(level)->numberOfNodes; pos++)
 		{
 			if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID)
 			{
@@ -1975,9 +1975,9 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-			if ( ((part+1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-				sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
 			}
 			else
 			{
@@ -2080,9 +2080,9 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-			if ( ((part+1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-				sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
 			}
 			else
 			{
@@ -2192,9 +2192,9 @@ namespace UnstructuredGridWriter
 			vxmax = 0;
 			//printf("\n test in if I... \n");
 			//////////////////////////////////////////////////////////////////////////
-			if ( ((part+1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->numberOfNodes)
+            if (((part + 1) * para->getlimitOfNodesForVTK()) > (uint)para->getParH(level)->numberOfNodes)
 			{
-				sizeOfNodes = para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
+                sizeOfNodes = (uint)para->getParH(level)->numberOfNodes - (part * para->getlimitOfNodesForVTK());
 			}
 			else
 			{
@@ -2319,7 +2319,7 @@ namespace UnstructuredGridWriter
 			wallX3 = 0.0;
 			q      = 0.0;
 			//////////////////////////////////////////////////////////////////////////
-			for (unsigned int typeOfQ = STARTDIR; typeOfQ <= ENDDIR; typeOfQ++)
+            for (size_t typeOfQ = vf::lbm::dir::STARTDIR; typeOfQ <= vf::lbm::dir::ENDDIR; typeOfQ++)
 			{
 				QQ = para->getParH(level)->geometryBC.q27[0];
 				Q.q27[typeOfQ] = &QQ[typeOfQ*sizeOfNodes];
@@ -2423,7 +2423,7 @@ namespace UnstructuredGridWriter
 			wallX3 = 0.0;
 			q      = 0.0;
 			//////////////////////////////////////////////////////////////////////////
-			for (unsigned int typeOfQ = STARTDIR; typeOfQ <= ENDDIR; typeOfQ++)
+            for (size_t typeOfQ = vf::lbm::dir::STARTDIR; typeOfQ <= vf::lbm::dir::ENDDIR; typeOfQ++)
 			{
 				QQ = para->getParH(level)->velocityBC.q27[0];
 				Q.q27[typeOfQ] = &QQ[typeOfQ*sizeOfNodes];
@@ -2528,7 +2528,7 @@ namespace UnstructuredGridWriter
 			wallX3 = 0.0;
 			q      = 0.0;
 			//////////////////////////////////////////////////////////////////////////
-			for (unsigned int typeOfQ = STARTDIR; typeOfQ <= ENDDIR; typeOfQ++)
+            for (size_t typeOfQ = vf::lbm::dir::STARTDIR; typeOfQ <= vf::lbm::dir::ENDDIR; typeOfQ++)
 			{
 				QQ = para->getParH(level)->pressureBC.q27[0];
 				Q.q27[typeOfQ] = &QQ[typeOfQ*sizeOfNodes];
diff --git a/src/gpu/VirtualFluids_GPU/Output/interfaceWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/interfaceWriter.hpp
index f140b15a6b7595a959139da2a35ed58f01b2a307..bf35411b93fd1f126cfdde1f3739f1baa33a4d83 100644
--- a/src/gpu/VirtualFluids_GPU/Output/interfaceWriter.hpp
+++ b/src/gpu/VirtualFluids_GPU/Output/interfaceWriter.hpp
@@ -32,11 +32,11 @@ public:
 		{
 			if ((Type == "_InterfaceCFC") || (Type == "_InterfaceCFF"))
 			{
-				nodeNumberVec += para->getParH(level)->K_CF;
+                nodeNumberVec += para->getParH(level)->coarseToFine.numberOfCells;
 			}
 			else if (Type == "_InterfaceFCF")
 			{
-				nodeNumberVec += para->getParH(level)->K_FC;
+                nodeNumberVec += para->getParH(level)->fineToCoarse.numberOfCells;
 			}
 		}
 		nodesVec.resize(nodeNumberVec*8);
@@ -55,9 +55,9 @@ public:
 			//std::vector<unsigned int>& posVec = posIndexVec[level];
 			if (Type == "_InterfaceCFC")
 			{
-				for(unsigned int u=0;u<para->getParH(level)->K_CF;u++)
+                for (unsigned int u = 0; u < para->getParH(level)->coarseToFine.numberOfCells; u++)
 				{
-					int pos = para->getParH(level)->intCF.ICellCFC[u];
+					int pos = para->getParH(level)->coarseToFine.coarseCellIndices[u];
 					int ix1 = pos % nx1lev;
 					int wertDurchNx1 = pos / nx1lev;
 					int ix2 = wertDurchNx1 % nx2lev;
@@ -82,9 +82,9 @@ public:
 			}
 			else if (Type == "_InterfaceCFF")
 			{
-				for(unsigned int u=0;u<para->getParH(level)->K_CF;u++)
+                for (unsigned int u = 0; u < para->getParH(level)->coarseToFine.numberOfCells; u++)
 				{
-					int pos = para->getParH(level)->intCF.ICellCFF[u];
+					int pos = para->getParH(level)->coarseToFine.fineCellIndices[u];
 					int ix1 = pos % nx1lev;
 					int wertDurchNx1 = pos / nx1lev;
 					int ix2 = wertDurchNx1 % nx2lev;
@@ -109,9 +109,9 @@ public:
 			}
 			else if (Type == "_InterfaceFCF")
 			{
-				for(unsigned int u=0;u<para->getParH(level)->K_FC;u++)
+                for (unsigned int u = 0; u < para->getParH(level)->fineToCoarse.numberOfCells; u++)
 				{
-					int pos = para->getParH(level)->intFC.ICellFCF[u];
+					int pos = para->getParH(level)->fineToCoarse.fineCellIndices[u];
 					int ix1 = pos % nx1lev;
 					int wertDurchNx1 = pos / nx1lev;
 					int ix2 = wertDurchNx1 % nx2lev;
@@ -146,7 +146,7 @@ public:
 		int nodeNumberVec = 0;
 		for (int level = 0; level < para->getMaxLevel(); level++)
 		{
-			nodeNumberVec += para->getParH(level)->K_FC;
+            nodeNumberVec += para->getParH(level)->fineToCoarse.numberOfCells;
 		}
 		nodesVec.resize(nodeNumberVec*8);
 		int nodeCount = 0;
@@ -163,9 +163,9 @@ public:
 			double achtelNodeDelta = 0.125*nodeDeltaLevel;
 			//int count = 0;
 			//std::vector<unsigned int>& posVec = posIndexVec[level];
-			for(unsigned int u=0;u<para->getParH(level)->K_FC;u++)
+            for (unsigned int u = 0; u < para->getParH(level)->fineToCoarse.numberOfCells; u++)
 			{
-				int pos = para->getParH(level)->intFC.ICellFCC[u];//posVec[u];
+				int pos = para->getParH(level)->fineToCoarse.coarseCellIndices[u];//posVec[u];
 				int ix1 = pos % nx1lev;
 				int wertDurchNx1 = pos / nx1lev;
 				int ix2 = wertDurchNx1 % nx2lev;
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h
index 5c59bcd3a5e6178d6e70a63f803caf8e29f32604..631a945a653e6b4b60924a650e94b3873ebacc7d 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h
+++ b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h
@@ -33,7 +33,7 @@
 #include <map>
 #include <cuda.h>
 #include <cuda_runtime.h>
-#include "Core/DataTypes.h"
+#include "DataTypes.h"
 
 enum class CudaStreamIndex
     {
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
index d1568932d9802d1060e37d26ed37fa48de1abb05..bf0d72448fb5a69c849d93749e24f29290cf9621 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
+++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
@@ -39,10 +39,11 @@
 
 #include <curand_kernel.h>
 
-#include "Core/StringUtilities/StringUtil.h"
+#include "StringUtilities/StringUtil.h"
 
 #include <basics/config/ConfigurationFile.h>
 
+#include "Logger.h"
 #include "Parameter/CudaStreamManager.h"
 
 Parameter::Parameter() : Parameter(1, 0, {}) {}
@@ -53,8 +54,8 @@ Parameter::Parameter(int numberOfProcesses, int myId) : Parameter(numberOfProces
 
 Parameter::Parameter(int numberOfProcesses, int myId, std::optional<const vf::basics::ConfigurationFile*> configData)
 {
-    this->ic.numprocs = numberOfProcesses;
-    this->ic.myProcessId = myId;
+    this->numprocs = numberOfProcesses;
+    this->myProcessId = myId;
 
     this->setQuadricLimiters(0.01, 0.01, 0.01);
     this->setForcing(0.0, 0.0, 0.0);
@@ -118,9 +119,6 @@ void Parameter::readConfigData(const vf::basics::ConfigurationFile &configData)
     if (configData.contains("UseConcFile"))
         this->setConcFile(configData.getValue<bool>("UseConcFile"));
     //////////////////////////////////////////////////////////////////////////
-    if (configData.contains("UseStreetVelocityFile"))
-        this->setStreetVelocityFile(configData.getValue<bool>("UseStreetVelocityFile"));
-    //////////////////////////////////////////////////////////////////////////
     if (configData.contains("UseMeasurePoints"))
         this->setUseMeasurePoints(configData.getValue<bool>("UseMeasurePoints"));
     //////////////////////////////////////////////////////////////////////////
@@ -358,13 +356,13 @@ void Parameter::initGridPaths(){
     // add missing slash to gridPath
     if (gridPath.back() != '/') {
         gridPath += "/";
-        ic.gridPath = gridPath;
+        this->gridPath = gridPath;
     }
 
     // for multi-gpu add process id (if not already there)
     if (this->getNumprocs() > 1) {
         gridPath += StringUtil::toString(this->getMyProcessID()) + "/";
-        ic.gridPath = gridPath;
+        this->gridPath = gridPath;
     }
 
     //////////////////////////////////////////////////////////////////////////
@@ -405,7 +403,6 @@ void Parameter::initGridPaths(){
     this->setcpBottom(gridPath + "cpBottom.dat");
     this->setcpBottom2(gridPath + "cpBottom2.dat");
     this->setConcentration(gridPath + "conc.dat");
-    this->setStreetVelocity(gridPath + "streetVector.dat");
 
     //////////////////////////////////////////////////////////////////////////
     // Normals - Geometry
@@ -492,9 +489,9 @@ void Parameter::initLBMSimulationParameter()
         parH[i]->gridNX           = getGridX().at(i);
         parH[i]->gridNY           = getGridY().at(i);
         parH[i]->gridNZ           = getGridZ().at(i);
-        parH[i]->vis              = ic.vis * pow(2.f, i);
-        parH[i]->diffusivity      = ic.Diffusivity * pow(2.f, i);
-        parH[i]->omega            = 1.0f / (3.0f * parH[i]->vis + 0.5f); // omega :-) not s9 = -1.0f/(3.0f*parH[i]->vis+0.5f);//
+        parH[i]->viscosity        = this->vis * pow((real)2.0, i);
+        parH[i]->diffusivity      = this->Diffusivity * pow((real)2.0, i);
+        parH[i]->omega            = (real)1.0 / (real(3.0) * parH[i]->viscosity + real(0.5)); // omega :-) not s9 = -1.0f/(3.0f*parH[i]->vis+0.5f);//
         parH[i]->nx               = parH[i]->gridNX + 2 * STARTOFFX;
         parH[i]->ny               = parH[i]->gridNY + 2 * STARTOFFY;
         parH[i]->nz               = parH[i]->gridNZ + 2 * STARTOFFZ;
@@ -502,17 +499,17 @@ void Parameter::initLBMSimulationParameter()
         parH[i]->sizePlaneXY      = parH[i]->nx * parH[i]->ny;
         parH[i]->sizePlaneYZ      = parH[i]->ny * parH[i]->nz;
         parH[i]->sizePlaneXZ      = parH[i]->nx * parH[i]->nz;
-        parH[i]->mem_size_real    = sizeof(real) * parH[i]->size_Mat;
-        parH[i]->mem_size_int     = sizeof(unsigned int) * parH[i]->size_Mat;
-        parH[i]->mem_size_bool    = sizeof(bool) * parH[i]->size_Mat;
-        parH[i]->mem_size_real_yz = sizeof(real) * parH[i]->ny * parH[i]->nz;
+//        parH[i]->mem_size_real    = sizeof(real) * parH[i]->size_Mat;         //DEPRECATED: related to full matrix
+//        parH[i]->mem_size_int     = sizeof(unsigned int) * parH[i]->size_Mat; //DEPRECATED: related to full matrix
+//        parH[i]->mem_size_bool    = sizeof(bool) * parH[i]->size_Mat;         //DEPRECATED: related to full matrix
+//        parH[i]->mem_size_real_yz = sizeof(real) * parH[i]->ny * parH[i]->nz; //DEPRECATED: related to full matrix
         parH[i]->isEvenTimestep        = true;
-        parH[i]->startz           = parH[i]->gridNZ * ic.myProcessId;
-        parH[i]->endz             = parH[i]->gridNZ * ic.myProcessId + parH[i]->gridNZ;
-        parH[i]->Lx               = (real)((1.f * parH[i]->gridNX - 1.f) / (pow(2.f, i)));
-        parH[i]->Ly               = (real)((1.f * parH[i]->gridNY - 1.f) / (pow(2.f, i)));
-        parH[i]->Lz               = (real)((1.f * parH[i]->gridNZ - 1.f) / (pow(2.f, i)));
-        parH[i]->dx               = (real)(1.f / (pow(2.f, i)));
+        parH[i]->startz           = parH[i]->gridNZ * this->myProcessId;
+        parH[i]->endz             = parH[i]->gridNZ * this->myProcessId + parH[i]->gridNZ;
+        parH[i]->Lx               = ((real)1.0 * parH[i]->gridNX - (real)1.0) / (pow((real)2.0, i));
+        parH[i]->Ly               = ((real)1.0 * parH[i]->gridNY - (real)1.0) / (pow((real)2.0, i));
+        parH[i]->Lz               = ((real)1.0 * parH[i]->gridNZ - (real)1.0) / (pow((real)2.0, i));
+        parH[i]->dx               = (real)1.0 / pow((real)2.0, i);
         parH[i]->XdistKn          = getDistX().at(i);
         parH[i]->YdistKn          = getDistY().at(i);
         parH[i]->ZdistKn          = getDistZ().at(i);
@@ -520,12 +517,12 @@ void Parameter::initLBMSimulationParameter()
             parH[i]->distX  = (real)getDistX().at(i);
             parH[i]->distY  = (real)getDistY().at(i);
             parH[i]->distZ  = (real)getDistZ().at(i);
-            parH[i]->mTtoWx = (real)1.0f;
-            parH[i]->mTtoWy = (real)1.0f;
-            parH[i]->mTtoWz = (real)1.0f;
-            parH[i]->cTtoWx = (real)0.0f;
-            parH[i]->cTtoWy = (real)0.0f;
-            parH[i]->cTtoWz = (real)0.0f;
+            parH[i]->mTtoWx = (real)1.0;
+            parH[i]->mTtoWy = (real)1.0;
+            parH[i]->mTtoWz = (real)1.0;
+            parH[i]->cTtoWx = (real)0.0;
+            parH[i]->cTtoWy = (real)0.0;
+            parH[i]->cTtoWz = (real)0.0;
             ////MGs Trafo///////////////////////////////////////////////////////////////
             // parH[i]->cStartx               = (real)parH[i]->XdistKn;
             // parH[i]->cStarty               = (real)parH[i]->XdistKn;
@@ -533,9 +530,9 @@ void Parameter::initLBMSimulationParameter()
             ////////////////////////////////////////////////////////////////////////////
         } else {
             // Geller
-            parH[i]->distX = ((real)getDistX().at(i) + 0.25f) * parH[i - 1]->dx;
-            parH[i]->distY = ((real)getDistY().at(i) + 0.25f) * parH[i - 1]->dx;
-            parH[i]->distZ = ((real)getDistZ().at(i) + 0.25f) * parH[i - 1]->dx;
+            parH[i]->distX = ((real)getDistX().at(i) + (real)0.25) * parH[i - 1]->dx;
+            parH[i]->distY = ((real)getDistY().at(i) + (real)0.25) * parH[i - 1]->dx;
+            parH[i]->distZ = ((real)getDistZ().at(i) + (real)0.25) * parH[i - 1]->dx;
             // parH[i]->distX                 = ((real)getDistX().at(i) + 0.25f) * parH[i-1]->dx + parH[i-1]->distX;
             // parH[i]->distY                 = ((real)getDistY().at(i) + 0.25f) * parH[i-1]->dx + parH[i-1]->distY;
             // parH[i]->distZ                 = ((real)getDistZ().at(i) + 0.25f) * parH[i-1]->dx + parH[i-1]->distZ;
@@ -560,7 +557,7 @@ void Parameter::initLBMSimulationParameter()
         parD[i]->gridNX           = parH[i]->gridNX;
         parD[i]->gridNY           = parH[i]->gridNY;
         parD[i]->gridNZ           = parH[i]->gridNZ;
-        parD[i]->vis              = parH[i]->vis;
+        parD[i]->viscosity        = parH[i]->viscosity;
         parD[i]->diffusivity      = parH[i]->diffusivity;
         parD[i]->omega            = parH[i]->omega;
         parD[i]->nx               = parH[i]->nx;
@@ -570,10 +567,10 @@ void Parameter::initLBMSimulationParameter()
         parD[i]->sizePlaneXY      = parH[i]->sizePlaneXY;
         parD[i]->sizePlaneYZ      = parH[i]->sizePlaneYZ;
         parD[i]->sizePlaneXZ      = parH[i]->sizePlaneXZ;
-        parD[i]->mem_size_real    = sizeof(real) * parD[i]->size_Mat;
-        parD[i]->mem_size_int     = sizeof(unsigned int) * parD[i]->size_Mat;
-        parD[i]->mem_size_bool    = sizeof(bool) * parD[i]->size_Mat;
-        parD[i]->mem_size_real_yz = sizeof(real) * parD[i]->ny * parD[i]->nz;
+        //parD[i]->mem_size_real    = sizeof(real) * parD[i]->size_Mat;          //DEPRECATED: related to full matrix
+        //parD[i]->mem_size_int     = sizeof(unsigned int) * parD[i]->size_Mat;  //DEPRECATED: related to full matrix
+        //parD[i]->mem_size_bool    = sizeof(bool) * parD[i]->size_Mat;          //DEPRECATED: related to full matrix
+        //parD[i]->mem_size_real_yz = sizeof(real) * parD[i]->ny * parD[i]->nz;  //DEPRECATED: related to full matrix
         parD[i]->isEvenTimestep        = parH[i]->isEvenTimestep;
         parD[i]->startz           = parH[i]->startz;
         parD[i]->endz             = parH[i]->endz;
@@ -588,6 +585,30 @@ void Parameter::initLBMSimulationParameter()
         parD[i]->distY            = parH[i]->distY;
         parD[i]->distZ            = parH[i]->distZ;
     }
+
+    checkParameterValidityCumulantK17();
+}
+
+void Parameter::checkParameterValidityCumulantK17() const
+{
+    if (this->mainKernel != "CumulantK17")
+        return;
+
+    const real viscosity = this->parH[maxlevel]->viscosity;
+    const real viscosityLimit = 1.0 / 42.0;
+    if (viscosity > viscosityLimit) {
+        VF_LOG_WARNING("The viscosity (in LB units) at level {} is {:1.3g}. It is recommended to keep it smaller than {:1.3g} "
+                       "for the CumulantK17 collision kernel.",
+                       maxlevel, viscosity, viscosityLimit);
+    }
+
+    const real velocity = this->u0;
+    const real velocityLimit = 0.1;
+    if (velocity > velocityLimit) {
+        VF_LOG_WARNING("The velocity (in LB units) is {:1.4g}. It is recommended to keep it smaller than {:1.4g} for the "
+                       "CumulantK17 collision kernel.",
+                       velocity, velocityLimit);
+    }
 }
 
 void Parameter::copyMeasurePointsArrayToVector(int lev)
@@ -692,15 +713,15 @@ void Parameter::setEndXHotWall(real endXHotWall)
 }
 void Parameter::setTimestepEnd(unsigned int tend)
 {
-    ic.tend = tend;
+    this->tend = tend;
 }
 void Parameter::setTimestepOut(unsigned int tout)
 {
-    ic.tout = tout;
+    this->tout = tout;
 }
 void Parameter::setTimestepStartOut(unsigned int tStartOut)
 {
-    ic.tStartOut = tStartOut;
+    this->tStartOut = tStartOut;
 }
 void Parameter::setTimestepOfCoarseLevel(unsigned int timestep)
 {
@@ -712,7 +733,7 @@ void Parameter::setCalcTurbulenceIntensity(bool calcVelocityAndFluctuations)
 }
 void Parameter::setCalcMedian(bool calcMedian)
 {
-    ic.calcMedian = calcMedian;
+    this->calcMedian = calcMedian;
 }
 void Parameter::setCalcDragLift(bool calcDragLift)
 {
@@ -732,11 +753,11 @@ void Parameter::setCalcPlaneConc(bool calcPlaneConc)
 }
 void Parameter::setTimeCalcMedStart(int CalcMedStart)
 {
-    ic.tCalcMedStart = CalcMedStart;
+    this->tCalcMedStart = CalcMedStart;
 }
 void Parameter::setTimeCalcMedEnd(int CalcMedEnd)
 {
-    ic.tCalcMedEnd = CalcMedEnd;
+    this->tCalcMedEnd = CalcMedEnd;
 }
 void Parameter::setOutputPath(std::string oPath)
 {
@@ -744,82 +765,82 @@ void Parameter::setOutputPath(std::string oPath)
     if (oPath.back() != '/')
         oPath += "/";
 
-    ic.oPath = oPath;
+    this->oPath = oPath;
     this->setPathAndFilename(this->getOutputPath() + this->getOutputPrefix());
 }
 void Parameter::setOutputPrefix(std::string oPrefix)
 {
-    ic.oPrefix = oPrefix;
+    this->oPrefix = oPrefix;
     this->setPathAndFilename(this->getOutputPath() + this->getOutputPrefix());
 }
 void Parameter::setPathAndFilename(std::string fname)
 {
-    ic.fname = fname;
+    this->fname = fname;
 }
 void Parameter::setGridPath(std::string gridPath)
 {
-    ic.gridPath = gridPath;
+    this->gridPath = gridPath;
     this->initGridPaths();
 }
 void Parameter::setPrintFiles(bool printfiles)
 {
-    ic.printFiles = printfiles;
+    this->printFiles = printfiles;
 }
 void Parameter::setReadGeo(bool readGeo)
 {
-    ic.readGeo = readGeo;
+    this->readGeo = readGeo;
 }
 void Parameter::setDiffusivity(real Diffusivity)
 {
-    ic.Diffusivity = Diffusivity;
+    this->Diffusivity = Diffusivity;
 }
 void Parameter::setTemperatureInit(real Temp)
 {
-    ic.Temp = Temp;
+    this->Temp = Temp;
 }
 void Parameter::setTemperatureBC(real TempBC)
 {
-    ic.TempBC = TempBC;
+    this->TempBC = TempBC;
 }
 void Parameter::setViscosityLB(real Viscosity)
 {
-    ic.vis = Viscosity;
+    this->vis = Viscosity;
 }
 void Parameter::setVelocityLB(real Velocity)
 {
-    ic.u0 = Velocity;
+    this->u0 = Velocity;
 }
 void Parameter::setViscosityRatio(real ViscosityRatio)
 {
-    ic.vis_ratio = ViscosityRatio;
+    this->vis_ratio = ViscosityRatio;
 }
 void Parameter::setVelocityRatio(real VelocityRatio)
 {
-    ic.u0_ratio = VelocityRatio;
+    this->u0_ratio = VelocityRatio;
 }
 void Parameter::setDensityRatio(real DensityRatio)
 {
-    ic.delta_rho = DensityRatio;
+    this->delta_rho = DensityRatio;
 }
 void Parameter::setPressRatio(real PressRatio)
 {
-    ic.delta_press = PressRatio;
+    this->delta_press = PressRatio;
 }
 real Parameter::getViscosityRatio()
 {
-    return ic.vis_ratio;
+    return this->vis_ratio;
 }
 real Parameter::getVelocityRatio()
 {
-    return ic.u0_ratio;
+    return this->u0_ratio;
 }
 real Parameter::getDensityRatio()
 {
-    return ic.delta_rho;
+    return this->delta_rho;
 }
 real Parameter::getPressureRatio()
 {
-    return ic.delta_press;
+    return this->delta_press;
 }
 real Parameter::getTimeRatio()
 {
@@ -867,133 +888,129 @@ real Parameter::getScaledStressRatio(int level)
 }
 void Parameter::setRealX(real RealX)
 {
-    ic.RealX = RealX;
+    this->RealX = RealX;
 }
 void Parameter::setRealY(real RealY)
 {
-    ic.RealY = RealY;
+    this->RealY = RealY;
 }
 void Parameter::setPressInID(unsigned int PressInID)
 {
-    ic.PressInID = PressInID;
+    this->PressInID = PressInID;
 }
 void Parameter::setPressOutID(unsigned int PressOutID)
 {
-    ic.PressOutID = PressOutID;
+    this->PressOutID = PressOutID;
 }
 void Parameter::setPressInZ(unsigned int PressInZ)
 {
-    ic.PressInZ = PressInZ;
+    this->PressInZ = PressInZ;
 }
 void Parameter::setPressOutZ(unsigned int PressOutZ)
 {
-    ic.PressOutZ = PressOutZ;
+    this->PressOutZ = PressOutZ;
 }
 void Parameter::setOutflowPressureCorrectionFactor(real pressBCrhoCorrectionFactor)
 {
-    ic.outflowPressureCorrectionFactor = pressBCrhoCorrectionFactor;
+    this->outflowPressureCorrectionFactor = pressBCrhoCorrectionFactor;
 }
 void Parameter::setMaxDev(int maxdev)
 {
-    ic.maxdev = maxdev;
+    this->maxdev = maxdev;
 }
 void Parameter::setMyID(int myid)
 {
-    ic.myProcessId = myid;
+    this->myProcessId = myid;
 }
 void Parameter::setNumprocs(int numprocs)
 {
-    ic.numprocs = numprocs;
+    this->numprocs = numprocs;
 }
 void Parameter::setDevices(std::vector<uint> devices)
 {
-    ic.devices = devices;
+    this->devices = devices;
 }
 void Parameter::setGeometryFileC(std::string GeometryFileC)
 {
-    ic.geometryFileC = GeometryFileC;
+    this->geometryFileC = GeometryFileC;
 }
 void Parameter::setGeometryFileM(std::string GeometryFileM)
 {
-    ic.geometryFileM = GeometryFileM;
+    this->geometryFileM = GeometryFileM;
 }
 void Parameter::setGeometryFileF(std::string GeometryFileF)
 {
-    ic.geometryFileF = GeometryFileF;
+    this->geometryFileF = GeometryFileF;
 }
 void Parameter::setRe(real Re)
 {
-    ic.Re = Re;
+    this->Re = Re;
 }
 void Parameter::setFactorPressBC(real factorPressBC)
 {
-    ic.factorPressBC = factorPressBC;
+    this->factorPressBC = factorPressBC;
 }
 void Parameter::setIsGeo(bool isGeo)
 {
-    ic.isGeo = isGeo;
+    this->isGeo = isGeo;
 }
 void Parameter::setIsGeoNormal(bool isGeoNormal)
 {
-    ic.isGeoNormal = isGeoNormal;
+    this->isGeoNormal = isGeoNormal;
 }
 void Parameter::setIsInflowNormal(bool isInflowNormal)
 {
-    ic.isInflowNormal = isInflowNormal;
+    this->isInflowNormal = isInflowNormal;
 }
 void Parameter::setIsOutflowNormal(bool isOutflowNormal)
 {
-    ic.isOutflowNormal = isOutflowNormal;
+    this->isOutflowNormal = isOutflowNormal;
 }
 void Parameter::setIsProp(bool isProp)
 {
-    ic.isProp = isProp;
+    this->isProp = isProp;
 }
 void Parameter::setIsCp(bool isCp)
 {
-    ic.isCp = isCp;
+    this->isCp = isCp;
 }
 void Parameter::setConcFile(bool concFile)
 {
-    ic.isConc = concFile;
-}
-void Parameter::setStreetVelocityFile(bool streetVelocityFile)
-{
-    ic.streetVelocityFile = streetVelocityFile;
+    this->isConc = concFile;
 }
 void Parameter::setUseMeasurePoints(bool useMeasurePoints)
 {
-    ic.isMeasurePoints = useMeasurePoints;
+    this->isMeasurePoints = useMeasurePoints;
 }
 void Parameter::setUseInitNeq(bool useInitNeq)
 {
-    ic.isInitNeq = useInitNeq;
+    this->isInitNeq = useInitNeq;
 }
 void Parameter::setSimulatePorousMedia(bool simulatePorousMedia)
 {
-    ic.simulatePorousMedia = simulatePorousMedia;
+    this->simulatePorousMedia = simulatePorousMedia;
 }
 void Parameter::setUseTurbulentViscosity(bool useTurbulentViscosity)
 {
-    ic.isTurbulentViscosity = useTurbulentViscosity;
+    this->isTurbulentViscosity = useTurbulentViscosity;
 }
 void Parameter::setUseWale(bool useWale)
 {
-    ic.isWale = useWale;
+    this->isWale = useWale;
     if (useWale)
         setUseTurbulentViscosity(true);
 }
 void Parameter::setTurbulenceModel(TurbulenceModel turbulenceModel)
 {
-    ic.turbulenceModel = turbulenceModel;
+    this->turbulenceModel = turbulenceModel;
 }
 void Parameter::setSGSConstant(real SGSConstant)
 {
-    ic.SGSConstant = SGSConstant;
+    this->SGSConstant = SGSConstant;
 }
 void Parameter::setHasWallModelMonitor(bool hasWallModelMonitor)
 {
-    ic.hasWallModelMonitor = hasWallModelMonitor;
+    this->hasWallModelMonitor = hasWallModelMonitor;
 }
 
 void Parameter::setIsF3(bool isF3)
@@ -1008,59 +1025,59 @@ void Parameter::setIsBodyForce(bool isBodyForce)
 
 void Parameter::setGridX(std::vector<int> GridX)
 {
-    ic.GridX = GridX;
+    this->GridX = GridX;
 }
 void Parameter::setGridY(std::vector<int> GridY)
 {
-    ic.GridY = GridY;
+    this->GridY = GridY;
 }
 void Parameter::setGridZ(std::vector<int> GridZ)
 {
-    ic.GridZ = GridZ;
+    this->GridZ = GridZ;
 }
 void Parameter::setDistX(std::vector<int> DistX)
 {
-    ic.DistX = DistX;
+    this->DistX = DistX;
 }
 void Parameter::setDistY(std::vector<int> DistY)
 {
-    ic.DistY = DistY;
+    this->DistY = DistY;
 }
 void Parameter::setDistZ(std::vector<int> DistZ)
 {
-    ic.DistZ = DistZ;
+    this->DistZ = DistZ;
 }
 void Parameter::setScaleLBMtoSI(std::vector<real> scaleLBMtoSI)
 {
-    ic.scaleLBMtoSI = scaleLBMtoSI;
+    this->scaleLBMtoSI = scaleLBMtoSI;
 }
 void Parameter::setTranslateLBMtoSI(std::vector<real> translateLBMtoSI)
 {
-    ic.translateLBMtoSI = translateLBMtoSI;
+    this->translateLBMtoSI = translateLBMtoSI;
 }
 void Parameter::setMinCoordX(std::vector<real> MinCoordX)
 {
-    ic.minCoordX = MinCoordX;
+    this->minCoordX = MinCoordX;
 }
 void Parameter::setMinCoordY(std::vector<real> MinCoordY)
 {
-    ic.minCoordY = MinCoordY;
+    this->minCoordY = MinCoordY;
 }
 void Parameter::setMinCoordZ(std::vector<real> MinCoordZ)
 {
-    ic.minCoordZ = MinCoordZ;
+    this->minCoordZ = MinCoordZ;
 }
 void Parameter::setMaxCoordX(std::vector<real> MaxCoordX)
 {
-    ic.maxCoordX = MaxCoordX;
+    this->maxCoordX = MaxCoordX;
 }
 void Parameter::setMaxCoordY(std::vector<real> MaxCoordY)
 {
-    ic.maxCoordY = MaxCoordY;
+    this->maxCoordY = MaxCoordY;
 }
 void Parameter::setMaxCoordZ(std::vector<real> MaxCoordZ)
 {
-    ic.maxCoordZ = MaxCoordZ;
+    this->maxCoordZ = MaxCoordZ;
 }
 void Parameter::setTempH(TempforBoundaryConditions *TempH)
 {
@@ -1104,259 +1121,255 @@ void Parameter::setTempPressD(TempPressforBoundaryConditions *TempPressD)
 //}
 void Parameter::setkFull(std::string kFull)
 {
-    ic.kFull = kFull;
+    this->kFull = kFull;
 }
 void Parameter::setgeoFull(std::string geoFull)
 {
-    ic.geoFull = geoFull;
+    this->geoFull = geoFull;
 }
 void Parameter::setgeoVec(std::string geoVec)
 {
-    ic.geoVec = geoVec;
+    this->geoVec = geoVec;
 }
 void Parameter::setcoordX(std::string coordX)
 {
-    ic.coordX = coordX;
+    this->coordX = coordX;
 }
 void Parameter::setcoordY(std::string coordY)
 {
-    ic.coordY = coordY;
+    this->coordY = coordY;
 }
 void Parameter::setcoordZ(std::string coordZ)
 {
-    ic.coordZ = coordZ;
+    this->coordZ = coordZ;
 }
 void Parameter::setneighborX(std::string neighborX)
 {
-    ic.neighborX = neighborX;
+    this->neighborX = neighborX;
 }
 void Parameter::setneighborY(std::string neighborY)
 {
-    ic.neighborY = neighborY;
+    this->neighborY = neighborY;
 }
 void Parameter::setneighborZ(std::string neighborZ)
 {
-    ic.neighborZ = neighborZ;
+    this->neighborZ = neighborZ;
 }
 void Parameter::setneighborWSB(std::string neighborWSB)
 {
-    ic.neighborWSB = neighborWSB;
+    this->neighborWSB = neighborWSB;
 }
 void Parameter::setscaleCFC(std::string scaleCFC)
 {
-    ic.scaleCFC = scaleCFC;
+    this->scaleCFC = scaleCFC;
 }
 void Parameter::setscaleCFF(std::string scaleCFF)
 {
-    ic.scaleCFF = scaleCFF;
+    this->scaleCFF = scaleCFF;
 }
 void Parameter::setscaleFCC(std::string scaleFCC)
 {
-    ic.scaleFCC = scaleFCC;
+    this->scaleFCC = scaleFCC;
 }
 void Parameter::setscaleFCF(std::string scaleFCF)
 {
-    ic.scaleFCF = scaleFCF;
+    this->scaleFCF = scaleFCF;
 }
 void Parameter::setscaleOffsetCF(std::string scaleOffsetCF)
 {
-    ic.scaleOffsetCF = scaleOffsetCF;
+    this->scaleOffsetCF = scaleOffsetCF;
 }
 void Parameter::setscaleOffsetFC(std::string scaleOffsetFC)
 {
-    ic.scaleOffsetFC = scaleOffsetFC;
+    this->scaleOffsetFC = scaleOffsetFC;
 }
 void Parameter::setgeomBoundaryBcQs(std::string geomBoundaryBcQs)
 {
-    ic.geomBoundaryBcQs = geomBoundaryBcQs;
+    this->geomBoundaryBcQs = geomBoundaryBcQs;
 }
 void Parameter::setgeomBoundaryBcValues(std::string geomBoundaryBcValues)
 {
-    ic.geomBoundaryBcValues = geomBoundaryBcValues;
+    this->geomBoundaryBcValues = geomBoundaryBcValues;
 }
 void Parameter::setnoSlipBcPos(std::string noSlipBcPos)
 {
-    ic.noSlipBcPos = noSlipBcPos;
+    this->noSlipBcPos = noSlipBcPos;
 }
 void Parameter::setnoSlipBcQs(std::string noSlipBcQs)
 {
-    ic.noSlipBcQs = noSlipBcQs;
+    this->noSlipBcQs = noSlipBcQs;
 }
 void Parameter::setnoSlipBcValue(std::string noSlipBcValue)
 {
-    ic.noSlipBcValue = noSlipBcValue;
+    this->noSlipBcValue = noSlipBcValue;
 }
 void Parameter::setnoSlipBcValues(std::string noSlipBcValues)
 {
-    ic.noSlipBcValues = noSlipBcValues;
+    this->noSlipBcValues = noSlipBcValues;
 }
 void Parameter::setslipBcPos(std::string slipBcPos)
 {
-    ic.slipBcPos = slipBcPos;
+    this->slipBcPos = slipBcPos;
 }
 void Parameter::setslipBcQs(std::string slipBcQs)
 {
-    ic.slipBcQs = slipBcQs;
+    this->slipBcQs = slipBcQs;
 }
 void Parameter::setslipBcValue(std::string slipBcValue)
 {
-    ic.slipBcValue = slipBcValue;
+    this->slipBcValue = slipBcValue;
 }
 void Parameter::setpressBcPos(std::string pressBcPos)
 {
-    ic.pressBcPos = pressBcPos;
+    this->pressBcPos = pressBcPos;
 }
 void Parameter::setpressBcQs(std::string pressBcQs)
 {
-    ic.pressBcQs = pressBcQs;
+    this->pressBcQs = pressBcQs;
 }
 void Parameter::setpressBcValue(std::string pressBcValue)
 {
-    ic.pressBcValue = pressBcValue;
+    this->pressBcValue = pressBcValue;
 }
 void Parameter::setpressBcValues(std::string pressBcValues)
 {
-    ic.pressBcValues = pressBcValues;
+    this->pressBcValues = pressBcValues;
 }
 void Parameter::setvelBcQs(std::string velBcQs)
 {
-    ic.velBcQs = velBcQs;
+    this->velBcQs = velBcQs;
 }
 void Parameter::setvelBcValues(std::string velBcValues)
 {
-    ic.velBcValues = velBcValues;
+    this->velBcValues = velBcValues;
 }
 void Parameter::setinletBcQs(std::string inletBcQs)
 {
-    ic.inletBcQs = inletBcQs;
+    this->inletBcQs = inletBcQs;
 }
 void Parameter::setinletBcValues(std::string inletBcValues)
 {
-    ic.inletBcValues = inletBcValues;
+    this->inletBcValues = inletBcValues;
 }
 void Parameter::setoutletBcQs(std::string outletBcQs)
 {
-    ic.outletBcQs = outletBcQs;
+    this->outletBcQs = outletBcQs;
 }
 void Parameter::setoutletBcValues(std::string outletBcValues)
 {
-    ic.outletBcValues = outletBcValues;
+    this->outletBcValues = outletBcValues;
 }
 void Parameter::settopBcQs(std::string topBcQs)
 {
-    ic.topBcQs = topBcQs;
+    this->topBcQs = topBcQs;
 }
 void Parameter::settopBcValues(std::string topBcValues)
 {
-    ic.topBcValues = topBcValues;
+    this->topBcValues = topBcValues;
 }
 void Parameter::setbottomBcQs(std::string bottomBcQs)
 {
-    ic.bottomBcQs = bottomBcQs;
+    this->bottomBcQs = bottomBcQs;
 }
 void Parameter::setbottomBcValues(std::string bottomBcValues)
 {
-    ic.bottomBcValues = bottomBcValues;
+    this->bottomBcValues = bottomBcValues;
 }
 void Parameter::setfrontBcQs(std::string frontBcQs)
 {
-    ic.frontBcQs = frontBcQs;
+    this->frontBcQs = frontBcQs;
 }
 void Parameter::setfrontBcValues(std::string frontBcValues)
 {
-    ic.frontBcValues = frontBcValues;
+    this->frontBcValues = frontBcValues;
 }
 void Parameter::setbackBcQs(std::string backBcQs)
 {
-    ic.backBcQs = backBcQs;
+    this->backBcQs = backBcQs;
 }
 void Parameter::setbackBcValues(std::string backBcValues)
 {
-    ic.backBcValues = backBcValues;
+    this->backBcValues = backBcValues;
 }
 void Parameter::setwallBcQs(std::string wallBcQs)
 {
-    ic.wallBcQs = wallBcQs;
+    this->wallBcQs = wallBcQs;
 }
 void Parameter::setwallBcValues(std::string wallBcValues)
 {
-    ic.wallBcValues = wallBcValues;
+    this->wallBcValues = wallBcValues;
 }
 void Parameter::setperiodicBcQs(std::string periodicBcQs)
 {
-    ic.periodicBcQs = periodicBcQs;
+    this->periodicBcQs = periodicBcQs;
 }
 void Parameter::setperiodicBcValues(std::string periodicBcValues)
 {
-    ic.periodicBcValues = periodicBcValues;
+    this->periodicBcValues = periodicBcValues;
 }
 void Parameter::setpropellerQs(std::string propellerQs)
 {
-    ic.propellerQs = propellerQs;
+    this->propellerQs = propellerQs;
 }
 void Parameter::setpropellerValues(std::string propellerValues)
 {
-    ic.propellerValues = propellerValues;
+    this->propellerValues = propellerValues;
 }
 void Parameter::setpropellerCylinder(std::string propellerCylinder)
 {
-    ic.propellerCylinder = propellerCylinder;
+    this->propellerCylinder = propellerCylinder;
 }
 void Parameter::setmeasurePoints(std::string measurePoints)
 {
-    ic.measurePoints = measurePoints;
+    this->measurePoints = measurePoints;
 }
 void Parameter::setnumberNodes(std::string numberNodes)
 {
-    ic.numberNodes = numberNodes;
+    this->numberNodes = numberNodes;
 }
 void Parameter::setLBMvsSI(std::string LBMvsSI)
 {
-    ic.LBMvsSI = LBMvsSI;
+    this->LBMvsSI = LBMvsSI;
 }
 void Parameter::setcpTop(std::string cpTop)
 {
-    ic.cpTop = cpTop;
+    this->cpTop = cpTop;
 }
 void Parameter::setcpBottom(std::string cpBottom)
 {
-    ic.cpBottom = cpBottom;
+    this->cpBottom = cpBottom;
 }
 void Parameter::setcpBottom2(std::string cpBottom2)
 {
-    ic.cpBottom2 = cpBottom2;
+    this->cpBottom2 = cpBottom2;
 }
 void Parameter::setConcentration(std::string concFile)
 {
-    ic.concentration = concFile;
-}
-void Parameter::setStreetVelocity(std::string streetVelocity)
-{
-    ic.streetVelocity = streetVelocity;
+    this->concentration = concFile;
 }
 void Parameter::setclockCycleForMP(real clockCycleForMP)
 {
-    ic.clockCycleForMP = clockCycleForMP;
+    this->clockCycleForMP = clockCycleForMP;
 }
 void Parameter::setTimeDoCheckPoint(unsigned int tDoCheckPoint)
 {
-    ic.tDoCheckPoint = tDoCheckPoint;
+    this->tDoCheckPoint = tDoCheckPoint;
 }
 void Parameter::setTimeDoRestart(unsigned int tDoRestart)
 {
-    ic.tDoRestart = tDoRestart;
+    this->tDoRestart = tDoRestart;
 }
 void Parameter::setDoCheckPoint(bool doCheckPoint)
 {
-    ic.doCheckPoint = doCheckPoint;
+    this->doCheckPoint = doCheckPoint;
 }
 void Parameter::setDoRestart(bool doRestart)
 {
-    ic.doRestart = doRestart;
+    this->doRestart = doRestart;
 }
 void Parameter::settimestepForMP(unsigned int timestepForMP)
 {
-    ic.timeStepForMP = timestepForMP;
+    this->timeStepForMP = timestepForMP;
 }
 void Parameter::setObj(std::string str, bool isObj)
 {
@@ -1376,19 +1389,19 @@ void Parameter::setObj(std::string str, bool isObj)
 }
 void Parameter::setUseGeometryValues(bool useGeometryValues)
 {
-    ic.GeometryValues = useGeometryValues;
+    this->GeometryValues = useGeometryValues;
 }
 void Parameter::setCalc2ndOrderMoments(bool is2ndOrderMoments)
 {
-    ic.is2ndOrderMoments = is2ndOrderMoments;
+    this->is2ndOrderMoments = is2ndOrderMoments;
 }
 void Parameter::setCalc3rdOrderMoments(bool is3rdOrderMoments)
 {
-    ic.is3rdOrderMoments = is3rdOrderMoments;
+    this->is3rdOrderMoments = is3rdOrderMoments;
 }
 void Parameter::setCalcHighOrderMoments(bool isHighOrderMoments)
 {
-    ic.isHighOrderMoments = isHighOrderMoments;
+    this->isHighOrderMoments = isHighOrderMoments;
 }
 void Parameter::setMemsizeGPU(double admem, bool reset)
 {
@@ -1580,39 +1593,39 @@ void Parameter::setRecvProcessNeighborsAfterFtoCZ(int numberOfNodes, int level,
 }
 void Parameter::setgeomBoundaryNormalX(std::string geomNormalX)
 {
-    ic.geomNormalX = geomNormalX;
+    this->geomNormalX = geomNormalX;
 }
 void Parameter::setgeomBoundaryNormalY(std::string geomNormalY)
 {
-    ic.geomNormalY = geomNormalY;
+    this->geomNormalY = geomNormalY;
 }
 void Parameter::setgeomBoundaryNormalZ(std::string geomNormalZ)
 {
-    ic.geomNormalZ = geomNormalZ;
+    this->geomNormalZ = geomNormalZ;
 }
 void Parameter::setInflowBoundaryNormalX(std::string inflowNormalX)
 {
-    ic.inflowNormalX = inflowNormalX;
+    this->inflowNormalX = inflowNormalX;
 }
 void Parameter::setInflowBoundaryNormalY(std::string inflowNormalY)
 {
-    ic.inflowNormalY = inflowNormalY;
+    this->inflowNormalY = inflowNormalY;
 }
 void Parameter::setInflowBoundaryNormalZ(std::string inflowNormalZ)
 {
-    ic.inflowNormalZ = inflowNormalZ;
+    this->inflowNormalZ = inflowNormalZ;
 }
 void Parameter::setOutflowBoundaryNormalX(std::string outflowNormalX)
 {
-    ic.outflowNormalX = outflowNormalX;
+    this->outflowNormalX = outflowNormalX;
 }
 void Parameter::setOutflowBoundaryNormalY(std::string outflowNormalY)
 {
-    ic.outflowNormalY = outflowNormalY;
+    this->outflowNormalY = outflowNormalY;
 }
 void Parameter::setOutflowBoundaryNormalZ(std::string outflowNormalZ)
 {
-    ic.outflowNormalZ = outflowNormalZ;
+    this->outflowNormalZ = outflowNormalZ;
 }
 void Parameter::setMainKernel(std::string kernel)
 {
@@ -1730,22 +1743,22 @@ unsigned int Parameter::getSizeMat(int level)
 {
     return parH[level]->size_Mat;
 }
-unsigned int Parameter::getMemSizereal(int level)
-{
-    return parH[level]->mem_size_real;
-}
-unsigned int Parameter::getMemSizeInt(int level)
-{
-    return parH[level]->mem_size_int;
-}
-unsigned int Parameter::getMemSizeBool(int level)
-{
-    return parH[level]->mem_size_bool;
-}
-unsigned int Parameter::getMemSizerealYZ(int level)
-{
-    return parH[level]->mem_size_real_yz;
-}
+//unsigned int Parameter::getMemSizereal(int level)      //DEPRECATED: related to full matrix
+//{
+//    return parH[level]->mem_size_real;
+//}
+//unsigned int Parameter::getMemSizeInt(int level)     //DEPRECATED: related to full matrix
+//{
+//    return parH[level]->mem_size_int;
+//}
+//unsigned int Parameter::getMemSizeBool(int level)    //DEPRECATED: related to full matrix
+//{
+//    return parH[level]->mem_size_bool;
+//}
+//unsigned int Parameter::getMemSizerealYZ(int level)  //DEPRECATED: related to full matrix
+//{
+//    return parH[level]->mem_size_real_yz;
+//}
 int Parameter::getFine()
 {
     return fine;
@@ -1812,19 +1825,19 @@ unsigned int Parameter::getTimestepInit()
 }
 unsigned int Parameter::getTimestepEnd()
 {
-    return ic.tend;
+    return this->tend;
 }
 unsigned int Parameter::getTimestepOut()
 {
-    return ic.tout;
+    return this->tout;
 }
 unsigned int Parameter::getTimestepStartOut()
 {
-    return ic.tStartOut;
+    return this->tStartOut;
 }
 bool Parameter::getCalcMedian()
 {
-    return ic.calcMedian;
+    return this->calcMedian;
 }
 bool Parameter::getCalcDragLift()
 {
@@ -1848,35 +1861,35 @@ bool Parameter::getCalcPlaneConc()
 }
 int Parameter::getTimeCalcMedStart()
 {
-    return ic.tCalcMedStart;
+    return this->tCalcMedStart;
 }
 int Parameter::getTimeCalcMedEnd()
 {
-    return ic.tCalcMedEnd;
+    return this->tCalcMedEnd;
 }
 std::string Parameter::getOutputPath()
 {
-    return ic.oPath;
+    return this->oPath;
 }
 std::string Parameter::getOutputPrefix()
 {
-    return ic.oPrefix;
+    return this->oPrefix;
 }
 std::string Parameter::getFName()
 {
-    return ic.fname;
+    return this->fname;
 }
 std::string Parameter::getGridPath()
 {
-    return ic.gridPath;
+    return this->gridPath;
 }
 bool Parameter::getPrintFiles()
 {
-    return ic.printFiles;
+    return this->printFiles;
 }
 bool Parameter::getReadGeo()
 {
-    return ic.readGeo;
+    return this->readGeo;
 }
 bool Parameter::getCalcTurbulenceIntensity()
 {
@@ -1884,143 +1897,143 @@ bool Parameter::getCalcTurbulenceIntensity()
 }
 real Parameter::getDiffusivity()
 {
-    return ic.Diffusivity;
+    return this->Diffusivity;
 }
 real Parameter::getTemperatureInit()
 {
-    return ic.Temp;
+    return this->Temp;
 }
 real Parameter::getTemperatureBC()
 {
-    return ic.TempBC;
+    return this->TempBC;
 }
 real Parameter::getViscosity()
 {
-    return ic.vis;
+    return this->vis;
 }
 real Parameter::getVelocity()
 {
-    return ic.u0;
+    return this->u0;
 }
 real Parameter::getRealX()
 {
-    return ic.RealX;
+    return this->RealX;
 }
 real Parameter::getRealY()
 {
-    return ic.RealY;
+    return this->RealY;
 }
 unsigned int Parameter::getPressInID()
 {
-    return ic.PressInID;
+    return this->PressInID;
 }
 unsigned int Parameter::getPressOutID()
 {
-    return ic.PressOutID;
+    return this->PressOutID;
 }
 unsigned int Parameter::getPressInZ()
 {
-    return ic.PressInZ;
+    return this->PressInZ;
 }
 unsigned int Parameter::getPressOutZ()
 {
-    return ic.PressOutZ;
+    return this->PressOutZ;
 }
 real Parameter::getOutflowPressureCorrectionFactor()
 {
-    return ic.outflowPressureCorrectionFactor;
+    return this->outflowPressureCorrectionFactor;
 }
 int Parameter::getMaxDev()
 {
-    return ic.maxdev;
+    return this->maxdev;
 }
 int Parameter::getMyProcessID()
 {
-    return ic.myProcessId;
+    return this->myProcessId;
 }
 int Parameter::getNumprocs()
 {
-    return ic.numprocs;
+    return this->numprocs;
 }
 std::vector<uint> Parameter::getDevices()
 {
-    return ic.devices;
+    return this->devices;
 }
 std::string Parameter::getGeometryFileC()
 {
-    return ic.geometryFileC;
+    return this->geometryFileC;
 }
 std::string Parameter::getGeometryFileM()
 {
-    return ic.geometryFileM;
+    return this->geometryFileM;
 }
 std::string Parameter::getGeometryFileF()
 {
-    return ic.geometryFileF;
+    return this->geometryFileF;
 }
 real Parameter::getRe()
 {
-    return ic.Re;
+    return this->Re;
 }
 real Parameter::getFactorPressBC()
 {
-    return ic.factorPressBC;
+    return this->factorPressBC;
 }
 std::vector<int> Parameter::getGridX()
 {
-    return ic.GridX;
+    return this->GridX;
 }
 std::vector<int> Parameter::getGridY()
 {
-    return ic.GridY;
+    return this->GridY;
 }
 std::vector<int> Parameter::getGridZ()
 {
-    return ic.GridZ;
+    return this->GridZ;
 }
 std::vector<int> Parameter::getDistX()
 {
-    return ic.DistX;
+    return this->DistX;
 }
 std::vector<int> Parameter::getDistY()
 {
-    return ic.DistY;
+    return this->DistY;
 }
 std::vector<int> Parameter::getDistZ()
 {
-    return ic.DistZ;
+    return this->DistZ;
 }
 std::vector<real> Parameter::getScaleLBMtoSI()
 {
-    return ic.scaleLBMtoSI;
+    return this->scaleLBMtoSI;
 }
 std::vector<real> Parameter::getTranslateLBMtoSI()
 {
-    return ic.translateLBMtoSI;
+    return this->translateLBMtoSI;
 }
 std::vector<real> Parameter::getMinCoordX()
 {
-    return ic.minCoordX;
+    return this->minCoordX;
 }
 std::vector<real> Parameter::getMinCoordY()
 {
-    return ic.minCoordY;
+    return this->minCoordY;
 }
 std::vector<real> Parameter::getMinCoordZ()
 {
-    return ic.minCoordZ;
+    return this->minCoordZ;
 }
 std::vector<real> Parameter::getMaxCoordX()
 {
-    return ic.maxCoordX;
+    return this->maxCoordX;
 }
 std::vector<real> Parameter::getMaxCoordY()
 {
-    return ic.maxCoordY;
+    return this->maxCoordY;
 }
 std::vector<real> Parameter::getMaxCoordZ()
 {
-    return ic.maxCoordZ;
+    return this->maxCoordZ;
 }
 TempforBoundaryConditions *Parameter::getTempH()
 {
@@ -2064,247 +2077,243 @@ TempPressforBoundaryConditions *Parameter::getTempPressD()
 //}
 std::string Parameter::getkFull()
 {
-    return ic.kFull;
+    return this->kFull;
 }
 std::string Parameter::getgeoFull()
 {
-    return ic.geoFull;
+    return this->geoFull;
 }
 std::string Parameter::getgeoVec()
 {
-    return ic.geoVec;
+    return this->geoVec;
 }
 std::string Parameter::getcoordX()
 {
-    return ic.coordX;
+    return this->coordX;
 }
 std::string Parameter::getcoordY()
 {
-    return ic.coordY;
+    return this->coordY;
 }
 std::string Parameter::getcoordZ()
 {
-    return ic.coordZ;
+    return this->coordZ;
 }
 std::string Parameter::getneighborX()
 {
-    return ic.neighborX;
+    return this->neighborX;
 }
 std::string Parameter::getneighborY()
 {
-    return ic.neighborY;
+    return this->neighborY;
 }
 std::string Parameter::getneighborZ()
 {
-    return ic.neighborZ;
+    return this->neighborZ;
 }
 std::string Parameter::getneighborWSB()
 {
-    return ic.neighborWSB;
+    return this->neighborWSB;
 }
 std::string Parameter::getscaleCFC()
 {
-    return ic.scaleCFC;
+    return this->scaleCFC;
 }
 std::string Parameter::getscaleCFF()
 {
-    return ic.scaleCFF;
+    return this->scaleCFF;
 }
 std::string Parameter::getscaleFCC()
 {
-    return ic.scaleFCC;
+    return this->scaleFCC;
 }
 std::string Parameter::getscaleFCF()
 {
-    return ic.scaleFCF;
+    return this->scaleFCF;
 }
 std::string Parameter::getscaleOffsetCF()
 {
-    return ic.scaleOffsetCF;
+    return this->scaleOffsetCF;
 }
 std::string Parameter::getscaleOffsetFC()
 {
-    return ic.scaleOffsetFC;
+    return this->scaleOffsetFC;
 }
 std::string Parameter::getgeomBoundaryBcQs()
 {
-    return ic.geomBoundaryBcQs;
+    return this->geomBoundaryBcQs;
 }
 std::string Parameter::getgeomBoundaryBcValues()
 {
-    return ic.geomBoundaryBcValues;
+    return this->geomBoundaryBcValues;
 }
 std::string Parameter::getnoSlipBcPos()
 {
-    return ic.noSlipBcPos;
+    return this->noSlipBcPos;
 }
 std::string Parameter::getnoSlipBcQs()
 {
-    return ic.noSlipBcQs;
+    return this->noSlipBcQs;
 }
 std::string Parameter::getnoSlipBcValue()
 {
-    return ic.noSlipBcValue;
+    return this->noSlipBcValue;
 }
 std::string Parameter::getnoSlipBcValues()
 {
-    return ic.noSlipBcValues;
+    return this->noSlipBcValues;
 }
 std::string Parameter::getslipBcPos()
 {
-    return ic.slipBcPos;
+    return this->slipBcPos;
 }
 std::string Parameter::getslipBcQs()
 {
-    return ic.slipBcQs;
+    return this->slipBcQs;
 }
 std::string Parameter::getslipBcValue()
 {
-    return ic.slipBcValue;
+    return this->slipBcValue;
 }
 std::string Parameter::getpressBcPos()
 {
-    return ic.pressBcPos;
+    return this->pressBcPos;
 }
 std::string Parameter::getpressBcQs()
 {
-    return ic.pressBcQs;
+    return this->pressBcQs;
 }
 std::string Parameter::getpressBcValue()
 {
-    return ic.pressBcValue;
+    return this->pressBcValue;
 }
 std::string Parameter::getpressBcValues()
 {
-    return ic.pressBcValues;
+    return this->pressBcValues;
 }
 std::string Parameter::getvelBcQs()
 {
-    return ic.velBcQs;
+    return this->velBcQs;
 }
 std::string Parameter::getvelBcValues()
 {
-    return ic.velBcValues;
+    return this->velBcValues;
 }
 std::string Parameter::getinletBcQs()
 {
-    return ic.inletBcQs;
+    return this->inletBcQs;
 }
 std::string Parameter::getinletBcValues()
 {
-    return ic.inletBcValues;
+    return this->inletBcValues;
 }
 std::string Parameter::getoutletBcQs()
 {
-    return ic.outletBcQs;
+    return this->outletBcQs;
 }
 std::string Parameter::getoutletBcValues()
 {
-    return ic.outletBcValues;
+    return this->outletBcValues;
 }
 std::string Parameter::gettopBcQs()
 {
-    return ic.topBcQs;
+    return this->topBcQs;
 }
 std::string Parameter::gettopBcValues()
 {
-    return ic.topBcValues;
+    return this->topBcValues;
 }
 std::string Parameter::getbottomBcQs()
 {
-    return ic.bottomBcQs;
+    return this->bottomBcQs;
 }
 std::string Parameter::getbottomBcValues()
 {
-    return ic.bottomBcValues;
+    return this->bottomBcValues;
 }
 std::string Parameter::getfrontBcQs()
 {
-    return ic.frontBcQs;
+    return this->frontBcQs;
 }
 std::string Parameter::getfrontBcValues()
 {
-    return ic.frontBcValues;
+    return this->frontBcValues;
 }
 std::string Parameter::getbackBcQs()
 {
-    return ic.backBcQs;
+    return this->backBcQs;
 }
 std::string Parameter::getbackBcValues()
 {
-    return ic.backBcValues;
+    return this->backBcValues;
 }
 std::string Parameter::getwallBcQs()
 {
-    return ic.wallBcQs;
+    return this->wallBcQs;
 }
 std::string Parameter::getwallBcValues()
 {
-    return ic.wallBcValues;
+    return this->wallBcValues;
 }
 std::string Parameter::getperiodicBcQs()
 {
-    return ic.periodicBcQs;
+    return this->periodicBcQs;
 }
 std::string Parameter::getperiodicBcValues()
 {
-    return ic.periodicBcValues;
+    return this->periodicBcValues;
 }
 std::string Parameter::getpropellerQs()
 {
-    return ic.propellerQs;
+    return this->propellerQs;
 }
 std::string Parameter::getpropellerValues()
 {
-    return ic.propellerValues;
+    return this->propellerValues;
 }
 std::string Parameter::getpropellerCylinder()
 {
-    return ic.propellerCylinder;
+    return this->propellerCylinder;
 }
 std::string Parameter::getmeasurePoints()
 {
-    return ic.measurePoints;
+    return this->measurePoints;
 }
 std::string Parameter::getLBMvsSI()
 {
-    return ic.LBMvsSI;
+    return this->LBMvsSI;
 }
 std::string Parameter::getnumberNodes()
 {
-    return ic.numberNodes;
+    return this->numberNodes;
 }
 std::string Parameter::getcpTop()
 {
-    return ic.cpTop;
+    return this->cpTop;
 }
 std::string Parameter::getcpBottom()
 {
-    return ic.cpBottom;
+    return this->cpBottom;
 }
 std::string Parameter::getcpBottom2()
 {
-    return ic.cpBottom2;
+    return this->cpBottom2;
 }
 std::string Parameter::getConcentration()
 {
-    return ic.concentration;
-}
-std::string Parameter::getStreetVelocityFilePath()
-{
-    return ic.streetVelocity;
+    return this->concentration;
 }
 real Parameter::getclockCycleForMP()
 {
-    return ic.clockCycleForMP;
+    return this->clockCycleForMP;
 }
 unsigned int Parameter::getTimeDoCheckPoint()
 {
-    return ic.tDoCheckPoint;
+    return this->tDoCheckPoint;
 }
 unsigned int Parameter::getTimeDoRestart()
 {
-    return ic.tDoRestart;
+    return this->tDoRestart;
 }
 
 //=======================================================================================
@@ -2328,63 +2337,59 @@ unsigned int Parameter::getTimeStep(int level, unsigned int t, bool isPostCollis
 
 bool Parameter::getDoCheckPoint()
 {
-    return ic.doCheckPoint;
+    return this->doCheckPoint;
 }
 bool Parameter::getDoRestart()
 {
-    return ic.doRestart;
+    return this->doRestart;
 }
 bool Parameter::getIsGeo()
 {
-    return ic.isGeo;
+    return this->isGeo;
 }
 bool Parameter::getIsGeoNormal()
 {
-    return ic.isGeoNormal;
+    return this->isGeoNormal;
 }
 bool Parameter::getIsInflowNormal()
 {
-    return ic.isInflowNormal;
+    return this->isInflowNormal;
 }
 bool Parameter::getIsOutflowNormal()
 {
-    return ic.isOutflowNormal;
+    return this->isOutflowNormal;
 }
 bool Parameter::getIsCp()
 {
-    return ic.isCp;
+    return this->isCp;
 }
 bool Parameter::getConcFile()
 {
-    return ic.isConc;
-}
-bool Parameter::isStreetVelocityFile()
-{
-    return ic.streetVelocityFile;
+    return this->isConc;
 }
 bool Parameter::getUseMeasurePoints()
 {
-    return ic.isMeasurePoints;
+    return this->isMeasurePoints;
 }
 bool Parameter::getUseWale()
 {
-    return ic.isWale;
+    return this->isWale;
 }
 TurbulenceModel Parameter::getTurbulenceModel()
 {
-    return ic.turbulenceModel;
+    return this->turbulenceModel;
 }
 bool Parameter::getUseTurbulentViscosity()
 {
-    return ic.isTurbulentViscosity;
+    return this->isTurbulentViscosity;
 }
 real Parameter::getSGSConstant()
 {
-    return ic.SGSConstant;
+    return this->SGSConstant;
 }
 bool Parameter::getHasWallModelMonitor()
 {
-    return ic.hasWallModelMonitor;
+    return this->hasWallModelMonitor;
 }
 std::vector<SPtr<PreCollisionInteractor>> Parameter::getActuators()
 {
@@ -2396,11 +2401,11 @@ std::vector<SPtr<PreCollisionInteractor>> Parameter::getProbes()
 }
 bool Parameter::getUseInitNeq()
 {
-    return ic.isInitNeq;
+    return this->isInitNeq;
 }
 bool Parameter::getSimulatePorousMedia()
 {
-    return ic.simulatePorousMedia;
+    return this->simulatePorousMedia;
 }
 
 bool Parameter::getIsF3()
@@ -2415,23 +2420,23 @@ bool Parameter::getIsBodyForce()
 
 bool Parameter::getIsGeometryValues()
 {
-    return ic.GeometryValues;
+    return this->GeometryValues;
 }
 bool Parameter::getCalc2ndOrderMoments()
 {
-    return ic.is2ndOrderMoments;
+    return this->is2ndOrderMoments;
 }
 bool Parameter::getCalc3rdOrderMoments()
 {
-    return ic.is3rdOrderMoments;
+    return this->is3rdOrderMoments;
 }
 bool Parameter::getCalcHighOrderMoments()
 {
-    return ic.isHighOrderMoments;
+    return this->isHighOrderMoments;
 }
 bool Parameter::getIsProp()
 {
-    return ic.isProp;
+    return this->isProp;
 }
 bool Parameter::overWritingRestart(uint t)
 {
@@ -2439,7 +2444,7 @@ bool Parameter::overWritingRestart(uint t)
 }
 unsigned int Parameter::getTimestepForMP()
 {
-    return ic.timeStepForMP;
+    return this->timeStepForMP;
 }
 unsigned int Parameter::getTimestepOfCoarseLevel()
 {
@@ -2542,39 +2547,39 @@ bool Parameter::getIsNeighborZ()
 }
 std::string Parameter::getgeomBoundaryNormalX()
 {
-    return ic.geomNormalX;
+    return this->geomNormalX;
 }
 std::string Parameter::getgeomBoundaryNormalY()
 {
-    return ic.geomNormalY;
+    return this->geomNormalY;
 }
 std::string Parameter::getgeomBoundaryNormalZ()
 {
-    return ic.geomNormalZ;
+    return this->geomNormalZ;
 }
 std::string Parameter::getInflowBoundaryNormalX()
 {
-    return ic.inflowNormalX;
+    return this->inflowNormalX;
 }
 std::string Parameter::getInflowBoundaryNormalY()
 {
-    return ic.inflowNormalY;
+    return this->inflowNormalY;
 }
 std::string Parameter::getInflowBoundaryNormalZ()
 {
-    return ic.inflowNormalZ;
+    return this->inflowNormalZ;
 }
 std::string Parameter::getOutflowBoundaryNormalX()
 {
-    return ic.outflowNormalX;
+    return this->outflowNormalX;
 }
 std::string Parameter::getOutflowBoundaryNormalY()
 {
-    return ic.outflowNormalY;
+    return this->outflowNormalY;
 }
 std::string Parameter::getOutflowBoundaryNormalZ()
 {
-    return ic.outflowNormalZ;
+    return this->outflowNormalZ;
 }
 curandState *Parameter::getRandomState()
 {
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
index 86b7bc2a058d69ba878d4445953a4dc56e524027..5944cf66caed4f680ff0480c7b7c39ff7d237aab 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
+++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
@@ -67,188 +67,194 @@ struct LBMSimulationParameter {
     //////////////////////////////////////////////////////////////////////////
     //! \brief stores the number of threads per GPU block
     uint numberofthreads;
-
-    // distributions///////////
-    // Distributions19 d0;
-    Distributions27 d0;  // DEPRECATED: distribution functions for full matrix (not sparse)
     //! \brief store all distribution functions for the D3Q27
     Distributions27 distributions;
-
-    // distributions F3////////
-    Distributions6 g6;
-
-    // advection diffusion //////////////////
-    //! \brief store all distribution functions for the D3Q7 advection diffusion field
-    Distributions7 distributionsAD7;
-    //! \brief store all distribution functions for the D3Q27 advection diffusion field
-    Distributions27 distributionsAD27;
-    //! \brief stores a field of concentration values
-    real *Conc, *Conc_Full;
-    //! \brief stores the diffusivity
-    real diffusivity;
-    //! \brief stores the value for omega (for the diffusivity)
-    real omegaDiffusivity;
-    // BC NoSlip
-    TempforBoundaryConditions Temp;
-    // BC Velocity
-    TempVelforBoundaryConditions TempVel;
-    // BC Pressure
-    TempPressforBoundaryConditions TempPress;
-    // Plane Conc
-    real *ConcPlaneIn, *ConcPlaneOut1, *ConcPlaneOut2;
-    std::vector<double> PlaneConcVectorIn, PlaneConcVectorOut1, PlaneConcVectorOut2;
-
-    // trafo///////////////////
-    real mTtoWx, mTtoWy, mTtoWz;
-    real cTtoWx, cTtoWy, cTtoWz;
-
-    // MGstrafo////////////////
-    real cStartx, cStarty, cStartz;
-    real cFx, cFy, cFz;
-
-    // typeOfGridNode (formerly known as "geo") /////////////////////
-    int *geo; // DEPRECATED: typeOfGridNode for full matrix (not sparse)
+    //////////////////////////////////////////////////////////////////////////
     //! \brief stores the type for every lattice node (f.e. fluid node)
-    unsigned int *typeOfGridNode;
-
-    // k///////////////////////
-    unsigned int *k; // DEPRECATED: index for full matrix
-
-    // neighbor///////////////////////////////////////////////////////////////
+    uint *typeOfGridNode;
+    //////////////////////////////////////////////////////////////////////////
     //! \brief store the neighbors in +X, +Y, +Z, and in diagonal negative direction
     //! \brief this information is important because we use an indirect addressing scheme
     uint *neighborX, *neighborY, *neighborZ, *neighborInverse;
-
-    // coordinates////////////////////////////////////////////////////////////
+    //////////////////////////////////////////////////////////////////////////
     //! \brief store the coordinates for every lattice node
     real *coordinateX, *coordinateY, *coordinateZ;
-
-    // body forces////////////
-    real *forceX_SP, *forceY_SP, *forceZ_SP;
-
-    // vel parab///////////////
-    real *vParab;
-
-    // turbulent viscosity ///
-    real *turbViscosity;
-    real *gSij, *gSDij, *gDxvx, *gDyvx, *gDzvx, *gDxvy, *gDyvy, *gDzvy, *gDxvz, *gDyvz, *gDzvz; // DebugInformation
-
-    // turbulence intensity //
-    real *vx_mean, *vy_mean, *vz_mean;       // means
-    real *vxx, *vyy, *vzz, *vxy, *vxz, *vyz; // fluctuations
-    std::vector<real> turbulenceIntensity;
-
-    // macroscopic values//////
-    // real *vx, *vy, *vz, *rho;  // DEPRECATED: macroscopic values for full matrix
+    //////////////////////////////////////////////////////////////////////////
     //! \brief store the macroscopic values (velocity, density, pressure)
     //! \brief for every lattice node
     real *velocityX, *velocityY, *velocityZ, *rho, *pressure;
     //! \brief stores the value for omega
     real omega;
-    //! \brief stores the value for viscosity (on level 0)
-    real vis;
+    //! \brief stores the value for viscosity
+    real viscosity;
+    //////////////////////////////////////////////////////////////////////////
+    //! \brief stores the number of nodes (based on indirect addressing scheme)
+    unsigned long long numberOfNodes;
+    //! \brief stores the size of the memory consumption for real/int values of the arrays (e.g. coordinates, velocity)
+    unsigned long long memSizeRealLBnodes, memSizeLonglongLBnodes;
+    //////////////////////////////////////////////////////////////////////////
+    //! \brief stores the slip boundary condition data
+    QforBoundaryConditions slipBC;
+    //////////////////////////////////////////////////////////////////////////
+    //! \brief stores the no slip boundary condition data
+    QforBoundaryConditions noSlipBC;
+    //////////////////////////////////////////////////////////////////////////
+    //! \brief stores the velocity boundary condition data
+    QforBoundaryConditions velocityBC;
+    //////////////////////////////////////////////////////////////////////////
+    //! \brief stores the geometry boundary condition data
+    QforBoundaryConditions geometryBC;
+    //////////////////////////////////////////////////////////////////////////
+    //! \brief stores the pressure boundary condition data
+    QforBoundaryConditions pressureBC;
+    //////////////////////////////////////////////////////////////////////////
+    //! \brief stores the outflow boundary condition data
+    QforBoundaryConditions outflowBC;
+    //////////////////////////////////////////////////////////////////////////
+    //! \brief stores the stress boundary condition data
+    QforBoundaryConditions stressBC;
+    //////////////////////////////////////////////////////////////////////////
+    //! \brief stores the precursor boundary condition data
+    QforPrecursorBoundaryConditions precursorBC;
 
-    // derivations for iso test
-    real *dxxUx, *dyyUy, *dzzUz;
+    //////////////////////////////////////////////////////////////////////////
+    //! \brief sets a uniform forcing on each fluid node in all three spatial dimensions
+    real *forcing;
+    //////////////////////////////////////////////////////////////////////////
+    //! \brief stores parameters for a wall model
+    WallModelParameters wallModel;
+    //////////////////////////////////////////////////////////////////////////
+    //! \brief allows reading values for a boundary condition from a file
+    std::vector<SPtr<TransientBCInputFileReader>> transientBCInputFileReader;
+    //////////////////////////////////////////////////////////////////////////
+    //! \brief can be used for pressure correction at outflow boundary condition
+    real outflowPressureCorrectionFactor;
+    //////////////////////////////////////////////////////////////////////////
+    //! \brief store the values of body forces for all 3 dimensions
+    real *forceX_SP, *forceY_SP, *forceZ_SP;
 
-    // median-macro-values/////
-    real *vx_SP_Med, *vy_SP_Med, *vz_SP_Med, *rho_SP_Med, *press_SP_Med;
-    real *vx_SP_Med_Out, *vy_SP_Med_Out, *vz_SP_Med_Out, *rho_SP_Med_Out, *press_SP_Med_Out;
-    // Advection-Diffusion
-    real *Conc_Med, *Conc_Med_Out;
 
-    // grid////////////////////
-    unsigned int nx, ny, nz;
-    unsigned int gridNX, gridNY, gridNZ;
+    //////////////////////////////////////////////////////////////////////////
+    // Advection Diffusion
+    //////////////////////////////////////////////////////////////////////////
+    //! \brief stores the diffusivity
+    real diffusivity;
+    //! \brief stores the value for omega (for the diffusivity)
+    real omegaDiffusivity;
+    //! \brief stores a field of concentration values
+    real *concentration;
+    //! \brief store all distribution functions for the D3Q27 advection diffusion field
+    Distributions27 distributionsAD;
+    //////////////////////////////////////////////////////////////////////////
 
-    // size of matrix//////////
-    unsigned int size_Mat;
-    unsigned int sizePlaneXY, sizePlaneYZ, sizePlaneXZ;
 
-    // size of sparse matrix//////////
-    //! \brief stores the number of nodes (based on indirect addressing scheme)
-    unsigned int numberOfNodes;
-    unsigned int size_Array_SP;
+    //////////////////////////////////////////////////////////////////////////
+    // Grid Refinement
+    //////////////////////////////////////////////////////////////////////////
+    //! \brief stores the base-node-indices of coarse and fine refinement cells
+    InterpolationCells coarseToFine;
+    InterpolationCells fineToCoarse;
+    //////////////////////////////////////////////////////////////////////////
+    //! \brief distinguish between bulk and border interpolation cells (necessary for communication hiding)
+    InterpolationCells fineToCoarseBorder;
+    InterpolationCells fineToCoarseBulk;
+    InterpolationCells coarseToFineBorder;
+    InterpolationCells coarseToFineBulk;
+    //////////////////////////////////////////////////////////////////////////
+    //! \brief stores location of neighboring cell (necessary for refinement into the wall)
+    InterpolationCellNeighbor neighborCoarseToFine;
+    InterpolationCellNeighbor neighborCoarseToFineBulk;
+    InterpolationCellNeighbor neighborFineToCoarse;
+    InterpolationCellNeighbor neighborFineToCoarseBulk;
+    //////////////////////////////////////////////////////////////////////////
 
-    // size of Plane btw. 2 GPUs//////
-    unsigned int sizePlaneSB, sizePlaneRB, startB, endB;
-    unsigned int sizePlaneST, sizePlaneRT, startT, endT;
-    bool isSetSendB, isSetRecvB, isSetSendT, isSetRecvT;
-    int *SendT, *SendB, *RecvT, *RecvB;
 
-    // size of Plane for PressMess
-    unsigned int sizePlanePress, startP;
-    unsigned int sizePlanePressIN, startPIN;
-    unsigned int sizePlanePressOUT, startPOUT;
-    bool isSetPress;
+    //////////////////////////////////////////////////////////////////////////
+    // Inter-GPU-Communication
+    //////////////////////////////////////////////////////////////////////////
+    //! \brief stores the base-node-indices of coarse and fine refinement cells
+    // 3D domain decomposition
+    std::vector<ProcessNeighbor27> sendProcessNeighborX;
+    std::vector<ProcessNeighbor27> sendProcessNeighborY;
+    std::vector<ProcessNeighbor27> sendProcessNeighborZ;
+    std::vector<ProcessNeighbor27> recvProcessNeighborX;
+    std::vector<ProcessNeighbor27> recvProcessNeighborY;
+    std::vector<ProcessNeighbor27> recvProcessNeighborZ;
+
+    std::vector<ProcessNeighbor27> sendProcessNeighborsAfterFtoCX;
+    std::vector<ProcessNeighbor27> sendProcessNeighborsAfterFtoCY;
+    std::vector<ProcessNeighbor27> sendProcessNeighborsAfterFtoCZ;
+    std::vector<ProcessNeighbor27> recvProcessNeighborsAfterFtoCX;
+    std::vector<ProcessNeighbor27> recvProcessNeighborsAfterFtoCY;
+    std::vector<ProcessNeighbor27> recvProcessNeighborsAfterFtoCZ;
+    ///////////////////////////////////////////////////////
+    // 3D domain decomposition convection diffusion
+    std::vector<ProcessNeighbor27> sendProcessNeighborADX;
+    std::vector<ProcessNeighbor27> sendProcessNeighborADY;
+    std::vector<ProcessNeighbor27> sendProcessNeighborADZ;
+    std::vector<ProcessNeighbor27> recvProcessNeighborADX;
+    std::vector<ProcessNeighbor27> recvProcessNeighborADY;
+    std::vector<ProcessNeighbor27> recvProcessNeighborADZ;
+    ///////////////////////////////////////////////////////
+    // 3D domain decomposition F3
+    std::vector<ProcessNeighborF3> sendProcessNeighborF3X;
+    std::vector<ProcessNeighborF3> sendProcessNeighborF3Y;
+    std::vector<ProcessNeighborF3> sendProcessNeighborF3Z;
+    std::vector<ProcessNeighborF3> recvProcessNeighborF3X;
+    std::vector<ProcessNeighborF3> recvProcessNeighborF3Y;
+    std::vector<ProcessNeighborF3> recvProcessNeighborF3Z;
+    ////////////////////////////////////////////////////////////////////////////
+    // 3D domain decomposition: position (index in array) of corner nodes in ProcessNeighbor27
+    struct EdgeNodePositions {
+        int indexOfProcessNeighborRecv;
+        int indexInRecvBuffer;
+        int indexOfProcessNeighborSend;
+        int indexInSendBuffer;
+        EdgeNodePositions(int indexOfProcessNeighborRecv, int indexInRecvBuffer, int indexOfProcessNeighborSend,
+                          int indexInSendBuffer)
+            : indexOfProcessNeighborRecv(indexOfProcessNeighborRecv), indexInRecvBuffer(indexInRecvBuffer),
+              indexOfProcessNeighborSend(indexOfProcessNeighborSend), indexInSendBuffer(indexInSendBuffer)
+        {
+        }
+    };
+    std::vector<EdgeNodePositions> edgeNodesXtoY;
+    std::vector<EdgeNodePositions> edgeNodesXtoZ;
+    std::vector<EdgeNodePositions> edgeNodesYtoZ;
+
+    ///////////////////////////////////////////////////////
+    std::map<CollisionTemplate, uint*>    taggedFluidNodeIndices = {{CollisionTemplate::Default,        nullptr},
+                                                                    {CollisionTemplate::SubDomainBorder,nullptr},
+                                                                    {CollisionTemplate::WriteMacroVars, nullptr},
+                                                                    {CollisionTemplate::ApplyBodyForce, nullptr},
+                                                                    {CollisionTemplate::AllFeatures,    nullptr}};
+    std::map<CollisionTemplate, uint >  numberOfTaggedFluidNodes = {{CollisionTemplate::Default,        0},
+                                                                    {CollisionTemplate::SubDomainBorder,0},
+                                                                    {CollisionTemplate::WriteMacroVars, 0},
+                                                                    {CollisionTemplate::ApplyBodyForce, 0},
+                                                                    {CollisionTemplate::AllFeatures,    0}};
+
+    std::vector<CollisionTemplate> allocatedBulkFluidNodeTags = {};
 
-    // memsizeSP/////////////////
-    //! \brief stores the size of the memory consumption for real/int values of the arrays (e.g. coordinates, velocity)
-    unsigned int mem_size_real_SP;
-    unsigned int mem_size_int_SP;
 
-    // memsize/////////////////
-    unsigned int mem_size_real;
-    unsigned int mem_size_int;
-    unsigned int mem_size_bool;
-    unsigned int mem_size_real_yz;
 
-    // print///////////////////
-    unsigned int startz, endz;
-    real Lx, Ly, Lz, dx;
-    real distX, distY, distZ;
 
-    // interface////////////////
-    bool need_interface[6];
-    unsigned int XdistKn, YdistKn, ZdistKn;
-    InterpolationCellCF intCF;
-    InterpolationCellFC intFC;
-    unsigned int K_CF;
-    unsigned int K_FC;
-    unsigned int mem_size_kCF;
-    unsigned int mem_size_kFC;
-
-    InterpolationCellFC intFCBorder;
-    InterpolationCellFC intFCBulk;
-    InterpolationCellCF intCFBorder;
-    InterpolationCellCF intCFBulk;
-
-    // offset//////////////////
-    OffsetCF offCF;
-    OffsetCF offCFBulk;
-    OffsetFC offFC;
-    OffsetFC offFCBulk;
-    unsigned int mem_size_kCF_off;
-    unsigned int mem_size_kFC_off;
-    
-    //! \brief stores the boundary condition data
-    QforBoundaryConditions noSlipBC, velocityBC, outflowBC, slipBC, stressBC, pressureBC;
-    //! \brief number of lattice nodes for the boundary conditions
-    unsigned int numberOfNoSlipBCnodesRead, numberOfVeloBCnodesRead, numberOfOutflowBCnodesRead, numberOfSlipBCnodesRead, numberOfStressBCnodesRead, numberOfPressureBCnodesRead, numberOfPrecursorBCnodesRead;
 
-    QforBoundaryConditions QpressX0, QpressX1, QpressY0, QpressY1, QpressZ0, QpressZ1; // DEPRECATED
-    QforBoundaryConditions propellerBC;
-    QforBoundaryConditions geometryBC;
-    QforPrecursorBoundaryConditions precursorBC;
-    QforBoundaryConditions geometryBCnormalX, geometryBCnormalY, geometryBCnormalZ;
-    QforBoundaryConditions inflowBCnormalX, inflowBCnormalY, inflowBCnormalZ;
-    QforBoundaryConditions outflowBCnormalX, outflowBCnormalY, outflowBCnormalZ;
-    QforBoundaryConditions QInlet, QOutlet, QPeriodic; // DEPRECATED
-    unsigned int kInletQread, kOutletQread;  // DEPRECATED
 
-    WallModelParameters wallModel;
-    std::vector<SPtr<TransientBCInputFileReader>> transientBCInputFileReader;
-    real outflowPressureCorrectionFactor;
 
-    // testRoundoffError
-    Distributions27 kDistTestRE;
 
     //////////////////////////////////////////////////////////////////////////
-    // velocities to fit the force
-    real *VxForce, *VyForce, *VzForce;
+    // ADD IN FUTURE RELEASE
     //////////////////////////////////////////////////////////////////////////
-    //! \brief sets the forcing uniform on every fluid node in all three space dimensions
-    real *forcing;
+
+    // distributions F3////////
+    Distributions6 g6;
+
+    // BC NoSlip
+    TempforBoundaryConditions Temp;
+    // BC Velocity
+    TempVelforBoundaryConditions TempVel;
+    // BC Pressure
+    TempPressforBoundaryConditions TempPress;
 
     // Measure Points/////////
     std::vector<MeasurePoints> MP;
@@ -298,94 +304,152 @@ struct LBMSimulationParameter {
     unsigned int numberOfPointsCpBottom2;
     std::vector<std::vector<double>> cpBottom2;
 
-    // Concentration////////
+    //////////////////////////////////////////////////////////////////////////
+    // \brief velocities to fit the force
+    real *VxForce, *VyForce, *VzForce;
+
+    //! \brief stores indices for the concentration field
     int *concIndex;
-    real *concentration;
+    //    real *concentration;
     unsigned int numberOfPointsConc;
+    //! \brief store all distribution functions for the D3Q7 advection diffusion field
+    Distributions7 distributionsAD7;
+    // Plane Conc
+    real *ConcPlaneIn, *ConcPlaneOut1, *ConcPlaneOut2;
+    std::vector<double> PlaneConcVectorIn, PlaneConcVectorOut1, PlaneConcVectorOut2;
+
+    // turbulent viscosity ///
+    real *turbViscosity;
+    real *gSij, *gSDij, *gDxvx, *gDyvx, *gDzvx, *gDxvy, *gDyvy, *gDzvy, *gDxvz, *gDyvz, *gDzvz; // DebugInformation
+
+    // turbulence intensity //
+    real *vx_mean, *vy_mean, *vz_mean;       // means
+    real *vxx, *vyy, *vzz, *vxy, *vxz, *vyz; // fluctuations
+    std::vector<real> turbulenceIntensity;
+
+    // median-macro-values/////
+    real *vx_SP_Med, *vy_SP_Med, *vz_SP_Med, *rho_SP_Med, *press_SP_Med;
+    real *vx_SP_Med_Out, *vy_SP_Med_Out, *vz_SP_Med_Out, *rho_SP_Med_Out, *press_SP_Med_Out;
+    // Advection-Diffusion
+    real *Conc_Med, *Conc_Med_Out;
+
+
+
+
+
 
-    // street X and Y velocity fractions///////
-    real *streetFractionXvelocity;
-    real *streetFractionYvelocity;
-    int *naschVelocity;
-    uint numberOfStreetNodes;
+
+
+
+
+
+
+
+    //////////////////////////////////////////////////////////////////////////
+    // DEPRECATED - planed to be taken out permanently
+    //////////////////////////////////////////////////////////////////////////
+
+    unsigned int size_Array_SP; //?? Deprecated
+
+    // distributions///////////
+    // Distributions19 d0;
+    Distributions27 d0;  // DEPRECATED: distribution functions for full matrix (not sparse)
+
+    // typeOfGridNode (formerly known as "geo") /////////////////////
+    int *geo; // DEPRECATED: typeOfGridNode for full matrix (not sparse)
+
+    // k///////////////////////
+    unsigned int *k; // DEPRECATED: index for full matrix
+
+    // memsize/////////////////
+    //unsigned int mem_size_real_yz;
+    //unsigned int mem_size_bool;
+    //unsigned int mem_size_int;
+    //unsigned int mem_size_real;
+
+    QforBoundaryConditions QpressX0, QpressX1, QpressY0, QpressY1, QpressZ0, QpressZ1; // DEPRECATED  BCs that are not used any more
+    QforBoundaryConditions QInlet, QOutlet, QPeriodic; // DEPRECATED BCs that are not used any more
+    unsigned int kInletQread, kOutletQread;            // DEPRECATED
+
+    QforBoundaryConditions propellerBC;                                                 // DEPRECATED
+    QforBoundaryConditions geometryBCnormalX, geometryBCnormalY, geometryBCnormalZ;     // DEPRECATED
+    QforBoundaryConditions inflowBCnormalX, inflowBCnormalY, inflowBCnormalZ;           // DEPRECATED
+    QforBoundaryConditions outflowBCnormalX, outflowBCnormalY, outflowBCnormalZ;        // DEPRECATED
+
+    unsigned int numberOfNoSlipBCnodesRead, numberOfVeloBCnodesRead, numberOfOutflowBCnodesRead, // DEPRECATED
+    numberOfSlipBCnodesRead, numberOfStressBCnodesRead, numberOfPressureBCnodesRead, numberOfPrecursorBCnodesRead; // DEPRECATED
+
+    //! \brief stores a full matrix field of concentration values
+    real *Conc_Full;
+
+
+
+    // trafo///////////////////
+    real mTtoWx, mTtoWy, mTtoWz;
+    real cTtoWx, cTtoWy, cTtoWz;
+
+    // MGstrafo////////////////
+    real cStartx, cStarty, cStartz;
+    real cFx, cFy, cFz;
+
+    // interface////////////////
+    bool need_interface[6];
+    unsigned int XdistKn, YdistKn, ZdistKn;
+
+    // vel parab///////////////
+    real *vParab;
+
+    // macroscopic values//////
+    // real *vx, *vy, *vz, *rho;  // DEPRECATED: macroscopic values for full matrix
+
+    // derivations for iso test
+    real *dxxUx, *dyyUy, *dzzUz;
+
+    // grid////////////////////
+    unsigned int nx, ny, nz;
+    unsigned int gridNX, gridNY, gridNZ;
+
+    // size of matrix//////////
+    unsigned int size_Mat;
+    unsigned int sizePlaneXY, sizePlaneYZ, sizePlaneXZ;
+
+    // size of Plane btw. 2 GPUs//////
+    unsigned int sizePlaneSB, sizePlaneRB, startB, endB;
+    unsigned int sizePlaneST, sizePlaneRT, startT, endT;
+    bool isSetSendB, isSetRecvB, isSetSendT, isSetRecvT;
+    int *SendT, *SendB, *RecvT, *RecvB;
+
+    // size of Plane for PressMess
+    unsigned int sizePlanePress, startP;
+    unsigned int sizePlanePressIN, startPIN;
+    unsigned int sizePlanePressOUT, startPOUT;
+    bool isSetPress;
 
     // deltaPhi
     real deltaPhi;
 
-    ////////////////////////////////////////////////////////////////////////////
     // particles
     PathLineParticles plp;
-    ////////////////////////////////////////////////////////////////////////////
 
     ////////////////////////////////////////////////////////////////////////////
     // 1D domain decomposition
     std::vector<ProcessNeighbor27> sendProcessNeighbor;
     std::vector<ProcessNeighbor27> recvProcessNeighbor;
-    ///////////////////////////////////////////////////////
-    // 3D domain decomposition
-    std::vector<ProcessNeighbor27> sendProcessNeighborX;
-    std::vector<ProcessNeighbor27> sendProcessNeighborY;
-    std::vector<ProcessNeighbor27> sendProcessNeighborZ;
-    std::vector<ProcessNeighbor27> recvProcessNeighborX;
-    std::vector<ProcessNeighbor27> recvProcessNeighborY;
-    std::vector<ProcessNeighbor27> recvProcessNeighborZ;
 
-    std::vector<ProcessNeighbor27> sendProcessNeighborsAfterFtoCX;
-    std::vector<ProcessNeighbor27> sendProcessNeighborsAfterFtoCY;
-    std::vector<ProcessNeighbor27> sendProcessNeighborsAfterFtoCZ;
-    std::vector<ProcessNeighbor27> recvProcessNeighborsAfterFtoCX;
-    std::vector<ProcessNeighbor27> recvProcessNeighborsAfterFtoCY;
-    std::vector<ProcessNeighbor27> recvProcessNeighborsAfterFtoCZ;
-    ///////////////////////////////////////////////////////
-    // 3D domain decomposition convection diffusion
-    std::vector<ProcessNeighbor27> sendProcessNeighborADX;
-    std::vector<ProcessNeighbor27> sendProcessNeighborADY;
-    std::vector<ProcessNeighbor27> sendProcessNeighborADZ;
-    std::vector<ProcessNeighbor27> recvProcessNeighborADX;
-    std::vector<ProcessNeighbor27> recvProcessNeighborADY;
-    std::vector<ProcessNeighbor27> recvProcessNeighborADZ;
-    ///////////////////////////////////////////////////////
-    // 3D domain decomposition F3
-    std::vector<ProcessNeighborF3> sendProcessNeighborF3X;
-    std::vector<ProcessNeighborF3> sendProcessNeighborF3Y;
-    std::vector<ProcessNeighborF3> sendProcessNeighborF3Z;
-    std::vector<ProcessNeighborF3> recvProcessNeighborF3X;
-    std::vector<ProcessNeighborF3> recvProcessNeighborF3Y;
-    std::vector<ProcessNeighborF3> recvProcessNeighborF3Z;
-    ////////////////////////////////////////////////////////////////////////////
-    // 3D domain decomposition: position (index in array) of corner nodes in ProcessNeighbor27
-    struct EdgeNodePositions {
-        int indexOfProcessNeighborRecv;
-        int indexInRecvBuffer;
-        int indexOfProcessNeighborSend;
-        int indexInSendBuffer;
-        EdgeNodePositions(int indexOfProcessNeighborRecv, int indexInRecvBuffer, int indexOfProcessNeighborSend,
-                          int indexInSendBuffer)
-            : indexOfProcessNeighborRecv(indexOfProcessNeighborRecv), indexInRecvBuffer(indexInRecvBuffer),
-              indexOfProcessNeighborSend(indexOfProcessNeighborSend), indexInSendBuffer(indexInSendBuffer)
-        {
-        }
-    };
-    std::vector<EdgeNodePositions> edgeNodesXtoY;
-    std::vector<EdgeNodePositions> edgeNodesXtoZ;
-    std::vector<EdgeNodePositions> edgeNodesYtoZ;
+    // print///////////////////
+    unsigned int startz, endz;
+    real Lx, Ly, Lz, dx;
+    real distX, distY, distZ;
+
+    // testRoundoffError
+    Distributions27 kDistTestRE;
 
-    ///////////////////////////////////////////////////////
-    std::map<CollisionTemplate, uint*>    taggedFluidNodeIndices = {{CollisionTemplate::Default,        nullptr},
-                                                                    {CollisionTemplate::SubDomainBorder,nullptr},
-                                                                    {CollisionTemplate::WriteMacroVars, nullptr},
-                                                                    {CollisionTemplate::ApplyBodyForce, nullptr},
-                                                                    {CollisionTemplate::AllFeatures,    nullptr}};
-    std::map<CollisionTemplate, uint >  numberOfTaggedFluidNodes = {{CollisionTemplate::Default,        0},
-                                                                    {CollisionTemplate::SubDomainBorder,0},
-                                                                    {CollisionTemplate::WriteMacroVars, 0},
-                                                                    {CollisionTemplate::ApplyBodyForce, 0},
-                                                                    {CollisionTemplate::AllFeatures,    0}};
 
-    std::vector<CollisionTemplate> allocatedBulkFluidNodeTags = {};
 
+    //////////////////////////////////////////////////////////////////////////
 };
 
+
 //! \brief Class for LBM-parameter management
 class VIRTUALFLUIDS_GPU_EXPORT Parameter
 {
@@ -514,7 +578,6 @@ public:
     void setcpBottom(std::string cpBottom);
     void setcpBottom2(std::string cpBottom2);
     void setConcentration(std::string concFile);
-    void setStreetVelocity(std::string streetVelocity);
     void setPrintFiles(bool printfiles);
     void setReadGeo(bool readGeo);
     void setTemperatureInit(real Temp);
@@ -536,7 +599,6 @@ public:
     void setIsProp(bool isProp);
     void setIsCp(bool isCp);
     void setConcFile(bool concFile);
-    void setStreetVelocityFile(bool streetVelocityFile);
     void setUseMeasurePoints(bool useMeasurePoints);
     void setUseWale(bool useWale);
     void setTurbulenceModel(TurbulenceModel turbulenceModel);
@@ -734,15 +796,14 @@ public:
     std::string getcpBottom();
     std::string getcpBottom2();
     std::string getConcentration();
-    std::string getStreetVelocityFilePath();
     unsigned int getPressInID();
     unsigned int getPressOutID();
     unsigned int getPressInZ();
     unsigned int getPressOutZ();
-    unsigned int getMemSizereal(int level);
-    unsigned int getMemSizeInt(int level);
-    unsigned int getMemSizeBool(int level);
-    unsigned int getMemSizerealYZ(int level);
+//    unsigned int getMemSizereal(int level);    //DEPRECATED: related to full matrix
+//    unsigned int getMemSizeInt(int level);     //DEPRECATED: related to full matrix
+//    unsigned int getMemSizeBool(int level);    //DEPRECATED: related to full matrix
+//    unsigned int getMemSizerealYZ(int level);  //DEPRECATED: related to full matrix
     unsigned int getSizeMat(int level);
     unsigned int getTimestepStart();
     unsigned int getTimestepInit();
@@ -832,7 +893,6 @@ public:
     bool getCalc3rdOrderMoments();
     bool getCalcHighOrderMoments();
     bool getConcFile();
-    bool isStreetVelocityFile();
     bool getUseMeasurePoints();
     bool getUseWale();
     TurbulenceModel getTurbulenceModel();
@@ -912,7 +972,26 @@ private:
 
     void setPathAndFilename(std::string fname);
 
+    void checkParameterValidityCumulantK17() const;
+
 private:
+    real Re;
+    real factorPressBC{ 1.0 };
+    real Diffusivity{ 0.001 };
+    real Temp{ 0.0 };
+    real TempBC{ 1.0 };
+    real RealX{ 1.0 };
+    real RealY{ 1.0 };
+    real clockCycleForMP{ 1.0 };
+    real vis{ 0.001 };
+    real vis_ratio{ 1.0 };
+    real u0{ 0.01 };
+    real u0_ratio{ 1.0 };
+    real delta_rho{ 0.0 };
+    real delta_press{ 1.0 };
+    real SGSConstant{ 0.0 };
+    real outflowPressureCorrectionFactor{ 0.0 };
+
     bool compOn{ false };
     bool diffOn{ false };
     bool isF3{ false };
@@ -922,6 +1001,27 @@ private:
     bool calcPlaneConc{ false };
     bool calcVelocityAndFluctuations{ false };
     bool isBodyForce{ false };
+    bool printFiles{ false };
+    bool doRestart{ false };
+    bool doCheckPoint{ false };
+    bool readGeo{ false };
+    bool isGeo;
+    bool isProp;
+    bool isCp;
+    bool GeometryValues{ false };
+    bool is2ndOrderMoments{ false };
+    bool is3rdOrderMoments{ false };
+    bool isHighOrderMoments{ false };
+    bool calcMedian{ false };
+    bool isConc{ false };
+    bool isWale{ false };
+    bool isTurbulentViscosity{ false };
+    bool isMeasurePoints{ false };
+    bool isInitNeq{ false };
+    bool isGeoNormal, isInflowNormal, isOutflowNormal;
+    bool hasWallModelMonitor{ false };
+    bool simulatePorousMedia{ false };
+
     int diffMod{ 27 };
     //! \property maximum level of grid refinement
     int maxlevel{ 0 };
@@ -929,11 +1029,60 @@ private:
     int fine{ 0 };
     int factor_gridNZ{ 2 };
     int D3Qxx{ 27 };
-    InitCondition ic;
+    int numprocs{ 1 };
+    int myProcessId{ 0 };
+    int maxdev{ 1 };
+
     double memsizeGPU;
-    unsigned int limitOfNodesForVTK;
-    unsigned int outputCount;
-    unsigned int timestep;
+    
+    uint limitOfNodesForVTK;
+    uint outputCount;
+    uint timestep;
+    uint tDoCheckPoint{ 0 };
+    uint tDoRestart{ 0 };
+    uint tCalcMedStart{ 0 };
+    uint tCalcMedEnd{ 10 };
+    uint tend{ 10 };
+    uint tout{ 1 };
+    uint tStartOut{ 0 };
+    uint PressInID{ 0 };
+    uint PressOutID{ 0 };
+    uint PressInZ{ 1 };
+    uint PressOutZ{ 2 };
+    uint timeStepForMP{ 10 };
+
+    std::vector<uint> devices{ 0, 1 }; // one device with ID = 0
+    std::vector<int> GridX, GridY, GridZ, DistX, DistY, DistZ;
+    std::vector<real> scaleLBMtoSI, translateLBMtoSI;
+    std::vector<real> minCoordX, minCoordY, minCoordZ, maxCoordX, maxCoordY, maxCoordZ;
+
+    std::string fname{ "output/simulation" };
+    std::string oPath{ "output/" };
+    std::string gridPath{ "grid/" };
+    std::string oPrefix{ "simulation" };
+    std::string geometryFileC, geometryFileM, geometryFileF;
+    std::string kFull, geoFull, geoVec, coordX, coordY, coordZ, neighborX, neighborY, neighborZ, neighborWSB, scaleCFC, scaleCFF, scaleFCC, scaleFCF, scaleOffsetCF, scaleOffsetFC;
+    std::string noSlipBcPos, noSlipBcQs, noSlipBcValue;
+    std::string slipBcPos, slipBcQs, slipBcValue;
+    std::string pressBcPos, pressBcQs, pressBcValue;
+    std::string geomBoundaryBcQs, velBcQs;
+    std::string geomBoundaryBcValues, velBcValues, pressBcValues, noSlipBcValues;
+    std::string propellerCylinder, propellerValues, propellerQs, measurePoints;
+    std::string inletBcQs, inletBcValues;
+    std::string outletBcQs, outletBcValues;
+    std::string topBcQs, topBcValues;
+    std::string bottomBcQs, bottomBcValues;
+    std::string frontBcQs, frontBcValues;
+    std::string backBcQs, backBcValues;
+    std::string wallBcQs, wallBcValues;
+    std::string periodicBcQs, periodicBcValues;
+    std::string numberNodes, LBMvsSI;
+    std::string cpTop, cpBottom, cpBottom2;
+    std::string concentration;
+    std::string geomNormalX, geomNormalY, geomNormalZ, inflowNormalX, inflowNormalY, inflowNormalZ, outflowNormalX, outflowNormalY, outflowNormalZ;
+    
+    TurbulenceModel turbulenceModel{ TurbulenceModel::None };
+
 
     // Kernel
     std::string mainKernel{ "CumulantK17CompChim" };
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp b/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp
index 4025acf7acad362e9f0f3702cb897b9c1b6dbf3b..b86d5657922deb9dca6e89574efc72766bcd16ce 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp
+++ b/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp
@@ -1,4 +1,3 @@
-#include <gmock/gmock.h>
 #include "basics/tests/testUtilities.h"
 
 #include <filesystem>
@@ -8,7 +7,6 @@
 #include "Parameter.h"
 #include "basics/config/ConfigurationFile.h"
 
-
 TEST(ParameterTest, passingEmptyFileWithoutPath_ShouldNotThrow)
 {
     // assuming that the config files is stored parallel to this file.
@@ -37,7 +35,9 @@ TEST(ParameterTest, check_all_Parameter_CanBePassedToConstructor)
 
     // test optional parameter
     EXPECT_THAT(para.getOutputPath(), testing::Eq("/output/path/"));
-    EXPECT_THAT(para.getGridPath(), testing::Eq("/path/to/grid/")); // ... all grid files (e.g. multi-gpu/ multi-level) could be tested as well
+    EXPECT_THAT(
+        para.getGridPath(),
+        testing::Eq("/path/to/grid/")); // ... all grid files (e.g. multi-gpu/ multi-level) could be tested as well
     EXPECT_THAT(para.getgeoVec(), testing::Eq("/path/to/grid/geoVec.dat"));
     EXPECT_THAT(para.getMaxDev(), testing::Eq(2));
     EXPECT_THAT(para.getDevices(), testing::ElementsAreArray({ 2, 3 }));
@@ -53,7 +53,6 @@ TEST(ParameterTest, check_all_Parameter_CanBePassedToConstructor)
     EXPECT_THAT(para.getWriteVeloASCIIfiles(), testing::Eq(true));
     EXPECT_THAT(para.getCalcPlaneConc(), testing::Eq(true));
     EXPECT_THAT(para.getConcFile(), testing::Eq(true));
-    EXPECT_THAT(para.isStreetVelocityFile(), testing::Eq(true));
     EXPECT_THAT(para.getUseMeasurePoints(), testing::Eq(true));
     EXPECT_THAT(para.getUseWale(), testing::Eq(true));
     EXPECT_THAT(para.getUseInitNeq(), testing::Eq(true));
@@ -163,7 +162,7 @@ TEST(ParameterTest, setGridPathOverridesDefaultGridPath)
     Parameter para(2, 1);
     para.setGridPath("gridPathTest");
 
-    EXPECT_THAT( para.getGridPath(), testing::Eq("gridPathTest/1/"));
+    EXPECT_THAT(para.getGridPath(), testing::Eq("gridPathTest/1/"));
     EXPECT_THAT(para.getConcentration(), testing::Eq("gridPathTest/1/conc.dat"));
 }
 
@@ -177,9 +176,8 @@ TEST(ParameterTest, setGridPathOverridesConfigFile)
     auto para = Parameter(2, 0, &config);
     para.setGridPath("gridPathTest");
 
-    EXPECT_THAT( para.getGridPath(), testing::Eq("gridPathTest/0/"));
+    EXPECT_THAT(para.getGridPath(), testing::Eq("gridPathTest/0/"));
     EXPECT_THAT(para.getConcentration(), testing::Eq("gridPathTest/0/conc.dat"));
-
 }
 
 TEST(ParameterTest, userMissedSlash)
@@ -189,7 +187,6 @@ TEST(ParameterTest, userMissedSlash)
 
     EXPECT_THAT(para.getGridPath(), testing::Eq("gridPathTest/"));
     EXPECT_THAT(para.getConcentration(), testing::Eq("gridPathTest/conc.dat"));
-
 }
 
 TEST(ParameterTest, userMissedSlashMultiGPU)
@@ -199,4 +196,87 @@ TEST(ParameterTest, userMissedSlashMultiGPU)
 
     EXPECT_THAT(para.getGridPath(), testing::Eq("gridPathTest/0/"));
     EXPECT_THAT(para.getConcentration(), testing::Eq("gridPathTest/0/conc.dat"));
-}
\ No newline at end of file
+}
+
+class ParameterTestCumulantK17 : public testing::Test
+{
+protected:
+    void SetUp() override
+    {
+    }
+
+    bool stdoutContainsWarning()
+    {
+        std::string output = testing::internal::GetCapturedStdout();
+        return output.find("warning") != std::string::npos;
+    }
+
+    Parameter para;
+};
+
+TEST_F(ParameterTestCumulantK17, CumulantK17_VelocityIsTooHigh_expectWarning)
+{
+
+    para.setVelocityLB(0.11);
+    para.setMainKernel("CumulantK17");
+    testing::internal::CaptureStdout();
+
+    para.initLBMSimulationParameter();
+
+    EXPECT_TRUE(stdoutContainsWarning());
+}
+
+TEST_F(ParameterTestCumulantK17, CumulantK17_VelocityIsOk_expectNoWarning)
+{
+    para.setVelocityLB(0.09);
+    para.setMainKernel("CumulantK17");
+    testing::internal::CaptureStdout();
+
+    para.initLBMSimulationParameter();
+
+    EXPECT_FALSE(stdoutContainsWarning());
+}
+
+TEST_F(ParameterTestCumulantK17, NotCumulantK17_VelocityIsTooHigh_expectNoWarning)
+{
+    para.setVelocityLB(42);
+    para.setMainKernel("K");
+    testing::internal::CaptureStdout();
+
+    para.initLBMSimulationParameter();
+
+    EXPECT_FALSE(stdoutContainsWarning());
+}
+
+TEST_F(ParameterTestCumulantK17, CumulantK17_ViscosityIsTooHigh_expectWarning)
+{
+    para.setViscosityLB(0.024);
+    para.setMainKernel("CumulantK17");
+    testing::internal::CaptureStdout();
+
+    para.initLBMSimulationParameter();
+
+    EXPECT_TRUE(stdoutContainsWarning());
+}
+
+TEST_F(ParameterTestCumulantK17, CumulantK17_ViscosityIsOk_expectNoWarning)
+{
+    para.setViscosityLB(0.023);
+    para.setMainKernel("CumulantK17");
+    testing::internal::CaptureStdout();
+
+    para.initLBMSimulationParameter();
+
+    EXPECT_FALSE(stdoutContainsWarning());
+}
+
+TEST_F(ParameterTestCumulantK17, NotCumulantK17_ViscosityIsTooHigh_expectNoWarning)
+{
+    para.setViscosityLB(10);
+    para.setMainKernel("K");
+    testing::internal::CaptureStdout();
+
+    para.initLBMSimulationParameter();
+
+    EXPECT_FALSE(stdoutContainsWarning());
+}
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/parameterTest.cfg b/src/gpu/VirtualFluids_GPU/Parameter/parameterTest.cfg
index af9cb2851acbb56c2235c66ddf96bab3dac0d39f..097c6e6ccbec37c0f30de45ba444f33ff756cbdb 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/parameterTest.cfg
+++ b/src/gpu/VirtualFluids_GPU/Parameter/parameterTest.cfg
@@ -17,7 +17,6 @@ calcDrafLift = true
 writeVeloASCIIfiles = true
 calcPlaneConc = true
 UseConcFile = true
-UseStreetVelocityFile = true
 UseMeasurePoints = true
 UseWale = true
 UseInitNeq = true
diff --git a/src/gpu/VirtualFluids_GPU/Particles/Particles.cpp b/src/gpu/VirtualFluids_GPU/Particles/Particles.cpp
index 7c710f50afb0ae07edd53ef9d68e294c7af54ac1..e0156e3fbae46282baeb1359c719a077f021cf6b 100644
--- a/src/gpu/VirtualFluids_GPU/Particles/Particles.cpp
+++ b/src/gpu/VirtualFluids_GPU/Particles/Particles.cpp
@@ -141,12 +141,12 @@ void initParticles(Parameter* para)
 			para->getParH(lev)->plp.coordZabsolut[i] = (real)zCoordVec[i]; 
 
 			// find IDs
-			for (unsigned int ii = 0; ii < para->getParH(lev)->numberOfNodes; ii++)
+			for (size_t index = 0; index < para->getParH(lev)->numberOfNodes; index++)
 			{
-				if ((para->getParH(lev)->coordinateX[ii] <= para->getParH(lev)->plp.coordXabsolut[i]) &&
-					((para->getParH(lev)->plp.coordXabsolut[i] - para->getParH(lev)->coordinateX[ii]) <= dx))
+				if ((para->getParH(lev)->coordinateX[index] <= para->getParH(lev)->plp.coordXabsolut[i]) &&
+					((para->getParH(lev)->plp.coordXabsolut[i] - para->getParH(lev)->coordinateX[index]) <= dx))
 				{
-					tempID.push_back(ii);
+					tempID.push_back((int)index);
 				}
 			}
 
@@ -455,7 +455,7 @@ void rearrangeGeometry(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 		int counter2 = 0;
 		//////////////////////////////////////////////////////////////////////////
 		//redefine fluid nodes
-		for (uint index = 0; index < para->getParH(lev)->numberOfNodes; index++)
+		for (size_t index = 0; index < para->getParH(lev)->numberOfNodes; index++)
 		{
 			if (para->getParH(lev)->typeOfGridNode[index] == GEO_FLUID_OLD)
 			{
diff --git a/src/gpu/VirtualFluids_GPU/Particles/Particles.h b/src/gpu/VirtualFluids_GPU/Particles/Particles.h
index 7a6d003a08ef7f6517b6259c2c1b895676c6d80b..805817e2f7c6c64b27d60109ad8b6ee2c60cf5cc 100644
--- a/src/gpu/VirtualFluids_GPU/Particles/Particles.h
+++ b/src/gpu/VirtualFluids_GPU/Particles/Particles.h
@@ -4,7 +4,7 @@
 #include "LBM/LB.h"
 #include "GPU/GPU_Interface.h"
 #include "GPU/CudaMemoryManager.h"
-#include "Core/StringUtilities/StringUtil.h"
+#include "StringUtilities/StringUtil.h"
 #include "Parameter/Parameter.h"
 
 //void calcDragLift(Parameter* para, int lev);
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.cu
index 626e3bc0b745a8f8b029afb38028c81bd8d659e7..bcdd63657d13cd8a9dcef3372fe02760a337b057 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.cu
@@ -1,21 +1,53 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file ActuatorFarm.cu
+//! \ingroup PreCollisionInteractor
+//! \author Henrik Asmuth, Henry Korb
+//======================================================================================
 #include "ActuatorFarm.h"
 
 #include <cuda.h>
 #include <cuda_runtime.h>
 #include <helper_cuda.h>
 
-#include <cuda/CudaGrid.h>
+#include "cuda/CudaGrid.h"
 #include "VirtualFluids_GPU/GPU/GeometryUtils.h"
-#include "VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
 #include "Parameter/Parameter.h"
 #include "Parameter/CudaStreamManager.h"
 #include "DataStructureInitializer/GridProvider.h"
 #include "GPU/CudaMemoryManager.h"
-#include <lbm/constants/NumericConstants.h>
+#include "basics/constants/NumericConstants.h"
 #include <logger/Logger.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 
 
 __host__ __device__ __inline__ uint calcNode(uint bladeNode, uint numberOfBladeNodes, uint blade, uint numberOfBlades, uint turbine, uint numberOfTurbines)
@@ -30,7 +62,7 @@ __host__ __device__ __inline__ void calcTurbineBladeAndBladeNode(uint node, uint
     uint x_off = turbine*numberOfBladeNodes*numberOfBlades;
     blade = (node - x_off)/numberOfBlades;
     uint y_off = numberOfBladeNodes*blade+x_off;
-    bladeNode = (node - y_off)/numberOfBladeNodes;
+    bladeNode = (node - y_off);
 }
 
 __host__ __device__ __forceinline__ real distSqrd(real distX, real distY, real distZ)
@@ -46,9 +78,9 @@ void swapArrays(real* &arr1, real* &arr2)
 }
 
 __host__ __device__ __inline__ void rotateFromBladeToGlobal(
-                            real& bladeCoordX_BF, real& bladeCoordY_BF, real& bladeCoordZ_BF, 
+                            real bladeCoordX_BF, real bladeCoordY_BF, real bladeCoordZ_BF, 
                             real& bladeCoordX_GF, real& bladeCoordY_GF, real& bladeCoordZ_GF,
-                            real& azimuth, real& yaw)
+                            real azimuth, real yaw)
 {
     real tmpX, tmpY, tmpZ;
 
@@ -59,8 +91,8 @@ __host__ __device__ __inline__ void rotateFromBladeToGlobal(
 
 __host__ __device__ __inline__ void rotateFromGlobalToBlade(
                             real& bladeCoordX_BF, real& bladeCoordY_BF, real& bladeCoordZ_BF, 
-                            real& bladeCoordX_GF, real& bladeCoordY_GF, real& bladeCoordZ_GF,
-                            real& azimuth, real& yaw)
+                            real bladeCoordX_GF, real bladeCoordY_GF, real bladeCoordZ_GF,
+                            real azimuth, real yaw)
 {
     real tmpX, tmpY, tmpZ;
 
@@ -79,17 +111,20 @@ __global__ void interpolateVelocities(real* gridCoordsX, real* gridCoordsY, real
                                       uint* bladeIndices, real velocityRatio, real invDeltaX)
 {
 
-    const uint node =  vf::gpu::getNodeIndex();
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = vf::gpu::getNodeIndex();
 
-    if(node>=numberOfBladeNodes*numberOfBlades*numberOfTurbines) return;
+    if(nodeIndex>=numberOfBladeNodes*numberOfBlades*numberOfTurbines) return;
 
     uint turbine, bladeNode, blade;
 
-    calcTurbineBladeAndBladeNode(node, bladeNode, numberOfBladeNodes, blade, numberOfBlades, turbine, numberOfTurbines);
+    calcTurbineBladeAndBladeNode(nodeIndex, bladeNode, numberOfBladeNodes, blade, numberOfBlades, turbine, numberOfTurbines);
 
-    real bladeCoordX_BF = bladeCoordsX[node];
-    real bladeCoordY_BF = bladeCoordsY[node];
-    real bladeCoordZ_BF = bladeCoordsZ[node];
+    real bladeCoordX_BF = bladeCoordsX[nodeIndex];
+    real bladeCoordY_BF = bladeCoordsY[nodeIndex];
+    real bladeCoordZ_BF = bladeCoordsZ[nodeIndex];
 
     real bladeCoordX_GF, bladeCoordY_GF, bladeCoordZ_GF;
 
@@ -108,12 +143,12 @@ __global__ void interpolateVelocities(real* gridCoordsX, real* gridCoordsY, real
     uint k, ke, kn, kt;
     uint kne, kte, ktn, ktne;
 
-    k = findNearestCellBSW(bladeIndices[node], 
+    k = findNearestCellBSW(bladeIndices[nodeIndex], 
                            gridCoordsX, gridCoordsY, gridCoordsZ, 
                            bladeCoordX_GF, bladeCoordY_GF, bladeCoordZ_GF, 
                            neighborsX, neighborsY, neighborsZ, neighborsWSB);
         
-    bladeIndices[node] = k;
+    bladeIndices[nodeIndex] = k;
 
     getNeighborIndicesOfBSW(k, ke, kn, kt, kne, kte, ktn, ktne, neighborsX, neighborsY, neighborsZ);
 
@@ -135,9 +170,9 @@ __global__ void interpolateVelocities(real* gridCoordsX, real* gridCoordsY, real
                             bladeVelX_GF, bladeVelY_GF, bladeVelZ_GF, 
                             localAzimuth, yaw);
 
-    bladeVelocitiesX[node] = bladeVelX_BF;
-    bladeVelocitiesY[node] = bladeVelY_BF+omegas[turbine]*bladeCoordZ_BF;
-    bladeVelocitiesZ[node] = bladeVelZ_BF;
+    bladeVelocitiesX[nodeIndex] = bladeVelX_BF;
+    bladeVelocitiesY[nodeIndex] = bladeVelY_BF+omegas[turbine]*bladeCoordZ_BF;
+    bladeVelocitiesZ[nodeIndex] = bladeVelZ_BF;
 }
 
 
@@ -270,11 +305,11 @@ void ActuatorFarm::init(Parameter* para, GridProvider* gridProvider, CudaMemoryM
     if(!para->getIsBodyForce()) throw std::runtime_error("try to allocate ActuatorFarm but BodyForce is not set in Parameter.");
     this->forceRatio = para->getForceRatio();
     this->initTurbineGeometries(cudaMemoryManager);
-    this->initBladeCoords(cudaMemoryManager);    
+    this->initBladeCoords(cudaMemoryManager);
     this->initBladeIndices(para, cudaMemoryManager);
     this->initBladeVelocities(cudaMemoryManager);
-    this->initBladeForces(cudaMemoryManager);    
-    this->initBoundingSpheres(para, cudaMemoryManager);  
+    this->initBladeForces(cudaMemoryManager);
+    this->initBoundingSpheres(para, cudaMemoryManager);
     this->streamIndex = 0;
 }
 
@@ -286,7 +321,7 @@ void ActuatorFarm::interact(Parameter* para, CudaMemoryManager* cudaMemoryManage
 
     if(useHostArrays) cudaMemoryManager->cudaCopyBladeCoordsHtoD(this);
 
-    vf::cuda::CudaGrid bladeGrid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, this->numberOfNodes);
+    vf::cuda::CudaGrid bladeGrid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, this->numberOfGridNodes);
 
     interpolateVelocities<<< bladeGrid.grid, bladeGrid.threads, 0, stream >>>(
         para->getParD(this->level)->coordinateX, para->getParD(this->level)->coordinateY, para->getParD(this->level)->coordinateZ,        
@@ -343,6 +378,7 @@ void ActuatorFarm::calcForcesEllipticWing()
     real Cd = c0o1;
     real c0 = 20*c1o10;
     real c, Cn, Ct;
+
     for(uint turbine=0; turbine<this->numberOfTurbines; turbine++)
     {
         real diameter = this->diametersH[turbine];
@@ -388,7 +424,7 @@ void ActuatorFarm::getTaggedFluidNodes(Parameter *para, GridProvider* gridProvid
 void ActuatorFarm::initTurbineGeometries(CudaMemoryManager* cudaMemoryManager)
 {
     this->numberOfTurbines = uint(this->preInitDiameters.size());
-    this->numberOfNodes = numberOfTurbines*numberOfBladeNodes*numberOfBlades;
+    this->numberOfGridNodes = numberOfTurbines*numberOfBladeNodes*numberOfBlades;
 
     cudaMemoryManager->cudaAllocBladeGeometries(this);
     cudaMemoryManager->cudaAllocBladeOrientations(this);
@@ -444,9 +480,9 @@ void ActuatorFarm::initBladeVelocities(CudaMemoryManager* cudaMemoryManager)
 {   
     cudaMemoryManager->cudaAllocBladeVelocities(this);
 
-    std::fill_n(this->bladeVelocitiesXH, this->numberOfNodes, c0o1);
-    std::fill_n(this->bladeVelocitiesYH, this->numberOfNodes, c0o1);
-    std::fill_n(this->bladeVelocitiesZH, this->numberOfNodes, c0o1);
+    std::fill_n(this->bladeVelocitiesXH, this->numberOfGridNodes, c0o1);
+    std::fill_n(this->bladeVelocitiesYH, this->numberOfGridNodes, c0o1);
+    std::fill_n(this->bladeVelocitiesZH, this->numberOfGridNodes, c0o1);
 
     cudaMemoryManager->cudaCopyBladeVelocitiesHtoD(this);
     swapArrays(this->bladeVelocitiesXDCurrentTimestep, this->bladeVelocitiesXDPreviousTimestep);
@@ -459,9 +495,9 @@ void ActuatorFarm::initBladeForces(CudaMemoryManager* cudaMemoryManager)
 {   
     cudaMemoryManager->cudaAllocBladeForces(this);
 
-    std::fill_n(this->bladeForcesXH, this->numberOfNodes, c0o1);
-    std::fill_n(this->bladeForcesYH, this->numberOfNodes, c0o1);
-    std::fill_n(this->bladeForcesZH, this->numberOfNodes, c0o1);
+    std::fill_n(this->bladeForcesXH, this->numberOfGridNodes, c0o1);
+    std::fill_n(this->bladeForcesYH, this->numberOfGridNodes, c0o1);
+    std::fill_n(this->bladeForcesZH, this->numberOfGridNodes, c0o1);
 
     cudaMemoryManager->cudaCopyBladeForcesHtoD(this);
     swapArrays(this->bladeForcesXDCurrentTimestep, this->bladeForcesXDPreviousTimestep);
@@ -474,7 +510,7 @@ void ActuatorFarm::initBladeIndices(Parameter* para, CudaMemoryManager* cudaMemo
 {   
     cudaMemoryManager->cudaAllocBladeIndices(this);
 
-    std::fill_n(this->bladeIndicesH, this->numberOfNodes, 1);
+    std::fill_n(this->bladeIndicesH, this->numberOfGridNodes, 1);
 
     cudaMemoryManager->cudaCopyBladeIndicesHtoD(this);
 }
@@ -496,14 +532,14 @@ void ActuatorFarm::initBoundingSpheres(Parameter* para, CudaMemoryManager* cudaM
         uint minimumNumberOfNodesPerSphere = (uint)(c4o3*cPi*pow(sphereRadius-this->deltaX, c3o1)/pow(this->deltaX, c3o1));
         uint nodesInThisSphere = 0;
 
-        for (uint j = 1; j <= para->getParH(this->level)->numberOfNodes; j++)
+        for (size_t pos = 1; pos <= para->getParH(this->level)->numberOfNodes; pos++)
         {
-            const real distX = para->getParH(this->level)->coordinateX[j]-posX;
-            const real distY = para->getParH(this->level)->coordinateY[j]-posY;
-            const real distZ = para->getParH(this->level)->coordinateZ[j]-posZ;
+            const real distX = para->getParH(this->level)->coordinateX[pos]-posX;
+            const real distY = para->getParH(this->level)->coordinateY[pos]-posY;
+            const real distZ = para->getParH(this->level)->coordinateZ[pos]-posZ;
             if(distSqrd(distX,distY,distZ) < sphereRadiusSqrd) 
             {
-                nodesInSpheres.push_back(j);
+                nodesInSpheres.push_back((int)pos);
                 nodesInThisSphere++;
             }
         }
@@ -539,23 +575,23 @@ void ActuatorFarm::setAllYaws(real* _yaws)
 
 void ActuatorFarm::setAllBladeCoords(real* _bladeCoordsX, real* _bladeCoordsY, real* _bladeCoordsZ)
 { 
-    std::copy_n(_bladeCoordsX, this->numberOfNodes, this->bladeCoordsXH);
-    std::copy_n(_bladeCoordsY, this->numberOfNodes, this->bladeCoordsYH);
-    std::copy_n(_bladeCoordsZ, this->numberOfNodes, this->bladeCoordsZH);
+    std::copy_n(_bladeCoordsX, this->numberOfGridNodes, this->bladeCoordsXH);
+    std::copy_n(_bladeCoordsY, this->numberOfGridNodes, this->bladeCoordsYH);
+    std::copy_n(_bladeCoordsZ, this->numberOfGridNodes, this->bladeCoordsZH);
 }
 
 void ActuatorFarm::setAllBladeVelocities(real* _bladeVelocitiesX, real* _bladeVelocitiesY, real* _bladeVelocitiesZ)
 { 
-    std::copy_n(_bladeVelocitiesX, this->numberOfNodes, this->bladeVelocitiesXH);
-    std::copy_n(_bladeVelocitiesY, this->numberOfNodes, this->bladeVelocitiesYH);
-    std::copy_n(_bladeVelocitiesZ, this->numberOfNodes, this->bladeVelocitiesZH);
+    std::copy_n(_bladeVelocitiesX, this->numberOfGridNodes, this->bladeVelocitiesXH);
+    std::copy_n(_bladeVelocitiesY, this->numberOfGridNodes, this->bladeVelocitiesYH);
+    std::copy_n(_bladeVelocitiesZ, this->numberOfGridNodes, this->bladeVelocitiesZH);
 }
 
 void ActuatorFarm::setAllBladeForces(real* _bladeForcesX, real* _bladeForcesY, real* _bladeForcesZ)
 { 
-    std::copy_n(_bladeForcesX, this->numberOfNodes, this->bladeForcesXH);
-    std::copy_n(_bladeForcesY, this->numberOfNodes, this->bladeForcesYH);
-    std::copy_n(_bladeForcesZ, this->numberOfNodes, this->bladeForcesZH);
+    std::copy_n(_bladeForcesX, this->numberOfGridNodes, this->bladeForcesXH);
+    std::copy_n(_bladeForcesY, this->numberOfGridNodes, this->bladeForcesYH);
+    std::copy_n(_bladeForcesZ, this->numberOfGridNodes, this->bladeForcesZH);
 
 }void ActuatorFarm::setTurbineBladeCoords(uint turbine, real* _bladeCoordsX, real* _bladeCoordsY, real* _bladeCoordsZ)
 { 
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.h
index 8e21cdb6b21efd323f6723e21d6b28614109f1ec..67bf83691d19179984647cb808bb6c0592bb0bfb 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.h
@@ -3,14 +3,14 @@
 
 #include "PreCollisionInteractor.h"
 #include "PointerDefinitions.h"
-#include "lbm/constants/NumericConstants.h"
+#include "basics/constants/NumericConstants.h"
 #include <stdexcept>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 
 class Parameter;
 class GridProvider;
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 
 class ActuatorFarm : public PreCollisionInteractor
 {
@@ -32,7 +32,7 @@ public:
         level(_level),
         useHostArrays(_useHostArrays),
         numberOfTurbines(0),
-        numberOfNodes(0),
+        numberOfGridNodes(0),
         PreCollisionInteractor()
     {
         this->deltaT = _deltaT*exp2(-this->level);
@@ -62,7 +62,7 @@ public:
     uint getNumberOfBladesPerTurbine(){ return this->numberOfBlades; };
 
     uint getNumberOfIndices(){ return this->numberOfIndices; };
-    uint getNumberOfNodes(){ return this->numberOfNodes; };
+    uint getNumberOfGridNodes(){ return this->numberOfGridNodes; };
 
     real* getAllAzimuths(){ return azimuthsH; };
     real* getAllOmegas(){ return omegasH; };
@@ -189,7 +189,7 @@ private:
     const real epsilon; // in m
     const int level;
     uint numberOfIndices;
-    uint numberOfNodes;
+    uint numberOfGridNodes;
     real forceRatio, factorGaussian, invEpsilonSqrd, invDeltaX;
     int streamIndex;
 };
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h
index f9a87f613e7607301e59a7c1e67eb556418892e4..811045a32b18fd0f5d7f71be39b0dfec8982b352 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h
@@ -4,7 +4,7 @@
 #include <string>
 #include <vector>
 
-#include "Core/DataTypes.h"
+#include "DataTypes.h"
 #include "PointerDefinitions.h"
 #include "VirtualFluids_GPU_export.h"
 
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu
index aacb1721ad90e9178b3c2ccf49a2aa9ec79d5504..99c60fd3d2aae2e796e0c95e624b9d5d33c30ef1 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu
@@ -1,21 +1,52 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file PrecursorWriter.cu
+//! \ingroup PreCollisionInteractor
+//! \author Henrik Asmuth, Henry Korb
+//======================================================================================
 #include "PrecursorWriter.h"
 #include "basics/writer/WbWriterVtkXmlImageBinary.h"
 
 #include <cuda.h>
 #include <cuda_runtime.h>
 #include <helper_cuda.h>
-#include <cuda/CudaGrid.h>
-#include "Kernel/Utilities/DistributionHelper.cuh"
+#include "cuda/CudaGrid.h"
+#include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
-#include <Core/StringUtilities/StringUtil.h>
+#include "StringUtilities/StringUtil.h"
 
 #include "Parameter/Parameter.h"
 #include "DataStructureInitializer/GridProvider.h"
 #include "GPU/CudaMemoryManager.h"
 
 using namespace vf::lbm::dir;
-
-
+using namespace vf::gpu;
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 //TODO check everything for multiple level
@@ -52,13 +83,16 @@ __global__ void fillArrayVelocities(const uint numberOfPrecursorNodes,
 
 
 {
-    const uint node = vf::gpu::getNodeIndex();
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = vf::gpu::getNodeIndex();
 
-    if(node>=numberOfPrecursorNodes) return;
+    if(nodeIndex>=numberOfPrecursorNodes) return;
 
-    precursorData[linearIdx(0u, node, numberOfPrecursorNodes)] = vx[indices[node]]*velocityRatio;
-    precursorData[linearIdx(1u, node, numberOfPrecursorNodes)] = vy[indices[node]]*velocityRatio;
-    precursorData[linearIdx(2u, node, numberOfPrecursorNodes)] = vz[indices[node]]*velocityRatio;
+    precursorData[linearIdx(0u, nodeIndex, numberOfPrecursorNodes)] = vx[indices[nodeIndex]]*velocityRatio;
+    precursorData[linearIdx(1u, nodeIndex, numberOfPrecursorNodes)] = vy[indices[nodeIndex]]*velocityRatio;
+    precursorData[linearIdx(2u, nodeIndex, numberOfPrecursorNodes)] = vz[indices[nodeIndex]]*velocityRatio;
 }
 
 
@@ -71,15 +105,19 @@ __global__ void fillArrayDistributions( uint numberOfPrecursorNodes,
                                         bool isEvenTimestep,
                                         unsigned long numberOfLBnodes)
 {
-    const uint node = vf::gpu::getNodeIndex();
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned nodeIndex = vf::gpu::getNodeIndex();
 
-    if(node>=numberOfPrecursorNodes) return;
+    if(nodeIndex>=numberOfPrecursorNodes) return;
 
-    Distributions27 dist = vf::gpu::getDistributionReferences27(distributions, numberOfLBnodes, isEvenTimestep);
+    Distributions27 dist;
+    getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
     
     ////////////////////////////////////////////////////////////////////////////////
     // ! - Set neighbor indices (necessary for indirect addressing)
-    uint k_000 = indices[node];
+    uint k_000 = indices[nodeIndex];
     // uint k_M00 = neighborX[k_000];
     uint k_0M0 = neighborY[k_000];
     uint k_00M = neighborZ[k_000];
@@ -91,15 +129,15 @@ __global__ void fillArrayDistributions( uint numberOfPrecursorNodes,
     ////////////////////////////////////////////////////////////////////////////////////
     //! - Get local distributions in PX directions
     //!
-    precursorData[linearIdx(PrecP00, node, numberOfPrecursorNodes)] = (dist.f[DIR_P00])[k_000];
-    precursorData[linearIdx(PrecPP0, node, numberOfPrecursorNodes)] = (dist.f[DIR_PP0])[k_000];
-    precursorData[linearIdx(PrecPM0, node, numberOfPrecursorNodes)] = (dist.f[DIR_PM0])[k_0M0];
-    precursorData[linearIdx(PrecP0P, node, numberOfPrecursorNodes)] = (dist.f[DIR_P0P])[k_000];
-    precursorData[linearIdx(PrecP0M, node, numberOfPrecursorNodes)] = (dist.f[DIR_P0M])[k_00M];
-    precursorData[linearIdx(PrecPPP, node, numberOfPrecursorNodes)] = (dist.f[DIR_PPP])[k_000];
-    precursorData[linearIdx(PrecPMP, node, numberOfPrecursorNodes)] = (dist.f[DIR_PMP])[k_0M0];
-    precursorData[linearIdx(PrecPPM, node, numberOfPrecursorNodes)] = (dist.f[DIR_PPM])[k_00M];
-    precursorData[linearIdx(PrecPMM, node, numberOfPrecursorNodes)] = (dist.f[DIR_PMM])[k_0MM];
+    precursorData[linearIdx(PrecP00, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_P00])[k_000];
+    precursorData[linearIdx(PrecPP0, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_PP0])[k_000];
+    precursorData[linearIdx(PrecPM0, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_PM0])[k_0M0];
+    precursorData[linearIdx(PrecP0P, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_P0P])[k_000];
+    precursorData[linearIdx(PrecP0M, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_P0M])[k_00M];
+    precursorData[linearIdx(PrecPPP, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_PPP])[k_000];
+    precursorData[linearIdx(PrecPMP, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_PMP])[k_0M0];
+    precursorData[linearIdx(PrecPPM, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_PPM])[k_00M];
+    precursorData[linearIdx(PrecPMM, nodeIndex, numberOfPrecursorNodes)] = (dist.f[DIR_PMM])[k_0MM];
 }
 
 
@@ -128,12 +166,12 @@ void PrecursorWriter::init(Parameter* para, GridProvider* gridProvider, CudaMemo
         std::vector<int> indicesOnPlane;
         std::vector<real> coordY, coordZ;
 
-        for(uint j=1; j<para->getParH(level)->numberOfNodes; j++ )
+        for(size_t pos = 1; pos < para->getParH(level)->numberOfNodes; pos++ )
         {
-            real pointCoordX = para->getParH(level)->coordinateX[j];
-            real pointCoordY = para->getParH(level)->coordinateY[j];
-            real pointCoordZ = para->getParH(level)->coordinateZ[j];
-            if( para->getParH(level)->typeOfGridNode[j] == GEO_FLUID &&
+            real pointCoordX = para->getParH(level)->coordinateX[pos];
+            real pointCoordY = para->getParH(level)->coordinateY[pos];
+            real pointCoordZ = para->getParH(level)->coordinateZ[pos];
+            if( para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID &&
                 pointCoordX < (dx+xPos) && pointCoordX >= xPos       &&
                 pointCoordY<=yMax && pointCoordY>=yMin               && 
                 pointCoordZ<=zMax && pointCoordZ>=zMin)
@@ -143,7 +181,7 @@ void PrecursorWriter::init(Parameter* para, GridProvider* gridProvider, CudaMemo
 
                 lowestY = min(lowestY, pointCoordY);
                 lowestZ = min(lowestZ, pointCoordZ);
-                indicesOnGrid.push_back(j);    
+                indicesOnGrid.push_back((uint)pos);    
                 coordY.push_back(pointCoordY);            
                 coordZ.push_back(pointCoordZ);    
             }
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h
index 3bae63a339255f3f72196e20096f6019cdd7748d..264023b58ba6db46b50f6a85b334c530864a0b8f 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h
@@ -31,7 +31,7 @@
 //! \date 05/12/2022
 //! \brief Probe writing planes of data to be used as inflow data in successor simulation using PrecursorBC
 //!
-//! The probe writes out yz-planes at a specifc x position ( \param xPos ) of either velocity or distributions 
+//! The probe writes out yz-planes at a specific x position ( \param xPos ) of either velocity or distributions 
 //! that can be read by PrecursorBC as inflow data.
 //=======================================================================================
 
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu
index 4f4affd9fffdfd2d7a91a14051ff49b354473b6a..e89d392b5d4bf5983f9bb47642fef81d0f06cc89 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu
@@ -236,7 +236,7 @@ void PlanarAverageProbe::findPoints(Parameter* para, GridProvider* gridProvider,
                                 }
 
     // Find all points along the normal direction
-    for(uint j=1; j<para->getParH(level)->numberOfNodes; j++ )
+    for(size_t j = 1; j < para->getParH(level)->numberOfNodes; j++ )
     {
         if(para->getParH(level)->typeOfGridNode[j] == GEO_FLUID)
         {   
@@ -251,16 +251,16 @@ void PlanarAverageProbe::findPoints(Parameter* para, GridProvider* gridProvider,
     std::sort(pointCoordsNormal->begin(), pointCoordsNormal->end());
     
     // Find all pointCoords in the first plane 
-    for(uint j=1; j<para->getParH(level)->numberOfNodes; j++ )
+    for(size_t pos = 1; pos < para->getParH(level)->numberOfNodes; pos++ )
     {
-        if( para->getParH(level)->typeOfGridNode[j] == GEO_FLUID && pointCoordsNormal_par[j] == pointCoordsNormal->at(0)) 
+        if( para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID && pointCoordsNormal_par[pos] == pointCoordsNormal->at(0)) 
         {
             //not needed in current state, might become relevant for two-point correlations
             // pointCoordsNormal->push_back( pointCoordsNormal_par[j] ); 
             // pointCoordsInplane1->push_back( pointCoordsInplane1_par[j] );
             // pointCoordsInplane2->push_back( pointCoordsInplane2_par[j] );
 
-            probeIndices_level.push_back(j);
+            probeIndices_level.push_back((int)pos);
         }
     }
 }
@@ -270,12 +270,20 @@ void PlanarAverageProbe::findPoints(Parameter* para, GridProvider* gridProvider,
 void PlanarAverageProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, uint t_level, int level)
 {   
     // Compute macroscopic variables in entire domain
-    CalcMacCompSP27(para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,
-                    para->getParD(level)->rho, para->getParD(level)->pressure, para->getParD(level)->typeOfGridNode,
-                    para->getParD(level)->neighborX, para->getParD(level)->neighborY,
-                    para->getParD(level)->neighborZ, para->getParD(level)->numberOfNodes,
-                    para->getParD(level)->numberofthreads, para->getParD(level)->distributions.f[0],
-                    para->getParD(level)->isEvenTimestep);
+    CalcMacCompSP27(
+        para->getParD(level)->velocityX, 
+        para->getParD(level)->velocityY, 
+        para->getParD(level)->velocityZ,
+        para->getParD(level)->rho, 
+        para->getParD(level)->pressure, 
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->neighborX, 
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ, 
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->numberofthreads, 
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->isEvenTimestep);
     getLastCudaError("In PlanarAverageProbe Kernel CalcMacSP27 execution failed");
 
     // Definition of normal and inplane directions for moveIndices kernels
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu
index 8e0bf1faf87a251d41f1b5deeda698c75d9706c6..f55045505bff0e3b5b0b1426be4e9e1a3832d088 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu
@@ -76,11 +76,11 @@ void PlaneProbe::findPoints(Parameter* para, GridProvider* gridProvider, std::ve
                             int level)
 {
     real dx = abs(para->getParH(level)->coordinateX[1]-para->getParH(level)->coordinateX[para->getParH(level)->neighborX[1]]);
-    for(uint j=1; j<para->getParH(level)->numberOfNodes; j++ )
+    for(size_t pos = 1; pos < para->getParH(level)->numberOfNodes; pos++ )
     {
-        real pointCoordX = para->getParH(level)->coordinateX[j];
-        real pointCoordY = para->getParH(level)->coordinateY[j];
-        real pointCoordZ = para->getParH(level)->coordinateZ[j];
+        real pointCoordX = para->getParH(level)->coordinateX[pos];
+        real pointCoordY = para->getParH(level)->coordinateY[pos];
+        real pointCoordZ = para->getParH(level)->coordinateZ[pos];
         real distX = pointCoordX - this->posX;
         real distY = pointCoordY - this->posY;
         real distZ = pointCoordZ - this->posZ;
@@ -88,7 +88,7 @@ void PlaneProbe::findPoints(Parameter* para, GridProvider* gridProvider, std::ve
         if( distX <= this->deltaX && distY <= this->deltaY && distZ <= this->deltaZ &&
             distX >=0.f && distY >=0.f && distZ >=0.f)
         {
-            probeIndices_level.push_back(j);
+            probeIndices_level.push_back((int)pos);
             distX_level.push_back( distX/dx );
             distY_level.push_back( distY/dx );
             distZ_level.push_back( distZ/dx );
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu
index 37beeed05e498028356cd89b416e5269c070de81..89e1f6b87687ed42c079415a5340f1d385c8d62c 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu
@@ -75,20 +75,20 @@ void PointProbe::findPoints(Parameter* para, GridProvider* gridProvider, std::ve
 {
 
     real dx = abs(para->getParH(level)->coordinateX[1]-para->getParH(level)->coordinateX[para->getParH(level)->neighborX[1]]);
-    for(uint j=1; j<para->getParH(level)->numberOfNodes; j++ )
+    for(size_t pos = 1; pos < para->getParH(level)->numberOfNodes; pos++ )
     {    
         for(uint point=0; point<this->pointCoordsX.size(); point++)
         {
             real pointCoordX = this->pointCoordsX[point];
             real pointCoordY = this->pointCoordsY[point];
             real pointCoordZ = this->pointCoordsZ[point];
-            real distX = pointCoordX-para->getParH(level)->coordinateX[j];
-            real distY = pointCoordY-para->getParH(level)->coordinateY[j];
-            real distZ = pointCoordZ-para->getParH(level)->coordinateZ[j];
+            real distX = pointCoordX-para->getParH(level)->coordinateX[pos];
+            real distY = pointCoordY-para->getParH(level)->coordinateY[pos];
+            real distZ = pointCoordZ-para->getParH(level)->coordinateZ[pos];
             if( distX <=dx && distY <=dx && distZ <=dx &&
                 distX >0.f && distY >0.f && distZ >0.f)
             {
-                probeIndices_level.push_back(j);
+                probeIndices_level.push_back((int)pos);
                 distX_level.push_back( distX/dx );
                 distY_level.push_back( distY/dx );
                 distZ_level.push_back( distZ/dx );
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu
index 03c18f5a9a2133bec244053113209abc70469a2a..a7a0e79c0bcbf0f7a9e13e879debfb378e23f69d 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu
@@ -37,15 +37,15 @@
 #include <helper_cuda.h>
 
 #include "VirtualFluids_GPU/GPU/GeometryUtils.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 #include "basics/writer/WbWriterVtkXmlBinary.h"
-#include <Core/StringUtilities/StringUtil.h>
+#include <StringUtilities/StringUtil.h>
 
 #include "Parameter/Parameter.h"
 #include "DataStructureInitializer/GridProvider.h"
 #include "GPU/CudaMemoryManager.h"
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 
 __device__ void calculatePointwiseQuantities(uint n, real* quantityArray, bool* quantities, uint* quantityArrayOffsets, uint nPoints, uint node, real vx, real vy, real vz, real rho)
 {
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu
index 81da15595baae55aa562bc77e24442a9258d992f..3341111c134ace7ca6ff64eeb7f87b38f8014656 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu
@@ -171,11 +171,11 @@ void WallModelProbe::findPoints(Parameter* para, GridProvider* gridProvider, std
     {
         if (!para->getIsBodyForce()) throw std::runtime_error("WallModelProbe::findPoints(): bodyforce not allocated!");
         // Find all fluid nodes
-        for(uint j=1; j<para->getParH(level)->numberOfNodes; j++ )
+        for(size_t pos = 1; pos < para->getParH(level)->numberOfNodes; pos++ )
         {
-            if( para->getParH(level)->typeOfGridNode[j] == GEO_FLUID) 
+            if( para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID) 
             {
-                probeIndices_level.push_back(j);
+                probeIndices_level.push_back((int)pos);
             }
         }
     }
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27.cu
index e43fb54a6b56b4d9a501269544cea000df31cdb7..1e70fc642a3fd7f6fca4ed90b9ff4ebc1bb437db 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27.cu
@@ -2,6 +2,7 @@
 
 #include "InitCompAD27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<PreProcessorStrategy> InitCompAD27::getNewInstance(std::shared_ptr<Parameter> para)
 {
@@ -10,36 +11,21 @@ std::shared_ptr<PreProcessorStrategy> InitCompAD27::getNewInstance(std::shared_p
 
 void InitCompAD27::init(int level)
 {
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Init_Comp_AD_27 << < grid, threads >> >(	para->getParD(level)->neighborX,
-											para->getParD(level)->neighborY,
-											para->getParD(level)->neighborZ,
-											para->getParD(level)->typeOfGridNode,
-											para->getParD(level)->Conc,
-											para->getParD(level)->velocityX,
-											para->getParD(level)->velocityY,
-											para->getParD(level)->velocityZ,
-											para->getParD(level)->numberOfNodes,
-											para->getParD(level)->distributionsAD27.f[0],
-											para->getParD(level)->isEvenTimestep);
-	getLastCudaError("InitAD27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Init_Comp_AD_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->concentration,
+        para->getParD(level)->velocityX,
+        para->getParD(level)->velocityY,
+        para->getParD(level)->velocityZ,
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->distributionsAD.f[0],
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Init_Comp_AD_27 execution failed");
 }
 
 bool InitCompAD27::checkParameter()
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27_Device.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27_Device.cu
index d40e60c764054f8ac6c1793ea3e3573ed04a84fc..8fc9de61cfc20c5111a70ad544a9a26c5b3ea7b4 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 
 
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7.cu
index 8a53dff5c14adef69aa012bdf1d870d62a9749b2..f8fc6af00d93cc5a51da4a69d67b69b616f97140 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7.cu
@@ -2,6 +2,7 @@
 
 #include "InitCompAD7_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<InitCompAD7> InitCompAD7::getNewInstance(std::shared_ptr<Parameter> para)
 {
@@ -10,36 +11,21 @@ std::shared_ptr<InitCompAD7> InitCompAD7::getNewInstance(std::shared_ptr<Paramet
 
 void InitCompAD7::init(int level)
 {
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Init_Comp_AD_7 << < grid, threads >> >(	para->getParD(level)->neighborX,
-										para->getParD(level)->neighborY,
-										para->getParD(level)->neighborZ,
-										para->getParD(level)->typeOfGridNode,
-										para->getParD(level)->Conc,
-										para->getParD(level)->velocityX,
-										para->getParD(level)->velocityY,
-										para->getParD(level)->velocityZ,
-										para->getParD(level)->numberOfNodes,
-										para->getParD(level)->distributionsAD7.f[0],
-										para->getParD(level)->isEvenTimestep);
-	getLastCudaError("InitAD7 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Init_Comp_AD_7 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->concentration,
+        para->getParD(level)->velocityX,
+        para->getParD(level)->velocityY,
+        para->getParD(level)->velocityZ,
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->distributionsAD7.f[0],
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Init_Comp_AD_7 execution failed");
 }
 
 bool InitCompAD7::checkParameter()
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7_Device.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7_Device.cu
index 38cd57fd48e02e410e1ae557088e023ffeadfc4e..bb3e6c97ddc387234252b59dc43143f115888a6a 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27.cu
index 23ec3e5293ec3a49bf632a720ab554d156dc9674..c4676f28f969e2db8ff7f1910ac784a1c0dab351 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27.cu
@@ -2,6 +2,7 @@
 
 #include "InitCompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<PreProcessorStrategy> InitCompSP27::getNewInstance(std::shared_ptr<Parameter> para)
 {
@@ -10,27 +11,12 @@ std::shared_ptr<PreProcessorStrategy> InitCompSP27::getNewInstance(std::shared_p
 
 void InitCompSP27::init(int level)
 {
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
 
     if( ! para->getUseInitNeq() )
     {
-        LB_Init_Comp_SP_27 <<< grid, threads >>> (para->getParD(level)->neighborX,
+        LB_Init_Comp_SP_27 <<< grid.grid, grid.threads >>> (
+            para->getParD(level)->neighborX,
             para->getParD(level)->neighborY,
             para->getParD(level)->neighborZ,
             para->getParD(level)->typeOfGridNode,
@@ -41,11 +27,12 @@ void InitCompSP27::init(int level)
             para->getParD(level)->numberOfNodes,
             para->getParD(level)->distributions.f[0],
             para->getParD(level)->isEvenTimestep);
-        getLastCudaError("LBInitSP27 execution failed");
+        getLastCudaError("LB_Init_Comp_SP_27 execution failed");
     }
     else
     {
-        LB_Init_Comp_Neq_SP_27 <<< grid, threads >>> (para->getParD(level)->neighborX,
+        LB_Init_Comp_Neq_SP_27 <<< grid.grid, grid.threads >>> (
+            para->getParD(level)->neighborX,
             para->getParD(level)->neighborY,
             para->getParD(level)->neighborZ,
             para->getParD(level)->neighborInverse,
@@ -59,7 +46,7 @@ void InitCompSP27::init(int level)
             para->getParD(level)->omega,
             para->getParD(level)->isEvenTimestep);
         cudaDeviceSynchronize();
-        getLastCudaError("LBInitNeqSP27 execution failed");
+        getLastCudaError("LB_Init_Comp_Neq_SP_27 execution failed");
     }
 
 
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27_Device.cu
index dcc3b9a060a026accffdc6d24f338a6d23295d73..109ea18edf183b65106ce6ed86fea05d6b005d90 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
@@ -355,7 +355,7 @@ __global__ void LB_Init_Comp_Neq_SP_27( unsigned int* neighborX,
 
             //////////////////////////////////////////////////////////////////////////
 
-            // the following code is copy and pasted from VirtualFluidsCore/Visitors/InitDistributionsBlockVisitor.cpp
+            // the following code is copy and pasted from VirtualFluidsVisitors/InitDistributionsBlockVisitor.cpp
             // i.e. Konstantins code
 
             real ax = dvx1dx;
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3.cu
index cb6b40b4371a206c6d1e031822338621c4907be1..14d6b725337aa8b9af279bf794ff1c0912516b64 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3.cu
@@ -2,6 +2,7 @@
 
 #include "InitF3_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<PreProcessorStrategy> InitF3::getNewInstance(std::shared_ptr<Parameter> para)
 {
@@ -10,36 +11,21 @@ std::shared_ptr<PreProcessorStrategy> InitF3::getNewInstance(std::shared_ptr<Par
 
 void InitF3::init(int level)
 {
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Init_F3 << < grid, threads >> >(	para->getParD(level)->neighborX,
-										para->getParD(level)->neighborY,
-										para->getParD(level)->neighborZ,
-										para->getParD(level)->typeOfGridNode,
-										para->getParD(level)->rho,
-										para->getParD(level)->velocityX,
-										para->getParD(level)->velocityY,
-										para->getParD(level)->velocityZ,
-										para->getParD(level)->numberOfNodes,
-										para->getParD(level)->g6.g[0],
-										para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LBInitF3 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Init_F3 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->rho,
+        para->getParD(level)->velocityX,
+        para->getParD(level)->velocityY,
+        para->getParD(level)->velocityZ,
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->g6.g[0],
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Init_F3 execution failed");
 }
 
 bool InitF3::checkParameter()
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3_Device.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3_Device.cu
index 25af54e43ec213214615c2edc79d7996e4651c38..349bfda9824483bf08d09f267d5fc4b0f6a13ac1 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27.cu
index 419ae80b96be57f8dc9c4ebecaccac0d435f00e0..6a9b4cb31b1032f6921bddbe60d3cd570ef46b6d 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27.cu
@@ -2,6 +2,7 @@
 
 #include "InitIncompAD27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<PreProcessorStrategy> InitIncompAD27::getNewInstance(std::shared_ptr<Parameter> para)
 {
@@ -10,36 +11,21 @@ std::shared_ptr<PreProcessorStrategy> InitIncompAD27::getNewInstance(std::shared
 
 void InitIncompAD27::init(int level)
 {
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Init_Incomp_AD_27 << < grid, threads >> >(	para->getParD(level)->neighborX,
-												para->getParD(level)->neighborY,
-												para->getParD(level)->neighborZ,
-												para->getParD(level)->typeOfGridNode,
-												para->getParD(level)->Conc,
-												para->getParD(level)->velocityX,
-												para->getParD(level)->velocityY,
-												para->getParD(level)->velocityZ,
-												para->getParD(level)->numberOfNodes,
-												para->getParD(level)->distributionsAD27.f[0],
-												para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LBInitIncompAD27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Init_Incomp_AD_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->concentration,
+        para->getParD(level)->velocityX,
+        para->getParD(level)->velocityY,
+        para->getParD(level)->velocityZ,
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->distributionsAD.f[0],
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Init_Incomp_AD_27 execution failed");
 }
 
 bool InitIncompAD27::checkParameter()
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27_Device.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27_Device.cu
index 62d766aaa04b6f6349c6c4106e201f36898601ec..869169c525bf7f64a2c1ac9e1cf2d9678efdb28b 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7.cu
index 795cd0496a207e0861e35e4f310481950a037caf..b7c7d46a9ea8e3133b8240e27959b6b4d2ed0cf5 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7.cu
@@ -2,6 +2,7 @@
 
 #include "InitIncompAD7_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<PreProcessorStrategy> InitIncompAD7::getNewInstance(std::shared_ptr<Parameter> para)
 {
@@ -10,36 +11,21 @@ std::shared_ptr<PreProcessorStrategy> InitIncompAD7::getNewInstance(std::shared_
 
 void InitIncompAD7::init(int level)
 {
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);	
-
-	LB_Init_Incomp_AD_7 << < grid, threads >> >(	para->getParD(level)->neighborX,
-												para->getParD(level)->neighborY,
-												para->getParD(level)->neighborZ,
-												para->getParD(level)->typeOfGridNode,
-												para->getParD(level)->Conc,
-												para->getParD(level)->velocityX,
-												para->getParD(level)->velocityY,
-												para->getParD(level)->velocityZ,
-												para->getParD(level)->numberOfNodes,
-												para->getParD(level)->distributionsAD27.f[0],
-												para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LBInitIncompAD7 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Init_Incomp_AD_7 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->concentration,
+        para->getParD(level)->velocityX,
+        para->getParD(level)->velocityY,
+        para->getParD(level)->velocityZ,
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->distributionsAD.f[0],
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Init_Incomp_AD_7 execution failed");
 }
 
 bool InitIncompAD7::checkParameter()
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7_Device.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7_Device.cu
index 94a4352d43dee67117f66eaf03536c5ea3e15edd..1fee2cbe232415d7435f5b60297799f2668cc01e 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27.cu
index 0538c7ab89eb750a40cfc47486dc0891d4493976..078ad24f24659bf10a3dc9ed90bfd62b5e021187 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27.cu
@@ -2,6 +2,7 @@
 
 #include "InitSP27_Device.cuh"
 #include "Parameter/Parameter.h"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<PreProcessorStrategy> InitSP27::getNewInstance(std::shared_ptr<Parameter> para)
 {
@@ -10,36 +11,21 @@ std::shared_ptr<PreProcessorStrategy> InitSP27::getNewInstance(std::shared_ptr<P
 
 void InitSP27::init(int level)
 {
-	int numberOfThreads = para->getParD(level)->numberofthreads;
-	int size_Mat = para->getParD(level)->numberOfNodes;
-
-	int Grid = (size_Mat / numberOfThreads) + 1;
-	int Grid1, Grid2;
-	if (Grid>512)
-	{
-		Grid1 = 512;
-		Grid2 = (Grid / Grid1) + 1;
-	}
-	else
-	{
-		Grid1 = 1;
-		Grid2 = Grid;
-	}
-	dim3 grid(Grid1, Grid2);
-	dim3 threads(numberOfThreads, 1, 1);
-
-	LB_Init_SP_27 << < grid, threads >> >(	para->getParD(level)->neighborX,
-										para->getParD(level)->neighborY,
-										para->getParD(level)->neighborZ,
-										para->getParD(level)->typeOfGridNode,
-										para->getParD(level)->rho,
-										para->getParD(level)->velocityX,
-										para->getParD(level)->velocityY,
-										para->getParD(level)->velocityZ,
-										para->getParD(level)->numberOfNodes,
-										para->getParD(level)->distributions.f[0],
-										para->getParD(level)->isEvenTimestep);
-	getLastCudaError("LBInitSP27 execution failed");
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    LB_Init_SP_27 <<< grid.grid, grid.threads >>>(
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->typeOfGridNode,
+        para->getParD(level)->rho,
+        para->getParD(level)->velocityX,
+        para->getParD(level)->velocityY,
+        para->getParD(level)->velocityZ,
+        para->getParD(level)->numberOfNodes,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->isEvenTimestep);
+    getLastCudaError("LB_Init_SP_27 execution failed");
 }
 
 bool InitSP27::checkParameter()
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27_Device.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27_Device.cu
index b58935feb0bf276a2d8da3f36efbb1fb0ab9d13f..87abb17176942594280fae7b7592f31303ba746d 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27_Device.cu
@@ -1,8 +1,8 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-using namespace vf::lbm::constant;
+using namespace vf::basics::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
diff --git a/src/gpu/VirtualFluids_GPU/Restart/RestartObject.cpp b/src/gpu/VirtualFluids_GPU/Restart/RestartObject.cpp
index a38535f7bdff0d87a5af74a69f0ed8255c647382..15813b7967a84e45f44eb4d286c41aa99c4ff343 100644
--- a/src/gpu/VirtualFluids_GPU/Restart/RestartObject.cpp
+++ b/src/gpu/VirtualFluids_GPU/Restart/RestartObject.cpp
@@ -9,12 +9,12 @@ void RestartObject::deserialize(const std::string &filename, std::shared_ptr<Par
 {
     deserialize_internal(filename);
 
-    for (int j = para->getCoarse(); j <= para->getFine(); j++) {
+    for (int index1 = para->getCoarse(); index1 <= para->getFine(); index1++) {
         std::vector<real> vec;
         fs.push_back(vec);
 
-        for (unsigned int i = 0; i < (para->getD3Qxx() * para->getParH(j)->numberOfNodes); i++) {
-            para->getParH(j)->distributions.f[0][i] = fs[j][i];
+        for (size_t index2 = 0; index2 < (para->getD3Qxx() * para->getParH(index1)->numberOfNodes); index2++) {
+            para->getParH(index1)->distributions.f[0][index2] = fs[index1][index2];
         }
     }
 }
@@ -24,15 +24,15 @@ void RestartObject::serialize(const std::string &filename, const std::shared_ptr
     if (fs.size() > 0) {
         clear(para);
     }
-    for (int j = para->getCoarse(); j <= para->getFine(); j++) {
+    for (int index1 = para->getCoarse(); index1 <= para->getFine(); index1++) {
         std::vector<real> vec;
         fs.push_back(vec);
 
-        for (unsigned int i = 0; i < (para->getD3Qxx() * para->getParH(j)->numberOfNodes); i++) {
-            if (UbMath::isNaN(para->getParH(j)->distributions.f[0][i])) {
-                fs[j].push_back((real)0.0);
+        for (size_t index2 = 0; index2 < (para->getD3Qxx() * para->getParH(index1)->numberOfNodes); index2++) {
+            if (UbMath::isNaN(para->getParH(index1)->distributions.f[0][index2])) {
+                fs[index1].push_back((real)0.0);
             } else {
-                fs[j].push_back(para->getParH(j)->distributions.f[0][i]);
+                fs[index1].push_back(para->getParH(index1)->distributions.f[0][index2]);
             }
         }
     }
diff --git a/src/gpu/VirtualFluids_GPU/Restart/RestartObject.h b/src/gpu/VirtualFluids_GPU/Restart/RestartObject.h
index 3d6db6e3a7aad82b2656d917a2b25b26c4ff4d92..963c6549fd41c80c77edc9a205679e8cbc627a1e 100644
--- a/src/gpu/VirtualFluids_GPU/Restart/RestartObject.h
+++ b/src/gpu/VirtualFluids_GPU/Restart/RestartObject.h
@@ -5,7 +5,7 @@
 #include <string>
 #include <vector>
 
-#include <basics/Core/DataTypes.h>
+#include <basics/DataTypes.h>
 
 class Parameter;
 
diff --git a/src/gpu/VirtualFluids_GPU/Temperature/FindTemperature.cpp b/src/gpu/VirtualFluids_GPU/Temperature/FindTemperature.cpp
index 7b42b22cbf66b248bb1ae6681207eef1da22fa97..a4f6a01f83bd914e6ba7123a10387a463d90e84d 100644
--- a/src/gpu/VirtualFluids_GPU/Temperature/FindTemperature.cpp
+++ b/src/gpu/VirtualFluids_GPU/Temperature/FindTemperature.cpp
@@ -37,7 +37,7 @@ void initTemperatur(Parameter* para, CudaMemoryManager* cudaMemoryManager, int l
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      CalcMacThS7(para->getParD(lev)->Conc, 
+      CalcMacThS7(para->getParD(lev)->concentration, 
                   para->getParD(lev)->typeOfGridNode,       
                   para->getParD(lev)->neighborX, 
                   para->getParD(lev)->neighborY, 
@@ -68,13 +68,13 @@ void initTemperatur(Parameter* para, CudaMemoryManager* cudaMemoryManager, int l
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       CalcConcentration27(
                      para->getParD(lev)->numberofthreads,
-                     para->getParD(lev)->Conc,
+                     para->getParD(lev)->concentration,
                      para->getParD(lev)->typeOfGridNode,
                      para->getParD(lev)->neighborX,
                      para->getParD(lev)->neighborY,
                      para->getParD(lev)->neighborZ,
                      para->getParD(lev)->numberOfNodes,
-                     para->getParD(lev)->distributionsAD27.f[0],
+                     para->getParD(lev)->distributionsAD.f[0],
                      para->getParD(lev)->isEvenTimestep);
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    }
diff --git a/src/lbm/BGK.cpp b/src/lbm/BGK.cpp
index fa3af6777a0492687768dd4945cbf1e9b186f514..5d1993c872b1aefea801e6e2d2c602a82ecd9281 100644
--- a/src/lbm/BGK.cpp
+++ b/src/lbm/BGK.cpp
@@ -1,20 +1,17 @@
 #include "BGK.h"
 
 
-#include <basics/Core/DataTypes.h>
-#include <basics/Core/RealConstants.h>
+#include <basics/DataTypes.h>
 
 #include "constants/NumericConstants.h"
 #include "constants/D3Q27.h"
 
 #include "MacroscopicQuantities.h"
 
-namespace vf
-{
-namespace lbm
+namespace vf::lbm
 {
 
-using namespace constant;
+using namespace vf::basics::constant;
 
 
 
@@ -61,7 +58,7 @@ __host__ __device__ void bgk(KernelParameter parameter)
     //! - Acquire macroscopic quantities
     const real drho = getDensity(distribution.f);
     const real rho = c1o1 + drho;
-    const real OOrho = constant::c1o1 / (constant::c1o1 + drho);    
+    const real OOrho = c1o1 / (c1o1 + drho);    
 
     const real vvx = getIncompressibleVelocityX1(distribution.f) * OOrho;
     const real vvy = getIncompressibleVelocityX2(distribution.f) * OOrho;
@@ -136,5 +133,4 @@ __host__ __device__ void bgk(KernelParameter parameter)
 
 
 }
-}
 
diff --git a/src/lbm/BGK.h b/src/lbm/BGK.h
index 2c82f5bd445ee008954add02fd0d6d6093364e90..6cde85013dd92472022bbf7b93bc73e7940049a1 100644
--- a/src/lbm/BGK.h
+++ b/src/lbm/BGK.h
@@ -8,7 +8,7 @@
 #define __device__
 #endif
 
-#include <basics/Core/DataTypes.h>
+#include <basics/DataTypes.h>
 
 #include "KernelParameter.h"
 
diff --git a/src/lbm/CMakeLists.txt b/src/lbm/CMakeLists.txt
index afa90bdd3f95bb71cf7f1eda6407f9b38766072a..52ab3d78710c8551475307463334c9d1d0baf36f 100644
--- a/src/lbm/CMakeLists.txt
+++ b/src/lbm/CMakeLists.txt
@@ -1,12 +1,12 @@
-if(BUILD_VF_CPU)
-    project(lbm LANGUAGES CXX)
 
-    vf_add_library(NAME lbm PUBLIC_LINK basics)
-    target_link_libraries(lbm PRIVATE project_warnings)
+vf_add_library(PUBLIC_LINK basics)
 
-    vf_add_tests()
-endif()
+if(BUILD_VF_GPU)
+    set_target_properties(lbm PROPERTIES CUDA_SEPARABLE_COMPILATION ON POSITION_INDEPENDENT_CODE ON)
 
-if(BUILD_VF_GPU OR BUILD_VF_GKS)
-    add_subdirectory(cuda)
+    set_source_files_properties(KernelParameter.cpp PROPERTIES LANGUAGE CUDA)
+    set_source_files_properties(CumulantChimera.cpp PROPERTIES LANGUAGE CUDA)
+    set_source_files_properties(BGK.cpp PROPERTIES LANGUAGE CUDA)
 endif()
+
+vf_add_tests()
\ No newline at end of file
diff --git a/src/lbm/Chimera.h b/src/lbm/Chimera.h
index 6ffa0918aac4e6303efe4db82aa98ee645dc63e8..8e54d47a42e164a89060f5009c112f5a69afd257 100644
--- a/src/lbm/Chimera.h
+++ b/src/lbm/Chimera.h
@@ -8,16 +8,14 @@
 #define __device__
 #endif
 
-#include <basics/Core/DataTypes.h>
+#include <basics/DataTypes.h>
 
-#include <lbm/constants/NumericConstants.h>
+#include <basics/constants/NumericConstants.h>
 
-namespace vf
-{
-namespace lbm
-{
+using namespace vf::basics::constant;
 
-using namespace constant;
+namespace vf::lbm
+{
 
 ////////////////////////////////////////////////////////////////////////////////
 //! \brief forward chimera transformation \ref forwardInverseChimeraWithK 
@@ -116,6 +114,5 @@ inline __host__ __device__ void backwardChimeraWithK(real &mfa, real &mfb, real
     mfb = m1;
 }
 
-}
 }
 #endif
diff --git a/src/lbm/CumulantChimera.cpp b/src/lbm/CumulantChimera.cpp
index e1c27f90b6611640d8e5db47c9432268f5f58f15..f8e11d468471efe96b3d9e17c356b8c1a3d59ff8 100644
--- a/src/lbm/CumulantChimera.cpp
+++ b/src/lbm/CumulantChimera.cpp
@@ -2,8 +2,7 @@
 
 #include <cmath>
 
-#include <basics/Core/DataTypes.h>
-#include <basics/Core/RealConstants.h>
+#include <basics/DataTypes.h>
 
 #include "constants/NumericConstants.h"
 #include "constants/D3Q27.h"
@@ -11,12 +10,10 @@
 #include "Chimera.h"
 #include "MacroscopicQuantities.h"
 
-namespace vf
-{
-namespace lbm
+namespace vf::lbm
 {
 
-using namespace constant;
+using namespace vf::basics::constant;
 
 
 ////////////////////////////////////////////////////////////////////////////////////
@@ -449,5 +446,4 @@ __host__ __device__ void cumulantChimera(KernelParameter parameter, RelaxationRa
 
 
 }
-}
 
diff --git a/src/lbm/CumulantChimera.h b/src/lbm/CumulantChimera.h
index e8740c7d3f5b988a6fdc5c3b16ab6a90e0a28b83..c30a0c07912953cff45e3734c0b60fa7a03acd53 100644
--- a/src/lbm/CumulantChimera.h
+++ b/src/lbm/CumulantChimera.h
@@ -8,7 +8,7 @@
 #define __device__
 #endif
 
-#include <basics/Core/DataTypes.h>
+#include <basics/DataTypes.h>
 
 #include "KernelParameter.h"
 
diff --git a/src/lbm/KernelParameter.cpp b/src/lbm/KernelParameter.cpp
index e039214d218ef19f35e8adf927f36d3a6f1aa355..7bf5a369d0e5d4e673d79dcb30bc22fc2c330e68 100644
--- a/src/lbm/KernelParameter.cpp
+++ b/src/lbm/KernelParameter.cpp
@@ -5,11 +5,8 @@
 #include "MacroscopicQuantities.h"
 
 
-namespace vf
+namespace vf::lbm
 {
-namespace lbm
-{
-
 
 
 inline __host__ __device__ real Distribution27::getDensity_() const
@@ -17,8 +14,6 @@ inline __host__ __device__ real Distribution27::getDensity_() const
     return getDensity(f);
 }
 
-
-
 __host__ __device__ real abs_internal(real value)
 {
 #ifdef __CUDA_ARCH__
@@ -30,4 +25,3 @@ __host__ __device__ real abs_internal(real value)
 
 
 }
-}
diff --git a/src/lbm/KernelParameter.h b/src/lbm/KernelParameter.h
index 95226628110637f3794c8a1f7e6f6c1f6dda937b..9c07524226a40aaa9e2c65e7ab028b07aec62ddc 100644
--- a/src/lbm/KernelParameter.h
+++ b/src/lbm/KernelParameter.h
@@ -8,12 +8,10 @@
 #define __device__
 #endif
 
-#include <basics/Core/DataTypes.h>
+#include <basics/DataTypes.h>
 
 
-namespace vf
-{
-namespace lbm
+namespace vf::lbm
 {
 
 struct Distribution27
@@ -35,9 +33,6 @@ struct KernelParameter
 };
 
 
-
-
-}
 }
 
 #endif
diff --git a/src/lbm/MacroscopicQuantities.h b/src/lbm/MacroscopicQuantities.h
index 8789f65195ee38b1399a42a0c24511dfcea3d6d0..751c874f1c240f5502ccaef5f6e540be1e5d28ff 100644
--- a/src/lbm/MacroscopicQuantities.h
+++ b/src/lbm/MacroscopicQuantities.h
@@ -8,16 +8,15 @@
 #define __device__
 #endif
 
-#include <basics/Core/DataTypes.h>
+#include <basics/DataTypes.h>
 
 #include "constants/NumericConstants.h"
 #include "constants/D3Q27.h"
 
-namespace vf
-{
-namespace lbm
+
+namespace vf::lbm
 {
-    
+
 ////////////////////////////////////////////////////////////////////////////////////
 //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
 //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa  2015.05.001 ]</b></a>
@@ -61,19 +60,19 @@ inline __host__ __device__ real getIncompressibleVelocityX3(const real *const &f
 */
 inline __host__ __device__ real getCompressibleVelocityX1(const real *const &f27, const real& rho)
 {
-    return getIncompressibleVelocityX1(f27) / (rho + constant::c1o1);
+    return getIncompressibleVelocityX1(f27) / (rho + basics::constant::c1o1);
 }
 
 
 inline __host__ __device__ real getCompressibleVelocityX2(const real *const &f27, const real& rho)
 {
-    return getIncompressibleVelocityX2(f27) / (rho + constant::c1o1);
+    return getIncompressibleVelocityX2(f27) / (rho + basics::constant::c1o1);
 }
 
 
 inline __host__ __device__ real getCompressibleVelocityX3(const real *const &f27, const real& rho)
 {
-    return getIncompressibleVelocityX3(f27) / (rho + constant::c1o1);
+    return getIncompressibleVelocityX3(f27) / (rho + basics::constant::c1o1);
 }
 
 /*
@@ -82,17 +81,17 @@ inline __host__ __device__ real getCompressibleVelocityX3(const real *const &f27
 inline __host__ __device__ real getPressure(const real *const &f27, const real& rho, const real& vx, const real& vy, const real& vz)
 {
     return (f27[dir::DIR_P00] + f27[dir::DIR_M00] + f27[dir::DIR_0P0] + f27[dir::DIR_0M0] + f27[dir::DIR_00P] + f27[dir::DIR_00M] + 
-    constant::c2o1 * (f27[dir::DIR_PP0] + f27[dir::DIR_MM0] + f27[dir::DIR_PM0] + f27[dir::DIR_MP0] + f27[dir::DIR_P0P] + 
+    basics::constant::c2o1 * (f27[dir::DIR_PP0] + f27[dir::DIR_MM0] + f27[dir::DIR_PM0] + f27[dir::DIR_MP0] + f27[dir::DIR_P0P] + 
                       f27[dir::DIR_M0M] + f27[dir::DIR_P0M] + f27[dir::DIR_M0P] + f27[dir::DIR_0PP] + f27[dir::DIR_0MM] + 
                       f27[dir::DIR_0PM] + f27[dir::DIR_0MP]) + 
-    constant::c3o1 * (f27[dir::DIR_PPP] + f27[dir::DIR_MMP] + f27[dir::DIR_PMP] + f27[dir::DIR_MPP] + 
+    basics::constant::c3o1 * (f27[dir::DIR_PPP] + f27[dir::DIR_MMP] + f27[dir::DIR_PMP] + f27[dir::DIR_MPP] + 
                       f27[dir::DIR_PPM] + f27[dir::DIR_MMM] + f27[dir::DIR_PMM] + f27[dir::DIR_MPM]) -
-    rho - (vx * vx + vy * vy + vz * vz) * (constant::c1o1 + rho)) * 
-    constant::c1o2 + rho; // times zero for incompressible case                 
+    rho - (vx * vx + vy * vy + vz * vz) * (basics::constant::c1o1 + rho)) * 
+    basics::constant::c1o2 + rho; // times zero for incompressible case                 
                           // Attention: op defined directly to op = 1 ; ^^^^(1.0/op-0.5)=0.5
 }
 
-}
+
 }
 
 #endif
diff --git a/src/lbm/constants/D3Q27.h b/src/lbm/constants/D3Q27.h
index b9c9d34f7004d1be2f90b6115f005fb2c8d0cbac..b6c05eae921ae66b43999ff01977f7a674ce505f 100644
--- a/src/lbm/constants/D3Q27.h
+++ b/src/lbm/constants/D3Q27.h
@@ -2,96 +2,96 @@
 #define LBM_D3Q27_H
 
 #include <map>
-#include "basics/Core/DataTypes.h"
+#include "basics/DataTypes.h"
 
 namespace vf::lbm::dir
 {
 
-static constexpr int STARTDIR = 0;
-static constexpr int ENDDIR   = 26;
+static constexpr size_t STARTDIR = 0;
+static constexpr size_t ENDDIR = 26;
 
 // used in the CPU and the GPU version
-static constexpr int DIR_000 = 0;
-static constexpr int DIR_P00 = 1;
-static constexpr int DIR_M00 = 2;
-static constexpr int DIR_0P0 = 3;
-static constexpr int DIR_0M0 = 4;
-static constexpr int DIR_00P = 5;
-static constexpr int DIR_00M = 6;
-static constexpr int DIR_PP0 = 7;
-static constexpr int DIR_MM0 = 8;
-static constexpr int DIR_PM0 = 9;
-static constexpr int DIR_MP0 = 10;
-static constexpr int DIR_P0P = 11;
-static constexpr int DIR_M0M = 12;
-static constexpr int DIR_P0M = 13;
-static constexpr int DIR_M0P = 14;
-static constexpr int DIR_0PP = 15;
-static constexpr int DIR_0MM = 16;
-static constexpr int DIR_0PM = 17;
-static constexpr int DIR_0MP = 18;
-static constexpr int DIR_PPP = 19;
-static constexpr int DIR_MPP = 20;
-static constexpr int DIR_PMP = 21;
-static constexpr int DIR_MMP = 22;
-static constexpr int DIR_PPM = 23;
-static constexpr int DIR_MPM = 24;
-static constexpr int DIR_PMM = 25;
-static constexpr int DIR_MMM = 26;
-
-static constexpr int INV_P00 = DIR_M00;
-static constexpr int INV_M00 = DIR_P00;
-static constexpr int INV_0P0 = DIR_0M0;
-static constexpr int INV_0M0 = DIR_0P0;
-static constexpr int INV_00P = DIR_00M;
-static constexpr int INV_00M = DIR_00P;
-static constexpr int INV_PP0 = DIR_MM0;
-static constexpr int INV_MM0 = DIR_PP0;
-static constexpr int INV_PM0 = DIR_MP0;
-static constexpr int INV_MP0 = DIR_PM0;
-static constexpr int INV_P0P = DIR_M0M;
-static constexpr int INV_M0M = DIR_P0P;
-static constexpr int INV_P0M = DIR_M0P;
-static constexpr int INV_M0P = DIR_P0M;
-static constexpr int INV_0PP = DIR_0MM;
-static constexpr int INV_0MM = DIR_0PP;
-static constexpr int INV_0PM = DIR_0MP;
-static constexpr int INV_0MP = DIR_0PM;
-static constexpr int INV_PPP = DIR_MMM;
-static constexpr int INV_MPP = DIR_PMM;
-static constexpr int INV_PMP = DIR_MPM;
-static constexpr int INV_MMP = DIR_PPM;
-static constexpr int INV_PPM = DIR_MMP;
-static constexpr int INV_MPM = DIR_PMP;
-static constexpr int INV_PMM = DIR_MPP;
-static constexpr int INV_MMM = DIR_PPP;
-
-static constexpr int SGD_P00 = 0;
-static constexpr int SGD_M00 = 1;
-static constexpr int SGD_0P0 = 2;
-static constexpr int SGD_0M0 = 3;
-static constexpr int SGD_00P = 4;
-static constexpr int SGD_00M = 5;
-static constexpr int SGD_PP0 = 6;
-static constexpr int SGD_MM0 = 7;
-static constexpr int SGD_PM0 = 8;
-static constexpr int SGD_MP0 = 9;
-static constexpr int SGD_P0P = 10;
-static constexpr int SGD_M0M = 11;
-static constexpr int SGD_P0M = 12;
-static constexpr int SGD_M0P = 13;
-static constexpr int SGD_0PP = 14;
-static constexpr int SGD_0MM = 15;
-static constexpr int SGD_0PM = 16;
-static constexpr int SGD_0MP = 17;
-static constexpr int SGD_PPP = 18;
-static constexpr int SGD_MPP = 19;
-static constexpr int SGD_PMP = 20;
-static constexpr int SGD_MMP = 21;
-static constexpr int SGD_PPM = 22;
-static constexpr int SGD_MPM = 23;
-static constexpr int SGD_PMM = 24;
-static constexpr int SGD_MMM = 25;
+static constexpr size_t DIR_000 = 0;
+static constexpr size_t DIR_P00 = 1;
+static constexpr size_t DIR_M00 = 2;
+static constexpr size_t DIR_0P0 = 3;
+static constexpr size_t DIR_0M0 = 4;
+static constexpr size_t DIR_00P = 5;
+static constexpr size_t DIR_00M = 6;
+static constexpr size_t DIR_PP0 = 7;
+static constexpr size_t DIR_MM0 = 8;
+static constexpr size_t DIR_PM0 = 9;
+static constexpr size_t DIR_MP0 = 10;
+static constexpr size_t DIR_P0P = 11;
+static constexpr size_t DIR_M0M = 12;
+static constexpr size_t DIR_P0M = 13;
+static constexpr size_t DIR_M0P = 14;
+static constexpr size_t DIR_0PP = 15;
+static constexpr size_t DIR_0MM = 16;
+static constexpr size_t DIR_0PM = 17;
+static constexpr size_t DIR_0MP = 18;
+static constexpr size_t DIR_PPP = 19;
+static constexpr size_t DIR_MPP = 20;
+static constexpr size_t DIR_PMP = 21;
+static constexpr size_t DIR_MMP = 22;
+static constexpr size_t DIR_PPM = 23;
+static constexpr size_t DIR_MPM = 24;
+static constexpr size_t DIR_PMM = 25;
+static constexpr size_t DIR_MMM = 26;
+
+static constexpr size_t INV_P00 = DIR_M00;
+static constexpr size_t INV_M00 = DIR_P00;
+static constexpr size_t INV_0P0 = DIR_0M0;
+static constexpr size_t INV_0M0 = DIR_0P0;
+static constexpr size_t INV_00P = DIR_00M;
+static constexpr size_t INV_00M = DIR_00P;
+static constexpr size_t INV_PP0 = DIR_MM0;
+static constexpr size_t INV_MM0 = DIR_PP0;
+static constexpr size_t INV_PM0 = DIR_MP0;
+static constexpr size_t INV_MP0 = DIR_PM0;
+static constexpr size_t INV_P0P = DIR_M0M;
+static constexpr size_t INV_M0M = DIR_P0P;
+static constexpr size_t INV_P0M = DIR_M0P;
+static constexpr size_t INV_M0P = DIR_P0M;
+static constexpr size_t INV_0PP = DIR_0MM;
+static constexpr size_t INV_0MM = DIR_0PP;
+static constexpr size_t INV_0PM = DIR_0MP;
+static constexpr size_t INV_0MP = DIR_0PM;
+static constexpr size_t INV_PPP = DIR_MMM;
+static constexpr size_t INV_MPP = DIR_PMM;
+static constexpr size_t INV_PMP = DIR_MPM;
+static constexpr size_t INV_MMP = DIR_PPM;
+static constexpr size_t INV_PPM = DIR_MMP;
+static constexpr size_t INV_MPM = DIR_PMP;
+static constexpr size_t INV_PMM = DIR_MPP;
+static constexpr size_t INV_MMM = DIR_PPP;
+
+static constexpr size_t SGD_P00 = 0;
+static constexpr size_t SGD_M00 = 1;
+static constexpr size_t SGD_0P0 = 2;
+static constexpr size_t SGD_0M0 = 3;
+static constexpr size_t SGD_00P = 4;
+static constexpr size_t SGD_00M = 5;
+static constexpr size_t SGD_PP0 = 6;
+static constexpr size_t SGD_MM0 = 7;
+static constexpr size_t SGD_PM0 = 8;
+static constexpr size_t SGD_MP0 = 9;
+static constexpr size_t SGD_P0P = 10;
+static constexpr size_t SGD_M0M = 11;
+static constexpr size_t SGD_P0M = 12;
+static constexpr size_t SGD_M0P = 13;
+static constexpr size_t SGD_0PP = 14;
+static constexpr size_t SGD_0MM = 15;
+static constexpr size_t SGD_0PM = 16;
+static constexpr size_t SGD_0MP = 17;
+static constexpr size_t SGD_PPP = 18;
+static constexpr size_t SGD_MPP = 19;
+static constexpr size_t SGD_PMP = 20;
+static constexpr size_t SGD_MMP = 21;
+static constexpr size_t SGD_PPM = 22;
+static constexpr size_t SGD_MPM = 23;
+static constexpr size_t SGD_PMM = 24;
+static constexpr size_t SGD_MMM = 25;
 
 struct countersForPointerChasing{
     uint counterInverse;
@@ -100,7 +100,7 @@ struct countersForPointerChasing{
     uint counterZ;
 };
 
-const std::map<const int, const countersForPointerChasing> mapForPointerChasing = 
+const std::map<const size_t, const countersForPointerChasing> mapForPointerChasing = 
 {
     {DIR_000, countersForPointerChasing{0, 0, 0, 0}},
     {DIR_P00, countersForPointerChasing{0, 1, 0, 0}},
diff --git a/src/lbm/constants/NumericConstants.h b/src/lbm/constants/NumericConstants.h
deleted file mode 100644
index b61e994e726e0b5cd85d8130bd6548ccebdc27f6..0000000000000000000000000000000000000000
--- a/src/lbm/constants/NumericConstants.h
+++ /dev/null
@@ -1,244 +0,0 @@
-#ifndef REAL_CONSTANT_H
-#define REAL_CONSTANT_H
-
-
-namespace vf::lbm::constant
-{
-
-#ifdef VF_DOUBLE_ACCURACY
-static constexpr double c1o2 = 0.5;
-static constexpr double c3o2 = 1.5;
-static constexpr double c1o3 = 0.333333333333333;
-static constexpr double c2o3 = 0.666666666666667;
-static constexpr double c1o4 = 0.25;
-static constexpr double c3o4 = 0.75;
-static constexpr double c1o6 = 0.166666666666667;
-static constexpr double c1o7 = 0.142857142857143;
-static constexpr double c1o8 = 0.125;
-static constexpr double c1o9 = 0.111111111111111;
-static constexpr double c2o9 = 0.222222222222222;
-static constexpr double c4o9 = 0.444444444444444;
-static constexpr double c4o10 = 0.4;
-static constexpr double c1o10 = 0.1;
-static constexpr double c1o12 = 0.083333333333333;
-static constexpr double c1o16 = 0.0625;
-static constexpr double c3o16 = 0.1875;
-static constexpr double c9o16 = 0.5625;
-static constexpr double c1o18 = 0.055555555555556;
-static constexpr double c1o20 = 0.05;
-static constexpr double c19o20 = 0.95;
-static constexpr double c21o20 = 1.05;
-static constexpr double c1o24 = 0.041666666666667;
-static constexpr double c1o27 = 0.037037037037037;
-static constexpr double c3o32 = 0.09375;
-static constexpr double c4o32 = 0.125;
-static constexpr double c1o36 = 0.027777777777778;
-static constexpr double c1o48 = 0.020833333333333;
-static constexpr double c1o64 = 0.015625;
-static constexpr double c3o64 = 0.046875;
-static constexpr double c9o64 = 0.140625;
-static constexpr double c27o64 = 0.421875;
-static constexpr double c1o66 = 0.015151515151515;
-static constexpr double c1o72 = 0.013888888888889;
-static constexpr double c1o264 = 0.003787878787879;
-static constexpr double c8o27 = 0.296296296296296;
-static constexpr double c2o27 = 0.074074074074074;
-static constexpr double c1o54 = 0.018518518518519;
-static constexpr double c1o100 = 0.01;
-static constexpr double c99o100 = 0.99;
-static constexpr double c1o126 = 0.007936507936508;
-static constexpr double c1o216 = 0.004629629629630;
-static constexpr double c5o4 = 1.25;
-static constexpr double c4o3 = 1.333333333333333;
-static constexpr double c9o4 = 2.25;
-static constexpr double c5o2 = 2.5;
-static constexpr double c9o2 = 4.5;
-
-static constexpr double c0o1 = 0.;
-static constexpr double c1o1 = 1.;
-static constexpr double c2o1 = 2.;
-static constexpr double c3o1 = 3.;
-static constexpr double c4o1 = 4.;
-static constexpr double c5o1 = 5.;
-static constexpr double c6o1 = 6.;
-static constexpr double c7o1 = 7.;
-static constexpr double c8o1 = 8.;
-static constexpr double c9o1 = 9.;
-static constexpr double c10o1 = 10.;
-static constexpr double c11o1 = 11.;
-static constexpr double c12o1 = 12.;
-static constexpr double c13o1 = 13.;
-static constexpr double c14o1 = 14.;
-static constexpr double c15o1 = 15.;
-static constexpr double c16o1 = 16.;
-static constexpr double c17o1 = 17.;
-static constexpr double c18o1 = 18.;
-static constexpr double c21o1 = 21.;
-static constexpr double c24o1 = 24.;
-static constexpr double c25o1 = 25.;
-static constexpr double c26o1 = 26.;
-static constexpr double c27o1 = 27.;
-static constexpr double c28o1 = 28.;
-static constexpr double c29o1 = 29.;
-static constexpr double c30o1 = 30.;
-static constexpr double c32o1 = 32.;
-static constexpr double c33o1 = 33.;
-static constexpr double c34o1 = 34.;
-static constexpr double c36o1 = 36.;
-static constexpr double c40o1 = 40.;
-static constexpr double c42o1 = 42.;
-static constexpr double c46o1 = 46.;
-static constexpr double c48o1 = 48.;
-static constexpr double c50o1 = 50.;
-static constexpr double c52o1 = 52.;
-static constexpr double c54o1 = 54.;
-static constexpr double c56o1 = 56.;
-static constexpr double c64o1 = 64.;
-static constexpr double c66o1 = 66.;
-static constexpr double c68o1 = 68.;
-static constexpr double c69o1 = 69.;
-static constexpr double c72o1 = 72.;
-static constexpr double c84o1 = 84.;
-static constexpr double c88o1 = 88.;
-static constexpr double c96o1 = 96.;
-static constexpr double c100o1 = 100.;
-static constexpr double c130o1 = 130.;
-static constexpr double c152o1 = 152.;
-static constexpr double c166o1 = 166.;
-static constexpr double c195o1 = 195.;
-static constexpr double c216o1 = 216.;
-static constexpr double c264o1 = 264.;
-static constexpr double c290o1 = 290.;
-static constexpr double c367o1 = 367.;
-
-static constexpr double Op0000002 = 0.0000002;
-static constexpr double c10eM30 = 1e-30;
-static constexpr double c10eM10 = 1e-10;
-static constexpr double smallSingle = 0.0000000002;
-
-static constexpr double cPi = 3.1415926535;
-static constexpr double c2Pi = 6.28318530717;
-static constexpr double cPio180 = 1.74532925199e-2;
-static constexpr double c180oPi = 57.2957795131;
-
-#else
-static constexpr float c1o2 = 0.5f;
-static constexpr float c3o2 = 1.5f;
-static constexpr float c1o3 = (1.0f / 3.0f);
-static constexpr float c2o3 = (2.0f / 3.0f);
-static constexpr float c1o4 = 0.25f;
-static constexpr float c3o4 = 0.75f;
-static constexpr float c1o6 = (1.0f / 6.0f);
-static constexpr float c1o7 = (1.0f / 7.0f);
-static constexpr float c1o8 = 0.125f;
-static constexpr float c1o9 = (1.0f / 9.0f);
-static constexpr float c2o9 = (2.0f / 9.0f);
-static constexpr float c4o9 = (4.0f / 9.0f);
-static constexpr float c4o10 = 0.4f;
-static constexpr float c1o10 = 0.1f;
-static constexpr float c1o12 = (1.0f / 12.0f);
-static constexpr float c1o16 = 0.0625f;
-static constexpr float c3o16 = 0.1875f;
-static constexpr float c9o16 = 0.5625f;
-static constexpr float c1o18 = (1.0f / 18.0f);
-static constexpr float c1o20 = 0.05f;
-static constexpr float c19o20 = 0.95f;
-static constexpr float c21o20 = 1.05f;
-static constexpr float c1o24 = (1.0f / 24.0f);
-static constexpr float c1o27 = (1.0f / 27.0f);
-static constexpr float c3o32 = 0.09375f;
-static constexpr float c4o32 = 0.125f;
-static constexpr float c1o36 = (1.0f / 36.0f);
-static constexpr float c1o48 = (1.0f / 48.0f);
-static constexpr float c1o64 = 0.015625f;
-static constexpr float c3o64 = 0.046875f;
-static constexpr float c9o64 = 0.140625f;
-static constexpr float c27o64 = 0.421875f;
-static constexpr float c1o66 = (1.0f / 66.0f);
-static constexpr float c1o72 = (1.0f / 72.0f);
-static constexpr float c1o264 = (1.0f / 264.0f);
-static constexpr float c8o27 = (8.0f / 27.0f);
-static constexpr float c2o27 = (2.0f / 27.0f);
-static constexpr float c1o54 = (1.0f / 54.0f);
-static constexpr float c1o100 = 0.01f;
-static constexpr float c99o100 = 0.99f;
-static constexpr float c1o126 = (1.0f / 126.0f);
-static constexpr float c1o216 = (1.0f / 216.0f);
-static constexpr float c5o4 = 1.25f;
-static constexpr float c4o3 = (4.0f / 3.0f);
-static constexpr float c9o4 = 2.25f;
-static constexpr float c5o2 = 2.5f;
-static constexpr float c9o2 = 4.5f;
-
-static constexpr float c0o1 = 0.f;
-static constexpr float c1o1 = 1.f;
-static constexpr float c2o1 = 2.f;
-static constexpr float c3o1 = 3.f;
-static constexpr float c4o1 = 4.f;
-static constexpr float c5o1 = 5.f;
-static constexpr float c6o1 = 6.f;
-static constexpr float c7o1 = 7.f;
-static constexpr float c8o1 = 8.f;
-static constexpr float c9o1 = 9.f;
-static constexpr float c10o1 = 10.f;
-static constexpr float c11o1 = 11.f;
-static constexpr float c12o1 = 12.f;
-static constexpr float c13o1 = 13.f;
-static constexpr float c14o1 = 14.f;
-static constexpr float c15o1 = 15.f;
-static constexpr float c16o1 = 16.f;
-static constexpr float c17o1 = 17.f;
-static constexpr float c18o1 = 18.f;
-static constexpr float c21o1 = 21.f;
-static constexpr float c24o1 = 24.f;
-static constexpr float c25o1 = 25.f;
-static constexpr float c26o1 = 26.f;
-static constexpr float c27o1 = 27.f;
-static constexpr float c28o1 = 28.f;
-static constexpr float c29o1 = 29.f;
-static constexpr float c30o1 = 30.f;
-static constexpr float c32o1 = 32.f;
-static constexpr float c33o1 = 33.f;
-static constexpr float c34o1 = 34.f;
-static constexpr float c36o1 = 36.f;
-static constexpr float c40o1 = 40.f;
-static constexpr float c42o1 = 42.f;
-static constexpr float c46o1 = 46.f;
-static constexpr float c48o1 = 48.f;
-static constexpr float c50o1 = 50.f;
-static constexpr float c52o1 = 52.f;
-static constexpr float c54o1 = 54.f;
-static constexpr float c56o1 = 56.f;
-static constexpr float c64o1 = 64.f;
-static constexpr float c66o1 = 66.f;
-static constexpr float c68o1 = 68.f;
-static constexpr float c69o1 = 69.f;
-static constexpr float c72o1 = 72.f;
-static constexpr float c84o1 = 84.f;
-static constexpr float c88o1 = 88.f;
-static constexpr float c96o1 = 96.f;
-static constexpr float c100o1 = 100.0f;
-static constexpr float c130o1 = 130.0f;
-static constexpr float c152o1 = 152.0f;
-static constexpr float c166o1 = 166.0f;
-static constexpr float c195o1 = 195.0f;
-static constexpr float c216o1 = 216.0f;
-static constexpr float c264o1 = 264.0f;
-static constexpr float c290o1 = 290.0f;
-static constexpr float c367o1 = 367.0f;
-
-static constexpr float Op0000002 = 0.0000002f;
-static constexpr float c10eM30 = 1e-30f;
-static constexpr float c10eM10 = 1e-10f;
-static constexpr float smallSingle = 0.0000000002f;
-
-static constexpr float cPi = 3.1415926535f;
-static constexpr double c2Pi = 6.2831853071f;
-static constexpr float cPio180 = 1.74532925199e-2f;
-static constexpr float c180oPi = 57.2957795131f;
-
-#endif
-
-}
-
-#endif
diff --git a/src/lbm/cuda/CMakeLists.txt b/src/lbm/cuda/CMakeLists.txt
deleted file mode 100644
index 4142b7c3b1c46275c3257e3dfd657cc6b30c841d..0000000000000000000000000000000000000000
--- a/src/lbm/cuda/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-project(lbmCuda LANGUAGES CUDA CXX)
-
-
-vf_add_library(NAME lbmCuda BUILDTYPE static PUBLIC_LINK basics FOLDER ../../lbm)
-
-
-set_target_properties(lbmCuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON POSITION_INDEPENDENT_CODE ON)
-
-
-set_source_files_properties(../KernelParameter.cpp PROPERTIES LANGUAGE CUDA)
-set_source_files_properties(../CumulantChimera.cpp PROPERTIES LANGUAGE CUDA)
-set_source_files_properties(../BGK.cpp PROPERTIES LANGUAGE CUDA)
diff --git a/src/logger/Logger.cpp b/src/logger/Logger.cpp
index 708e359c8430380dd57c404ed9b3c41f53dcb714..d6b79e110c0827a8e91640ad77d4cb6675f23ce4 100644
--- a/src/logger/Logger.cpp
+++ b/src/logger/Logger.cpp
@@ -10,7 +10,7 @@ namespace vf::logging
 
     std::string Logger::logPath = {"logs/"};
 
-    void Logger::initalizeLogger() 
+    void Logger::initializeLogger() 
     {
         updateDefaultLogger();
 
diff --git a/src/logger/Logger.h b/src/logger/Logger.h
index adb7796135a989843ef8de1f778c9901f3ad17c8..3a25fea02eb7d5ea1ab9bffebea08bfc9f512b04 100644
--- a/src/logger/Logger.h
+++ b/src/logger/Logger.h
@@ -33,7 +33,7 @@
 
 // VirtualFluids is using the spdlog logger https://github.com/gabime/spdlog
 #include <spdlog/spdlog.h>
-// To initialize spdlog initalizeLogger() must be called.
+// To initialize spdlog initializeLogger() must be called.
 // spdlog supports 5 log level, which can be changed at runtime e.g.:
 // spdlog::set_level(spdlog::level::debug)
 // The default log level is set to trace. Supported levels: trace < debug < info < warning < critical
@@ -58,7 +58,7 @@ namespace vf::logging
     {
     public:
         // initalizing the above named logger
-        static void initalizeLogger();
+        static void initializeLogger();
 
         // changing the path of the log files
         static void changeLogPath(const std::string& path);
diff --git a/utilities/ci-regression-tests/generate-ci.py b/utilities/ci-regression-tests/generate-ci.py
new file mode 100644
index 0000000000000000000000000000000000000000..834728b5b22d7cf566019483a3c405e75c6fc837
--- /dev/null
+++ b/utilities/ci-regression-tests/generate-ci.py
@@ -0,0 +1,23 @@
+from jinja2 import Template
+from pathlib import Path
+
+TEMPLATES_DIR = Path(__file__).parent
+REGRESSION_CI_TEMPLATE = TEMPLATES_DIR / "regression-tests-ci.yml.j2"
+GENERATED_DIR = Path("generated")
+REGRESSION_CI_FILE = GENERATED_DIR / "regression-tests-ci.yml"
+TEST_FILE_DIR = Path("regression-tests")
+
+
+def build_regression_job_string(regression_tests: list[str]) -> str:
+    template = Template(REGRESSION_CI_TEMPLATE.read_text())
+    return template.render(regression_tests=regression_tests)
+
+
+def main():
+    regression_tests_files = [item.stem for item in TEST_FILE_DIR.glob("*_test.sh")]
+    regression_tests_ci_file = build_regression_job_string(regression_tests_files)
+    REGRESSION_CI_FILE.write_text(regression_tests_ci_file)
+
+if __name__ == "__main__":
+    GENERATED_DIR.mkdir(parents=True, exist_ok=True)
+    main()
\ No newline at end of file
diff --git a/utilities/ci-regression-tests/regression-tests-ci.yml.j2 b/utilities/ci-regression-tests/regression-tests-ci.yml.j2
new file mode 100644
index 0000000000000000000000000000000000000000..4367ad6c7d5f820ef8975c37dbf843e5342e4dbd
--- /dev/null
+++ b/utilities/ci-regression-tests/regression-tests-ci.yml.j2
@@ -0,0 +1,26 @@
+image: git.rz.tu-bs.de:4567/irmb/virtualfluids/ubuntu20_04:1.5
+
+stages: 
+  - test
+
+.regression-test:
+  stage: test
+
+  needs: []
+
+  tags:
+    - gpu
+    - linux
+
+  before_script:
+    - chmod +x ./regression-tests/*
+    - pip install fieldcompare
+
+{% for regression_test in regression_tests %}
+run-regression-test-{{ regression_test }}:
+  extends: .regression-test
+
+  script:
+    - ./regression-tests/{{ regression_test }}.sh
+
+{% endfor %}
diff --git a/utilities/ci-regression-tests/requirements.txt b/utilities/ci-regression-tests/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb0794f9960967b2c8aac7fc4bf1162d0e68c85f
--- /dev/null
+++ b/utilities/ci-regression-tests/requirements.txt
@@ -0,0 +1 @@
+Jinja2==3.1.2
\ No newline at end of file